diff --git a/.asf.yaml b/.asf.yaml index e3b6db166f76..31c127c945c5 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -51,6 +51,7 @@ github: protected_branches: master: {} + release-2.64.0-postrelease: {} release-2.64: {} release-2.63.0-postrelease: {} release-2.63: {} diff --git a/.github/REVIEWERS.yml b/.github/REVIEWERS.yml index 781316f14f89..fdff50b9329a 100644 --- a/.github/REVIEWERS.yml +++ b/.github/REVIEWERS.yml @@ -20,12 +20,12 @@ labels: - name: Go reviewers: - - lostluck - jrmccluskey - exclusionList: - - youngoli + - lostluck + exclusionList: [] - name: Python reviewers: + - claudevdm - damccorm - jrmccluskey - tvalentyn @@ -34,20 +34,13 @@ labels: exclusionList: [] - name: Java reviewers: - - Abacn - - kennknowles - - robertwb - - m-trieu - - damondouglas - exclusionList: [] - - name: IO - reviewers: - - chamikaramj - - johnjcasey - Abacn - ahmedabu98 - - damondouglas + - chamikaramj + - m-trieu + - kennknowles - shunping + - robertwb exclusionList: [] - name: spanner reviewers: @@ -68,8 +61,8 @@ labels: - sjvanrossum - name: Build reviewers: - - damccorm - Abacn + - damccorm exclusionList: [] - name: website reviewers: @@ -83,7 +76,6 @@ fallbackReviewers: - Abacn - chamikaramj - damccorm - - damondouglas - johnjcasey - jrmccluskey - kennknowles diff --git a/.github/actions/common-rc-validation/action.yaml b/.github/actions/common-rc-validation/action.yaml index 51738e138122..0e38ffb7cfe0 100644 --- a/.github/actions/common-rc-validation/action.yaml +++ b/.github/actions/common-rc-validation/action.yaml @@ -36,19 +36,19 @@ runs: shell: bash run: | echo "---------------------Downloading Python Staging RC----------------------------" - wget ${PYTHON_RC_DOWNLOAD_URL}/${RELEASE_VER}/python/apache-beam-${RELEASE_VER}.tar.gz - wget ${PYTHON_RC_DOWNLOAD_URL}/${RELEASE_VER}/python/apache-beam-${RELEASE_VER}.tar.gz.sha512 - if [[ ! -f apache-beam-$RELEASE_VER.tar.gz ]]; then + wget ${PYTHON_RC_DOWNLOAD_URL}/${RELEASE_VER}/python/apache_beam-${RELEASE_VER}.tar.gz + wget ${PYTHON_RC_DOWNLOAD_URL}/${RELEASE_VER}/python/apache_beam-${RELEASE_VER}.tar.gz.sha512 + if [[ ! -f apache_beam-$RELEASE_VER.tar.gz ]]; then { echo "Fail to download Python Staging RC files." ;exit 1; } fi echo "--------------------------Verifying Hashes------------------------------------" - sha512sum -c apache-beam-${RELEASE_VER}.tar.gz.sha512 + sha512sum -c apache_beam-${RELEASE_VER}.tar.gz.sha512 `which pip` install --upgrade pip `which pip` install --upgrade setuptools - name: Installing python SDK shell: bash - run: pip install apache-beam-${RELEASE_VER}.tar.gz[gcp] - \ No newline at end of file + run: pip install apache_beam-${RELEASE_VER}.tar.gz[gcp] + diff --git a/.github/actions/setup-k8s-access/action.yml b/.github/actions/setup-k8s-access/action.yml index 56758d0fea1a..cb00c853738b 100644 --- a/.github/actions/setup-k8s-access/action.yml +++ b/.github/actions/setup-k8s-access/action.yml @@ -68,7 +68,7 @@ runs: run: | kubectl config set-context --current --namespace=${{ steps.replace_namespace.outputs.TEST_NAMESPACE }} - name: Post cleanup - uses: pyTooling/Actions/with-post-step@v0.4.6 + uses: pyTooling/Actions/with-post-step@v4.2.2 with: main: echo "Post Cleanup" post: | diff --git a/.github/autolabeler.yml b/.github/autolabeler.yml index eed6e34d09ee..6cd7516e7440 100644 --- a/.github/autolabeler.yml +++ b/.github/autolabeler.yml @@ -18,7 +18,7 @@ # Please keep the entries sorted lexicographically in each category. # General -build: ["assembly.xml", "build.gradle.kts", "buildSrc/**/*", ".gitattributes", ".github/**/*", ".gitignore", "gradle/**/*", ".mailmap", "ownership/**/*", "release/**/*", "sdks/java/build-tools/**/*", "settings.gradle.kts"] +build: ["assembly.xml", "buildSrc/**/*", ".gitattributes", ".github/workflows/*", ".gitignore", "gradle/**/*", ".mailmap", "release/**/*", "sdks/java/build-tools/**/*"] docker: ["runners/flink/job-server-container/**/*", "runners/spark/job-server/container/**/*", "sdks/go/container/**/*", "sdks/java/container/**/*", "sdks/python/container/**/*"] examples: ["examples/**/*", "sdks/go/examples/**/*", "sdks/python/apache_beam/examples/**/*"] go: ["sdks/go/**/*", "sdks/go.mod", "sdks/go.sum", "learning/katas/go/**/*"] diff --git a/.github/trigger_files/IO_Iceberg_Integration_Tests.json b/.github/trigger_files/IO_Iceberg_Integration_Tests.json index 34a6e02150e7..37dd25bf9029 100644 --- a/.github/trigger_files/IO_Iceberg_Integration_Tests.json +++ b/.github/trigger_files/IO_Iceberg_Integration_Tests.json @@ -1,4 +1,4 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run.", - "modification": 4 + "modification": 3 } diff --git a/.github/trigger_files/beam_PostCommit_Python_Xlang_Gcp_Direct.json b/.github/trigger_files/beam_PostCommit_Python_Xlang_Gcp_Direct.json index f1ba03a243ee..e0266d62f2e0 100644 --- a/.github/trigger_files/beam_PostCommit_Python_Xlang_Gcp_Direct.json +++ b/.github/trigger_files/beam_PostCommit_Python_Xlang_Gcp_Direct.json @@ -1,4 +1,4 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run", - "modification": 5 + "modification": 4 } diff --git a/.github/trigger_files/beam_PostCommit_XVR_Direct.json b/.github/trigger_files/beam_PostCommit_XVR_Direct.json index 236b7bee8af8..262d546418db 100644 --- a/.github/trigger_files/beam_PostCommit_XVR_Direct.json +++ b/.github/trigger_files/beam_PostCommit_XVR_Direct.json @@ -1,3 +1,4 @@ { - "https://github.com/apache/beam/pull/32648": "testing Flink 1.19 support" + "https://github.com/apache/beam/pull/32648": "testing Flink 1.19 support", + "modification": 1 } diff --git a/.github/trigger_files/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.json b/.github/trigger_files/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.json index 9e26dfeeb6e6..bb31ea07c195 100644 --- a/.github/trigger_files/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.json +++ b/.github/trigger_files/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.json @@ -1 +1,3 @@ -{} \ No newline at end of file +{ + "modification": 1 +} \ No newline at end of file diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml index 58c4de11e857..a18def996acd 100644 --- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml +++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml @@ -109,7 +109,7 @@ jobs: -PpythonVersion=3.10 \ -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \ - - name: run Pytorch Language Modeling using Hugging face bert-base-uncased model + - name: run Pytorch Language Modeling using Hugging Face bert-base-uncased model uses: ./.github/actions/gradle-command-self-hosted-action timeout-minutes: 180 with: diff --git a/.github/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml b/.github/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml index bb16ef5ea5de..494b4cfc9d96 100644 --- a/.github/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml +++ b/.github/workflows/beam_PostCommit_Java_SingleStoreIO_IT.yml @@ -87,13 +87,13 @@ jobs: kubectl apply -f ${{github.workspace}}/.test-infra/kubernetes/singlestore/sdb-rbac.yaml kubectl apply -f ${{github.workspace}}/.test-infra/kubernetes/singlestore/sdb-cluster-crd.yaml kubectl apply -f ${{github.workspace}}/.test-infra/kubernetes/singlestore/sdb-operator.yaml - kubectl wait --for=condition=Ready pod -l name=sdb-operator --timeout=120s + kubectl wait --for=condition=Ready pod -l name=sdb-operator --timeout=300s - name: Install SingleStore cluster id: install_singlestore run: | kubectl apply -f ${{github.workspace}}/.test-infra/kubernetes/singlestore/sdb-cluster.yaml - kubectl wait --for=jsonpath='{.status.phase}'=Running memsqlclusters.memsql.com --all --timeout=120s - kubectl wait svc/svc-sdb-cluster-ddl --for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=120s + kubectl wait --for=jsonpath='{.status.phase}'=Running memsqlclusters.memsql.com --all --timeout=300s + kubectl wait svc/svc-sdb-cluster-ddl --for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=300s loadbalancer_IP=$(kubectl get svc svc-sdb-cluster-ddl -o jsonpath='{.status.loadBalancer.ingress[0].ip}') echo lb_ip=$loadbalancer_IP >> $GITHUB_OUTPUT - name: Run Java SingleStore IO IT diff --git a/.github/workflows/beam_PreCommit_Java.yml b/.github/workflows/beam_PreCommit_Java.yml index 2d89febfd337..d9119afdfd6b 100644 --- a/.github/workflows/beam_PreCommit_Java.yml +++ b/.github/workflows/beam_PreCommit_Java.yml @@ -160,7 +160,7 @@ jobs: matrix: job_name: [beam_PreCommit_Java] job_phrase: [Run Java PreCommit] - timeout-minutes: 120 + timeout-minutes: 180 if: | github.event_name == 'push' || github.event_name == 'pull_request_target' || diff --git a/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml index 32ec15161954..97206d5c368f 100644 --- a/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml @@ -83,7 +83,7 @@ jobs: matrix: job_name: ["beam_PreCommit_Java_GCP_IO_Direct"] job_phrase: ["Run Java_GCP_IO_Direct PreCommit"] - timeout-minutes: 120 + timeout-minutes: 180 if: | github.event_name == 'push' || github.event_name == 'pull_request_target' || diff --git a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml index dbba0922f882..329995422515 100644 --- a/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml +++ b/.github/workflows/beam_Python_CostBenchmarks_Dataflow.yml @@ -92,6 +92,7 @@ jobs: -PloadTest.mainClass=apache_beam.testing.benchmarks.wordcount.wordcount \ -Prunner=DataflowRunner \ -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/testing/benchmarks/wordcount/requirements.txt \ '-PloadTest.args=${{ env.beam_Python_Cost_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-wordcount-python-${{env.NOW_UTC}} --output_file=gs://temp-storage-for-end-to-end-tests/wordcount/result_wordcount-${{env.NOW_UTC}}.txt' \ - name: Run Tensorflow MNIST Image Classification on Dataflow uses: ./.github/actions/gradle-command-self-hosted-action diff --git a/.github/workflows/build_release_candidate.yml b/.github/workflows/build_release_candidate.yml index 3a31820ae646..9c4205071d5e 100644 --- a/.github/workflows/build_release_candidate.yml +++ b/.github/workflows/build_release_candidate.yml @@ -161,6 +161,7 @@ jobs: stage_python_artifacts: if: ${{ fromJson(github.event.inputs.STAGE).python_artifacts == 'yes'}} + needs: [publish_java_artifacts, build_and_stage_prism] # Enforce ordering to avoid svn conflicts runs-on: ubuntu-22.04 steps: - name: Checkout @@ -417,6 +418,7 @@ jobs: build_and_stage_prism: if: ${{ fromJson(github.event.inputs.STAGE).prism == 'yes'}} + needs: [publish_java_artifacts] # Enforce ordering to avoid svn conflicts runs-on: ubuntu-22.04 steps: - name: Checkout diff --git a/.github/workflows/cost-benchmarks-pipeline-options/python_wordcount.txt b/.github/workflows/cost-benchmarks-pipeline-options/python_wordcount.txt index 424936ddad97..352393451838 100644 --- a/.github/workflows/cost-benchmarks-pipeline-options/python_wordcount.txt +++ b/.github/workflows/cost-benchmarks-pipeline-options/python_wordcount.txt @@ -22,6 +22,7 @@ --input_options={} --staging_location=gs://temp-storage-for-perf-tests/loadtests --temp_location=gs://temp-storage-for-perf-tests/loadtests +--requirements_file=apache_beam/testing/benchmarks/wordcount/requirements.txt --publish_to_big_query=true --metrics_dataset=beam_run_inference --metrics_table=python_wordcount diff --git a/.github/workflows/cut_release_branch.yml b/.github/workflows/cut_release_branch.yml index 48dbd68817ff..bedbd91c14a9 100644 --- a/.github/workflows/cut_release_branch.yml +++ b/.github/workflows/cut_release_branch.yml @@ -97,6 +97,12 @@ jobs: steps: - name: Validate Next Version run: | + if [[ ${RELEASE} =~ ([0-9]+\.[0-9]+) ]]; then + echo "RELEASE_CUT_TAG=v${RELEASE}.0-RC00" >> $GITHUB_ENV + else + echo "The input for RELEASE does not match a valid format [0-9]+\.[0-9]+" + exit 1 + fi if [[ $NEXT_RELEASE =~ ([0-9]+\.[0-9]+) ]]; then echo "NEXT_VERSION_IN_BASE_BRANCH=${BASH_REMATCH[1]}.0" >> $GITHUB_ENV else @@ -116,7 +122,9 @@ jobs: sed -i -e "s/master: {}/master: {}\n release-${RELEASE}: {}/g" .asf.yaml - name: Update master branch run: | - bash "${SCRIPT_DIR}/set_version.sh" "${NEXT_VERSION_IN_BASE_BRANCH}" + bash "${SCRIPT_DIR}/set_version.sh" "${NEXT_VERSION_IN_BASE_BRANCH}" "--add-tag" "${RELEASE_CUT_TAG}" + echo "==============tag RC00 to current master branch================" + git push origin tag "${RELEASE_CUT_TAG}" echo "==============Update master branch as following================" git diff echo "===============================================================" diff --git a/.github/workflows/refresh_looker_metrics.yml b/.github/workflows/refresh_looker_metrics.yml index 3866301b039a..e2de65876aad 100644 --- a/.github/workflows/refresh_looker_metrics.yml +++ b/.github/workflows/refresh_looker_metrics.yml @@ -18,8 +18,6 @@ name: Refresh Looker Performance Metrics on: - schedule: - - cron: '10 10 * * 1' workflow_dispatch: inputs: READ_ONLY: diff --git a/.github/workflows/republish_released_docker_containers.yml b/.github/workflows/republish_released_docker_containers.yml index 6fab92c9b8d5..d4d4cf9c92e9 100644 --- a/.github/workflows/republish_released_docker_containers.yml +++ b/.github/workflows/republish_released_docker_containers.yml @@ -32,7 +32,7 @@ on: - cron: "0 6 * * 1" env: docker_registry: gcr.io - release: "${{ github.event.inputs.RELEASE || '2.63.0' }}" + release: "${{ github.event.inputs.RELEASE || '2.64.0' }}" rc: "${{ github.event.inputs.RC || '2' }}" jobs: diff --git a/.github/workflows/run_rc_validation.yml b/.github/workflows/run_rc_validation.yml index 801a72d37130..81526edf22b8 100644 --- a/.github/workflows/run_rc_validation.yml +++ b/.github/workflows/run_rc_validation.yml @@ -16,6 +16,7 @@ # under the License. # To learn more about GitHub Actions in Apache Beam check the CI.m +# DO NOT USE THIS WORKFLOW AND WILL BE REMOVED SOON name: Run RC Validation on: @@ -74,9 +75,22 @@ jobs: contents: write env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - RELEASE_BRANCH: "release-${{github.event.inputs.RELEASE_VER}}" + RAW_RELEASE_VER: ${{ github.event.inputs.RELEASE_VER }} WORKING_BRANCH: "v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}_validations" steps: + - name: Set Release Branch Name (Major.Minor) + id: set_branch_name # Optional: Give the step an ID + run: | + VERSION="${{ env.RAW_RELEASE_VER }}" # Use the full version from input/env + # Use shell parameter expansion to remove the last dot and everything after it + MAJOR_MINOR_VERSION="${VERSION%.*}" + # Construct the branch name + BRANCH_NAME="release-$MAJOR_MINOR_VERSION" + echo "Original Version: $VERSION" + echo "Calculated Major.Minor Version: $MAJOR_MINOR_VERSION" + echo "Setting RELEASE_BRANCH Env Var to: $BRANCH_NAME" + # Export the calculated name to the environment for subsequent steps + echo "RELEASE_BRANCH=$BRANCH_NAME" >> $GITHUB_ENV - name: Check out code uses: actions/checkout@v4 with: @@ -117,7 +131,7 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{matrix.py_version}} - + - name: Setup Java JDK uses: actions/setup-java@v4 @@ -130,7 +144,7 @@ jobs: with: RELEASE_VER: ${{env.RELEASE_VER}} PYTHON_RC_DOWNLOAD_URL: ${{env.PYTHON_RC_DOWNLOAD_URL}} - + - name: Setting Taxi Variables @@ -154,7 +168,7 @@ jobs: --num_workers 5 \ --output_topic projects/${USER_GCP_PROJECT}/topics/${SQL_TAXI_TOPIC} \ --beam_services="{\":sdks:java:extensions:sql:expansion-service:shadowJar\": \"${SQL_EXPANSION_SERVICE_JAR}\"}" \ - --sdk_location apache-beam-${RELEASE_VER}.tar.gz || true + --sdk_location apache_beam-${RELEASE_VER}.tar.gz || true - name: Checking Results run: | gcloud pubsub subscriptions pull --project=${USER_GCP_PROJECT} --limit=5 ${SQL_TAXI_SUBSCRIPTION} @@ -243,7 +257,7 @@ jobs: --temp_location=${USER_GCS_BUCKET}/temp/ \ --with_metadata \ --beam_services="{\"sdks:java:io:expansion-service:shadowJar\": \"${KAFKA_EXPANSION_SERVICE_JAR}\"}" \ - --sdk_location apache-beam-${RELEASE_VER}.tar.gz || true + --sdk_location apache_beam-${RELEASE_VER}.tar.gz || true - name: Checking executions results run: | bq head -n 10 ${KAFKA_TAXI_DF_DATASET}.xlang_kafka_taxi @@ -294,8 +308,8 @@ jobs: RELEASE_VER: ${{env.RELEASE_VER}} PYTHON_RC_DOWNLOAD_URL: ${{env.PYTHON_RC_DOWNLOAD_URL}} - - name: Setup Maven Action - uses: s4u/setup-maven-action@v1.2.1 + - name: Setup Java and Maven Action + uses: actions/setup-java@v4 with: java-version: 11 - name: Updating Settings @@ -434,7 +448,7 @@ jobs: --dataset ${LEADERBOARD_DF_DATASET} \ --runner DataflowRunner \ --temp_location=${USER_GCS_BUCKET}/temp/ \ - --sdk_location apache-beam-${RELEASE_VER}.tar.gz || true + --sdk_location apache_beam-${RELEASE_VER}.tar.gz || true - name: Checking results run: | bq head -n 10 ${LEADERBOARD_DF_DATASET}.leader_board_users @@ -535,7 +549,7 @@ jobs: --dataset ${GAMESTATS_DF_DATASET} \ --runner DataflowRunner \ --temp_location=${USER_GCS_BUCKET}/temp/ \ - --sdk_location apache-beam-${RELEASE_VER}.tar.gz \ + --sdk_location apache_beam-${RELEASE_VER}.tar.gz \ --fixed_window_duration ${FIXED_WINDOW_DURATION} || true - name: Checking Results run: | diff --git a/.github/workflows/run_rc_validation_java_mobile_gaming.yml b/.github/workflows/run_rc_validation_java_mobile_gaming.yml new file mode 100644 index 000000000000..98106917c8a4 --- /dev/null +++ b/.github/workflows/run_rc_validation_java_mobile_gaming.yml @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Run Java Mobile Gaming RC Validation + +on: + workflow_dispatch: + inputs: + RELEASE_VER: + description: 'Beam Release Version (e.g., 2.64.0)' + required: true + default: '2.64.0' + RC_NUM: + description: 'Release Candidate number (e.g., 1)' + required: true + default: '1' + APACHE_CONTENTS_REPO: + description: 'Apache Staging Repository URL (e.g., https://repository.apache.org/content/repositories/orgapachebeam-1234)' + required: true + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.inputs.RELEASE_VER }}-${{ github.event.inputs.RC_NUM }}' + cancel-in-progress: true + +# Setting explicit permissions for the action +permissions: + actions: write + pull-requests: write # Needed for setup-action potentially + checks: write + contents: read # Needs read to checkout the code + deployments: read + id-token: write # Required for GCP Workload Identity Federation + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + # Define unique names for resources based on run ID to avoid collisions + RUN_ID_SUFFIX: ${{ github.run_id }}_${{ github.run_attempt }} + BQ_DATASET: mobilegaming_java_rc_${{ github.run_id }}_${{ github.run_attempt }} + PUBSUB_TOPIC: mobilegaming_java_rc_${{ github.run_id }}_${{ github.run_attempt }} + # Set GCP Project ID and Bucket as constants + GCP_PROJECT_ID: 'apache-beam-testing' + GCS_BUCKET_NAME: 'gs://rc-validation-migration-tests' + APACHE_REPO_URL: ${{ github.event.inputs.APACHE_CONTENTS_REPO }} + RELEASE_VERSION: ${{ github.event.inputs.RELEASE_VER }} + RC_TAG: "v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}" + +jobs: + run_java_mobile_gaming_rc_validation: + name: Run Java Mobile Gaming RC Validation (${{ github.event.inputs.RELEASE_VER }} RC${{ github.event.inputs.RC_NUM }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 # Adjust timeout as needed + steps: + - name: Extract GCS Bucket Name + run: echo "GCS_BUCKET_NAME=$(echo ${{ github.event.inputs.GCS_BUCKET }} | sed 's/^gs:\/\///')" >> $GITHUB_ENV + + - name: Checkout code at RC tag + uses: actions/checkout@v4 + with: + ref: v${{ github.event.inputs.RELEASE_VER }}-RC${{ github.event.inputs.RC_NUM }} + + # Standard setup actions (consider if setup-action is needed or if manual setup is sufficient) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 11 + + # Setup GCP resources + - name: Create BigQuery Dataset + run: | + echo "Creating BigQuery dataset: ${{ env.BQ_DATASET }} in project ${{ env.GCP_PROJECT_ID }}" + bq mk --project_id=${{ env.GCP_PROJECT_ID }} ${{ env.BQ_DATASET }} + shell: bash + - name: Create PubSub Topic + run: | + echo "Creating PubSub topic: ${{ env.PUBSUB_TOPIC }} in project ${{ env.GCP_PROJECT_ID }}" + gcloud pubsub topics create --project=${{ env.GCP_PROJECT_ID }} ${{ env.PUBSUB_TOPIC }} + shell: bash + + # Run the Mobile Gaming example test using Gradle + - name: Run MobileGaming Java Dataflow Test + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:runMobileGamingJavaDataflow + arguments: | + -Prepourl=${{ env.APACHE_REPO_URL }} \ + -Pver=${{ env.RELEASE_VERSION }} \ + -PgcpProject=${{ env.GCP_PROJECT_ID }} \ + -PgcsBucket=${{ env.GCS_BUCKET_NAME }} \ + -PbqDataset=${{ env.BQ_DATASET }} \ + -PpubsubTopic=${{ env.PUBSUB_TOPIC }} \ + + # Cleanup GCP resources (always run) + - name: Cleanup BigQuery Dataset + if: always() + run: | + echo "Deleting BigQuery dataset: ${{ env.BQ_DATASET }} in project ${{ env.GCP_PROJECT_ID }}" + bq rm --project_id=${{ env.GCP_PROJECT_ID }} -f -r ${{ env.BQ_DATASET }} || echo "Failed to delete BQ dataset ${{ env.BQ_DATASET }}, continuing..." + shell: bash + - name: Cleanup PubSub Topic + if: always() + run: | + echo "Deleting PubSub topic: ${{ env.PUBSUB_TOPIC }} in project ${{ env.GCP_PROJECT_ID }}" + gcloud pubsub topics delete --project=${{ env.GCP_PROJECT_ID }} ${{ env.PUBSUB_TOPIC }} --quiet || echo "Failed to delete PubSub topic ${{ env.PUBSUB_TOPIC }}, continuing..." + shell: bash + + # Reporting (Optional: Keep if test results are generated) + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v4 + if: failure() # Upload only on failure + with: + name: JUnit Test Results (Java MobileGaming RC) + path: "**/build/reports/tests/" + retention-days: 7 + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() # Publish results regardless of status + with: + commit: '${{ env.RC_TAG }}' # Use RC tag for commit reference + files: '**/build/test-results/**/*.xml' + check_name: "Java MobileGaming RC Test Results (${{ env.RELEASE_VERSION }} RC${{ github.event.inputs.RC_NUM }})" + large_files: true diff --git a/.github/workflows/run_rc_validation_java_quickstart.yml b/.github/workflows/run_rc_validation_java_quickstart.yml new file mode 100644 index 000000000000..023839d5a3d7 --- /dev/null +++ b/.github/workflows/run_rc_validation_java_quickstart.yml @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Run Java QuickStart RC Validation + +on: + workflow_dispatch: + inputs: + RELEASE_VER: + description: 'Beam Release Version (e.g., 2.64.0)' + required: true + default: '2.xx.0' + RC_NUM: + description: 'Release Candidate number (e.g., 1)' + required: true + default: '1' + APACHE_CONTENTS_REPO: + description: 'Apache Staging Repository URL (e.g., https://repository.apache.org/content/repositories/orgapachebeam-1234)' + required: true + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.inputs.RELEASE_VER }}-${{ github.event.inputs.RC_NUM }}' + cancel-in-progress: true + +# Setting explicit permissions for the action +permissions: + actions: write + pull-requests: write # Needed for setup-action potentially + checks: write + contents: read # Needs read to checkout the code + deployments: read + id-token: write # Required for GCP Workload Identity Federation (if needed by setup) + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + APACHE_REPO_URL: ${{ github.event.inputs.APACHE_CONTENTS_REPO }} + RELEASE_VERSION: ${{ github.event.inputs.RELEASE_VER }} + RC_TAG: "v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}" + +jobs: + run_java_quickstart_rc_validation: + name: Run Java QuickStart RC Validation (${{ github.event.inputs.RELEASE_VER }} RC${{ github.event.inputs.RC_NUM }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 60 # Adjust timeout as needed + steps: + - name: Checkout code at RC tag + uses: actions/checkout@v4 + with: + ref: ${{ env.RC_TAG }} + + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 11 + + - name: Run QuickStart Java Direct Runner + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:direct-java:runQuickstartJavaDirect + arguments: | + -Prepourl=${{ env.APACHE_REPO_URL }} \ + -Pver=${{ env.RELEASE_VERSION }} + + - name: Run QuickStart Java Flink Runner + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.19:runQuickstartJavaFlinkLocal + arguments: | + -Prepourl=${{ env.APACHE_REPO_URL }} \ + -Pver=${{ env.RELEASE_VERSION }} + + - name: Run QuickStart Java Spark Runner + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:runQuickstartJavaSpark + arguments: | + -Prepourl=${{ env.APACHE_REPO_URL }} \ + -Pver=${{ env.RELEASE_VERSION }} diff --git a/.github/workflows/run_rc_validation_python_mobile_gaming.yml b/.github/workflows/run_rc_validation_python_mobile_gaming.yml new file mode 100644 index 000000000000..dc24281a3684 --- /dev/null +++ b/.github/workflows/run_rc_validation_python_mobile_gaming.yml @@ -0,0 +1,630 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Run Python Mobile Gaming RC Validation + +on: + workflow_dispatch: + inputs: + RELEASE_VER: + description: 'Beam Release Version (e.g., 2.64.0)' + required: true + default: '2.64.0' + RC_NUM: + description: 'Release Candidate number (e.g., 1)' + required: true + default: '1' + APACHE_CONTENTS_REPO: + description: 'Apache Staging Repository URL for Java Injector (e.g., https://repository.apache.org/content/repositories/orgapachebeam-1234)' + required: true + CLEANUP_BQ_RESOURCES: + description: 'Whether to delete the BigQuery dataset after the test run (true/false)' + required: false + type: boolean + default: true + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.inputs.RELEASE_VER }}-${{ github.event.inputs.RC_NUM }}' + cancel-in-progress: true + +# Setting explicit permissions for the action +permissions: + actions: write + pull-requests: write # Needed for setup-action potentially + checks: write + contents: read # Needs read to checkout the code + deployments: read + id-token: write # Required for GCP Workload Identity Federation + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: # Workflow level env vars if needed, specific ones are below + GCP_PROJECT_ID: 'apache-beam-testing' + GCS_BUCKET: 'gs://rc-validation-migration-tests' + +jobs: + run_python_mobile_gaming_rc_validation: + name: Run Python Mobile Gaming RC Validation (${{ github.event.inputs.RELEASE_VER }} RC${{ github.event.inputs.RC_NUM }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 360 + env: # Job-level env vars inherit workflow level and define job-specific ones + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + RUN_ID_SUFFIX: ${{ github.run_id }}_${{ github.run_attempt }} + BQ_DATASET: mobilegaming_py_rc_${{ github.run_id }}_${{ github.run_attempt }} + PUBSUB_TOPIC: mobilegaming_py_rc_${{ github.run_id }}_${{ github.run_attempt }} + GCE_REGION: 'us-central1' + APACHE_REPO_URL: ${{ github.event.inputs.APACHE_CONTENTS_REPO }} + RELEASE_VERSION: ${{ github.event.inputs.RELEASE_VER }} + RC_NUM: ${{ github.event.inputs.RC_NUM }} + RC_TAG: "v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}" + PYTHON_VERSION: '3.9' + BEAM_PYTHON_SDK_TAR_GZ: apache_beam-${{ github.event.inputs.RELEASE_VER }}.tar.gz + BEAM_SOURCE_ZIP: apache-beam-${{ github.event.inputs.RELEASE_VER }}-source-release.zip + APACHE_DIST_URL_BASE: https://dist.apache.org/repos/dist/dev/beam/${{ github.event.inputs.RELEASE_VER }} + GAME_STATS_WINDOW_DURATION: 20 + SUBMISSION_TIMEOUT_SECONDS: 120 # Timeout for the python submission script itself + # --- Define the validation function with enhanced debugging (FIXED QUOTING) --- + VALIDATE_TABLE_FUNC: | + validate_table() { + local table_name=$1 + echo "DEBUG: ===== Starting validate_table for table: $table_name =====" + # Ensure required env vars are set (GCP_PROJECT_ID, BQ_DATASET are inherited) + if [[ -z "$GCP_PROJECT_ID" || -z "$BQ_DATASET" ]]; then + echo "ERROR: GCP_PROJECT_ID and BQ_DATASET must be set in the environment." + exit 1 + fi + + local full_table_id="${GCP_PROJECT_ID}.${BQ_DATASET}.${table_name}" + local full_table_id_show="${GCP_PROJECT_ID}:${BQ_DATASET}.${table_name}" + local count="" + local exit_code=1 + local retries=10 + local delay=60 # Default seconds between retries + + # Allow overriding delay via second argument (optional) + if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then + delay=$2 + echo "DEBUG: Using custom retry delay: ${delay}s for table ${table_name}" + else + echo "DEBUG: Using default retry delay: ${delay}s for table ${table_name}" + fi + echo "DEBUG: Full table ID: ${full_table_id}, Max retries: ${retries}" + + for i in $(seq 1 $retries); do + echo "DEBUG: Starting attempt $i/$retries..." + local query_output + + echo "DEBUG: Executing: bq query --project_id=${GCP_PROJECT_ID} --use_legacy_sql=false --format=sparse --max_rows=1 \"SELECT COUNT(*) FROM \`${full_table_id}\`\"" + query_output=$(bq query --project_id=${GCP_PROJECT_ID} \ + --use_legacy_sql=false \ + --format=sparse \ + --max_rows=1 \ + "SELECT COUNT(*) FROM \`${full_table_id}\`" 2>&1) + exit_code=$? + + echo "DEBUG: bq query exit code: $exit_code" + echo "DEBUG: bq query raw output: [$query_output]" + + if [ $exit_code -eq 0 ]; then + echo "DEBUG: bq query exited successfully (code 0)." + count=$(echo "$query_output" | tail -n 1 | tr -d '[:space:]') + echo "DEBUG: Processed count after removing whitespace (from last line): [$count]" + if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then + echo "DEBUG: Count [$count] is a positive integer. Validation successful for this attempt." + break # Success! Found non-zero rows + else + echo "DEBUG: Count [$count] is zero or not a positive integer." + if [[ "$count" == "0" ]]; then + echo "DEBUG: Explicit count of 0 received." + fi + fi + else + echo "DEBUG: bq query failed (exit code: $exit_code)." + echo "DEBUG: Checking table existence with bq show..." + if ! bq show --project_id=${GCP_PROJECT_ID} "${full_table_id_show}" > /dev/null 2>&1; then + echo "DEBUG: Table ${full_table_id_show} appears not to exist (bq show failed)." + else + echo "DEBUG: Table ${full_table_id_show} appears to exist (bq show succeeded), but query failed." + fi + fi + + if [ $i -lt $retries ]; then + echo "DEBUG: Validation condition not met on attempt $i. Retrying in $delay seconds..." + sleep $delay + else + echo "DEBUG: Final attempt ($i) failed." + fi + done + + echo "DEBUG: ===== Final validation check for table: $table_name =====" + if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then + echo "SUCCESS: Table ${table_name} has ${count} rows. Final validation OK." + echo "DEBUG: validate_table returning 0 (success)." + return 0 # Indicate success + else + echo "ERROR: Failed to get a non-zero row count for table ${table_name} after $retries retries (Last exit code: $exit_code, Last processed count: '$count')." + echo "DEBUG: validate_table returning 1 (failure)." + return 1 # Indicate failure + fi + } + + steps: + - name: Checkout code at RC tag + uses: actions/checkout@v4 + with: + ref: ${{ env.RC_TAG }} + + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 11 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install Dependencies + run: | + sudo apt-get update --yes + sudo apt-get install -y wget unzip coreutils procps grep sed + shell: bash + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + + - name: Download RC Artifacts + run: | + echo "Downloading from ${{ env.APACHE_DIST_URL_BASE }}" + wget ${{ env.APACHE_DIST_URL_BASE }}/python/${{ env.BEAM_PYTHON_SDK_TAR_GZ }} + wget ${{ env.APACHE_DIST_URL_BASE }}/python/${{ env.BEAM_PYTHON_SDK_TAR_GZ }}.sha512 + wget ${{ env.APACHE_DIST_URL_BASE }}/${{ env.BEAM_SOURCE_ZIP }} + wget ${{ env.APACHE_DIST_URL_BASE }}/${{ env.BEAM_SOURCE_ZIP }}.sha512 + shell: bash + + - name: Verify Hashes + run: | + echo "Verifying sha512 checksums..." + sha512sum -c ${{ env.BEAM_PYTHON_SDK_TAR_GZ }}.sha512 + sha512sum -c ${{ env.BEAM_SOURCE_ZIP }}.sha512 + shell: bash + + - name: Setup Python Virtual Environment + run: | + echo "Setting up Python virtual environment..." + python -m venv beam_env + source beam_env/bin/activate + pip install --upgrade pip setuptools wheel build + echo "Virtual environment ready." + shell: bash + + - name: Build Python SDK from Source + run: | + echo "Building Python SDK sdist..." + source beam_env/bin/activate + unzip ${{ env.BEAM_SOURCE_ZIP }} + mkdir -p beam-${{ env.RELEASE_VERSION }}/website/www/site/content/en/documentation/sdks + sudo mkdir -p /website/www/site/content/en/documentation/sdks + cd beam-${{ env.RELEASE_VERSION }}/sdks/python + python -m build --sdist + shell: bash + + - name: Install Python SDK + run: | + echo "Installing built Python SDK: apache_beam-${{ env.RELEASE_VERSION }}.tar.gz" + source beam_env/bin/activate + pip install beam-${{ env.RELEASE_VERSION }}/sdks/python/dist/apache_beam-${{ env.RELEASE_VERSION }}.tar.gz + pip install beam-${{ env.RELEASE_VERSION }}/sdks/python/dist/apache_beam-${{ env.RELEASE_VERSION }}.tar.gz[gcp] + echo "SDK installed." + pip freeze # Log installed packages + shell: bash + + # ================== GCP Resource Setup ================== + - name: Create BigQuery Dataset + run: | + echo "Creating BigQuery dataset: ${{ env.BQ_DATASET }} in project ${{ env.GCP_PROJECT_ID }}" + bq mk --project_id=${{ env.GCP_PROJECT_ID }} ${{ env.BQ_DATASET }} + shell: bash + + - name: Create GCS Bucket (if needed - reusing input bucket) + run: | + echo "Ensuring GCS Bucket exists: ${{ env.GCS_BUCKET }} in project ${{ env.GCP_PROJECT_ID }}" + gsutil mb -p ${{ env.GCP_PROJECT_ID }} ${{ env.GCS_BUCKET }} || echo "Bucket ${{ env.GCS_BUCKET }} likely already exists." + shell: bash + + - name: Create PubSub Topic + run: | + echo "Creating PubSub topic: ${{ env.PUBSUB_TOPIC }} in project ${{ env.GCP_PROJECT_ID }}" + gcloud pubsub topics create --project=${{ env.GCP_PROJECT_ID }} ${{ env.PUBSUB_TOPIC }} + shell: bash + + # ================== Java Data Injector ================== + - name: Configure Maven Settings for Injector + run: | + mkdir -p ~/.m2 + cat < ~/.m2/settings.xml + + + + release-repo + + true + + + + Release ${{ env.RELEASE_VERSION }} RC${{ env.RC_NUM }} + Release ${{ env.RELEASE_VERSION }} RC${{ env.RC_NUM }} + ${{ env.APACHE_REPO_URL }} + + + + + + EOF + echo "Maven settings.xml configured for Java Injector." + shell: bash + + - name: Run Java Injector in Background + run: | + echo "Running Java Injector in Background..." + # Generate project from archetype + mvn archetype:generate \ + -DarchetypeGroupId=org.apache.beam \ + -DarchetypeArtifactId=beam-sdks-java-maven-archetypes-examples \ + -DarchetypeVersion=${{ env.RELEASE_VERSION }} \ + -DgroupId=org.example \ + -DartifactId=injector-temp \ + -Dversion="0.1" \ + -Dpackage=org.apache.beam.examples \ + -DinteractiveMode=false \ + -DarchetypeCatalog=internal \ + -Dmaven.wagon.http.retryHandler.count=3 \ + -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 + + cd injector-temp + # Compile and run, redirecting output to avoid polluting workflow logs + mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.complete.game.injector.Injector \ + -Dexec.args="${{ env.GCP_PROJECT_ID }} ${{ env.PUBSUB_TOPIC }} none" \ + -Dmaven.wagon.http.retryHandler.count=3 \ + -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 > ../injector_run.log 2>&1 & + + INJECTOR_PID=$! + echo "Java Injector started in background with PID: ${INJECTOR_PID}" + echo ${INJECTOR_PID} > ../injector.pid + + cd .. + # Give injector a moment to start fully + sleep 15 + echo "Checking if injector process $INJECTOR_PID is running..." + if ps -p $INJECTOR_PID > /dev/null; then + echo "Injector process $INJECTOR_PID confirmed running." + else + echo "ERROR: Injector process $INJECTOR_PID failed to start or exited prematurely." + echo "--- Injector Log ---" + cat injector_run.log || echo "Injector log not found." + echo "--- End Injector Log ---" + exit 1 + fi + shell: bash + + # ================== Leaderboard Tests ================== + - name: Run Leaderboard (Direct Runner) in Background + run: | + echo "Running Leaderboard with DirectRunner in Background..." + source beam_env/bin/activate + python -m apache_beam.examples.complete.game.leader_board \ + --project=${{ env.GCP_PROJECT_ID }} \ + --topic projects/${{ env.GCP_PROJECT_ID }}/topics/${{ env.PUBSUB_TOPIC }} \ + --dataset ${{ env.BQ_DATASET }} & + + LB_DIRECT_PID=$! + echo "Leaderboard (Direct Runner) started in background with PID: ${LB_DIRECT_PID}" + echo ${LB_DIRECT_PID} > leaderboard_direct.pid + shell: bash + + - name: Validate Leaderboard Results (Direct Runner) + run: | + source beam_env/bin/activate + eval "$VALIDATE_TABLE_FUNC" + echo "Validating BigQuery results for Leaderboard (DirectRunner)..." + sleep 90 + validate_table "leader_board_users" || exit 1 + validate_table "leader_board_teams" || exit 1 + echo "Leaderboard (Direct Runner) BQ validation finished successfully." + shell: bash + + - name: Kill Leaderboard Direct Runner Process + if: always() + run: | + if [ -f leaderboard_direct.pid ]; then + LB_DIRECT_PID=$(cat leaderboard_direct.pid) + echo "Attempting to kill Leaderboard Direct Runner process with PID: $LB_DIRECT_PID" + kill -9 $LB_DIRECT_PID || echo "Leaderboard Direct Runner process $LB_DIRECT_PID already stopped or not found." + rm leaderboard_direct.pid + else + echo "leaderboard_direct.pid not found, cannot kill process." + fi + shell: bash + + - name: Run Leaderboard (Dataflow Runner), Wait, Extract ID, Cleanup Submitter + id: submit_lb_df # Give step an ID to reference its outcome if needed + run: | + echo "Running Leaderboard with DataflowRunner in Background..." + source beam_env/bin/activate + python -m apache_beam.examples.complete.game.leader_board \ + --project=${{ env.GCP_PROJECT_ID }} \ + --region=${{ env.GCE_REGION }} \ + --topic projects/${{ env.GCP_PROJECT_ID }}/topics/${{ env.PUBSUB_TOPIC }} \ + --dataset ${{ env.BQ_DATASET }} \ + --runner DataflowRunner \ + --temp_location=${{ env.GCS_BUCKET }}/temp/leaderboard/ \ + --sdk_location=apache_beam-${{ env.RELEASE_VERSION }}.tar.gz \ + > leaderboard_dataflow_submit.log 2>&1 & + + LB_DF_PID=$! + echo "Leaderboard (Dataflow Runner) submission process started in background with PID: ${LB_DF_PID}" + echo ${LB_DF_PID} > leaderboard_dataflow_submit.pid + + echo "Waiting up to ${{ env.SUBMISSION_TIMEOUT_SECONDS }} seconds for Dataflow job submission process (PID: ${LB_DF_PID}) to potentially complete..." + sleep ${{ env.SUBMISSION_TIMEOUT_SECONDS }} + + echo "Proceeding with Job ID extraction..." + # Try extracting Job ID using common patterns + JOB_ID=$(grep -oP 'Dataflow Job ID: \K\S+' leaderboard_dataflow_submit.log || grep -oP "job_id='?\K[^' >]+" leaderboard_dataflow_submit.log || grep -oP "id: '?\"?\K[^'\" >]+" leaderboard_dataflow_submit.log | head -n 1) + + if [[ -n "$JOB_ID" ]]; then + echo "Extracted Leaderboard Dataflow Job ID: $JOB_ID" + echo "$JOB_ID" > leaderboard_dataflow_jobid.txt + else + echo "WARNING: Could not extract Leaderboard Dataflow Job ID after ${{ env.SUBMISSION_TIMEOUT_SECONDS }}s wait. Log content:" + echo "--- Leaderboard Dataflow submission log START ---" + cat leaderboard_dataflow_submit.log || echo "Log file not found." + echo "--- Leaderboard Dataflow submission log END ---" + fi + + # Check if the submission process is still running and kill it if necessary + if [ -f leaderboard_dataflow_submit.pid ] && ps -p $LB_DF_PID > /dev/null; then + echo "Submission process (PID: $LB_DF_PID) is still running after ${{ env.SUBMISSION_TIMEOUT_SECONDS }}s. Attempting to kill it." + kill -9 $LB_DF_PID || echo "Failed to kill process $LB_DF_PID." + else + echo "Submission process (PID: $LB_DF_PID) has already finished or PID file is missing." + fi + # Clean up PID file regardless + if [ -f leaderboard_dataflow_submit.pid ]; then + rm leaderboard_dataflow_submit.pid + fi + + echo "Leaderboard (Dataflow Runner) submission step finished processing." + shell: bash + + - name: Validate Leaderboard Results (Dataflow Runner) + run: | + if [ ! -f leaderboard_dataflow_jobid.txt ]; then + echo "Skipping Leaderboard Dataflow validation as Job ID was not extracted." + exit 0 # Exit step successfully to allow cancellation/cleanup + fi + source beam_env/bin/activate + eval "$VALIDATE_TABLE_FUNC" + echo "Validating BigQuery results for Leaderboard (DataflowRunner)..." + sleep 240 + validate_table "leader_board_users" 15 || exit 1 # Use 15s retry delay + validate_table "leader_board_teams" 15 || exit 1 # Use 15s retry delay + echo "Leaderboard (Dataflow Runner) BQ validation finished successfully." + shell: bash + + - name: Cancel Leaderboard Dataflow Job + if: always() # Run even if validation failed, to attempt cleanup + run: | + if [ -f leaderboard_dataflow_jobid.txt ]; then + JOB_ID=$(cat leaderboard_dataflow_jobid.txt) + if [[ -n "$JOB_ID" ]]; then + echo "Attempting to cancel Leaderboard Dataflow job: $JOB_ID in region ${{ env.GCE_REGION }}" + gcloud dataflow jobs cancel "$JOB_ID" --region=${{ env.GCE_REGION }} --project=${{ env.GCP_PROJECT_ID }} || echo "Failed to cancel Leaderboard Dataflow job $JOB_ID (maybe it finished or was already cancelled)." + else + echo "Leaderboard Dataflow Job ID file exists but is empty." + fi + # Keep the job ID file removal in the final cleanup section in case other steps need it? + # rm leaderboard_dataflow_jobid.txt # Or remove it here? Let's keep final cleanup consistent. + else + echo "leaderboard_dataflow_jobid.txt not found, cannot cancel job (it might have failed before ID extraction)." + fi + shell: bash + + # ================== GameStats Tests ================== + - name: Run GameStats (Direct Runner) in Background + run: | + echo "Running GameStats with DirectRunner in Background..." + source beam_env/bin/activate + python -m apache_beam.examples.complete.game.game_stats \ + --project=${{ env.GCP_PROJECT_ID }} \ + --topic projects/${{ env.GCP_PROJECT_ID }}/topics/${{ env.PUBSUB_TOPIC }} \ + --dataset ${{ env.BQ_DATASET }} \ + --fixed_window_duration ${{ env.GAME_STATS_WINDOW_DURATION }} & + + GS_DIRECT_PID=$! + echo "GameStats (Direct Runner) started in background with PID: ${GS_DIRECT_PID}" + echo ${GS_DIRECT_PID} > gamestats_direct.pid + shell: bash + + - name: Validate GameStats Results (Direct Runner) + run: | + source beam_env/bin/activate + eval "$VALIDATE_TABLE_FUNC" + echo "Validating BigQuery results for GameStats (DirectRunner)..." + echo "* Sleeping for 25mins" + sleep 25m + validate_table "game_stats_teams" || exit 1 + validate_table "game_stats_sessions" || exit 1 + echo "GameStats (Direct Runner) BQ validation finished successfully." + shell: bash + + - name: Kill GameStats Direct Runner Process + if: always() + run: | + if [ -f gamestats_direct.pid ]; then + GS_DIRECT_PID=$(cat gamestats_direct.pid) + echo "Attempting to kill GameStats Direct Runner process with PID: $GS_DIRECT_PID" + kill -9 $GS_DIRECT_PID || echo "GameStats Direct Runner process $GS_DIRECT_PID already stopped or not found." + rm gamestats_direct.pid + else + echo "gamestats_direct.pid not found, cannot kill process." + fi + shell: bash + + - name: Run GameStats (Dataflow Runner), Wait, Extract ID, Cleanup Submitter + id: submit_gs_df + run: | + echo "Running GameStats with DataflowRunner in Background..." + source beam_env/bin/activate + python -m apache_beam.examples.complete.game.game_stats \ + --project=${{ env.GCP_PROJECT_ID }} \ + --region=${{ env.GCE_REGION }} \ + --topic projects/${{ env.GCP_PROJECT_ID }}/topics/${{ env.PUBSUB_TOPIC }} \ + --dataset ${{ env.BQ_DATASET }} \ + --runner DataflowRunner \ + --temp_location=${{ env.GCS_BUCKET }}/temp/gamestats/ \ + --sdk_location=apache_beam-${{ env.RELEASE_VERSION }}.tar.gz \ + --fixed_window_duration ${{ env.GAME_STATS_WINDOW_DURATION }} \ + > gamestats_dataflow_submit.log 2>&1 & + + GS_DF_PID=$! + echo "GameStats (Dataflow Runner) submission process started in background with PID: ${GS_DF_PID}" + echo ${GS_DF_PID} > gamestats_dataflow_submit.pid + + echo "Waiting up to ${{ env.SUBMISSION_TIMEOUT_SECONDS }} seconds for Dataflow job submission process (PID: ${GS_DF_PID}) to potentially complete..." + sleep ${{ env.SUBMISSION_TIMEOUT_SECONDS }} + + echo "Proceeding with Job ID extraction..." + # Try extracting Job ID using common patterns + JOB_ID=$(grep -oP 'Dataflow Job ID: \K\S+' gamestats_dataflow_submit.log || grep -oP "job_id='?\K[^' >]+" gamestats_dataflow_submit.log || grep -oP "id: '?\"?\K[^'\" >]+" gamestats_dataflow_submit.log | head -n 1) + + if [[ -n "$JOB_ID" ]]; then + echo "Extracted GameStats Dataflow Job ID: $JOB_ID" + echo "$JOB_ID" > gamestats_dataflow_jobid.txt + else + echo "WARNING: Could not extract GameStats Dataflow Job ID after ${{ env.SUBMISSION_TIMEOUT_SECONDS }}s wait. Log content:" + echo "--- GameStats Dataflow submission log START ---" + cat gamestats_dataflow_submit.log || echo "Log file not found." + echo "--- GameStats Dataflow submission log END ---" + fi + + # Check if the submission process is still running and kill it if necessary + if [ -f gamestats_dataflow_submit.pid ] && ps -p $GS_DF_PID > /dev/null; then + echo "Submission process (PID: $GS_DF_PID) is still running after ${{ env.SUBMISSION_TIMEOUT_SECONDS }}s. Attempting to kill it." + kill -9 $GS_DF_PID || echo "Failed to kill process $GS_DF_PID." + else + echo "Submission process (PID: $GS_DF_PID) has already finished or PID file is missing." + fi + # Clean up PID file regardless + if [ -f gamestats_dataflow_submit.pid ]; then + rm gamestats_dataflow_submit.pid + fi + + echo "GameStats (Dataflow Runner) submission step finished processing." + shell: bash + + - name: Validate GameStats Results (Dataflow Runner) + run: | + if [ ! -f gamestats_dataflow_jobid.txt ]; then + echo "Skipping GameStats Dataflow validation as Job ID was not extracted." + exit 0 # Exit step successfully to allow cleanup + fi + source beam_env/bin/activate + eval "$VALIDATE_TABLE_FUNC" + echo "Validating BigQuery results for GameStats (DataflowRunner)..." + echo "* Sleeping for 25mins" + sleep 25m + validate_table "game_stats_teams" 15 || exit 1 # Use 15s retry delay + validate_table "game_stats_sessions" 15 || exit 1 # Use 15s retry delay + echo "GameStats (Dataflow Runner) BQ validation finished successfully." + shell: bash + + # ================== Cleanup ================== + # Kill background processes first + - name: Kill Java Injector Process + if: always() + run: | + if [ -f injector.pid ]; then + INJECTOR_PID=$(cat injector.pid) + echo "Attempting to kill Java Injector process with PID: $INJECTOR_PID" + kill $INJECTOR_PID || echo "Injector process $INJECTOR_PID may have already stopped or was not found." + sleep 5 + if ps -p $INJECTOR_PID > /dev/null; then + echo "Process $INJECTOR_PID still running, sending SIGKILL." + kill -9 $INJECTOR_PID || echo "Failed to SIGKILL process $INJECTOR_PID." + else + echo "Process $INJECTOR_PID terminated or was not running." + fi + rm injector.pid + else + echo "injector.pid not found, cannot kill process." + fi + shell: bash + + # Cancel Remaining Dataflow job + - name: Cancel GameStats Dataflow Job + if: always() + run: | + if [ -f gamestats_dataflow_jobid.txt ]; then + JOB_ID=$(cat gamestats_dataflow_jobid.txt) + if [[ -n "$JOB_ID" ]]; then + echo "Attempting to cancel GameStats Dataflow job: $JOB_ID in region ${{ env.GCE_REGION }}" + gcloud dataflow jobs cancel "$JOB_ID" --region=${{ env.GCE_REGION }} --project=${{ env.GCP_PROJECT_ID }} || echo "Failed to cancel GameStats Dataflow job $JOB_ID (maybe it finished or was already cancelled)." + else + echo "GameStats Dataflow Job ID file exists but is empty." + fi + # Remove the file here after attempting cancel + rm gamestats_dataflow_jobid.txt + else + echo "gamestats_dataflow_jobid.txt not found, cannot cancel job (it might have failed before ID extraction)." + fi + shell: bash + + # Cleanup GCP resources + - name: Cleanup BigQuery Dataset + if: always() && github.event.inputs.CLEANUP_BQ_RESOURCES == 'true' + run: | + echo "Deleting BigQuery dataset: ${{ env.BQ_DATASET }} in project ${{ env.GCP_PROJECT_ID }}" + bq rm --project_id=${{ env.GCP_PROJECT_ID }} -f -r ${{ env.BQ_DATASET }} || echo "Failed to delete BQ dataset ${{ env.BQ_DATASET }}, continuing..." + shell: bash + + - name: Cleanup GCS Bucket Objects and Logs + if: always() + run: | + echo "Deleting objects in GCS Bucket: ${{ env.GCS_BUCKET }}/temp/" + gsutil -m rm -r "${{ env.GCS_BUCKET }}/temp/leaderboard/**" || echo "Failed to delete objects in GCS leaderboard temp folder." + gsutil -m rm -r "${{ env.GCS_BUCKET }}/temp/gamestats/**" || echo "Failed to delete objects in GCS gamestats temp folder." + echo "Removing local log and jobid files..." + rm -f leaderboard_dataflow_submit.log gamestats_dataflow_submit.log injector_run.log + rm -f leaderboard_dataflow_jobid.txt # Remove Leaderboard jobid file here + # gamestats_dataflow_jobid.txt is removed in its cancellation step above + shell: bash + + - name: Cleanup PubSub Topic + if: always() + run: | + echo "Deleting PubSub topic: ${{ env.PUBSUB_TOPIC }} in project ${{ env.GCP_PROJECT_ID }}" + gcloud pubsub topics delete --project=${{ env.GCP_PROJECT_ID }} ${{ env.PUBSUB_TOPIC }} --quiet || echo "Failed to delete PubSub topic ${{ env.PUBSUB_TOPIC }}, continuing..." + shell: bash diff --git a/.test-infra/tools/refresh_looker_metrics.py b/.test-infra/tools/refresh_looker_metrics.py index 842fdd6ac103..4ebbf9744ade 100644 --- a/.test-infra/tools/refresh_looker_metrics.py +++ b/.test-infra/tools/refresh_looker_metrics.py @@ -34,6 +34,12 @@ ("33", ["21", "70", "116", "69", "115"]), # BigTableIO_Write ("34", ["22", "56", "96", "55", "95"]), # TextIO_Read ("35", ["23", "64", "110", "63", "109"]), # TextIO_Write + ("75", ["258", "259", "260", "261", "262"]), # TensorFlow MNIST + ("76", ["233", "234", "235", "236", "237"]), # PyTorch BERT base uncased + ("77", ["238", "239", "240", "241", "242"]), # PyTorch BERT large uncased + ("78", ["243", "244", "245", "246", "247"]), # PyTorch Resnet 101 + ("79", ["248", "249", "250", "251", "252"]), # PyTorch Resnet 152 + ("80", ["253", "254", "255", "256", "257"]), # PyTorch Resnet 152 Tesla T4 ] diff --git a/CHANGES.md b/CHANGES.md index b47ffc3da8a9..6f1a602ad1e0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -64,6 +64,7 @@ ## I/Os * Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* Upgraded GoogleAdsAPI to v19 for GoogleAdsIO (Java) ([#34497](https://github.com/apache/beam/pull/34497)). Changed PTransform method from version-specified (`v17()`) to `current()` for better backward compatibility in the future. ## New Features / Improvements @@ -80,6 +81,7 @@ ## Bugfixes * Fixed X (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* Fixed read Beam rows from cross-lang transform (for example, ReadFromJdbc) involving negative 32-bit integers incorrectly decoded to large integers ([#34089](https://github.com/apache/beam/issues/34089)) ## Security Fixes * Fixed (CVE-YYYY-NNNN)[https://www.cve.org/CVERecord?id=CVE-YYYY-NNNN] (Java/Python/Go) ([#X](https://github.com/apache/beam/issues/X)). @@ -89,7 +91,7 @@ [comment]: # ( When updating known issues after release, make sure also update website blog in website/www/site/content/blog.) * ([#X](https://github.com/apache/beam/issues/X)). -# [2.64.0] - Ongoing Release +# [2.64.0] - 2025-03-31 ## Highlights @@ -108,8 +110,9 @@ * [Python] Support custom coders in Reshuffle ([#29908](https://github.com/apache/beam/issues/29908), [#33356](https://github.com/apache/beam/issues/33356)). * [Java] Upgrade SLF4J to 2.0.16. Update default Spark version to 3.5.0. ([#33574](https://github.com/apache/beam/pull/33574)) * [Java] Support for `--add-modules` JVM option is added through a new pipeline option `JdkAddRootModules`. This allows extending the module graph with optional modules such as SDK incubator modules. Sample usage: ` --jdkAddRootModules=jdk.incubator.vector` ([#30281](https://github.com/apache/beam/issues/30281)). -* X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). * Managed API for [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/managed/Managed.html) and [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.managed.html#module-apache_beam.transforms.managed) supports [key I/O connectors](https://beam.apache.org/documentation/io/connectors/) Iceberg, Kafka, and BigQuery. +* [YAML] Beam YAML UDFs (such as those used in MapToFields) can now have declared dependencies + (e.g. pypi packages for Python, or extra jars for Java). * Prism now supports event time triggers for most common cases. ([#31438](https://github.com/apache/beam/issues/31438)) * Prism does not yet support triggered side inputs, or triggers on merging windows (such as session windows). @@ -117,7 +120,7 @@ * [Python] Reshuffle now correctly respects user-specified type hints, fixing a previous bug where it might use FastPrimitivesCoder wrongly. This change could break pipelines with incorrect type hints in Reshuffle. If you have issues after upgrading, temporarily set update_compatibility_version to a previous Beam version to use the old behavior. The recommended solution is to fix the type hints in your code. ([#33932](https://github.com/apache/beam/pull/33932)) * [Java] SparkReceiver 2 has been moved to SparkReceiver 3 that supports Spark 3.x. ([#33574](https://github.com/apache/beam/pull/33574)) -* [Python] Correct parsing of `collections.abc.Sequence` type hints was added, which can lead to pipelines failing type hint checks that were previously passing erroneously. These issues will be most commonly seen trying to consume a PCollection with a `Sequence` type hint after a GroupByKey or a CoGroupByKey. ([#33999](https://github.com/apache/beam/pull/33999). +* [Python] Correct parsing of `collections.abc.Sequence` type hints was added, which can lead to pipelines failing type hint checks that were previously passing erroneously. These issues will be most commonly seen trying to consume a PCollection with a `Sequence` type hint after a GroupByKey or a CoGroupByKey. ([#33999](https://github.com/apache/beam/pull/33999)). ## Bugfixes @@ -128,6 +131,11 @@ * Fixed checkpoint recovery and streaming behavior in Spark Classic and Portable runner's Flatten transform by replacing queueStream with SingleEmitInputDStream ([#34080](https://github.com/apache/beam/pull/34080), [#18144](https://github.com/apache/beam/issues/18144), [#20426](https://github.com/apache/beam/issues/20426)) * (Java) Fixed Read caching of UnboundedReader objects to effectively cache across multiple DoFns and avoid checkpointing unstarted reader. [#34146](https://github.com/apache/beam/pull/34146) [#33901](https://github.com/apache/beam/pull/33901) +## Known Issues + +* (Java) Current version of protobuf has a [bug](https://github.com/protocolbuffers/protobuf/issues/20599) leading to incompatibilities with clients using older versions of Protobuf ([example issue](https://github.com/GoogleCloudPlatform/DataflowTemplates/issues/2191)). This issue has been seen in SpannerIO in particular. Tracked in [#34452](https://github.com/GoogleCloudPlatform/DataflowTemplates/issues/34452). +* (Java) When constructing `SpannerConfig` for `SpannerIO`, calling `withHost` with a null or empty host will now result in a Null Pointer Exception (`java.lang.NullPointerException: Cannot invoke "java.lang.CharSequence.length()" because "this.text" is null`). See https://github.com/GoogleCloudPlatform/DataflowTemplates/issues/34489 for context. + # [2.63.0] - 2025-02-18 ## I/Os @@ -180,6 +188,10 @@ * Fixed the user mailing list address ([#26013](https://github.com/apache/beam/issues/26013)). * Fixed the contributing prerequisites link ([#33903](https://github.com/apache/beam/issues/33903)). +## Known Issues + +* (Java) Current version of protobuf has a [bug](https://github.com/protocolbuffers/protobuf/issues/20599) leading to incompatibilities with clients using older versions of Protobuf ([example issue](https://github.com/GoogleCloudPlatform/DataflowTemplates/issues/2191)). This issue has been seen in SpannerIO in particular. Tracked in [#34452](https://github.com/GoogleCloudPlatform/DataflowTemplates/issues/34452). + # [2.62.0] - 2025-01-21 ## I/Os diff --git a/LICENCE.cloudpickle b/LICENCE.cloudpickle new file mode 100644 index 000000000000..6f225c5a8b08 --- /dev/null +++ b/LICENCE.cloudpickle @@ -0,0 +1,29 @@ +Copyright (c) 2012-now, CloudPickle developers and contributors. +Copyright (c) 2012, Regents of the University of California. +Copyright (c) 2009 `PiCloud, Inc. `_. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the University of California, Berkeley nor the + names of its contributors may be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/build.gradle.kts b/build.gradle.kts index 664fb8a83d09..ccc0a95d96a4 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -44,6 +44,7 @@ tasks.rat { "**/package-list", "**/test.avsc", + "**/logical-types.avsc", "**/user.avsc", "**/test/resources/**/*.txt", "**/test/resources/**/*.csv", @@ -95,6 +96,10 @@ tasks.rat { // Ignore CPython LICENSE file "LICENSE.python", + // Ignore vendored cloudpickle files + "sdks/python/apache_beam/internal/cloudpickle/**", + "LICENCE.cloudpickle", + // Json doesn't support comments. "**/*.json", diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts index cd8aed6d3a67..d2f89cc62325 100644 --- a/buildSrc/build.gradle.kts +++ b/buildSrc/build.gradle.kts @@ -45,7 +45,7 @@ dependencies { implementation("com.github.spotbugs.snom:spotbugs-gradle-plugin:5.0.14") runtimeOnly("com.google.protobuf:protobuf-gradle-plugin:0.8.13") // Enable proto code generation - runtimeOnly("com.github.davidmc24.gradle.plugin:gradle-avro-plugin:1.9.1") // Enable Avro code generation + runtimeOnly("com.github.davidmc24.gradle.plugin:gradle-avro-plugin:1.9.1") // Enable Avro code generation. Version 1.1.0 is the last supporting avro 1.10.2 runtimeOnly("com.diffplug.spotless:spotless-plugin-gradle:5.6.1") // Enable a code formatting plugin runtimeOnly("gradle.plugin.com.dorongold.plugins:task-tree:1.5") // Adds a 'taskTree' task to print task dependency tree runtimeOnly("gradle.plugin.com.github.johnrengelman:shadow:7.1.1") // Enable shading Java dependencies diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 51ed48738b37..b71b81dabfc6 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -602,12 +602,12 @@ class BeamModulePlugin implements Plugin { def dbcp2_version = "2.9.0" def errorprone_version = "2.10.0" // [bomupgrader] determined by: com.google.api:gax, consistent with: google_cloud_platform_libraries_bom - def gax_version = "2.62.0" + def gax_version = "2.63.1" def google_ads_version = "33.0.0" def google_clients_version = "2.0.0" def google_cloud_bigdataoss_version = "2.2.26" // [bomupgrader] determined by: com.google.cloud:google-cloud-spanner, consistent with: google_cloud_platform_libraries_bom - def google_cloud_spanner_version = "6.88.0" + def google_cloud_spanner_version = "6.89.0" def google_code_gson_version = "2.10.1" def google_oauth_clients_version = "1.34.1" // [bomupgrader] determined by: io.grpc:grpc-netty, consistent with: google_cloud_platform_libraries_bom @@ -618,7 +618,7 @@ class BeamModulePlugin implements Plugin { def influxdb_version = "2.19" def httpclient_version = "4.5.13" def httpcore_version = "4.4.14" - def iceberg_bqms_catalog_version = "1.5.2-0.1.0" + def iceberg_bqms_catalog_version = "1.6.1-1.0.1-beta" def jackson_version = "2.15.4" def jaxb_api_version = "2.3.3" def jsr305_version = "3.0.2" @@ -627,10 +627,10 @@ class BeamModulePlugin implements Plugin { def log4j2_version = "2.20.0" def nemo_version = "0.1" // [bomupgrader] determined by: io.grpc:grpc-netty, consistent with: google_cloud_platform_libraries_bom - def netty_version = "4.1.110.Final" + def netty_version = "4.1.118.Final" def postgres_version = "42.2.16" // [bomupgrader] determined by: com.google.protobuf:protobuf-java, consistent with: google_cloud_platform_libraries_bom - def protobuf_version = "4.29.0" + def protobuf_version = "4.29.4" def qpid_jms_client_version = "0.61.0" def quickcheck_version = "1.0" def sbe_tool_version = "1.25.1" @@ -726,18 +726,16 @@ class BeamModulePlugin implements Plugin { gax_grpc : "com.google.api:gax-grpc", // google_cloud_platform_libraries_bom sets version gax_grpc_test : "com.google.api:gax-grpc:$gax_version:testlib", // google_cloud_platform_libraries_bom sets version gax_httpjson : "com.google.api:gax-httpjson", // google_cloud_platform_libraries_bom sets version - google_ads : "com.google.api-ads:google-ads:$google_ads_version", - google_ads_stubs : "com.google.api-ads:google-ads-stubs-v17:$google_ads_version", google_api_client : "com.google.api-client:google-api-client:$google_clients_version", // for the libraries using $google_clients_version below. google_api_client_gson : "com.google.api-client:google-api-client-gson:$google_clients_version", google_api_client_java6 : "com.google.api-client:google-api-client-java6:$google_clients_version", google_api_common : "com.google.api:api-common", // google_cloud_platform_libraries_bom sets version - google_api_services_bigquery : "com.google.apis:google-api-services-bigquery:v2-rev20250216-2.0.0", // [bomupgrader] sets version + google_api_services_bigquery : "com.google.apis:google-api-services-bigquery:v2-rev20250313-2.0.0", // [bomupgrader] sets version google_api_services_cloudresourcemanager : "com.google.apis:google-api-services-cloudresourcemanager:v1-rev20240310-2.0.0", // [bomupgrader] sets version google_api_services_dataflow : "com.google.apis:google-api-services-dataflow:v1b3-rev20250106-$google_clients_version", google_api_services_healthcare : "com.google.apis:google-api-services-healthcare:v1-rev20240130-$google_clients_version", google_api_services_pubsub : "com.google.apis:google-api-services-pubsub:v1-rev20220904-$google_clients_version", - google_api_services_storage : "com.google.apis:google-api-services-storage:v1-rev20241206-2.0.0", // [bomupgrader] sets version + google_api_services_storage : "com.google.apis:google-api-services-storage:v1-rev20250224-2.0.0", // [bomupgrader] sets version google_auth_library_credentials : "com.google.auth:google-auth-library-credentials", // google_cloud_platform_libraries_bom sets version google_auth_library_oauth2_http : "com.google.auth:google-auth-library-oauth2-http", // google_cloud_platform_libraries_bom sets version google_cloud_bigquery : "com.google.cloud:google-cloud-bigquery", // google_cloud_platform_libraries_bom sets version @@ -749,13 +747,13 @@ class BeamModulePlugin implements Plugin { google_cloud_core_grpc : "com.google.cloud:google-cloud-core-grpc", // google_cloud_platform_libraries_bom sets version google_cloud_datacatalog_v1beta1 : "com.google.cloud:google-cloud-datacatalog", // google_cloud_platform_libraries_bom sets version google_cloud_dataflow_java_proto_library_all: "com.google.cloud.dataflow:google-cloud-dataflow-java-proto-library-all:0.5.160304", - google_cloud_datastore_v1_proto_client : "com.google.cloud.datastore:datastore-v1-proto-client:2.26.4", // [bomupgrader] sets version + google_cloud_datastore_v1_proto_client : "com.google.cloud.datastore:datastore-v1-proto-client:2.27.1", // [bomupgrader] sets version google_cloud_firestore : "com.google.cloud:google-cloud-firestore", // google_cloud_platform_libraries_bom sets version google_cloud_pubsub : "com.google.cloud:google-cloud-pubsub", // google_cloud_platform_libraries_bom sets version google_cloud_pubsublite : "com.google.cloud:google-cloud-pubsublite", // google_cloud_platform_libraries_bom sets version // [bomupgrader] the BOM version is set by scripts/tools/bomupgrader.py. If update manually, also update // libraries-bom version on sdks/java/container/license_scripts/dep_urls_java.yaml - google_cloud_platform_libraries_bom : "com.google.cloud:libraries-bom:26.56.0", + google_cloud_platform_libraries_bom : "com.google.cloud:libraries-bom:26.57.0", google_cloud_secret_manager : "com.google.cloud:google-cloud-secretmanager", // google_cloud_platform_libraries_bom sets version google_cloud_spanner : "com.google.cloud:google-cloud-spanner", // google_cloud_platform_libraries_bom sets version google_cloud_spanner_test : "com.google.cloud:google-cloud-spanner:$google_cloud_spanner_version:tests", @@ -3033,12 +3031,6 @@ class BeamModulePlugin implements Plugin { def cleanPython = project.tasks.register('cleanPython') { doLast { def activate = "${project.ext.envdir}/bin/activate" - project.exec { - executable 'sh' - args '-c', "if [ -e ${activate} ]; then " + - ". ${activate} && cd ${pythonRootDir} && pip install pyyaml jinja2 && python setup.py clean; " + - "fi" - } project.delete project.buildDir // Gradle build directory project.delete project.ext.envdir // virtualenv directory project.delete "$project.projectDir/target" // tox work directory diff --git a/contributor-docs/code-change-guide.md b/contributor-docs/code-change-guide.md index b489cb0e4cd8..593f9e71b27b 100644 --- a/contributor-docs/code-change-guide.md +++ b/contributor-docs/code-change-guide.md @@ -37,6 +37,8 @@ The guide contains the following sections: for Python development, running unit and integration tests, and running a pipeline with modified Beam code. +For instructions regarding testing code changes for Go SDK, please see the Go SDK's [README file](https://github.com/apache/beam/tree/master/sdks/go). + ## Repository structure The Apache Beam GitHub repository (Beam repo) is, for the most part, a "mono repo". diff --git a/contributor-docs/discussion-docs/2025.md b/contributor-docs/discussion-docs/2025.md index b3969b1e2b06..2207b7d7ab3c 100644 --- a/contributor-docs/discussion-docs/2025.md +++ b/contributor-docs/discussion-docs/2025.md @@ -20,4 +20,8 @@ limitations under the License. | 3 | Danny McCormick | [Beam Python & ML Dependency Extras](https://docs.google.com/document/d/1c84Gc-cZRCfrU8f7kWGsNR2o8oSRjCM-dGHO9KvPWPw) | 2025-01-27 15:33:36 | | 4 | Danny McCormick | [How vLLM Model Handler Works (Plus a Summary of Model Memory Management in Beam ML)](https://docs.google.com/document/d/1UB4umrtnp1Eg45fiUB3iLS7kPK3BE6pcf0YRDkA289Q) | 2025-01-31 11:56:59 | | 5 | Shunping Huang | [Improve Logging Dependencies in Beam Java SDK](https://docs.google.com/document/d/1IkbiM4m8D-aB3NYI1aErFZHt6M7BQ-8eCULh284Davs) | 2025-02-04 15:13:14 | -| 6 | Ahmed Abualsaud | [Iceberg Incremental Source design](https://s.apache.org/beam-iceberg-incremental-source) | 2025-03-03 14:52:42 | \ No newline at end of file +| 6 | Ahmed Abualsaud | [Iceberg Incremental Source design](https://s.apache.org/beam-iceberg-incremental-source) | 2025-03-03 14:52:42 | +| 7 | Kenneth Knowles | [[PUBLIC] Timers, Watermark Holds, Loops, Batch and Drain](https://s.apache.org/beam-timers-and-drain) | 2025-03-11 14:00:00 | +| 8 | Robert Bradshaw | [Apache Beam YAML testing](https://s.apache.org/beam-yaml-testing) | 2025-03-17 14:00:00 | +| 9 | Jack McCluskey | [[Design Doc] Generic Remote Model Handlers for RunInference](https://docs.google.com/document/d/17A_oHJ7s3ol4TGCUpKeYc6iozkcTwN_e4H_J3kRlgPM/edit?usp=sharing) | 2025-03-24 14:00:00 | +| 10 | Robert Bradshaw | [Beam YAML Unknown Schemas](https://s.apache.org/beam-yaml-unknown-schema) | 2025-03-26 14:00:00 | diff --git a/contributor-docs/release-guide.md b/contributor-docs/release-guide.md index 49b69fc7c8f8..61ea8f4aa6fd 100644 --- a/contributor-docs/release-guide.md +++ b/contributor-docs/release-guide.md @@ -1199,6 +1199,10 @@ This should happen automatically: [dev@ thread](https://lists.apache.org/thread. Use [reporter.apache.org](https://reporter.apache.org/addrelease.html?beam) to seed the information about the release into future project reports. +#### Post on LinkedIn + +Add a short post on LinkedIn promoting the release. + ### Checklist to proceed to the next step - [ ] Maven artifacts released and indexed in the [Maven Central Repository](https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.beam%22) @@ -1228,10 +1232,7 @@ __NOTE__: This can only be done from `@apache.org` email address. This email has ### Social media -Tweet, post on Facebook, LinkedIn, and other platforms. -Ask other contributors to do the same. - -Also, update [the Wikipedia article on Apache Beam](https://en.wikipedia.org/wiki/Apache_Beam). +Update [the Wikipedia article on Apache Beam](https://en.wikipedia.org/wiki/Apache_Beam). ### Checklist to declare the process completed diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java index d6d348f524b2..45c22b1f0d55 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java @@ -34,6 +34,7 @@ import com.google.cloud.bigquery.TableId; import com.google.cloud.bigquery.TableInfo; import com.google.cloud.bigquery.TableResult; +import com.google.cloud.bigquery.TimePartitioning; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -271,6 +272,102 @@ public synchronized TableId createTable( } } + /** + * Creates a table within the current dataset given a table name, schema and time partitioning + * properties. + * + *

This table will automatically expire 1 hour after creation if not cleaned up manually or by + * calling the {@link BigQueryResourceManager#cleanupAll()} method. + * + *

Note: Implementations may do dataset creation here, if one does not already exist. + * + * @param tableName The name of the table. + * @param schema A schema object that defines the table. + * @param timePartitioning A TimePartition object that defines time partitioning details. + * @return The TableId (reference) to the table + * @throws BigQueryResourceManagerException if there is an error creating the table in BigQuery. + */ + public synchronized TableId createTimePartitionedTable( + String tableName, Schema schema, TimePartitioning timePartitioning) + throws BigQueryResourceManagerException { + return createTimePartitionedTable( + tableName, schema, timePartitioning, System.currentTimeMillis() + 3600000); // 1h + } + + /** + * Creates a table within the current dataset given a table name and schema. + * + *

This table will automatically expire at the time specified by {@code expirationTime} if not + * cleaned up manually or by calling the {@link BigQueryResourceManager#cleanupAll()} method. + * + *

Note: Implementations may do dataset creation here, if one does not already exist. + * + * @param tableName The name of the table. + * @param schema A schema object that defines the table. + * @param timePartitioning A TimePartition object that defines time partitioning details. + * @param expirationTimeMillis Sets the time when this table expires, in milliseconds since the + * epoch. + * @return The TableId (reference) to the table + * @throws BigQueryResourceManagerException if there is an error creating the table in BigQuery. + */ + public synchronized TableId createTimePartitionedTable( + String tableName, Schema schema, TimePartitioning timePartitioning, Long expirationTimeMillis) + throws BigQueryResourceManagerException { + // Check table ID + BigQueryResourceManagerUtils.checkValidTableId(tableName); + + // Check schema + if (schema == null) { + throw new IllegalArgumentException("A valid schema must be provided to create a table."); + } + + // Check time partition details + if (timePartitioning == null) { + throw new IllegalArgumentException( + "A valid TimePartition object must be provided to create a time paritioned table. Use createTable instead to create non-partitioned tables."); + } + + // Create a default dataset if this resource manager has not already created one + if (dataset == null) { + createDataset(DEFAULT_DATASET_REGION); + } + checkHasDataset(); + + LOG.info( + "Creating time partitioned table using tableName '{}' on field '{}'.", + tableName, + timePartitioning.getField()); + + // Create the table if it does not already exist in the dataset + try { + TableId tableId = TableId.of(dataset.getDatasetId().getDataset(), tableName); + if (bigQuery.getTable(tableId) == null) { + StandardTableDefinition tableDefinition = + StandardTableDefinition.newBuilder() + .setSchema(schema) + .setTimePartitioning(timePartitioning) + .build(); + TableInfo tableInfo = + TableInfo.newBuilder(tableId, tableDefinition) + .setExpirationTime(expirationTimeMillis) + .build(); + bigQuery.create(tableInfo); + LOG.info( + "Successfully created table {}.{} partitioned on {}", + dataset.getDatasetId().getDataset(), + tableName, + timePartitioning.getField()); + + return tableId; + } else { + throw new IllegalStateException( + "Table " + tableId + " already exists for dataset " + datasetId + "."); + } + } catch (Exception e) { + throw new BigQueryResourceManagerException("Failed to create table.", e); + } + } + /** * Writes a given row into a table. This method requires {@link * BigQueryResourceManager#createTable(String, Schema)} to be called for the target table diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java index e9cd25238755..53987dbe2670 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManagerTest.java @@ -36,9 +36,11 @@ import com.google.cloud.bigquery.QueryJobConfiguration; import com.google.cloud.bigquery.Schema; import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.StandardTableDefinition; import com.google.cloud.bigquery.Table; import com.google.cloud.bigquery.TableId; import com.google.cloud.bigquery.TableInfo; +import com.google.cloud.bigquery.TimePartitioning; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; import org.junit.Before; @@ -47,6 +49,8 @@ import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import org.mockito.Answers; +import org.mockito.ArgumentCaptor; +import org.mockito.Captor; import org.mockito.Mock; import org.mockito.junit.MockitoJUnit; import org.mockito.junit.MockitoRule; @@ -62,6 +66,8 @@ public class BigQueryResourceManagerTest { private Schema schema; private RowToInsert rowToInsert; + private TimePartitioning timePartition; + private static final String PARTITION_FIELD = "time"; // name of column to use for partitioning private static final String TABLE_NAME = "table-name"; private static final String DATASET_ID = "dataset-id"; private static final String TEST_ID = "test-id"; @@ -70,10 +76,14 @@ public class BigQueryResourceManagerTest { private BigQueryResourceManager testManager; + @Captor private ArgumentCaptor tableCaptor; + @Before public void setUp() { schema = Schema.of(Field.of("name", StandardSQLTypeName.STRING)); rowToInsert = RowToInsert.of("1", ImmutableMap.of("name", "Jake")); + timePartition = + TimePartitioning.newBuilder(TimePartitioning.Type.HOUR).setField(PARTITION_FIELD).build(); testManager = new BigQueryResourceManager(TEST_ID, PROJECT_ID, bigQuery); } @@ -163,6 +173,75 @@ public void testCreateTableShouldWorkWhenBigQueryDoesNotThrowAnyError() { verify(bigQuery).create(any(DatasetInfo.class)); } + @Test + public void testCreateTimePartitionedTableShouldThrowErrorWhenTableNameIsNotValid() { + assertThrows( + IllegalArgumentException.class, + () -> testManager.createTimePartitionedTable("", schema, timePartition)); + } + + @Test + public void testCreateTimePartitionedTableShouldThrowErrorWhenSchemaIsNull() { + assertThrows( + IllegalArgumentException.class, + () -> testManager.createTimePartitionedTable(TABLE_NAME, null, timePartition)); + } + + @Test + public void testCreateTimePartitionedTableShouldThrowErrorWhenPartitionInfoIsNull() { + assertThrows( + IllegalArgumentException.class, + () -> testManager.createTimePartitionedTable(TABLE_NAME, schema, null)); + } + + @Test + public void testCreateTimePartitionedTableShouldCreateDatasetWhenDatasetDoesNotExist() { + when(bigQuery.create(any(DatasetInfo.class)).getDatasetId().getDataset()) + .thenReturn(DATASET_ID); + when(bigQuery.getTable(any())).thenReturn(null); + + testManager.createTimePartitionedTable(TABLE_NAME, schema, timePartition); + + verify(bigQuery).create(any(DatasetInfo.class)); + } + + @Test + public void testCreateTimePartitionedTableShouldThrowErrorWhenCreateFails() { + testManager.createDataset(DATASET_ID); + when(bigQuery.create(any(TableInfo.class))).thenThrow(BigQueryException.class); + + assertThrows( + BigQueryResourceManagerException.class, + () -> testManager.createTimePartitionedTable(TABLE_NAME, schema, timePartition)); + } + + @Test + public void testCreateTimePartitionedTableShouldThrowErrorWhenTableExists() { + testManager.createDataset(DATASET_ID); + + when(bigQuery.getTable(any())).thenReturn(any()); + + assertThrows( + BigQueryResourceManagerException.class, + () -> testManager.createTimePartitionedTable(TABLE_NAME, schema, timePartition)); + } + + @Test + public void testCreateTimePartitionedTableShouldWorkWhenBigQueryDoesNotThrowAnyError() { + when(bigQuery.create(any(DatasetInfo.class)).getDatasetId().getDataset()) + .thenReturn(DATASET_ID); + when(bigQuery.getTable(any())).thenReturn(null); + + testManager.createTimePartitionedTable(TABLE_NAME, schema, timePartition); + + verify(bigQuery).create(any(TableInfo.class)); + verify(bigQuery).create(tableCaptor.capture()); + TableInfo capturedTableInfo = tableCaptor.getValue(); + StandardTableDefinition capturedTableDefinition = capturedTableInfo.getDefinition(); + TimePartitioning capturedTimePartitioning = capturedTableDefinition.getTimePartitioning(); + assertThat(capturedTimePartitioning).isEqualTo(timePartition); + } + @Test public void testWriteShouldThrowErrorWhenDatasetDoesNotExist() { assertThrows(IllegalStateException.class, () -> testManager.write(TABLE_NAME, rowToInsert)); diff --git a/release/src/main/scripts/set_version.sh b/release/src/main/scripts/set_version.sh index 082786c59461..73ca298c1331 100755 --- a/release/src/main/scripts/set_version.sh +++ b/release/src/main/scripts/set_version.sh @@ -25,7 +25,7 @@ set -e function usage() { - echo 'Usage: set_version.sh [--release] [--debug] [--git-add]' + echo 'Usage: set_version.sh [--release] [--debug] [--git-add] [--add-tag]' } IS_SNAPSHOT_VERSION=yes @@ -50,6 +50,11 @@ while [[ $# -gt 0 ]] ; do shift ;; + --add-tag) + shift + ADD_TAG="$1" + shift + ;; *) if [[ -z "$TARGET_VERSION" ]] ; then TARGET_VERSION="$1" @@ -74,6 +79,10 @@ if ! [[ ${TARGET_VERSION} =~ ([0-9]+\.[0-9]+\.[0-9]+) ]]; exit 1 fi +if [[ -n $ADD_TAG ]] ; then + git tag "$ADD_TAG" +fi + if [[ -z "$IS_SNAPSHOT_VERSION" ]] ; then # Fixing a release version sed -i -e "s/version=.*/version=$TARGET_VERSION/" gradle.properties diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/ExecutionStateTracker.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/ExecutionStateTracker.java index f70e9ac16f90..b2ff4b771fea 100644 --- a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/ExecutionStateTracker.java +++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/ExecutionStateTracker.java @@ -167,6 +167,7 @@ public synchronized void reset() { millisSinceLastTransition = 0; transitionsAtLastSample = 0; nextLullReportMs = LULL_REPORT_MS; + nextBundleLullDurationReportMs = BUNDLE_LULL_REPORT_MS; } @VisibleForTesting @@ -258,7 +259,6 @@ public synchronized void deactivate() { } this.trackedThread = null; millisSinceBundleStart = 0; - nextBundleLullDurationReportMs = BUNDLE_LULL_REPORT_MS; } public ExecutionState getCurrentState() { diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle index 4c25737fe2f7..d4ce1aedf312 100644 --- a/runners/google-cloud-dataflow-java/build.gradle +++ b/runners/google-cloud-dataflow-java/build.gradle @@ -53,8 +53,8 @@ evaluationDependsOn(":sdks:java:container:java11") ext.dataflowLegacyEnvironmentMajorVersion = '8' ext.dataflowFnapiEnvironmentMajorVersion = '8' -ext.dataflowLegacyContainerVersion = 'beam-master-20250312' -ext.dataflowFnapiContainerVersion = 'beam-master-20250312' +ext.dataflowLegacyContainerVersion = 'beam-master-20250326' +ext.dataflowFnapiContainerVersion = 'beam-master-20250326' ext.dataflowContainerBaseRepository = 'gcr.io/cloud-dataflow/v1beta3' processResources { diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java index c41cdf647a69..6ff804590a85 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java @@ -201,6 +201,17 @@ * *

Please see Google Cloud * Dataflow Security and Permissions for more details. + * + *

DataflowRunner now supports creating job templates using the {@code --templateLocation} + * option. If this option is set, the runner will generate a template instead of running the + * pipeline immediately. + * + *

Example: + * + *

{@code
+ * --runner=DataflowRunner
+ * --templateLocation=gs://your-bucket/templates/my-template
+ * }
*/ @SuppressWarnings({ "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) @@ -595,6 +606,7 @@ protected DataflowRunner(DataflowPipelineOptions options) { private static class AlwaysCreateViaRead implements PTransformOverrideFactory, Create.Values> { + @Override public PTransformOverrideFactory.PTransformReplacement> getReplacementTransform( @@ -775,7 +787,7 @@ private List getOverrides(boolean streaming) { PTransformOverride.of( PTransformMatchers.requiresStableInputParDoMulti(), RequiresStableInputParDoOverrides.multiOutputOverrideFactory())); - */ + */ overridesBuilder .add( PTransformOverride.of( @@ -790,10 +802,13 @@ private List getOverrides(boolean streaming) { PTransformMatchers.classEqualTo(ParDo.SingleOutput.class), new PrimitiveParDoSingleFactory())); + boolean usesAtLeastOnceStreamingMode = + options.getDataflowServiceOptions() != null + && options.getDataflowServiceOptions().contains("streaming_mode_at_least_once"); overridesBuilder.add( PTransformOverride.of( PTransformMatchers.classEqualTo(RedistributeByKey.class), - new RedistributeByKeyOverrideFactory())); + new RedistributeByKeyOverrideFactory(usesAtLeastOnceStreamingMode))); if (streaming) { // For update compatibility, always use a Read for Create in streaming mode. @@ -949,7 +964,7 @@ public Map, ReplacementOutput> mapOutputs( The PCollectionView itself must have the same tag since that tag may have been embedded in serialized DoFns previously and cannot easily be rewired. The PCollection may differ, so we rewire it, even if the rewiring is a noop. - */ + */ return ReplacementOutputs.singleton(outputs, newOutput); } } @@ -1179,6 +1194,7 @@ private List getDefaultArtifacts() { @VisibleForTesting static boolean isMultiLanguagePipeline(Pipeline pipeline) { class IsMultiLanguageVisitor extends PipelineVisitor.Defaults { + private boolean isMultiLanguage = false; private void performMultiLanguageTest(Node node) { @@ -1656,6 +1672,7 @@ private static EnvironmentInfo getEnvironmentInfoFromEnvironmentId( @AutoValue abstract static class EnvironmentInfo { + static EnvironmentInfo create( String environmentId, String containerUrl, List capabilities) { return new AutoValue_DataflowRunner_EnvironmentInfo( @@ -1954,7 +1971,6 @@ void recordViewUsesNonDeterministicKeyCoder(PTransform ptransform) { // ================================================================================ // PubsubIO translations // ================================================================================ - private static class StreamingPubsubIOReadOverrideFactory implements PTransformOverrideFactory< PBegin, PCollection, PubsubUnboundedSource> { @@ -2113,6 +2129,7 @@ protected String getKindString() { } private static class StreamingPubsubSinkTranslators { + /** Rewrite {@link StreamingPubsubIOWrite} to the appropriate internal node. */ static class StreamingPubsubIOWriteTranslator implements TransformTranslator { @@ -2166,9 +2183,9 @@ private static void translate( } // ================================================================================ - private static class SingleOutputExpandableTransformTranslator implements TransformTranslator { + @Override public void translate( External.SingleOutputExpandableTransform transform, TranslationContext context) { @@ -2186,6 +2203,7 @@ public void translate( private static class MultiOutputExpandableTransformTranslator implements TransformTranslator { + @Override public void translate( External.MultiOutputExpandableTransform transform, TranslationContext context) { @@ -2734,6 +2752,7 @@ static void verifyStateSupportForWindowingStrategy(WindowingStrategy strategy) { */ private static class DataflowPayloadTranslator implements TransformPayloadTranslator> { + @Override public String getUrn(PTransform transform) { return "dataflow_stub:" + transform.getClass().getName(); @@ -2758,6 +2777,7 @@ public RunnerApi.FunctionSpec translate( }) @AutoService(TransformPayloadTranslatorRegistrar.class) public static class DataflowTransformTranslator implements TransformPayloadTranslatorRegistrar { + @Override public Map, ? extends TransformPayloadTranslator> getTransformPayloadTranslators() { diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/RedistributeByKeyOverrideFactory.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/RedistributeByKeyOverrideFactory.java index 9dc8daf8d437..509b35b15390 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/RedistributeByKeyOverrideFactory.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/RedistributeByKeyOverrideFactory.java @@ -47,6 +47,12 @@ class RedistributeByKeyOverrideFactory extends SingleInputOutputOverrideFactory< PCollection>, PCollection>, RedistributeByKey> { + private final boolean usesAtLeastOnceStreamingMode; + + public RedistributeByKeyOverrideFactory(boolean usesAtLeastOnceStreamingMode) { + this.usesAtLeastOnceStreamingMode = usesAtLeastOnceStreamingMode; + } + @Override public PTransformReplacement>, PCollection>> getReplacementTransform( @@ -54,17 +60,24 @@ class RedistributeByKeyOverrideFactory transform) { return PTransformOverrideFactory.PTransformReplacement.of( PTransformReplacements.getSingletonMainInput(transform), - new DataflowRedistributeByKey<>(transform.getTransform())); + new DataflowRedistributeByKey<>(transform.getTransform(), usesAtLeastOnceStreamingMode)); } /** Specialized implementation of {@link RedistributeByKey} for Dataflow pipelines. */ - private static class DataflowRedistributeByKey + public static class DataflowRedistributeByKey extends PTransform>, PCollection>> { private final RedistributeByKey originalTransform; + private final boolean usesAtLeastOnceStreamingMode; - private DataflowRedistributeByKey(RedistributeByKey originalTransform) { + private DataflowRedistributeByKey( + RedistributeByKey originalTransform, boolean usesAtLeastOnceStreamingMode) { this.originalTransform = originalTransform; + this.usesAtLeastOnceStreamingMode = usesAtLeastOnceStreamingMode; + } + + public boolean getAllowDuplicates() { + return this.usesAtLeastOnceStreamingMode || this.originalTransform.getAllowDuplicates(); } @Override @@ -84,7 +97,7 @@ public PCollection> expand(PCollection> input) { .apply("ReifyOriginalMetadata", Reify.windowsInValue()); PCollection>>> grouped; - if (originalTransform.getAllowDuplicates()) { + if (getAllowDuplicates()) { grouped = reified.apply(DataflowGroupByKey.createWithAllowDuplicates()); } else { grouped = reified.apply(DataflowGroupByKey.create()); diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java index 1c3782595337..57f927d73073 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java @@ -105,6 +105,19 @@ public interface DataflowPipelineOptions + "Must either be local or Cloud Storage.") String getTemplateLocation(); + /** + * Sets the Cloud Storage path where the Dataflow template will be stored. Required for creating + * Flex Templates or Classic Templates. + * + *

Example: + * + *

{@code
+   * DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+   * options.setTemplateLocation("gs://your-bucket/templates/my-template");
+   * }
+ * + * @param value Cloud Storage path for storing the Dataflow template. + */ void setTemplateLocation(String value); /** @@ -181,10 +194,8 @@ public interface DataflowPipelineOptions enum FlexResourceSchedulingGoal { /** No goal specified. */ UNSPECIFIED, - /** Optimize for lower execution time. */ SPEED_OPTIMIZED, - /** Optimize for lower cost. */ COST_OPTIMIZED, } @@ -198,6 +209,7 @@ enum FlexResourceSchedulingGoal { /** Returns a default staging location under {@link GcpOptions#getGcpTempLocation}. */ class StagingLocationFactory implements DefaultValueFactory { + private static final Logger LOG = LoggerFactory.getLogger(StagingLocationFactory.class); @Override diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java index 83cbfbafa79a..c9bd50da0a56 100644 --- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java +++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java @@ -101,6 +101,9 @@ import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.Pipeline.PipelineVisitor; import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.coders.VarIntCoder; import org.apache.beam.sdk.coders.VoidCoder; import org.apache.beam.sdk.extensions.gcp.auth.NoopCredentialFactory; import org.apache.beam.sdk.extensions.gcp.auth.TestCredential; @@ -144,6 +147,7 @@ import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.Redistribute; import org.apache.beam.sdk.transforms.SerializableFunctions; import org.apache.beam.sdk.transforms.SimpleFunction; import org.apache.beam.sdk.transforms.resourcehints.ResourceHints; @@ -2533,6 +2537,48 @@ public void visitPrimitiveTransform(@UnknownKeyFor @NonNull @Initialized Node no assertTrue(sawPubsubOverride.get()); } + @Test + public void testEnableAllowDuplicatesForRedistributeWithALO() throws IOException { + DataflowPipelineOptions options = buildPipelineOptions(); + options.setDataflowServiceOptions(ImmutableList.of("streaming_mode_at_least_once")); + Pipeline pipeline = Pipeline.create(options); + + ImmutableList> abitraryKVs = + ImmutableList.of( + KV.of("k1", 3), + KV.of("k5", Integer.MAX_VALUE), + KV.of("k5", Integer.MIN_VALUE), + KV.of("k2", 66), + KV.of("k1", 4), + KV.of("k2", -33), + KV.of("k3", 0)); + PCollection> input = + pipeline.apply( + Create.of(abitraryKVs).withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))); + // The allowDuplicates for Redistribute is false by default. + PCollection> output = input.apply(Redistribute.byKey()); + pipeline.run(); + + // The DataflowRedistributeByKey transform translated from Redistribute should have + // allowDuplicates set to true. + AtomicBoolean redistributeAllowDuplicates = new AtomicBoolean(false); + pipeline.traverseTopologically( + new PipelineVisitor.Defaults() { + @Override + public CompositeBehavior enterCompositeTransform(Node node) { + if (node.getTransform() + instanceof RedistributeByKeyOverrideFactory.DataflowRedistributeByKey) { + RedistributeByKeyOverrideFactory.DataflowRedistributeByKey redistribute = + (RedistributeByKeyOverrideFactory.DataflowRedistributeByKey) + node.getTransform(); + redistributeAllowDuplicates.set(redistribute.getAllowDuplicates()); + } + return CompositeBehavior.ENTER_TRANSFORM; + } + }); + assertTrue(redistributeAllowDuplicates.get()); + } + static class TestExpansionServiceClientFactory implements ExpansionServiceClientFactory { ExpansionApi.ExpansionResponse response; diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowOutputCounter.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowOutputCounter.java index 81ef0a1c9d95..e4292f2252a2 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowOutputCounter.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/DataflowOutputCounter.java @@ -28,15 +28,12 @@ import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; /** - * A Dataflow-specific version of {@link ElementCounter}, which specifies the object counter name - * differently as PhysicalElementCount. Additionally, it counts element windows as ElementCount. + * A Dataflow-specific version of {@link ElementCounter}. It counts element windows as ElementCount. */ @SuppressWarnings({ "nullness" // TODO(https://github.com/apache/beam/issues/20497) }) public class DataflowOutputCounter implements ElementCounter { - /** Number of physical element and multiple-window assignments that were serialized/processed. */ - private static final String OBJECT_COUNTER_NAME = "-PhysicalElementCount"; /** Number of logical element and single window pairs that were processed. */ private static final String ELEMENT_COUNTER_NAME = "-ElementCount"; @@ -57,7 +54,6 @@ public DataflowOutputCounter( NameContext nameContext) { objectAndByteCounter = new OutputObjectAndByteCounter(elementByteSizeObservable, counterFactory, nameContext); - objectAndByteCounter.countObject(outputName + OBJECT_COUNTER_NAME); objectAndByteCounter.countMeanByte(outputName + MEAN_BYTE_COUNTER_NAME); createElementCounter(counterFactory, outputName + ELEMENT_COUNTER_NAME); } @@ -86,11 +82,6 @@ static String getElementCounterName(String prefix) { return prefix + ELEMENT_COUNTER_NAME; } - @VisibleForTesting - static String getObjectCounterName(String prefix) { - return prefix + OBJECT_COUNTER_NAME; - } - @VisibleForTesting static String getMeanByteCounterName(String prefix) { return prefix + MEAN_BYTE_COUNTER_NAME; diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/PubsubSink.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/PubsubSink.java index 3d542da3a4b9..b4962422b37e 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/PubsubSink.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/PubsubSink.java @@ -161,17 +161,21 @@ public long add(WindowedValue data) throws IOException { "Expected output stream to be empty but had %s", stream.toByteString()); ByteString byteString = null; - if (formatFn != null) { - PubsubMessage formatted = formatFn.apply(data.getValue()); - Pubsub.PubsubMessage.Builder pubsubMessageBuilder = - Pubsub.PubsubMessage.newBuilder().setData(ByteString.copyFrom(formatted.getPayload())); - if (formatted.getAttributeMap() != null) { - pubsubMessageBuilder.putAllAttributes(formatted.getAttributeMap()); + try { + if (formatFn != null) { + PubsubMessage formatted = formatFn.apply(data.getValue()); + Pubsub.PubsubMessage.Builder pubsubMessageBuilder = + Pubsub.PubsubMessage.newBuilder() + .setData(ByteString.copyFrom(formatted.getPayload())); + if (formatted.getAttributeMap() != null) { + pubsubMessageBuilder.putAllAttributes(formatted.getAttributeMap()); + } + pubsubMessageBuilder.build().writeTo(stream); + } else { + coder.encode(data.getValue(), stream, Coder.Context.OUTER); } - pubsubMessageBuilder.build().writeTo(stream); - byteString = stream.toByteStringAndReset(); - } else { - coder.encode(data.getValue(), stream, Coder.Context.OUTER); + } finally { + // Use a final block to ensure the stream is reset even in the case of an exception. byteString = stream.toByteStringAndReset(); } diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStream.java index 2e85ce233b3b..9c193ba073c1 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStream.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStream.java @@ -26,6 +26,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; import javax.annotation.concurrent.GuardedBy; import org.apache.beam.runners.dataflow.worker.windmill.client.grpc.observers.StreamObserverCancelledException; @@ -83,6 +84,7 @@ public abstract class AbstractWindmillStream implements Win private final String backendWorkerToken; private final ResettableThrowingStreamObserver requestObserver; private final StreamDebugMetrics debugMetrics; + private final AtomicBoolean isHealthCheckScheduled; @GuardedBy("this") protected boolean clientClosed; @@ -115,6 +117,7 @@ protected AbstractWindmillStream( this.clientClosed = false; this.isShutdown = false; this.started = false; + this.isHealthCheckScheduled = new AtomicBoolean(false); this.finishLatch = new CountDownLatch(1); this.logger = logger; this.requestObserver = @@ -236,13 +239,35 @@ protected final void executeSafely(Runnable runnable) { } } - public final synchronized void maybeSendHealthCheck(Instant lastSendThreshold) { - if (!clientClosed && debugMetrics.getLastSendTimeMs() < lastSendThreshold.getMillis()) { - try { - sendHealthCheck(); - } catch (Exception e) { - logger.debug("Received exception sending health check.", e); - } + /** + * Schedule an application level keep-alive health check to be sent on the stream. + * + * @implNote This is sent asynchronously via an executor to minimize blocking. Messages are sent + * serially. If we recently sent a message before we attempt to schedule the health check, the + * stream has been restarted/closed, there is a scheduled health check that hasn't completed + * or there was a more recent send by the time we enter the synchronized block, we skip the + * attempt to send the health check. + */ + public final void maybeScheduleHealthCheck(Instant lastSendThreshold) { + if (debugMetrics.getLastSendTimeMs() < lastSendThreshold.getMillis() + && isHealthCheckScheduled.compareAndSet(false, true)) { + // Don't block other streams when sending health check. + executeSafely( + () -> { + synchronized (this) { + try { + if (!clientClosed + && debugMetrics.getLastSendTimeMs() < lastSendThreshold.getMillis()) { + sendHealthCheck(); + } + } catch (Exception e) { + logger.debug("Received exception sending health check.", e); + } finally { + // Ready to send another health check after we attempt the scheduled health check. + isHealthCheckScheduled.set(false); + } + } + }); } } @@ -261,11 +286,12 @@ public final void appendSummaryHtml(PrintWriter writer) { .ifPresent( metrics -> writer.format( - ", %d restarts, last restart reason [ %s ] at [%s], %d errors", + ", %d restarts, last restart reason [ %s ] at [%s], %d errors, isHealthCheckScheduled=[%s]", metrics.restartCount(), metrics.lastRestartReason(), metrics.lastRestartTime().orElse(null), - metrics.errorCount())); + metrics.errorCount(), + isHealthCheckScheduled.get())); if (summaryMetrics.isClientClosed()) { writer.write(", client closed"); diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GetWorkTimingInfosTracker.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GetWorkTimingInfosTracker.java index b065636b766c..33bace25b2d2 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GetWorkTimingInfosTracker.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GetWorkTimingInfosTracker.java @@ -22,6 +22,7 @@ import java.util.HashMap; import java.util.Map; import javax.annotation.Nullable; +import javax.annotation.concurrent.NotThreadSafe; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkStreamTimingInfo; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkStreamTimingInfo.Event; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.LatencyAttribution; @@ -33,6 +34,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +@NotThreadSafe final class GetWorkTimingInfosTracker { private static final Logger LOG = LoggerFactory.getLogger(GetWorkTimingInfosTracker.class); @@ -46,8 +48,8 @@ final class GetWorkTimingInfosTracker { this.aggregatedGetWorkStreamLatencies = new EnumMap<>(State.class); this.clock = clock; this.workItemCreationEndTime = Instant.EPOCH; - workItemLastChunkReceivedByWorkerTime = Instant.EPOCH; - workItemCreationLatency = null; + this.workItemLastChunkReceivedByWorkerTime = Instant.EPOCH; + this.workItemCreationLatency = null; } void addTimingInfo(Collection infos) { @@ -69,8 +71,9 @@ void addTimingInfo(Collection infos) { // Record the difference between starting to get work and the first chunk being sent as the // work creation time. + @Nullable Instant workItemCreationStart = getWorkStreamTimings.get(Event.GET_WORK_CREATION_START); - Instant workItemCreationEnd = getWorkStreamTimings.get(Event.GET_WORK_CREATION_END); + @Nullable Instant workItemCreationEnd = getWorkStreamTimings.get(Event.GET_WORK_CREATION_END); if (workItemCreationStart != null && workItemCreationEnd != null && workItemCreationLatency == null) { @@ -90,39 +93,41 @@ void addTimingInfo(Collection infos) { Instant receivedByDispatcherTiming = getWorkStreamTimings.get(Event.GET_WORK_RECEIVED_BY_DISPATCHER); if (workItemCreationEnd != null && receivedByDispatcherTiming != null) { - Duration newDuration = new Duration(workItemCreationEnd, receivedByDispatcherTiming); - aggregatedGetWorkStreamLatencies.compute( + trackTimeInState( State.GET_WORK_IN_TRANSIT_TO_DISPATCHER, - (stateKey, duration) -> { - if (duration == null) { - return new SumAndMaxDurations(newDuration, newDuration); - } - duration.max = newDuration.isLongerThan(duration.max) ? newDuration : duration.max; - duration.sum = duration.sum.plus(newDuration); - return duration; - }); + new Duration(workItemCreationEnd, receivedByDispatcherTiming)); } - // Record the latency of each chunk between send on dispatcher and arrival on worker. + // Record the latency of each chunk between send on dispatcher or windmill worker and arrival on + // the user worker. + @Nullable Instant forwardedByDispatcherTiming = getWorkStreamTimings.get(Event.GET_WORK_FORWARDED_BY_DISPATCHER); Instant now = Instant.ofEpochMilli(clock.getMillis()); if (forwardedByDispatcherTiming != null && now.isAfter(forwardedByDispatcherTiming)) { - Duration newDuration = new Duration(forwardedByDispatcherTiming, now); - aggregatedGetWorkStreamLatencies.compute( - State.GET_WORK_IN_TRANSIT_TO_USER_WORKER, - (stateKey, duration) -> { - if (duration == null) { - return new SumAndMaxDurations(newDuration, newDuration); - } - duration.max = newDuration.isLongerThan(duration.max) ? newDuration : duration.max; - duration.sum = duration.sum.plus(newDuration); - return duration; - }); + trackTimeInState( + State.GET_WORK_IN_TRANSIT_TO_USER_WORKER, new Duration(forwardedByDispatcherTiming, now)); + } else if (workItemCreationEnd != null && now.isAfter(workItemCreationEnd)) { + trackTimeInState( + State.GET_WORK_IN_TRANSIT_TO_USER_WORKER, new Duration(workItemCreationEnd, now)); } + workItemLastChunkReceivedByWorkerTime = now; } + private void trackTimeInState(LatencyAttribution.State state, Duration newDuration) { + aggregatedGetWorkStreamLatencies.compute( + state, + (stateKey, duration) -> { + if (duration == null) { + return new SumAndMaxDurations(newDuration, newDuration); + } + duration.max = newDuration.isLongerThan(duration.max) ? newDuration : duration.max; + duration.sum = duration.sum.plus(newDuration); + return duration; + }); + } + ImmutableList getLatencyAttributions() { if (workItemCreationLatency == null && aggregatedGetWorkStreamLatencies.isEmpty()) { return ImmutableList.of(); diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcCommitWorkStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcCommitWorkStream.java index 04c6a1a07f51..68d4294d615f 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcCommitWorkStream.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcCommitWorkStream.java @@ -150,7 +150,7 @@ protected boolean hasPendingRequests() { } @Override - public void sendHealthCheck() throws WindmillStreamShutdownException { + protected void sendHealthCheck() throws WindmillStreamShutdownException { if (hasPendingRequests()) { StreamingCommitWorkRequest.Builder builder = StreamingCommitWorkRequest.newBuilder(); builder.addCommitChunkBuilder().setRequestId(HEARTBEAT_REQUEST_ID); diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcDirectGetWorkStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcDirectGetWorkStream.java index 4aafaa9dd43c..cab182297cf7 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcDirectGetWorkStream.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcDirectGetWorkStream.java @@ -238,7 +238,7 @@ public void appendSpecificHtml(PrintWriter writer) { } @Override - public void sendHealthCheck() throws WindmillStreamShutdownException { + protected void sendHealthCheck() throws WindmillStreamShutdownException { trySend(HEALTH_CHECK_REQUEST); } diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcGetDataStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcGetDataStream.java index 1b7f65fa3890..7bfd8c4d92d1 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcGetDataStream.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcGetDataStream.java @@ -296,7 +296,7 @@ public void onHeartbeatResponse(List resp } @Override - public void sendHealthCheck() throws WindmillStreamShutdownException { + protected void sendHealthCheck() throws WindmillStreamShutdownException { if (hasPendingRequests()) { trySend(HEALTH_CHECK_REQUEST); } diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcGetWorkStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcGetWorkStream.java index b36d35fdc5cd..6d217a63f74d 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcGetWorkStream.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcGetWorkStream.java @@ -166,7 +166,7 @@ public void appendSpecificHtml(PrintWriter writer) { } @Override - public void sendHealthCheck() throws WindmillStreamShutdownException { + protected void sendHealthCheck() throws WindmillStreamShutdownException { trySend(HEALTH_CHECK); } diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcWindmillStreamFactory.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcWindmillStreamFactory.java index b0172cdbd6ff..5d35fc5aedf4 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcWindmillStreamFactory.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcWindmillStreamFactory.java @@ -155,7 +155,7 @@ public void run() { Instant reportThreshold = Instant.now().minus(Duration.millis(healthCheckIntervalMillis)); for (AbstractWindmillStream stream : streamFactory.streamRegistry) { - stream.maybeSendHealthCheck(reportThreshold); + stream.maybeScheduleHealthCheck(reportThreshold); } } }, diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/IntrinsicMapTaskExecutorFactoryTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/IntrinsicMapTaskExecutorFactoryTest.java index 68f5075e4662..1d83594bb3f6 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/IntrinsicMapTaskExecutorFactoryTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/IntrinsicMapTaskExecutorFactoryTest.java @@ -20,7 +20,6 @@ import static org.apache.beam.runners.dataflow.util.Structs.addString; import static org.apache.beam.runners.dataflow.worker.DataflowOutputCounter.getElementCounterName; import static org.apache.beam.runners.dataflow.worker.DataflowOutputCounter.getMeanByteCounterName; -import static org.apache.beam.runners.dataflow.worker.DataflowOutputCounter.getObjectCounterName; import static org.apache.beam.runners.dataflow.worker.counters.CounterName.named; import static org.apache.beam.sdk.util.SerializableUtils.serializeToByteArray; import static org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString; @@ -242,8 +241,6 @@ private static void verifyOutputCounters( for (String outputName : outputNames) { verify(updateExtractor) .longSum(eq(named(getElementCounterName(outputName))), anyBoolean(), anyLong()); - verify(updateExtractor) - .longSum(eq(named(getObjectCounterName(outputName))), anyBoolean(), anyLong()); verify(updateExtractor) .longMean( eq(named(getMeanByteCounterName(outputName))), diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/PubsubSinkTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/PubsubSinkTest.java index fdeecb8d96e5..a03f7bc24a6d 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/PubsubSinkTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/PubsubSinkTest.java @@ -18,14 +18,21 @@ package org.apache.beam.runners.dataflow.worker; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; import static org.mockito.Mockito.when; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.beam.runners.dataflow.util.CloudObject; import org.apache.beam.runners.dataflow.util.PropertyNames; import org.apache.beam.runners.dataflow.worker.util.common.worker.Sink; import org.apache.beam.runners.dataflow.worker.windmill.Windmill; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.CoderException; import org.apache.beam.sdk.coders.StringUtf8Coder; import org.apache.beam.sdk.transforms.windowing.IntervalWindow; import org.apache.beam.sdk.util.WindowedValue; @@ -117,4 +124,56 @@ public void testBasic() throws Exception { public void testEmptyParseFn() throws Exception { testWriteWith(""); } + + private static class ErrorCoder extends Coder { + @Override + public void encode(String value, OutputStream outStream) throws CoderException, IOException { + outStream.write(1); + throw new CoderException("encode error"); + } + + @Override + public String decode(InputStream inStream) throws IOException { + throw new CoderException("decode error"); + } + + @Override + public List> getCoderArguments() { + return null; + } + + @Override + public void verifyDeterministic() {} + } + + // Regression test that the PubsubSink properly resets internal state on encoding exceptions to + // prevent precondition failures on further output. + @Test + public void testExceptionAfterEncoding() throws Exception { + Map spec = new HashMap<>(); + spec.put(PropertyNames.OBJECT_TYPE_NAME, ""); + spec.put(PropertyNames.PUBSUB_TOPIC, "topic"); + spec.put(PropertyNames.PUBSUB_TIMESTAMP_ATTRIBUTE, "ts"); + spec.put(PropertyNames.PUBSUB_ID_ATTRIBUTE, "id"); + CloudObject cloudSinkSpec = CloudObject.fromSpec(spec); + PubsubSink.Factory factory = new PubsubSink.Factory(); + PubsubSink sink = + (PubsubSink) + factory.create( + cloudSinkSpec, + WindowedValue.getFullCoder(new ErrorCoder(), IntervalWindow.getCoder()), + null, + mockContext, + null); + + Sink.SinkWriter> writer = sink.writer(); + assertThrows( + "encode error", + CoderException.class, + () -> writer.add(WindowedValue.timestampedValueInGlobalWindow("e0", new Instant(0)))); + assertThrows( + "encode error", + CoderException.class, + () -> writer.add(WindowedValue.timestampedValueInGlobalWindow("e0", new Instant(0)))); + } } diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/MapTaskExecutorTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/MapTaskExecutorTest.java index 7e01505500d8..2eeaa06eb5eb 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/MapTaskExecutorTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/util/common/worker/MapTaskExecutorTest.java @@ -329,6 +329,7 @@ public void start() throws Exception { } }); + assertEquals(TimeUnit.MINUTES.toMillis(10), stateTracker.getNextBundleLullDurationReportMs()); try (MapTaskExecutor executor = new MapTaskExecutor(operations, counterSet, stateTracker)) { // Call execute so that we run all the counters executor.execute(); @@ -343,7 +344,6 @@ public void start() throws Exception { context3.metricsContainer().getUpdates().counterUpdates(), contains(metricUpdate("TestMetric", "MetricCounter", o3, 3L))); assertEquals(0, stateTracker.getMillisSinceBundleStart()); - assertEquals(TimeUnit.MINUTES.toMillis(10), stateTracker.getNextBundleLullDurationReportMs()); } } diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStreamTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStreamTest.java index 90690f4f69f2..bc96689c4d3b 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStreamTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStreamTest.java @@ -23,7 +23,6 @@ import java.io.PrintWriter; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -36,10 +35,13 @@ import org.apache.beam.vendor.grpc.v1p69p0.io.grpc.stub.CallStreamObserver; import org.apache.beam.vendor.grpc.v1p69p0.io.grpc.stub.StreamObserver; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.Uninterruptibles; +import org.joda.time.Duration; +import org.joda.time.Instant; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; +import org.slf4j.Logger; import org.slf4j.LoggerFactory; @RunWith(JUnit4.class) @@ -61,61 +63,83 @@ private TestStream newStream( @Test public void testShutdown_notBlockedBySend() throws InterruptedException, ExecutionException { - CountDownLatch sendBlocker = new CountDownLatch(1); + TestCallStreamObserver callStreamObserver = TestCallStreamObserver.notReady(); Function, StreamObserver> clientFactory = - ignored -> - new CallStreamObserver() { - @Override - public void onNext(Integer integer) { - try { - sendBlocker.await(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } + ignored -> callStreamObserver; - @Override - public void onError(Throwable throwable) {} + TestStream testStream = newStream(clientFactory); + testStream.start(); + ExecutorService sendExecutor = Executors.newSingleThreadExecutor(); + Future sendFuture = + sendExecutor.submit( + () -> { + // Send a few times to trigger blocking in the CallStreamObserver. + testStream.testSend(); + testStream.testSend(); + return assertThrows(WindmillStreamShutdownException.class, testStream::testSend); + }); - @Override - public void onCompleted() {} + // Wait for 1 send since it always goes through, the rest may buffer. + callStreamObserver.waitForSends(1); - @Override - public boolean isReady() { - return false; - } + testStream.shutdown(); - @Override - public void setOnReadyHandler(Runnable runnable) {} + assertThat(sendFuture.get()).isInstanceOf(WindmillStreamShutdownException.class); + } - @Override - public void disableAutoInboundFlowControl() {} + @Test + public void testMaybeScheduleHealthCheck() { + TestCallStreamObserver callStreamObserver = TestCallStreamObserver.create(); + Function, StreamObserver> clientFactory = + ignored -> callStreamObserver; - @Override - public void request(int i) {} + TestStream testStream = newStream(clientFactory); + testStream.start(); + Instant reportingThreshold = Instant.now().minus(Duration.millis(1)); - @Override - public void setMessageCompression(boolean b) {} - }; + testStream.maybeScheduleHealthCheck(reportingThreshold); + testStream.waitForHealthChecks(1); + assertThat(testStream.numHealthChecks.get()).isEqualTo(1); + testStream.shutdown(); + } + + @Test + public void testMaybeSendHealthCheck_doesNotSendIfLastScheduleLessThanThreshold() { + TestCallStreamObserver callStreamObserver = TestCallStreamObserver.create(); + Function, StreamObserver> clientFactory = + ignored -> callStreamObserver; TestStream testStream = newStream(clientFactory); testStream.start(); - ExecutorService sendExecutor = Executors.newSingleThreadExecutor(); - Future sendFuture = - sendExecutor.submit( - () -> - assertThrows(WindmillStreamShutdownException.class, () -> testStream.testSend(1))); - testStream.shutdown(); - // Sleep a bit to give sendExecutor time to execute the send(). + try { + testStream.trySend(1); + } catch (WindmillStreamShutdownException e) { + throw new RuntimeException(e); + } + + // Set a really long reporting threshold. + Instant reportingThreshold = Instant.now().minus(Duration.standardHours(1)); + + // Should not send health checks since we just sent the above message. + testStream.maybeScheduleHealthCheck(reportingThreshold); + Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); + testStream.maybeScheduleHealthCheck(reportingThreshold); Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); - sendBlocker.countDown(); - assertThat(sendFuture.get()).isInstanceOf(WindmillStreamShutdownException.class); + callStreamObserver.waitForSends(1); + // Sleep just to ensure an async health check doesn't show up + Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); + + assertThat(testStream.numHealthChecks.get()).isEqualTo(0); + testStream.shutdown(); } private static class TestStream extends AbstractWindmillStream { + private static final Logger LOG = LoggerFactory.getLogger(AbstractWindmillStreamTest.class); + private final AtomicInteger numStarts = new AtomicInteger(); + private final AtomicInteger numHealthChecks = new AtomicInteger(); private TestStream( Function, StreamObserver> clientFactory, @@ -148,14 +172,26 @@ protected boolean hasPendingRequests() { @Override protected void startThrottleTimer() {} - public void testSend(Integer i) - throws ResettableThrowingStreamObserver.StreamClosedException, - WindmillStreamShutdownException { - trySend(i); + private void testSend() throws WindmillStreamShutdownException { + trySend(1); } @Override - protected void sendHealthCheck() {} + protected void sendHealthCheck() { + numHealthChecks.incrementAndGet(); + } + + private void waitForHealthChecks(int expectedHealthChecks) { + int waitedMillis = 0; + while (numHealthChecks.get() < expectedHealthChecks) { + LOG.info( + "Waited for {}ms for {} health checks. Current health check count is {}.", + waitedMillis, + numHealthChecks.get(), + expectedHealthChecks); + Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); + } + } @Override protected void appendSpecificHtml(PrintWriter writer) {} @@ -163,4 +199,60 @@ protected void appendSpecificHtml(PrintWriter writer) {} @Override protected void shutdownInternal() {} } + + private static class TestCallStreamObserver extends CallStreamObserver { + private static final Logger LOG = LoggerFactory.getLogger(AbstractWindmillStreamTest.class); + private final AtomicInteger numSends = new AtomicInteger(); + private final boolean isReady; + + private TestCallStreamObserver(boolean isReady) { + this.isReady = isReady; + } + + private static TestCallStreamObserver create() { + return new TestCallStreamObserver(true); + } + + private static TestCallStreamObserver notReady() { + return new TestCallStreamObserver(false); + } + + @Override + public void onNext(Integer integer) { + numSends.incrementAndGet(); + } + + private void waitForSends(int expectedSends) { + int millisWaited = 0; + while (numSends.get() < expectedSends) { + LOG.info( + "Waited {}ms for {} sends, current sends: {}", millisWaited, expectedSends, numSends); + Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS); + millisWaited += 100; + } + } + + @Override + public void onError(Throwable throwable) {} + + @Override + public void onCompleted() {} + + @Override + public boolean isReady() { + return isReady; + } + + @Override + public void setOnReadyHandler(Runnable runnable) {} + + @Override + public void disableAutoInboundFlowControl() {} + + @Override + public void request(int i) {} + + @Override + public void setMessageCompression(boolean b) {} + } } diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GetWorkTimingInfosTrackerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GetWorkTimingInfosTrackerTest.java new file mode 100644 index 000000000000..8145396e190e --- /dev/null +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GetWorkTimingInfosTrackerTest.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill.client.grpc; + +import static java.util.stream.Collectors.toMap; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class GetWorkTimingInfosTrackerTest { + + @Test + public void testGetWorkTimingInfosTracker_calculatesTransitToUserWorkerTimeFromWindmillWorker() { + GetWorkTimingInfosTracker tracker = new GetWorkTimingInfosTracker(() -> 50); + List infos = new ArrayList<>(); + for (int i = 0; i <= 3; i++) { + infos.add( + Windmill.GetWorkStreamTimingInfo.newBuilder() + .setEvent(Windmill.GetWorkStreamTimingInfo.Event.GET_WORK_CREATION_START) + .setTimestampUsec(0) + .build()); + infos.add( + Windmill.GetWorkStreamTimingInfo.newBuilder() + .setEvent(Windmill.GetWorkStreamTimingInfo.Event.GET_WORK_CREATION_END) + .setTimestampUsec(10000) + .build()); + tracker.addTimingInfo(infos); + infos.clear(); + } + // durations for each chunk: + // GET_WORK_IN_WINDMILL_WORKER: 10, 10, 10, 10 + // GET_WORK_IN_TRANSIT_TO_USER_WORKER: 34, 33, 32, 31 -> sum to 130 + ImmutableList attributions = tracker.getLatencyAttributions(); + assertEquals(2, attributions.size()); + Map latencies = + attributions.stream() + .collect(toMap(Windmill.LatencyAttribution::getState, Function.identity())); + + assertEquals( + 10L, + latencies + .get(Windmill.LatencyAttribution.State.GET_WORK_IN_WINDMILL_WORKER) + .getTotalDurationMillis()); + + assertEquals( + // Elapsed time from 10 -> 50. + 40, + latencies + .get(Windmill.LatencyAttribution.State.GET_WORK_IN_TRANSIT_TO_USER_WORKER) + .getTotalDurationMillis()); + } + + @Test + public void testGetWorkTimingInfosTracker_calculatesTransitToUserWorkerTimeFromDispatcher() { + GetWorkTimingInfosTracker tracker = new GetWorkTimingInfosTracker(() -> 50); + List infos = new ArrayList<>(); + for (int i = 0; i <= 3; i++) { + infos.add( + Windmill.GetWorkStreamTimingInfo.newBuilder() + .setEvent(Windmill.GetWorkStreamTimingInfo.Event.GET_WORK_CREATION_START) + .setTimestampUsec(0) + .build()); + infos.add( + Windmill.GetWorkStreamTimingInfo.newBuilder() + .setEvent(Windmill.GetWorkStreamTimingInfo.Event.GET_WORK_CREATION_END) + .setTimestampUsec(10000) + .build()); + infos.add( + Windmill.GetWorkStreamTimingInfo.newBuilder() + .setEvent(Windmill.GetWorkStreamTimingInfo.Event.GET_WORK_RECEIVED_BY_DISPATCHER) + .setTimestampUsec((i + 11) * 1000) + .build()); + infos.add( + Windmill.GetWorkStreamTimingInfo.newBuilder() + .setEvent(Windmill.GetWorkStreamTimingInfo.Event.GET_WORK_FORWARDED_BY_DISPATCHER) + .setTimestampUsec((i + 16) * 1000) + .build()); + tracker.addTimingInfo(infos); + infos.clear(); + } + // durations for each chunk: + // GET_WORK_IN_WINDMILL_WORKER: 10, 10, 10, 10 + // GET_WORK_IN_TRANSIT_TO_DISPATCHER: 1, 2, 3, 4 -> sum to 10 + // GET_WORK_IN_TRANSIT_TO_USER_WORKER: 34, 33, 32, 31 -> sum to 130 + Map latencies = new HashMap<>(); + ImmutableList attributions = tracker.getLatencyAttributions(); + assertEquals(3, attributions.size()); + for (Windmill.LatencyAttribution attribution : attributions) { + latencies.put(attribution.getState(), attribution); + } + assertEquals( + 10L, + latencies + .get(Windmill.LatencyAttribution.State.GET_WORK_IN_WINDMILL_WORKER) + .getTotalDurationMillis()); + // elapsed time from 10 -> 50; + long elapsedTime = 40; + // sumDurations: 1 + 2 + 3 + 4 + 34 + 33 + 32 + 31; + long sumDurations = 140; + assertEquals( + Math.min(4, (long) (elapsedTime * (10.0 / sumDurations))), + latencies + .get(Windmill.LatencyAttribution.State.GET_WORK_IN_TRANSIT_TO_DISPATCHER) + .getTotalDurationMillis()); + assertEquals( + Math.min(34, (long) (elapsedTime * (130.0 / sumDurations))), + latencies + .get(Windmill.LatencyAttribution.State.GET_WORK_IN_TRANSIT_TO_USER_WORKER) + .getTotalDurationMillis()); + } + + @Test + public void testGetWorkTimingInfosTracker_clockSkew() { + int skewMicros = 50 * 1000; + GetWorkTimingInfosTracker tracker = new GetWorkTimingInfosTracker(() -> 50); + List infos = new ArrayList<>(); + for (int i = 0; i <= 3; i++) { + infos.add( + Windmill.GetWorkStreamTimingInfo.newBuilder() + .setEvent(Windmill.GetWorkStreamTimingInfo.Event.GET_WORK_CREATION_START) + .setTimestampUsec(skewMicros) + .build()); + infos.add( + Windmill.GetWorkStreamTimingInfo.newBuilder() + .setEvent(Windmill.GetWorkStreamTimingInfo.Event.GET_WORK_CREATION_END) + .setTimestampUsec(10000 + skewMicros) + .build()); + infos.add( + Windmill.GetWorkStreamTimingInfo.newBuilder() + .setEvent(Windmill.GetWorkStreamTimingInfo.Event.GET_WORK_RECEIVED_BY_DISPATCHER) + .setTimestampUsec((i + 11) * 1000 + skewMicros) + .build()); + infos.add( + Windmill.GetWorkStreamTimingInfo.newBuilder() + .setEvent(Windmill.GetWorkStreamTimingInfo.Event.GET_WORK_FORWARDED_BY_DISPATCHER) + .setTimestampUsec((i + 16) * 1000 + skewMicros) + .build()); + tracker.addTimingInfo(infos); + infos.clear(); + } + // durations for each chunk: + // GET_WORK_IN_WINDMILL_WORKER: 10, 10, 10, 10 + // GET_WORK_IN_TRANSIT_TO_DISPATCHER: 1, 2, 3, 4 -> sum to 10 + // GET_WORK_IN_TRANSIT_TO_USER_WORKER: not observed due to skew + Map latencies = new HashMap<>(); + ImmutableList attributions = tracker.getLatencyAttributions(); + assertEquals(2, attributions.size()); + for (Windmill.LatencyAttribution attribution : attributions) { + latencies.put(attribution.getState(), attribution); + } + assertEquals( + 10L, + latencies + .get(Windmill.LatencyAttribution.State.GET_WORK_IN_WINDMILL_WORKER) + .getTotalDurationMillis()); + assertEquals( + 4L, + latencies + .get(Windmill.LatencyAttribution.State.GET_WORK_IN_TRANSIT_TO_DISPATCHER) + .getTotalDurationMillis()); + assertNull(latencies.get(Windmill.LatencyAttribution.State.GET_WORK_IN_TRANSIT_TO_USER_WORKER)); + } +} diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcWindmillServerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcWindmillServerTest.java index 471227cf87f3..bdb482ba768a 100644 --- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcWindmillServerTest.java +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcWindmillServerTest.java @@ -48,8 +48,6 @@ import org.apache.beam.runners.dataflow.worker.windmill.Windmill.ComputationHeartbeatRequest; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.ComputationWorkItemMetadata; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkRequest; -import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkStreamTimingInfo; -import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkStreamTimingInfo.Event; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalData; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataId; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest; @@ -58,7 +56,6 @@ import org.apache.beam.runners.dataflow.worker.windmill.Windmill.KeyedGetDataRequest; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.KeyedGetDataResponse; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.LatencyAttribution; -import org.apache.beam.runners.dataflow.worker.windmill.Windmill.LatencyAttribution.State; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.StreamingCommitRequestChunk; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.StreamingCommitResponse; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.StreamingCommitWorkRequest; @@ -1356,102 +1353,6 @@ public void onCompleted() { assertTrue(stream.awaitTermination(30, TimeUnit.SECONDS)); } - @Test - public void testGetWorkTimingInfosTracker() throws Exception { - GetWorkTimingInfosTracker tracker = new GetWorkTimingInfosTracker(() -> 50); - List infos = new ArrayList<>(); - for (int i = 0; i <= 3; i++) { - infos.add( - GetWorkStreamTimingInfo.newBuilder() - .setEvent(Event.GET_WORK_CREATION_START) - .setTimestampUsec(0) - .build()); - infos.add( - GetWorkStreamTimingInfo.newBuilder() - .setEvent(Event.GET_WORK_CREATION_END) - .setTimestampUsec(10000) - .build()); - infos.add( - GetWorkStreamTimingInfo.newBuilder() - .setEvent(Event.GET_WORK_RECEIVED_BY_DISPATCHER) - .setTimestampUsec((i + 11) * 1000) - .build()); - infos.add( - GetWorkStreamTimingInfo.newBuilder() - .setEvent(Event.GET_WORK_FORWARDED_BY_DISPATCHER) - .setTimestampUsec((i + 16) * 1000) - .build()); - tracker.addTimingInfo(infos); - infos.clear(); - } - // durations for each chunk: - // GET_WORK_IN_WINDMILL_WORKER: 10, 10, 10, 10 - // GET_WORK_IN_TRANSIT_TO_DISPATCHER: 1, 2, 3, 4 -> sum to 10 - // GET_WORK_IN_TRANSIT_TO_USER_WORKER: 34, 33, 32, 31 -> sum to 130 - Map latencies = new HashMap<>(); - ImmutableList attributions = tracker.getLatencyAttributions(); - assertEquals(3, attributions.size()); - for (LatencyAttribution attribution : attributions) { - latencies.put(attribution.getState(), attribution); - } - assertEquals(10L, latencies.get(State.GET_WORK_IN_WINDMILL_WORKER).getTotalDurationMillis()); - // elapsed time from 10 -> 50; - long elapsedTime = 40; - // sumDurations: 1 + 2 + 3 + 4 + 34 + 33 + 32 + 31; - long sumDurations = 140; - assertEquals( - Math.min(4, (long) (elapsedTime * (10.0 / sumDurations))), - latencies.get(State.GET_WORK_IN_TRANSIT_TO_DISPATCHER).getTotalDurationMillis()); - assertEquals( - Math.min(34, (long) (elapsedTime * (130.0 / sumDurations))), - latencies.get(State.GET_WORK_IN_TRANSIT_TO_USER_WORKER).getTotalDurationMillis()); - } - - @Test - public void testGetWorkTimingInfosTracker_ClockSkew() throws Exception { - int skewMicros = 50 * 1000; - GetWorkTimingInfosTracker tracker = new GetWorkTimingInfosTracker(() -> 50); - List infos = new ArrayList<>(); - for (int i = 0; i <= 3; i++) { - infos.add( - GetWorkStreamTimingInfo.newBuilder() - .setEvent(Event.GET_WORK_CREATION_START) - .setTimestampUsec(skewMicros) - .build()); - infos.add( - GetWorkStreamTimingInfo.newBuilder() - .setEvent(Event.GET_WORK_CREATION_END) - .setTimestampUsec(10000 + skewMicros) - .build()); - infos.add( - GetWorkStreamTimingInfo.newBuilder() - .setEvent(Event.GET_WORK_RECEIVED_BY_DISPATCHER) - .setTimestampUsec((i + 11) * 1000 + skewMicros) - .build()); - infos.add( - GetWorkStreamTimingInfo.newBuilder() - .setEvent(Event.GET_WORK_FORWARDED_BY_DISPATCHER) - .setTimestampUsec((i + 16) * 1000 + skewMicros) - .build()); - tracker.addTimingInfo(infos); - infos.clear(); - } - // durations for each chunk: - // GET_WORK_IN_WINDMILL_WORKER: 10, 10, 10, 10 - // GET_WORK_IN_TRANSIT_TO_DISPATCHER: 1, 2, 3, 4 -> sum to 10 - // GET_WORK_IN_TRANSIT_TO_USER_WORKER: not observed due to skew - Map latencies = new HashMap<>(); - ImmutableList attributions = tracker.getLatencyAttributions(); - assertEquals(2, attributions.size()); - for (LatencyAttribution attribution : attributions) { - latencies.put(attribution.getState(), attribution); - } - assertEquals(10L, latencies.get(State.GET_WORK_IN_WINDMILL_WORKER).getTotalDurationMillis()); - assertEquals( - 4L, latencies.get(State.GET_WORK_IN_TRANSIT_TO_DISPATCHER).getTotalDurationMillis()); - assertNull(latencies.get(State.GET_WORK_IN_TRANSIT_TO_USER_WORKER)); - } - class ResponseErrorInjector { private final Stream stream; diff --git a/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill.proto b/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill.proto index c58edab0fb47..77401be4ac77 100644 --- a/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill.proto +++ b/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill.proto @@ -455,6 +455,7 @@ message ComputationWorkItems { optional int64 input_data_watermark = 3 [default = -0x8000000000000000]; optional int64 dependent_realtime_input_watermark = 4 [default = -0x8000000000000000]; + optional bool drain_mode = 6; } //////////////////////////////////////////////////////////////////////////////// @@ -802,6 +803,7 @@ message ComputationWorkItemMetadata { optional int64 input_data_watermark = 2 [default = -0x8000000000000000]; optional int64 dependent_realtime_input_watermark = 3 [default = -0x8000000000000000]; + optional bool drain_mode = 5; } message StreamingGetDataRequest { @@ -938,6 +940,24 @@ message WorkerMetadataResponse { reserved 4; } + +// Client-side settings for gRPC flow control set on the user worker when +// constructing the channels and stubs. +message UserWorkerGrpcFlowControlSettings { + // If true, the user worker will use gRPCs automatic flow control for + // windmill RPCs. + optional bool enable_auto_flow_control = 1 [default = false]; + + // The flow control window size for windmill RPCs. If + // enable_auto_flow_control is true, this is the initial window size and may + // be resized by the gRPC framework. Default and minimum is 10MiB. + optional int32 flow_control_window_bytes = 2 [default = 10485760]; + + // Specifies how many bytes must be queued before the call is considered not + // ready to send more messages. + optional int32 on_ready_threshold_bytes = 3; +} + // Settings to control runtime behavior of the java runner v1 user worker. message UserWorkerRunnerV1Settings { // If true, use separate channels for each windmill RPC. @@ -945,6 +965,8 @@ message UserWorkerRunnerV1Settings { // If true, use separate streaming RPC for windmill heartbeats and state reads. optional bool use_separate_windmill_heartbeat_streams = 2 [default = true]; + + optional UserWorkerGrpcFlowControlSettings flow_control_settings = 3; } service WindmillAppliance { diff --git a/scripts/ci/pr-bot/package-lock.json b/scripts/ci/pr-bot/package-lock.json index 7cb764a43795..51968bb8ead7 100644 --- a/scripts/ci/pr-bot/package-lock.json +++ b/scripts/ci/pr-bot/package-lock.json @@ -16,7 +16,7 @@ "devDependencies": { "@types/mocha": "^9.1.0", "@types/node": "^16.11.7", - "mocha": "^9.1.3", + "mocha": "^11.1.0", "prettier": "^2.5.1", "typescript": "4.2.4" } @@ -53,6 +53,24 @@ "resolved": "https://registry.npmjs.org/@actions/io/-/io-1.1.1.tgz", "integrity": "sha512-Qi4JoKXjmE0O67wAOH6y0n26QXhMKMFo7GD/4IXNVcrtLjUlGjGuVys6pQgwF3ArfGTQu0XpqaNr0YhED2RaRA==" }, + "node_modules/@isaacs/cliui": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", + "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", + "dev": true, + "license": "ISC", + "dependencies": { + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, "node_modules/@octokit/auth-token": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-2.5.0.tgz", @@ -173,6 +191,17 @@ "@octokit/openapi-types": "^11.2.0" } }, + "node_modules/@pkgjs/parseargs": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", + "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "dev": true, + "license": "MIT", + "optional": true, + "engines": { + "node": ">=14" + } + }, "node_modules/@types/mocha": { "version": "9.1.0", "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-9.1.0.tgz", @@ -185,28 +214,27 @@ "integrity": "sha512-NrTwfD7L1RTc2qrHQD4RTTy4p0CO2LatKBEKEds3CaVuhoM/+DJzmWZl5f+ikR8cm8F5mfJxK+9rQq07gRiSjQ==", "dev": true }, - "node_modules/@ungap/promise-all-settled": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz", - "integrity": "sha512-sL/cEvJWAnClXw0wHk85/2L0G6Sj8UB0Ctc1TEMbKSsmpRosqhwj9gWgFRZSrBr2f9tiXISwNhCPmlfqUqyb9Q==", - "dev": true - }, "node_modules/ansi-colors": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", - "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==", + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz", + "integrity": "sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==", "dev": true, + "license": "MIT", "engines": { "node": ">=6" } }, "node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz", + "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==", "dev": true, + "license": "MIT", "engines": { - "node": ">=8" + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" } }, "node_modules/ansi-styles": { @@ -246,7 +274,8 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/before-after-hook": { "version": "2.2.2", @@ -263,13 +292,13 @@ } }, "node_modules/brace-expansion": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", - "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", "dev": true, + "license": "MIT", "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" + "balanced-match": "^1.0.0" } }, "node_modules/braces": { @@ -358,14 +387,81 @@ } }, "node_modules/cliui": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", - "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", "dev": true, + "license": "ISC", "dependencies": { "string-width": "^4.2.0", - "strip-ansi": "^6.0.0", + "strip-ansi": "^6.0.1", "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/cliui/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/cliui/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true, + "license": "MIT" + }, + "node_modules/cliui/node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/cliui/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/cliui/node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" } }, "node_modules/color-convert": { @@ -386,19 +482,29 @@ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "dev": true }, - "node_modules/concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", - "dev": true + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } }, "node_modules/debug": { - "version": "4.3.3", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.3.tgz", - "integrity": "sha512-/zxw5+vh1Tfv+4Qn7a5nsbcJKPaSvCDhojn6FEl9vupwK2VCSDtEiEtqr8DFtzYFOdz63LBkxec7DYuc2jon6Q==", + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", + "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==", "dev": true, + "license": "MIT", "dependencies": { - "ms": "2.1.2" + "ms": "^2.1.3" }, "engines": { "node": ">=6.0" @@ -409,12 +515,6 @@ } } }, - "node_modules/debug/node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", - "dev": true - }, "node_modules/decamelize": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-4.0.0.tgz", @@ -433,25 +533,35 @@ "integrity": "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==" }, "node_modules/diff": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/diff/-/diff-5.0.0.tgz", - "integrity": "sha512-/VTCrvm5Z0JGty/BWHljh+BAiw3IK+2j87NGMu8Nwc/f48WoDAC395uomO9ZD117ZOBaHmkX1oyLvkVM/aIT3w==", + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.0.tgz", + "integrity": "sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==", "dev": true, + "license": "BSD-3-Clause", "engines": { "node": ">=0.3.1" } }, + "node_modules/eastasianwidth": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", + "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", + "dev": true, + "license": "MIT" + }, "node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", + "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", + "dev": true, + "license": "MIT" }, "node_modules/escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", "dev": true, + "license": "MIT", "engines": { "node": ">=6" } @@ -505,11 +615,22 @@ "flat": "cli.js" } }, - "node_modules/fs.realpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", - "dev": true + "node_modules/foreground-child": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", + "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", + "dev": true, + "license": "ISC", + "dependencies": { + "cross-spawn": "^7.0.6", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } }, "node_modules/fsevents": { "version": "2.3.2", @@ -530,25 +651,27 @@ "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", "dev": true, + "license": "ISC", "engines": { "node": "6.* || 8.* || >= 10.*" } }, "node_modules/glob": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.0.tgz", - "integrity": "sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q==", + "version": "10.4.5", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", + "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", "dev": true, + "license": "ISC", "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.0.4", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" + "foreground-child": "^3.1.0", + "jackspeak": "^3.1.2", + "minimatch": "^9.0.4", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^1.11.1" }, - "engines": { - "node": "*" + "bin": { + "glob": "dist/esm/bin.mjs" }, "funding": { "url": "https://github.com/sponsors/isaacs" @@ -566,13 +689,20 @@ "node": ">= 6" } }, - "node_modules/growl": { - "version": "1.10.5", - "resolved": "https://registry.npmjs.org/growl/-/growl-1.10.5.tgz", - "integrity": "sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==", + "node_modules/glob/node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, "engines": { - "node": ">=4.x" + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" } }, "node_modules/has-flag": { @@ -593,22 +723,6 @@ "he": "bin/he" } }, - "node_modules/inflight": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", - "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", - "dev": true, - "dependencies": { - "once": "^1.3.0", - "wrappy": "1" - } - }, - "node_modules/inherits": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "dev": true - }, "node_modules/is-binary-path": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", @@ -635,6 +749,7 @@ "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", "dev": true, + "license": "MIT", "engines": { "node": ">=8" } @@ -692,8 +807,25 @@ "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", - "dev": true + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true, + "license": "ISC" + }, + "node_modules/jackspeak": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", + "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/cliui": "^8.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "optionalDependencies": { + "@pkgjs/parseargs": "^0.11.0" + } }, "node_modules/js-yaml": { "version": "4.1.0", @@ -737,59 +869,70 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "dev": true, + "license": "ISC" + }, "node_modules/minimatch": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", - "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", + "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", "dev": true, + "license": "ISC", "dependencies": { - "brace-expansion": "^1.1.7" + "brace-expansion": "^2.0.1" }, "engines": { - "node": "*" + "node": ">=10" + } + }, + "node_modules/minipass": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", + "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=16 || 14 >=14.17" } }, "node_modules/mocha": { - "version": "9.2.0", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-9.2.0.tgz", - "integrity": "sha512-kNn7E8g2SzVcq0a77dkphPsDSN7P+iYkqE0ZsGCYWRsoiKjOt+NvXfaagik8vuDa6W5Zw3qxe8Jfpt5qKf+6/Q==", - "dev": true, - "dependencies": { - "@ungap/promise-all-settled": "1.1.2", - "ansi-colors": "4.1.1", - "browser-stdout": "1.3.1", - "chokidar": "3.5.3", - "debug": "4.3.3", - "diff": "5.0.0", - "escape-string-regexp": "4.0.0", - "find-up": "5.0.0", - "glob": "7.2.0", - "growl": "1.10.5", - "he": "1.2.0", - "js-yaml": "4.1.0", - "log-symbols": "4.1.0", - "minimatch": "3.0.4", - "ms": "2.1.3", - "nanoid": "3.2.0", - "serialize-javascript": "6.0.0", - "strip-json-comments": "3.1.1", - "supports-color": "8.1.1", - "which": "2.0.2", - "workerpool": "6.2.0", - "yargs": "16.2.0", - "yargs-parser": "20.2.4", - "yargs-unparser": "2.0.0" + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-11.1.0.tgz", + "integrity": "sha512-8uJR5RTC2NgpY3GrYcgpZrsEd9zKbPDpob1RezyR2upGHRQtHWofmzTMzTMSV6dru3tj5Ukt0+Vnq1qhFEEwAg==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-colors": "^4.1.3", + "browser-stdout": "^1.3.1", + "chokidar": "^3.5.3", + "debug": "^4.3.5", + "diff": "^5.2.0", + "escape-string-regexp": "^4.0.0", + "find-up": "^5.0.0", + "glob": "^10.4.5", + "he": "^1.2.0", + "js-yaml": "^4.1.0", + "log-symbols": "^4.1.0", + "minimatch": "^5.1.6", + "ms": "^2.1.3", + "serialize-javascript": "^6.0.2", + "strip-json-comments": "^3.1.1", + "supports-color": "^8.1.1", + "workerpool": "^6.5.1", + "yargs": "^17.7.2", + "yargs-parser": "^21.1.1", + "yargs-unparser": "^2.0.0" }, "bin": { "_mocha": "bin/_mocha", - "mocha": "bin/mocha" + "mocha": "bin/mocha.js" }, "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/mochajs" + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, "node_modules/ms": { @@ -798,18 +941,6 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "dev": true }, - "node_modules/nanoid": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.2.0.tgz", - "integrity": "sha512-fmsZYa9lpn69Ad5eDn7FMcnnSR+8R34W9qJEijxYhTbfOWzr22n1QxCMzXLK+ODyW2973V3Fux959iQoUxzUIA==", - "dev": true, - "bin": { - "nanoid": "bin/nanoid.cjs" - }, - "engines": { - "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" - } - }, "node_modules/node-fetch": { "version": "2.6.7", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", @@ -876,6 +1007,13 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/package-json-from-dist": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", + "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", + "dev": true, + "license": "BlueOak-1.0.0" + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -885,13 +1023,31 @@ "node": ">=8" } }, - "node_modules/path-is-absolute": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", - "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", "dev": true, + "license": "MIT", "engines": { - "node": ">=0.10.0" + "node": ">=8" + } + }, + "node_modules/path-scurry": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", + "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^10.2.0", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" + }, + "engines": { + "node": ">=16 || 14 >=14.18" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" } }, "node_modules/picomatch": { @@ -923,6 +1079,7 @@ "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==", "dev": true, + "license": "MIT", "dependencies": { "safe-buffer": "^5.1.0" } @@ -942,8 +1099,9 @@ "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", "dev": true, + "license": "MIT", "engines": { "node": ">=0.10.0" } @@ -966,22 +1124,80 @@ "type": "consulting", "url": "https://feross.org/support" } - ] + ], + "license": "MIT" }, "node_modules/serialize-javascript": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.0.tgz", - "integrity": "sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag==", + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz", + "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==", "dev": true, + "license": "BSD-3-Clause", "dependencies": { "randombytes": "^2.1.0" } }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/string-width": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", + "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/string-width-cjs": { + "name": "string-width", "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", "dev": true, + "license": "MIT", "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", @@ -991,11 +1207,59 @@ "node": ">=8" } }, + "node_modules/string-width-cjs/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true, + "license": "MIT" + }, + "node_modules/string-width-cjs/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/strip-ansi-cjs": { + "name": "strip-ansi", "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", "dev": true, + "license": "MIT", "dependencies": { "ansi-regex": "^5.0.1" }, @@ -1003,6 +1267,16 @@ "node": ">=8" } }, + "node_modules/strip-ansi-cjs/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/strip-json-comments": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", @@ -1092,6 +1366,7 @@ "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", "dev": true, + "license": "ISC", "dependencies": { "isexe": "^2.0.0" }, @@ -1103,16 +1378,37 @@ } }, "node_modules/workerpool": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.2.0.tgz", - "integrity": "sha512-Rsk5qQHJ9eowMH28Jwhe8HEbmdYDX4lwoMWshiCXugjtHqMD9ZbiqSDLxcsfdqsETPzVUtX5s1Z5kStiIM6l4A==", - "dev": true + "version": "6.5.1", + "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.5.1.tgz", + "integrity": "sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA==", + "dev": true, + "license": "Apache-2.0" }, "node_modules/wrap-ansi": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", + "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs": { + "name": "wrap-ansi", "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", "dev": true, + "license": "MIT", "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", @@ -1125,6 +1421,64 @@ "url": "https://github.com/chalk/wrap-ansi?sponsor=1" } }, + "node_modules/wrap-ansi-cjs/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true, + "license": "MIT" + }, + "node_modules/wrap-ansi-cjs/node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi/node_modules/ansi-styles": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", + "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", @@ -1135,35 +1489,38 @@ "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", "dev": true, + "license": "ISC", "engines": { "node": ">=10" } }, "node_modules/yargs": { - "version": "16.2.0", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", - "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", "dev": true, + "license": "MIT", "dependencies": { - "cliui": "^7.0.2", + "cliui": "^8.0.1", "escalade": "^3.1.1", "get-caller-file": "^2.0.5", "require-directory": "^2.1.1", - "string-width": "^4.2.0", + "string-width": "^4.2.3", "y18n": "^5.0.5", - "yargs-parser": "^20.2.2" + "yargs-parser": "^21.1.1" }, "engines": { - "node": ">=10" + "node": ">=12" } }, "node_modules/yargs-parser": { - "version": "20.2.4", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.4.tgz", - "integrity": "sha512-WOkpgNhPTlE73h4VFAFsOnomJVaovO8VqLDzy5saChRBFQFBoMYirowyW+Q9HB4HFF4Z7VZTiG3iSzJJA29yRA==", + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", "dev": true, + "license": "ISC", "engines": { - "node": ">=10" + "node": ">=12" } }, "node_modules/yargs-unparser": { @@ -1181,6 +1538,51 @@ "node": ">=10" } }, + "node_modules/yargs/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/yargs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true, + "license": "MIT" + }, + "node_modules/yargs/node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/yargs/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/yocto-queue": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", @@ -1227,6 +1629,20 @@ "resolved": "https://registry.npmjs.org/@actions/io/-/io-1.1.1.tgz", "integrity": "sha512-Qi4JoKXjmE0O67wAOH6y0n26QXhMKMFo7GD/4IXNVcrtLjUlGjGuVys6pQgwF3ArfGTQu0XpqaNr0YhED2RaRA==" }, + "@isaacs/cliui": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", + "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", + "dev": true, + "requires": { + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + } + }, "@octokit/auth-token": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-2.5.0.tgz", @@ -1339,6 +1755,13 @@ "@octokit/openapi-types": "^11.2.0" } }, + "@pkgjs/parseargs": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", + "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "dev": true, + "optional": true + }, "@types/mocha": { "version": "9.1.0", "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-9.1.0.tgz", @@ -1351,22 +1774,16 @@ "integrity": "sha512-NrTwfD7L1RTc2qrHQD4RTTy4p0CO2LatKBEKEds3CaVuhoM/+DJzmWZl5f+ikR8cm8F5mfJxK+9rQq07gRiSjQ==", "dev": true }, - "@ungap/promise-all-settled": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz", - "integrity": "sha512-sL/cEvJWAnClXw0wHk85/2L0G6Sj8UB0Ctc1TEMbKSsmpRosqhwj9gWgFRZSrBr2f9tiXISwNhCPmlfqUqyb9Q==", - "dev": true - }, "ansi-colors": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", - "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==", + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz", + "integrity": "sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==", "dev": true }, "ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz", + "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==", "dev": true }, "ansi-styles": { @@ -1411,13 +1828,12 @@ "dev": true }, "brace-expansion": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", - "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", "dev": true, "requires": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" + "balanced-match": "^1.0.0" } }, "braces": { @@ -1479,14 +1895,59 @@ } }, "cliui": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", - "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", "dev": true, "requires": { "string-width": "^4.2.0", - "strip-ansi": "^6.0.0", + "strip-ansi": "^6.0.1", "wrap-ansi": "^7.0.0" + }, + "dependencies": { + "ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true + }, + "emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "requires": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + } + }, + "strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "requires": { + "ansi-regex": "^5.0.1" + } + }, + "wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "requires": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + } + } } }, "color-convert": { @@ -1504,27 +1965,24 @@ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "dev": true }, - "concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", - "dev": true + "cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "requires": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + } }, "debug": { - "version": "4.3.3", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.3.tgz", - "integrity": "sha512-/zxw5+vh1Tfv+4Qn7a5nsbcJKPaSvCDhojn6FEl9vupwK2VCSDtEiEtqr8DFtzYFOdz63LBkxec7DYuc2jon6Q==", + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", + "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==", "dev": true, "requires": { - "ms": "2.1.2" - }, - "dependencies": { - "ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", - "dev": true - } + "ms": "^2.1.3" } }, "decamelize": { @@ -1539,21 +1997,27 @@ "integrity": "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==" }, "diff": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/diff/-/diff-5.0.0.tgz", - "integrity": "sha512-/VTCrvm5Z0JGty/BWHljh+BAiw3IK+2j87NGMu8Nwc/f48WoDAC395uomO9ZD117ZOBaHmkX1oyLvkVM/aIT3w==", + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.0.tgz", + "integrity": "sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==", + "dev": true + }, + "eastasianwidth": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", + "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", "dev": true }, "emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", + "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", "dev": true }, "escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", "dev": true }, "escape-string-regexp": { @@ -1587,11 +2051,15 @@ "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==", "dev": true }, - "fs.realpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", - "dev": true + "foreground-child": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", + "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", + "dev": true, + "requires": { + "cross-spawn": "^7.0.6", + "signal-exit": "^4.0.1" + } }, "fsevents": { "version": "2.3.2", @@ -1607,17 +2075,28 @@ "dev": true }, "glob": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.0.tgz", - "integrity": "sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q==", + "version": "10.4.5", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", + "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", "dev": true, "requires": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.0.4", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" + "foreground-child": "^3.1.0", + "jackspeak": "^3.1.2", + "minimatch": "^9.0.4", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^1.11.1" + }, + "dependencies": { + "minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "dev": true, + "requires": { + "brace-expansion": "^2.0.1" + } + } } }, "glob-parent": { @@ -1629,12 +2108,6 @@ "is-glob": "^4.0.1" } }, - "growl": { - "version": "1.10.5", - "resolved": "https://registry.npmjs.org/growl/-/growl-1.10.5.tgz", - "integrity": "sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==", - "dev": true - }, "has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -1647,22 +2120,6 @@ "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", "dev": true }, - "inflight": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", - "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", - "dev": true, - "requires": { - "once": "^1.3.0", - "wrappy": "1" - } - }, - "inherits": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "dev": true - }, "is-binary-path": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", @@ -1719,9 +2176,19 @@ "isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", "dev": true }, + "jackspeak": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", + "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", + "dev": true, + "requires": { + "@isaacs/cliui": "^8.0.2", + "@pkgjs/parseargs": "^0.11.0" + } + }, "js-yaml": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", @@ -1749,45 +2216,53 @@ "is-unicode-supported": "^0.1.0" } }, + "lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "dev": true + }, "minimatch": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", - "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", + "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", "dev": true, "requires": { - "brace-expansion": "^1.1.7" + "brace-expansion": "^2.0.1" } }, + "minipass": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", + "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "dev": true + }, "mocha": { - "version": "9.2.0", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-9.2.0.tgz", - "integrity": "sha512-kNn7E8g2SzVcq0a77dkphPsDSN7P+iYkqE0ZsGCYWRsoiKjOt+NvXfaagik8vuDa6W5Zw3qxe8Jfpt5qKf+6/Q==", + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-11.1.0.tgz", + "integrity": "sha512-8uJR5RTC2NgpY3GrYcgpZrsEd9zKbPDpob1RezyR2upGHRQtHWofmzTMzTMSV6dru3tj5Ukt0+Vnq1qhFEEwAg==", "dev": true, "requires": { - "@ungap/promise-all-settled": "1.1.2", - "ansi-colors": "4.1.1", - "browser-stdout": "1.3.1", - "chokidar": "3.5.3", - "debug": "4.3.3", - "diff": "5.0.0", - "escape-string-regexp": "4.0.0", - "find-up": "5.0.0", - "glob": "7.2.0", - "growl": "1.10.5", - "he": "1.2.0", - "js-yaml": "4.1.0", - "log-symbols": "4.1.0", - "minimatch": "3.0.4", - "ms": "2.1.3", - "nanoid": "3.2.0", - "serialize-javascript": "6.0.0", - "strip-json-comments": "3.1.1", - "supports-color": "8.1.1", - "which": "2.0.2", - "workerpool": "6.2.0", - "yargs": "16.2.0", - "yargs-parser": "20.2.4", - "yargs-unparser": "2.0.0" + "ansi-colors": "^4.1.3", + "browser-stdout": "^1.3.1", + "chokidar": "^3.5.3", + "debug": "^4.3.5", + "diff": "^5.2.0", + "escape-string-regexp": "^4.0.0", + "find-up": "^5.0.0", + "glob": "^10.4.5", + "he": "^1.2.0", + "js-yaml": "^4.1.0", + "log-symbols": "^4.1.0", + "minimatch": "^5.1.6", + "ms": "^2.1.3", + "serialize-javascript": "^6.0.2", + "strip-json-comments": "^3.1.1", + "supports-color": "^8.1.1", + "workerpool": "^6.5.1", + "yargs": "^17.7.2", + "yargs-parser": "^21.1.1", + "yargs-unparser": "^2.0.0" } }, "ms": { @@ -1796,12 +2271,6 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "dev": true }, - "nanoid": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.2.0.tgz", - "integrity": "sha512-fmsZYa9lpn69Ad5eDn7FMcnnSR+8R34W9qJEijxYhTbfOWzr22n1QxCMzXLK+ODyW2973V3Fux959iQoUxzUIA==", - "dev": true - }, "node-fetch": { "version": "2.6.7", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", @@ -1842,18 +2311,34 @@ "p-limit": "^3.0.2" } }, + "package-json-from-dist": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", + "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", + "dev": true + }, "path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", "dev": true }, - "path-is-absolute": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", - "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", + "path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", "dev": true }, + "path-scurry": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", + "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", + "dev": true, + "requires": { + "lru-cache": "^10.2.0", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" + } + }, "picomatch": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", @@ -1887,7 +2372,7 @@ "require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", "dev": true }, "safe-buffer": { @@ -1897,16 +2382,48 @@ "dev": true }, "serialize-javascript": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.0.tgz", - "integrity": "sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag==", + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz", + "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==", "dev": true, "requires": { "randombytes": "^2.1.0" } }, + "shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "requires": { + "shebang-regex": "^3.0.0" + } + }, + "shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true + }, + "signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "dev": true + }, "string-width": { - "version": "4.2.3", + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", + "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", + "dev": true, + "requires": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + } + }, + "string-width-cjs": { + "version": "npm:string-width@4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", "dev": true, @@ -1914,15 +2431,55 @@ "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" + }, + "dependencies": { + "ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true + }, + "emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "requires": { + "ansi-regex": "^5.0.1" + } + } } }, "strip-ansi": { - "version": "6.0.1", + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dev": true, + "requires": { + "ansi-regex": "^6.0.1" + } + }, + "strip-ansi-cjs": { + "version": "npm:strip-ansi@6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", "dev": true, "requires": { "ansi-regex": "^5.0.1" + }, + "dependencies": { + "ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true + } } }, "strip-json-comments": { @@ -1994,13 +2551,32 @@ } }, "workerpool": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.2.0.tgz", - "integrity": "sha512-Rsk5qQHJ9eowMH28Jwhe8HEbmdYDX4lwoMWshiCXugjtHqMD9ZbiqSDLxcsfdqsETPzVUtX5s1Z5kStiIM6l4A==", + "version": "6.5.1", + "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.5.1.tgz", + "integrity": "sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA==", "dev": true }, "wrap-ansi": { - "version": "7.0.0", + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", + "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", + "dev": true, + "requires": { + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" + }, + "dependencies": { + "ansi-styles": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", + "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", + "dev": true + } + } + }, + "wrap-ansi-cjs": { + "version": "npm:wrap-ansi@7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", "dev": true, @@ -2008,6 +2584,40 @@ "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" + }, + "dependencies": { + "ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true + }, + "emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "requires": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + } + }, + "strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "requires": { + "ansi-regex": "^5.0.1" + } + } } }, "wrappy": { @@ -2022,24 +2632,58 @@ "dev": true }, "yargs": { - "version": "16.2.0", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", - "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", "dev": true, "requires": { - "cliui": "^7.0.2", + "cliui": "^8.0.1", "escalade": "^3.1.1", "get-caller-file": "^2.0.5", "require-directory": "^2.1.1", - "string-width": "^4.2.0", + "string-width": "^4.2.3", "y18n": "^5.0.5", - "yargs-parser": "^20.2.2" + "yargs-parser": "^21.1.1" + }, + "dependencies": { + "ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true + }, + "emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "requires": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + } + }, + "strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "requires": { + "ansi-regex": "^5.0.1" + } + } } }, "yargs-parser": { - "version": "20.2.4", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.4.tgz", - "integrity": "sha512-WOkpgNhPTlE73h4VFAFsOnomJVaovO8VqLDzy5saChRBFQFBoMYirowyW+Q9HB4HFF4Z7VZTiG3iSzJJA29yRA==", + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", "dev": true }, "yargs-unparser": { diff --git a/scripts/ci/pr-bot/package.json b/scripts/ci/pr-bot/package.json index 2ae28ff2f704..69b20c0ff6d5 100644 --- a/scripts/ci/pr-bot/package.json +++ b/scripts/ci/pr-bot/package.json @@ -22,7 +22,7 @@ "devDependencies": { "@types/mocha": "^9.1.0", "@types/node": "^16.11.7", - "mocha": "^9.1.3", + "mocha": "^11.1.0", "typescript": "4.2.4", "prettier": "^2.5.1" } diff --git a/sdks/go.mod b/sdks/go.mod index cd3d34595bc8..da0439711bc4 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -22,25 +22,25 @@ module github.com/apache/beam/sdks/v2 go 1.23.0 -toolchain go1.23.7 +toolchain go1.24.1 require ( cloud.google.com/go/bigquery v1.66.2 cloud.google.com/go/bigtable v1.35.0 cloud.google.com/go/datastore v1.20.0 cloud.google.com/go/profiler v0.4.2 - cloud.google.com/go/pubsub v1.47.0 + cloud.google.com/go/pubsub v1.48.0 cloud.google.com/go/spanner v1.76.1 cloud.google.com/go/storage v1.51.0 github.com/aws/aws-sdk-go-v2 v1.36.3 - github.com/aws/aws-sdk-go-v2/config v1.29.6 - github.com/aws/aws-sdk-go-v2/credentials v1.17.59 - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.62 - github.com/aws/aws-sdk-go-v2/service/s3 v1.77.0 - github.com/aws/smithy-go v1.22.2 + github.com/aws/aws-sdk-go-v2/config v1.29.10 + github.com/aws/aws-sdk-go-v2/credentials v1.17.64 + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.67 + github.com/aws/aws-sdk-go-v2/service/s3 v1.78.2 + github.com/aws/smithy-go v1.22.3 github.com/docker/go-connections v0.5.0 github.com/dustin/go-humanize v1.0.1 - github.com/go-sql-driver/mysql v1.9.0 + github.com/go-sql-driver/mysql v1.9.1 github.com/google/go-cmp v0.7.0 github.com/google/uuid v1.6.0 github.com/johannesboyne/gofakes3 v0.0.0-20250106100439-5c39aecd6999 @@ -55,15 +55,15 @@ require ( github.com/xitongsys/parquet-go v1.6.2 github.com/xitongsys/parquet-go-source v0.0.0-20241021075129-b732d2ac9c9b go.mongodb.org/mongo-driver v1.17.3 - golang.org/x/net v0.37.0 + golang.org/x/net v0.38.0 golang.org/x/oauth2 v0.28.0 golang.org/x/sync v0.12.0 golang.org/x/sys v0.31.0 golang.org/x/text v0.23.0 - google.golang.org/api v0.227.0 + google.golang.org/api v0.228.0 google.golang.org/genproto v0.0.0-20250303144028-a0af3efb3deb google.golang.org/grpc v1.71.0 - google.golang.org/protobuf v1.36.5 + google.golang.org/protobuf v1.36.6 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -78,7 +78,7 @@ require ( require ( cel.dev/expr v0.19.2 // indirect cloud.google.com/go/auth v0.15.0 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect cloud.google.com/go/monitoring v1.24.0 // indirect dario.cat/mergo v1.0.1 // indirect filippo.io/edwards25519 v1.1.0 // indirect @@ -130,7 +130,7 @@ require ( ) require ( - cloud.google.com/go v0.118.3 // indirect + cloud.google.com/go v0.119.0 // indirect cloud.google.com/go/compute/metadata v0.6.0 // indirect cloud.google.com/go/iam v1.4.1 // indirect cloud.google.com/go/longrunning v0.6.5 // indirect @@ -139,19 +139,19 @@ require ( github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect github.com/apache/thrift v0.21.0 // indirect github.com/aws/aws-sdk-go v1.55.5 // indirect - github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.9 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.28 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.32 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.32 // indirect - github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2 // indirect - github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.32 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.6.0 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.13 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.13 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.24.15 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.14 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.33.14 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.25.2 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.29.2 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.33.17 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 // indirect diff --git a/sdks/go.sum b/sdks/go.sum index 5eb99336a788..02bb1241a7f0 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -40,8 +40,8 @@ cloud.google.com/go v0.104.0/go.mod h1:OO6xxXdJyvuJPcEPBLN9BJPD+jep5G1+2U5B5gkRY cloud.google.com/go v0.105.0/go.mod h1:PrLgOJNe5nfE9UMxKxgXj4mD3voiP+YQ6gdt6KMFOKM= cloud.google.com/go v0.107.0/go.mod h1:wpc2eNrD7hXUTy8EKS10jkxpZBjASrORK7goS+3YX2I= cloud.google.com/go v0.110.0/go.mod h1:SJnCLqQ0FCFGSZMUNUf84MV3Aia54kn7pi8st7tMzaY= -cloud.google.com/go v0.118.3 h1:jsypSnrE/w4mJysioGdMBg4MiW/hHx/sArFpaBWHdME= -cloud.google.com/go v0.118.3/go.mod h1:Lhs3YLnBlwJ4KA6nuObNMZ/fCbOQBPuWKPoE0Wa/9Vc= +cloud.google.com/go v0.119.0 h1:tw7OjErMzJKbbjaEHkrt60KQrK5Wus/boCZ7tm5/RNE= +cloud.google.com/go v0.119.0/go.mod h1:fwB8QLzTcNevxqi8dcpR+hoMIs3jBherGS9VUBDAW08= cloud.google.com/go/accessapproval v1.4.0/go.mod h1:zybIuC3KpDOvotz59lFe5qxRZx6C75OtwbisN56xYB4= cloud.google.com/go/accessapproval v1.5.0/go.mod h1:HFy3tuiGvMdcd/u+Cu5b9NkO1pEICJ46IR82PoUdplw= cloud.google.com/go/accessapproval v1.6.0/go.mod h1:R0EiYnwV5fsRFiKZkPHr6mwyk2wxUJ30nL4j2pcFY2E= @@ -105,8 +105,8 @@ cloud.google.com/go/assuredworkloads v1.9.0/go.mod h1:kFuI1P78bplYtT77Tb1hi0FMxM cloud.google.com/go/assuredworkloads v1.10.0/go.mod h1:kwdUQuXcedVdsIaKgKTp9t0UJkE5+PAVNhdQm4ZVq2E= cloud.google.com/go/auth v0.15.0 h1:Ly0u4aA5vG/fsSsxu98qCQBemXtAtJf+95z9HK+cxps= cloud.google.com/go/auth v0.15.0/go.mod h1:WJDGqZ1o9E9wKIL+IwStfyn/+s59zl4Bi+1KQNVXLZ8= -cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M= -cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc= +cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= +cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= cloud.google.com/go/automl v1.5.0/go.mod h1:34EjfoFGMZ5sgJ9EoLsRtdPSNZLcfflJR39VbVNS2M0= cloud.google.com/go/automl v1.6.0/go.mod h1:ugf8a6Fx+zP0D59WLhqgTDsQI9w07o64uf/Is3Nh5p8= cloud.google.com/go/automl v1.7.0/go.mod h1:RL9MYCCsJEOmt0Wf3z9uzG0a7adTT1fe+aObgSpkCt8= @@ -460,8 +460,8 @@ cloud.google.com/go/pubsub v1.26.0/go.mod h1:QgBH3U/jdJy/ftjPhTkyXNj543Tin1pRYcd cloud.google.com/go/pubsub v1.27.1/go.mod h1:hQN39ymbV9geqBnfQq6Xf63yNhUAhv9CZhzp5O6qsW0= cloud.google.com/go/pubsub v1.28.0/go.mod h1:vuXFpwaVoIPQMGXqRyUQigu/AX1S3IWugR9xznmcXX8= cloud.google.com/go/pubsub v1.30.0/go.mod h1:qWi1OPS0B+b5L+Sg6Gmc9zD1Y+HaM0MdUr7LsupY1P4= -cloud.google.com/go/pubsub v1.47.0 h1:Ou2Qu4INnf7ykrFjGv2ntFOjVo8Nloh/+OffF4mUu9w= -cloud.google.com/go/pubsub v1.47.0/go.mod h1:LaENesmga+2u0nDtLkIOILskxsfvn/BXX9Ak1NFxOs8= +cloud.google.com/go/pubsub v1.48.0 h1:ntFpQVrr10Wj/GXSOpxGmexGynldv/bFp25H0jy8aOs= +cloud.google.com/go/pubsub v1.48.0/go.mod h1:AAtyjyIT/+zaY1ERKFJbefOvkUxRDNp3nD6TdfdqUZk= cloud.google.com/go/pubsublite v1.5.0/go.mod h1:xapqNQ1CuLfGi23Yda/9l4bBCKz/wC3KIJ5gKcxveZg= cloud.google.com/go/pubsublite v1.6.0/go.mod h1:1eFCS0U11xlOuMFV/0iBqw3zP12kddMeCbj/F3FSj9k= cloud.google.com/go/pubsublite v1.7.0/go.mod h1:8hVMwRXfDfvGm3fahVbtDbiLePT3gpoiJYJY+vxWxVM= @@ -751,79 +751,79 @@ github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38y github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.1/go.mod h1:n8Bs1ElDD2wJ9kCRTczA83gYbBmjSwZp3umc6zF4EeM= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.1/go.mod h1:t8PYl/6LzdAqsU4/9tz28V/kU+asFePvpOMkdul0gEQ= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.9 h1:VZPDrbzdsU1ZxhyWrvROqLY0nxFWgMCAzhn/nYz3X48= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.9/go.mod h1:3XkePX5dSaxveLAYY7nsbsZZrKxCyEuE5pM4ziFxyGg= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 h1:zAybnyUQXIZ5mok5Jqwlf58/TFE7uvd3IAsa1aF9cXs= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10/go.mod h1:qqvMj6gHLR/EXWZw4ZbqlPbQUyenf4h82UQUlKc+l14= github.com/aws/aws-sdk-go-v2/config v1.15.3/go.mod h1:9YL3v07Xc/ohTsxFXzan9ZpFpdTOFl4X65BAKYaz8jg= github.com/aws/aws-sdk-go-v2/config v1.25.3/go.mod h1:tAByZy03nH5jcq0vZmkcVoo6tRzRHEwSFx3QW4NmDw8= -github.com/aws/aws-sdk-go-v2/config v1.29.6 h1:fqgqEKK5HaZVWLQoLiC9Q+xDlSp+1LYidp6ybGE2OGg= -github.com/aws/aws-sdk-go-v2/config v1.29.6/go.mod h1:Ft+WLODzDQmCTHDvqAH1JfC2xxbZ0MxpZAcJqmE1LTQ= +github.com/aws/aws-sdk-go-v2/config v1.29.10 h1:yNjgjiGBp4GgaJrGythyBXg2wAs+Im9fSWIUwvi1CAc= +github.com/aws/aws-sdk-go-v2/config v1.29.10/go.mod h1:A0mbLXSdtob/2t59n1X0iMkPQ5d+YzYZB4rwu7SZ7aA= github.com/aws/aws-sdk-go-v2/credentials v1.11.2/go.mod h1:j8YsY9TXTm31k4eFhspiQicfXPLZ0gYXA50i4gxPE8g= github.com/aws/aws-sdk-go-v2/credentials v1.16.2/go.mod h1:sDdvGhXrSVT5yzBDR7qXz+rhbpiMpUYfF3vJ01QSdrc= -github.com/aws/aws-sdk-go-v2/credentials v1.17.59 h1:9btwmrt//Q6JcSdgJOLI98sdr5p7tssS9yAsGe8aKP4= -github.com/aws/aws-sdk-go-v2/credentials v1.17.59/go.mod h1:NM8fM6ovI3zak23UISdWidyZuI1ghNe2xjzUZAyT+08= +github.com/aws/aws-sdk-go-v2/credentials v1.17.64 h1:NH4RAQJEXBDQDUudTqMNHdyyEVa5CvMn0tQicqv48jo= +github.com/aws/aws-sdk-go-v2/credentials v1.17.64/go.mod h1:tUoJfj79lzEcalHDbyNkpnZZTRg/2ayYOK/iYnRfPbo= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.3/go.mod h1:uk1vhHHERfSVCUnqSqz8O48LBYDSC+k6brng09jcMOk= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.4/go.mod h1:t4i+yGHMCcUNIX1x7YVYa6bH/Do7civ5I6cG/6PMfyA= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.28 h1:KwsodFKVQTlI5EyhRSugALzsV6mG/SGrdjlMXSZSdso= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.28/go.mod h1:EY3APf9MzygVhKuPXAc5H+MkGb8k/DOSQjWS0LgkKqI= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 h1:x793wxmUWVDhshP8WW2mlnXuFrO4cOd3HLBroh1paFw= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30/go.mod h1:Jpne2tDnYiFascUEs2AWHJL9Yp7A5ZVy3TNyxaAjD6M= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.3/go.mod h1:0dHuD2HZZSiwfJSy1FO5bX1hQ1TxVV1QXXjpn3XUE44= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.14.0/go.mod h1:UcgIwJ9KHquYxs6Q5skC9qXjhYMK+JASDYcXQ4X7JZE= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.62 h1:qzLOdXzKUuMGDzEAzpEz3QHYy5510nEZCzWI4EBaxZw= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.62/go.mod h1:hezn6jOdr8sbGMCJmqJF/WOVK9h9H7EXsmu20zXG2m8= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.67 h1:V5KBNdfgTNFd8aLQDXKgHtDbiX5Z0AbH6HibzDx2CWU= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.67/go.mod h1:yut3GOtsk0hs3wnkOnpSmy+l+TxGC86/faMixuNiQLA= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.9/go.mod h1:AnVH5pvai0pAF4lXRq0bmhbes1u9R8wTE+g+183bZNM= github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.3/go.mod h1:7sGSz1JCKHWWBHq98m6sMtWQikmYPpxjqOydDemiVoM= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.32 h1:BjUcr3X3K0wZPGFg2bxOWW3VPN8rkE3/61zhP+IHviA= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.32/go.mod h1:80+OGC/bgzzFFTUmcuwD0lb4YutwQeKLFpmt6hoWapU= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 h1:ZK5jHhnrioRkUNOc+hOgQKlUL5JeC3S6JgLxtQ+Rm0Q= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34/go.mod h1:p4VfIceZokChbA9FzMbRGz5OV+lekcVtHlPKEO0gSZY= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.3/go.mod h1:ssOhaLpRlh88H3UmEcsBoVKq309quMvm3Ds8e9d4eJM= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.3/go.mod h1:ify42Rb7nKeDDPkFjKn7q1bPscVPu/+gmHH8d2c+anU= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.32 h1:m1GeXHVMJsRsUAqG6HjZWx9dj7F5TR+cF1bjyfYyBd4= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.32/go.mod h1:IitoQxGfaKdVLNg0hD8/DXmAqNy0H4K2H2Sf91ti8sI= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 h1:SZwFm17ZUNNg5Np0ioo/gq8Mn6u9w19Mri8DnJ15Jf0= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34/go.mod h1:dFZsC0BLo346mvKQLWmoJxT+Sjp+qcVR1tRVHQGOH9Q= github.com/aws/aws-sdk-go-v2/internal/ini v1.3.10/go.mod h1:8DcYQcz0+ZJaSxANlHIsbbi6S+zMwjwdDqwW3r9AzaE= github.com/aws/aws-sdk-go-v2/internal/ini v1.7.1/go.mod h1:6fQQgfuGmw8Al/3M2IgIllycxV7ZW7WCdVSqfBeUiCY= -github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2 h1:Pg9URiobXy85kgFev3og2CuOZ8JZUBENF+dcgWBaYNk= -github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.3/go.mod h1:5yzAuE9i2RkVAttBl8yxZgQr5OCq4D5yDnG7j9x2L0U= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.32 h1:OIHj/nAhVzIXGzbAE+4XmZ8FPvro3THr6NlqErJc3wY= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.32/go.mod h1:LiBEsDo34OJXqdDlRGsilhlIiXR7DL+6Cx2f4p1EgzI= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 h1:ZNTqv4nIdE/DiBfUUfXcLZ/Spcuz+RjeziUtNJackkM= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34/go.mod h1:zf7Vcd1ViW7cPqYWEHLHJkS50X0JS2IKz9Cgaj6ugrs= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.1/go.mod h1:GeUru+8VzrTXV/83XyMJ80KpH8xO89VPoUileyNQ+tc= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.1/go.mod h1:l9ymW25HOqymeU2m1gbUQ3rUIsTwKs8gYHXkqDQUhiI= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2 h1:D4oz8/CzT9bAEYtVhSBmFj2dNOtaHOtMKc2vHBwYizA= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2/go.mod h1:Za3IHqTQ+yNcRHxu1OFucBh0ACZT4j4VQFF0BqpZcLY= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 h1:eAh2A4b5IzM/lum78bZ590jy36+d/aFLgKF/4Vd1xPE= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3/go.mod h1:0yKJC/kb8sAnmlYa6Zs3QVYqaC8ug2AbnNChv5Ox3uA= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.3/go.mod h1:Seb8KNmD6kVTjwRjVEgOT5hPin6sq+v4C2ycJQDwuH8= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.3/go.mod h1:R+/S1O4TYpcktbVwddeOYg+uwUfLhADP2S/x4QwsCTM= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.6.0 h1:kT2WeWcFySdYpPgyqJMSUE7781Qucjtn6wBvrgm9P+M= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.6.0/go.mod h1:WYH1ABybY7JK9TITPnk6ZlP7gQB8psI4c9qDmMsnLSA= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.0 h1:lguz0bmOoGzozP9XfRJR1QIayEYo+2vP/No3OfLF0pU= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.0/go.mod h1:iu6FSzgt+M2/x3Dk8zhycdIcHjEFb36IS8HVUVFoMg0= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.3/go.mod h1:wlY6SVjuwvh3TVRpTqdy4I1JpBFLX4UGeKZdWntaocw= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.10.3/go.mod h1:Owv1I59vaghv1Ax8zz8ELY8DN7/Y0rGS+WWAmjgi950= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.13 h1:SYVGSFQHlchIcy6e7x12bsrxClCXSP5et8cqVhL8cuw= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.13/go.mod h1:kizuDaLX37bG5WZaoxGPQR/LNFXpxp0vsUnqfkWXfNE= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 h1:dM9/92u2F1JbDaGooxTq18wmmFzbJRfXfVfy96/1CXM= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15/go.mod h1:SwFBy2vjtA0vZbjjaFtfN045boopadnoVPhu4Fv66vY= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.3/go.mod h1:Bm/v2IaN6rZ+Op7zX+bOUMdL4fsrYZiD0dsjLhNKwZc= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.3/go.mod h1:KZgs2ny8HsxRIRbDwgvJcHHBZPOzQr/+NtGwnP+w2ec= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.13 h1:OBsrtam3rk8NfBEq7OLOMm5HtQ9Yyw32X4UQMya/wjw= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.13/go.mod h1:3U4gFA5pmoCOja7aq4nSaIAGbaOHv2Yl2ug018cmC+Q= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 h1:moLQUoVq91LiqT1nbvzDukyqAlCv89ZmwaHw/ZFlFZg= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15/go.mod h1:ZH34PJUc8ApjBIfgQCFvkWcUDBtl/WTD+uiYHjd8igA= github.com/aws/aws-sdk-go-v2/service/kms v1.16.3/go.mod h1:QuiHPBqlOFCi4LqdSskYYAWpQlx3PKmohy+rE2F+o5g= github.com/aws/aws-sdk-go-v2/service/s3 v1.26.3/go.mod h1:g1qvDuRsJY+XghsV6zg00Z4KJ7DtFFCx8fJD2a491Ak= github.com/aws/aws-sdk-go-v2/service/s3 v1.43.0/go.mod h1:NXRKkiRF+erX2hnybnVU660cYT5/KChRD4iUgJ97cI8= -github.com/aws/aws-sdk-go-v2/service/s3 v1.77.0 h1:RCOi1rDmLqOICym/6UeS2cqKED4T4m966w2rl1HfL+g= -github.com/aws/aws-sdk-go-v2/service/s3 v1.77.0/go.mod h1:VC4EKSHqT3nzOcU955VWHMGsQ+w67wfAUBSjC8NOo8U= +github.com/aws/aws-sdk-go-v2/service/s3 v1.78.2 h1:jIiopHEV22b4yQP2q36Y0OmwLbsxNWdWwfZRR5QRRO4= +github.com/aws/aws-sdk-go-v2/service/s3 v1.78.2/go.mod h1:U5SNqwhXB3Xe6F47kXvWihPl/ilGaEDe8HD/50Z9wxc= github.com/aws/aws-sdk-go-v2/service/secretsmanager v1.15.4/go.mod h1:PJc8s+lxyU8rrre0/4a0pn2wgwiDvOEzoOjcJUBr67o= github.com/aws/aws-sdk-go-v2/service/sns v1.17.4/go.mod h1:kElt+uCcXxcqFyc+bQqZPFD9DME/eC6oHBXvFzQ9Bcw= github.com/aws/aws-sdk-go-v2/service/sqs v1.18.3/go.mod h1:skmQo0UPvsjsuYYSYMVmrPc1HWCbHUJyrCEp+ZaLzqM= github.com/aws/aws-sdk-go-v2/service/ssm v1.24.1/go.mod h1:NR/xoKjdbRJ+qx0pMR4mI+N/H1I1ynHwXnO6FowXJc0= github.com/aws/aws-sdk-go-v2/service/sso v1.11.3/go.mod h1:7UQ/e69kU7LDPtY40OyoHYgRmgfGM4mgsLYtcObdveU= github.com/aws/aws-sdk-go-v2/service/sso v1.17.2/go.mod h1:/pE21vno3q1h4bbhUOEi+6Zu/aT26UK2WKkDXd+TssQ= -github.com/aws/aws-sdk-go-v2/service/sso v1.24.15 h1:/eE3DogBjYlvlbhd2ssWyeuovWunHLxfgw3s/OJa4GQ= -github.com/aws/aws-sdk-go-v2/service/sso v1.24.15/go.mod h1:2PCJYpi7EKeA5SkStAmZlF6fi0uUABuhtF8ILHjGc3Y= +github.com/aws/aws-sdk-go-v2/service/sso v1.25.2 h1:pdgODsAhGo4dvzC3JAG5Ce0PX8kWXrTZGx+jxADD+5E= +github.com/aws/aws-sdk-go-v2/service/sso v1.25.2/go.mod h1:qs4a9T5EMLl/Cajiw2TcbNt2UNo/Hqlyp+GiuG4CFDI= github.com/aws/aws-sdk-go-v2/service/ssooidc v1.20.0/go.mod h1:dWqm5G767qwKPuayKfzm4rjzFmVjiBFbOJrpSPnAMDs= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.14 h1:M/zwXiL2iXUrHputuXgmO94TVNmcenPHxgLXLutodKE= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.14/go.mod h1:RVwIw3y/IqxC2YEXSIkAzRDdEU1iRabDPaYjpGCbCGQ= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.29.2 h1:wK8O+j2dOolmpNVY1EWIbLgxrGCHJKVPm08Hv/u80M8= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.29.2/go.mod h1:MlYRNmYu/fGPoxBQVvBYr9nyr948aY/WLUvwBMBJubs= github.com/aws/aws-sdk-go-v2/service/sts v1.16.3/go.mod h1:bfBj0iVmsUyUg4weDB4NxktD9rDGeKSVWnjTnwbx9b8= github.com/aws/aws-sdk-go-v2/service/sts v1.25.3/go.mod h1:4EqRHDCKP78hq3zOnmFXu5k0j4bXbRFfCh/zQ6KnEfQ= -github.com/aws/aws-sdk-go-v2/service/sts v1.33.14 h1:TzeR06UCMUq+KA3bDkujxK1GVGy+G8qQN/QVYzGLkQE= -github.com/aws/aws-sdk-go-v2/service/sts v1.33.14/go.mod h1:dspXf/oYWGWo6DEvj98wpaTeqt5+DMidZD0A9BYTizc= +github.com/aws/aws-sdk-go-v2/service/sts v1.33.17 h1:PZV5W8yk4OtH1JAuhV2PXwwO9v5G5Aoj+eMCn4T+1Kc= +github.com/aws/aws-sdk-go-v2/service/sts v1.33.17/go.mod h1:cQnB8CUnxbMU82JvlqjKR2HBOm3fe9pWorWBza6MBJ4= github.com/aws/smithy-go v1.11.2/go.mod h1:3xHYmszWVx2c0kIwQeEVf9uSm4fYZt67FBJnwub1bgM= github.com/aws/smithy-go v1.17.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE= -github.com/aws/smithy-go v1.22.2 h1:6D9hW43xKFrRx/tXXfAlIZc4JI+yQe6snnWcQyxSyLQ= -github.com/aws/smithy-go v1.22.2/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= +github.com/aws/smithy-go v1.22.3 h1:Z//5NuZCSW6R4PhQ93hShNbyBbn8BWCmCVCt+Q8Io5k= +github.com/aws/smithy-go v1.22.3/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/bobg/gcsobj v0.1.2/go.mod h1:vS49EQ1A1Ib8FgrL58C8xXYZyOCR2TgzAdopy6/ipa8= github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= @@ -966,8 +966,8 @@ github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GO github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= -github.com/go-sql-driver/mysql v1.9.0 h1:Y0zIbQXhQKmQgTp44Y1dp3wTXcn804QoTptLZT1vtvo= -github.com/go-sql-driver/mysql v1.9.0/go.mod h1:pDetrLJeA3oMujJuvXc8RJoasr589B6A9fwzD3QMrqw= +github.com/go-sql-driver/mysql v1.9.1 h1:FrjNGn/BsJQjVRuSa8CBrM5BWA9BWoXXat3KrtSb/iI= +github.com/go-sql-driver/mysql v1.9.1/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= @@ -1674,8 +1674,8 @@ golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c= -golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= +golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= +golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -2035,8 +2035,8 @@ google.golang.org/api v0.108.0/go.mod h1:2Ts0XTHNVWxypznxWOYUeI4g3WdP9Pk2Qk58+a/ google.golang.org/api v0.110.0/go.mod h1:7FC4Vvx1Mooxh8C5HWjzZHcavuS2f6pmJpZx60ca7iI= google.golang.org/api v0.111.0/go.mod h1:qtFHvU9mhgTJegR31csQ+rwxyUTHOKFqCKWp1J0fdw0= google.golang.org/api v0.114.0/go.mod h1:ifYI2ZsFK6/uGddGfAD5BMxlnkBqCmqHSDUVi45N5Yg= -google.golang.org/api v0.227.0 h1:QvIHF9IuyG6d6ReE+BNd11kIB8hZvjN8Z5xY5t21zYc= -google.golang.org/api v0.227.0/go.mod h1:EIpaG6MbTgQarWF5xJvX0eOJPK9n/5D4Bynb9j2HXvQ= +google.golang.org/api v0.228.0 h1:X2DJ/uoWGnY5obVjewbp8icSL5U4FzuCfy9OjbLSnLs= +google.golang.org/api v0.228.0/go.mod h1:wNvRS1Pbe8r4+IfBIniV8fwCpGwTrYa+kMUDiC5z5a4= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -2263,8 +2263,8 @@ google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.29.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= -google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/sdks/go/README.md b/sdks/go/README.md index bcfba2742590..4d6a50472578 100644 --- a/sdks/go/README.md +++ b/sdks/go/README.md @@ -106,8 +106,19 @@ sport: 3 Crown'd: 1 ``` +Note that, when running at Beam HEAD, the Dataflow runner will try to use a non-existent container `gcr.io/cloud-dataflow/v1beta3/beam_go_sdk:.dev`. +To address this, you need to push your own SDK harness container image to a repository (for example, Docker Hub or Google Artifact Registry) and specify that as the +`` parameter above. +For example, running the following from Beam HEAD, will make the container availble at the location `/beam_go_sdk`. + +```bash +$ ./gradlew :sdks:go:container:docker -Pdocker-repository-root= + +$ docker push /beam_go_sdk +``` + See [BUILD.md](./BUILD.md) for how to build Go code in general. See -[container documentation](https://beam.apache.org/documentation/runtime/environments/#building-container-images) for how to build and push the Go SDK harness container image. +[container documentation](https://beam.apache.org/documentation/runtime/environments/#building-container-images) for more details on how to build and push the Go SDK harness container image. ## Issues diff --git a/sdks/go/pkg/beam/core/runtime/graphx/translate.go b/sdks/go/pkg/beam/core/runtime/graphx/translate.go index ae38e96ebf99..3bbb6c70dcf5 100644 --- a/sdks/go/pkg/beam/core/runtime/graphx/translate.go +++ b/sdks/go/pkg/beam/core/runtime/graphx/translate.go @@ -1069,6 +1069,8 @@ func (m *marshaller) expandReshuffle(edge NamedEdge) (string, error) { if _, err := m.makeNode(gbkOut, gbkCoderID, outNode); err != nil { return handleErr(err) } + // Use the same windowing for gbk output as postReify + m.pcollections[gbkOut].WindowingStrategyId = m.pcollections[postReify].WindowingStrategyId gbkID := fmt.Sprintf("%v_gbk", id) gbk := &pipepb.PTransform{ diff --git a/sdks/go/test/integration/integration.go b/sdks/go/test/integration/integration.go index de782daa2d5d..88db6a5b6c3b 100644 --- a/sdks/go/test/integration/integration.go +++ b/sdks/go/test/integration/integration.go @@ -313,6 +313,10 @@ var dataflowFilters = []string{ // Timers "TestTimers_ProcessingTime_Infinity", // Uses test stream. "TestTimers_ProcessingTime_Bounded", // Dataflow ignores processing time timers in batch. + // Sometimes the Dataflow worker is killed by OOM before the heap dump code in boot.go is reached, + // so no dump file is created. + // TODO: https://github.com/apache/beam/issues/34498 + "TestOomParDo", } // CheckFilters checks if an integration test is filtered to be skipped, either diff --git a/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml b/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml index 01c997441479..af384ff19c09 100644 --- a/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml +++ b/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml @@ -62,8 +62,7 @@ - - + diff --git a/sdks/java/container/license_scripts/dep_urls_java.yaml b/sdks/java/container/license_scripts/dep_urls_java.yaml index 6a22ecc5a8e0..b33b81d6db36 100644 --- a/sdks/java/container/license_scripts/dep_urls_java.yaml +++ b/sdks/java/container/license_scripts/dep_urls_java.yaml @@ -46,7 +46,7 @@ jaxen: '1.1.6': type: "3-Clause BSD" libraries-bom: - '26.56.0': + '26.57.0': license: "https://raw.githubusercontent.com/GoogleCloudPlatform/cloud-opensource-java/master/LICENSE" type: "Apache License 2.0" paranamer: diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/RowCoderGenerator.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/RowCoderGenerator.java index 7a1b16d7e91f..63a796141d5f 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/RowCoderGenerator.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/RowCoderGenerator.java @@ -199,7 +199,6 @@ static void clearRowCoderCache() { @SuppressWarnings("unchecked") public static Coder generate(Schema schema) { - String stackTrace = getStackTrace(); UUID uuid = Preconditions.checkNotNull(schema.getUUID()); // Avoid using computeIfAbsent which may cause issues with nested schemas. synchronized (cacheLock) { @@ -268,6 +267,7 @@ public static Coder generate(Schema schema) { | InvocationTargetException e) { throw new RuntimeException("Unable to generate coder for schema " + schema, e); } + String stackTrace = getStackTrace(); GENERATED_CODERS.put(uuid, new WithStackTrace<>(rowCoder, stackTrace)); LOG.debug( "Created row coder for uuid {} with encoding positions {} at {}", diff --git a/sdks/java/extensions/avro/build.gradle b/sdks/java/extensions/avro/build.gradle index 6631779e609c..3d22befaf4d6 100644 --- a/sdks/java/extensions/avro/build.gradle +++ b/sdks/java/extensions/avro/build.gradle @@ -67,6 +67,7 @@ dependencies { implementation library.java.error_prone_annotations implementation library.java.avro implementation library.java.joda_time + implementation library.java.commons_lang3 testImplementation(project(path: ":sdks:java:core", configuration: "shadowTest")) { // Exclude Avro dependencies from "core" since Avro support moved to this extension exclude group: "org.apache.avro", module: "avro" @@ -143,9 +144,11 @@ avroVersions.each { k, v -> main = "org.apache.avro.tool.Main" args = [ "compile", + "-bigDecimal", // Use BigDecimal for logical type decimal, similarly to what gradle-avro-plugin does "schema", "src/test/avro/org/apache/beam/sdk/extensions/avro/io/user.avsc", "src/test/avro/org/apache/beam/sdk/extensions/avro/schemas/test.avsc", + "src/test/avro/org/apache/beam/sdk/extensions/avro/schemas/logicaltypes/logical-types.avsc", "build/generated/sources/avro$k/test/java" ] } @@ -161,4 +164,4 @@ static def createTaskNames(Map prefixMap, String suffix) { return prefixMap.keySet().stream() .map { version -> "avroVersion${version}${suffix}" } .collect(Collectors.toList()) -} \ No newline at end of file +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtils.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtils.java index da7daf605d89..460bfaec4a36 100644 --- a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtils.java +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtils.java @@ -34,6 +34,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -49,6 +50,7 @@ import net.bytebuddy.implementation.bytecode.member.MethodInvocation; import net.bytebuddy.matcher.ElementMatchers; import org.apache.avro.AvroRuntimeException; +import org.apache.avro.Conversion; import org.apache.avro.Conversions; import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; @@ -61,6 +63,7 @@ import org.apache.avro.reflect.AvroName; import org.apache.avro.reflect.ReflectData; import org.apache.avro.specific.SpecificRecord; +import org.apache.avro.specific.SpecificRecordBase; import org.apache.avro.util.Utf8; import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.extensions.avro.schemas.AvroRecordSchema; @@ -97,6 +100,7 @@ import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps; +import org.apache.commons.lang3.reflect.FieldUtils; import org.checkerframework.checker.nullness.qual.EnsuresNonNullIf; import org.checkerframework.checker.nullness.qual.NonNull; import org.checkerframework.checker.nullness.qual.Nullable; @@ -154,6 +158,15 @@ public class AvroUtils { new ForLoadedType(ReadableInstant.class); private static final ForLoadedType JODA_INSTANT = new ForLoadedType(Instant.class); + private static final GenericData GENERIC_DATA_WITH_DEFAULT_CONVERSIONS; + + static { + GENERIC_DATA_WITH_DEFAULT_CONVERSIONS = new GenericData(); + addLogicalTypeConversions(GENERIC_DATA_WITH_DEFAULT_CONVERSIONS); + GENERIC_DATA_WITH_DEFAULT_CONVERSIONS.addLogicalTypeConversion( + new Conversions.DecimalConversion()); + } + // contains workarounds for third-party methods that accept nullable arguments but lack proper // annotations private static class NullnessCheckerWorkarounds { @@ -552,23 +565,43 @@ public static org.apache.avro.Schema toAvroSchema(Schema beamSchema) { * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during * conversion. If Schema is not provided, one is inferred from the AVRO schema. */ - public static Row toBeamRowStrict(GenericRecord record, @Nullable Schema schema) { + public static Row toBeamRowStrict( + GenericRecord record, @Nullable Schema schema, @Nullable GenericData genericData) { if (schema == null) { schema = toBeamSchema(record.getSchema()); } + if (genericData == null) { + if (record instanceof SpecificRecordBase) { + // in case of SpecificRecord, the MODEL$ GenericData already has registered the specific + // conversions + genericData = getGenericData((SpecificRecordBase) record); + } else { + genericData = GENERIC_DATA_WITH_DEFAULT_CONVERSIONS; + } + } + Row.Builder builder = Row.withSchema(schema); org.apache.avro.Schema avroSchema = record.getSchema(); for (Field field : schema.getFields()) { Object value = record.get(field.getName()); org.apache.avro.Schema fieldAvroSchema = avroSchema.getField(field.getName()).schema(); - builder.addValue(convertAvroFieldStrict(value, fieldAvroSchema, field.getType())); + builder.addValue( + convertAvroFieldStrict(value, fieldAvroSchema, field.getType(), genericData)); } return builder.build(); } + /** + * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during + * conversion. If Schema is not provided, one is inferred from the AVRO schema. + */ + public static Row toBeamRowStrict(GenericRecord record, @Nullable Schema schema) { + return toBeamRowStrict(record, schema, null); + } + /** * Convert from a Beam Row to an AVRO GenericRecord. The Avro Schema is inferred from the Beam * schema on the row. @@ -1323,6 +1356,67 @@ private static org.apache.avro.Schema getFieldSchema( } } + private static Object convertLogicalType( + @PolyNull Object value, + @Nonnull org.apache.avro.Schema avroSchema, + @Nonnull FieldType fieldType, + @Nonnull GenericData genericData) { + TypeWithNullability type = new TypeWithNullability(avroSchema); + LogicalType logicalType = LogicalTypes.fromSchema(type.type); + if (logicalType == null) { + return null; + } + + Object rawType = value; + + Conversion conversion = genericData.getConversionByClass(value.getClass(), logicalType); + Class convertedType = null; + if (conversion != null) { + convertedType = conversion.getConvertedType(); + if (convertedType.isInstance(value)) { + rawType = Conversions.convertToRawType(value, avroSchema, logicalType, conversion); + } + } + + // switch on string name because some LogicalType classes are not available in all versions of + // Avro + switch (logicalType.getName()) { + case "date": + return convertDateStrict( + checkRawType(Integer.class, value, logicalType, rawType, conversion, convertedType), + fieldType); + case "time-millis": + return checkRawType(Integer.class, value, logicalType, rawType, conversion, convertedType); + case "time-micros": + case "timestamp-micros": + case "local-timestamp-millis": + case "local-timestamp-micros": + return checkRawType(Long.class, value, logicalType, rawType, conversion, convertedType); + case "timestamp-millis": + return convertDateTimeStrict( + checkRawType(Long.class, value, logicalType, rawType, conversion, convertedType), + fieldType); + case "decimal": + { + if (rawType instanceof GenericFixed) { + // Decimal can be backed by ByteBuffer or GenericFixed. in case of GenericFixed, we + // convert it to ByteBuffer here + rawType = ByteBuffer.wrap(((GenericFixed) rawType).bytes()); + } + ByteBuffer byteBuffer = + checkRawType( + ByteBuffer.class, value, logicalType, rawType, conversion, convertedType); + Conversion decimalConversion = new Conversions.DecimalConversion(); + BigDecimal bigDecimal = + decimalConversion.fromBytes(byteBuffer.duplicate(), type.type, logicalType); + return convertDecimal(bigDecimal, fieldType); + } + case "uuid": + return UUID.fromString(rawType.toString()).toString(); + } + return null; + } + /** * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during * conversion. @@ -1330,44 +1424,26 @@ private static org.apache.avro.Schema getFieldSchema( * @param value {@link GenericRecord} or any nested value * @param avroSchema schema for value * @param fieldType target beam field type + * @param genericData {@link GenericData} instance to use for conversions * @return value converted for {@link Row} */ - @SuppressWarnings("unchecked") public static @PolyNull Object convertAvroFieldStrict( @PolyNull Object value, @Nonnull org.apache.avro.Schema avroSchema, - @Nonnull FieldType fieldType) { + @Nonnull FieldType fieldType, + @Nonnull GenericData genericData) { + if (value == null) { return null; } + Object convertedLogicalType = convertLogicalType(value, avroSchema, fieldType, genericData); - TypeWithNullability type = new TypeWithNullability(avroSchema); - LogicalType logicalType = LogicalTypes.fromSchema(type.type); - if (logicalType != null) { - if (logicalType instanceof LogicalTypes.Decimal) { - ByteBuffer byteBuffer = (ByteBuffer) value; - BigDecimal bigDecimal = - new Conversions.DecimalConversion() - .fromBytes(byteBuffer.duplicate(), type.type, logicalType); - return convertDecimal(bigDecimal, fieldType); - } else if (logicalType instanceof LogicalTypes.TimestampMillis) { - if (value instanceof ReadableInstant) { - return convertDateTimeStrict(((ReadableInstant) value).getMillis(), fieldType); - } else { - return convertDateTimeStrict((Long) value, fieldType); - } - } else if (logicalType instanceof LogicalTypes.Date) { - if (value instanceof ReadableInstant) { - int epochDays = Days.daysBetween(Instant.EPOCH, (ReadableInstant) value).getDays(); - return convertDateStrict(epochDays, fieldType); - } else if (value instanceof java.time.LocalDate) { - return convertDateStrict((int) ((java.time.LocalDate) value).toEpochDay(), fieldType); - } else { - return convertDateStrict((Integer) value, fieldType); - } - } + if (convertedLogicalType != null) { + return convertedLogicalType; } + TypeWithNullability type = new TypeWithNullability(avroSchema); + switch (type.type.getType()) { case FIXED: return convertFixedStrict((GenericFixed) value, fieldType); @@ -1402,14 +1478,15 @@ private static org.apache.avro.Schema getFieldSchema( return convertEnumStrict(value, fieldType); case ARRAY: - return convertArrayStrict((List) value, type.type.getElementType(), fieldType); + return convertArrayStrict( + (List) value, type.type.getElementType(), fieldType, genericData); case MAP: return convertMapStrict( - (Map) value, type.type.getValueType(), fieldType); + (Map) value, type.type.getValueType(), fieldType, genericData); case UNION: - return convertUnionStrict(value, type.type, fieldType); + return convertUnionStrict(value, type.type, fieldType, genericData); case NULL: throw new IllegalArgumentException("Can't convert 'null' to non-nullable field"); @@ -1419,6 +1496,24 @@ private static org.apache.avro.Schema getFieldSchema( } } + /** + * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during + * conversion. + * + * @param value {@link GenericRecord} or any nested value + * @param avroSchema schema for value + * @param fieldType target beam field type + * @return value converted for {@link Row} + */ + @SuppressWarnings("unchecked") + public static @PolyNull Object convertAvroFieldStrict( + @PolyNull Object value, + @Nonnull org.apache.avro.Schema avroSchema, + @Nonnull FieldType fieldType) { + return convertAvroFieldStrict( + value, avroSchema, fieldType, GENERIC_DATA_WITH_DEFAULT_CONVERSIONS); + } + private static Object convertRecordStrict(GenericRecord record, FieldType fieldType) { checkTypeName(fieldType.getTypeName(), TypeName.ROW, "record"); return toBeamRowStrict(record, fieldType.getRowSchema()); @@ -1495,7 +1590,10 @@ private static Object convertEnumStrict(Object value, FieldType fieldType) { } private static Object convertUnionStrict( - Object value, org.apache.avro.Schema unionAvroSchema, FieldType fieldType) { + Object value, + org.apache.avro.Schema unionAvroSchema, + FieldType fieldType, + GenericData genericData) { checkTypeName(fieldType.getTypeName(), TypeName.LOGICAL_TYPE, "oneOfType"); checkArgument( checkNotNull(fieldType.getLogicalType()).getIdentifier().equals(OneOfType.IDENTIFIER)); @@ -1503,19 +1601,24 @@ private static Object convertUnionStrict( int fieldNumber = GenericData.get().resolveUnion(unionAvroSchema, value); FieldType baseFieldType = oneOfType.getOneOfSchema().getField(fieldNumber).getType(); Object convertedValue = - convertAvroFieldStrict(value, unionAvroSchema.getTypes().get(fieldNumber), baseFieldType); + convertAvroFieldStrict( + value, unionAvroSchema.getTypes().get(fieldNumber), baseFieldType, genericData); return oneOfType.createValue(fieldNumber, convertedValue); } private static Object convertArrayStrict( - List values, org.apache.avro.Schema elemAvroSchema, FieldType fieldType) { + List values, + org.apache.avro.Schema elemAvroSchema, + FieldType fieldType, + GenericData genericData) { checkTypeName(fieldType.getTypeName(), TypeName.ARRAY, "array"); List ret = new ArrayList<>(values.size()); FieldType elemFieldType = fieldType.getCollectionElementType(); for (Object value : values) { - ret.add(convertAvroFieldStrict(value, elemAvroSchema, checkNotNull(elemFieldType))); + ret.add( + convertAvroFieldStrict(value, elemAvroSchema, checkNotNull(elemFieldType), genericData)); } return ret; @@ -1524,7 +1627,8 @@ private static Object convertArrayStrict( private static Object convertMapStrict( Map values, org.apache.avro.Schema valueAvroSchema, - FieldType fieldType) { + FieldType fieldType, + GenericData genericData) { checkTypeName(fieldType.getTypeName(), TypeName.MAP, "map"); FieldType mapKeyType = checkNotNull(fieldType.getMapKeyType()); FieldType mapValueType = checkNotNull(fieldType.getMapValueType()); @@ -1539,7 +1643,7 @@ private static Object convertMapStrict( for (Map.Entry value : values.entrySet()) { ret.put( convertStringStrict(value.getKey(), mapKeyType), - convertAvroFieldStrict(value.getValue(), valueAvroSchema, mapValueType)); + convertAvroFieldStrict(value.getValue(), valueAvroSchema, mapValueType, genericData)); } return ret; @@ -1563,4 +1667,46 @@ private static org.apache.avro.Schema buildHiveLogicalTypeSchema( hiveLogicalType, size); return new org.apache.avro.Schema.Parser().parse(schemaJson); } + + static GenericData getGenericData(SpecificRecordBase record) { + try { + return record.getSpecificData(); + } catch (NoSuchMethodError e) { + try { + // SpecificRecordBase.getSpecificData() was not available in avro 182 + return (GenericData) FieldUtils.readStaticField(record.getClass(), "MODEL$", true); + } catch (IllegalAccessException ex) { + throw new IllegalArgumentException( + "Unable to access MODEL$ field in SpecificRecordBase class", ex); + } + } + } + + private static T checkRawType( + Class desiredRawType, + Object value, + LogicalType logicalType, + Object rawType, + Conversion conversion, + Class convertedType) { + String msg = + String.format( + "Value %s of class %s is not a supported type for logical type %s (%s). " + + "Underlying avro built-in raw type should be instance of %s. " + + "However it is instance of %s and has value %s ." + + "Generic data has conversion %s, convertedType %s", + value, + value.getClass(), + logicalType.getName(), + logicalType, + desiredRawType, + rawType.getClass(), + rawType, + conversion, + convertedType); + if (!desiredRawType.isInstance(rawType)) { + throw new IllegalArgumentException(msg); + } + return (T) rawType; + } } diff --git a/sdks/java/extensions/avro/src/test/avro/org/apache/beam/sdk/extensions/avro/schemas/logicaltypes/logical-types.avsc b/sdks/java/extensions/avro/src/test/avro/org/apache/beam/sdk/extensions/avro/schemas/logicaltypes/logical-types.avsc new file mode 100644 index 000000000000..1fddfde068ff --- /dev/null +++ b/sdks/java/extensions/avro/src/test/avro/org/apache/beam/sdk/extensions/avro/schemas/logicaltypes/logical-types.avsc @@ -0,0 +1,123 @@ +{ + "type": "record", + "name": "LogicalTypesExample", + "namespace": "org.apache.beam.sdk.extensions.avro.schemas.logicaltypes", + "fields": [ + { + "name": "dateField", + "type": { + "type": "int", + "logicalType": "date" + } + }, + { + "name": "timeMillisField", + "type": { + "type": "int", + "logicalType": "time-millis" + } + }, + { + "name": "timeMicrosField", + "type": { + "type": "long", + "logicalType": "time-micros" + } + }, + { + "name": "timestampMillisField", + "type": { + "type": "long", + "logicalType": "timestamp-millis" + } + }, + { + "name": "timestampMicrosField", + "type": { + "type": "long", + "logicalType": "timestamp-micros" + } + }, + { + "name": "localTimestampMillisField", + "type": { + "type": "long", + "logicalType": "local-timestamp-millis" + } + }, + { + "name": "localTimestampMicrosField", + "type": { + "type": "long", + "logicalType": "local-timestamp-micros" + } + }, + { + "name": "decimalSmall", + "type": { + "type": "bytes", + "logicalType": "decimal", + "precision": 5, + "scale": 2 + } + }, + { + "name": "decimalMedium", + "type": { + "type": "bytes", + "logicalType": "decimal", + "precision": 10, + "scale": 4 + } + }, + { + "name": "decimalLarge", + "type": { + "type": "bytes", + "logicalType": "decimal", + "precision": 20, + "scale": 6 + } + }, + { + "name": "fixedDecimalSmall", + "type": { + "type": "fixed", + "size": 4, + "logicalType": "decimal", + "precision": 6, + "scale": 2, + "name": "fixedDecimalSmall" + } + }, + { + "name": "fixedDecimalMedium", + "type": { + "type": "fixed", + "size": 8, + "logicalType": "decimal", + "precision": 14, + "scale": 4, + "name": "fixedDecimalMedium" + } + }, + { + "name": "fixedDecimalLarge", + "type": { + "type": "fixed", + "size": 12, + "logicalType": "decimal", + "precision": 22, + "scale": 6, + "name": "fixedDecimalLarge" + } + }, + { + "name": "uuidField", + "type": { + "type": "string", + "logicalType": "uuid" + } + } + ] +} diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtilsTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtilsTest.java index 85781c4b8d0e..7cda1e9dba5a 100644 --- a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtilsTest.java +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtilsTest.java @@ -27,20 +27,26 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; import java.sql.JDBCType; +import java.time.temporal.ChronoUnit; +import java.time.temporal.TemporalUnit; import java.util.List; import java.util.Map; +import java.util.UUID; import org.apache.avro.Conversions; import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema.Type; +import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecordBuilder; import org.apache.avro.reflect.ReflectData; +import org.apache.avro.specific.SpecificRecordBase; import org.apache.avro.util.Utf8; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; import org.apache.beam.sdk.extensions.avro.io.AvroGeneratedUser; import org.apache.beam.sdk.extensions.avro.io.AvroGeneratedUserFactory; +import org.apache.beam.sdk.extensions.avro.schemas.logicaltypes.LogicalTypesExample; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.Schema.Field; import org.apache.beam.sdk.schemas.Schema.FieldType; @@ -60,6 +66,7 @@ import org.checkerframework.checker.nullness.qual.NonNull; import org.checkerframework.checker.nullness.qual.Nullable; import org.joda.time.DateTime; +import org.joda.time.DateTimeFieldType; import org.joda.time.DateTimeZone; import org.joda.time.Days; import org.joda.time.Instant; @@ -115,6 +122,157 @@ public void supportsAnyAvroSchema( } } + @Test + public void supportsAllLogicalTypes() { + + BigDecimal bigDecimalPrecision5Scale2 = new BigDecimal("123.45"); + BigDecimal bigDecimalPrecision10Scale4 = new BigDecimal("12345.6789"); + BigDecimal bigDecimalPrecision20Scale6 = new BigDecimal("1234567.123456"); + UUID uuid = java.util.UUID.fromString("aa5961a8-a14a-4e8c-91a9-e5d3f35389e8"); + + long timestampMicros = 1739543415001000L; + long timeMicros = 52215000500L; + + DateTime dateTime = new DateTime(2025, 2, 17, 0, 0, 0, DateTimeZone.UTC); + + SpecificRecordBase genericRecord = + getSpecificRecordWithLogicalTypes( + dateTime, + timeMicros, + timestampMicros, + bigDecimalPrecision5Scale2, + bigDecimalPrecision10Scale4, + bigDecimalPrecision20Scale6, + uuid); + + Row expected = + getRowWithLogicalTypes( + dateTime, + timeMicros, + timestampMicros, + bigDecimalPrecision5Scale2, + bigDecimalPrecision10Scale4, + bigDecimalPrecision20Scale6, + uuid); + + GenericData genericData; + switch (VERSION_AVRO) { + case "1.8.2": + // SpecificRecords generated with 1.8.2 have no registered conversions. Still this is a + // supported case, as the user can pass a GenericData with the appropriate conversions to + // AvroUtils.toBeamRowStrict. + // Basically GenericRecords can contain objects of any type, as long as the user provides + // the appropriate conversions. + genericData = new GenericData(); + genericData.addLogicalTypeConversion(new AvroJodaTimeConversions.DateConversion()); + genericData.addLogicalTypeConversion(new AvroJodaTimeConversions.TimeConversion()); + genericData.addLogicalTypeConversion(new AvroJodaTimeConversions.TimestampConversion()); + genericData.addLogicalTypeConversion(new Conversions.DecimalConversion()); + break; + case "1.9.2": + // SpecificRecords generated with 1.9.2 have some registered conversions, but not all. We + // can add the missing ones manually. + genericData = AvroUtils.getGenericData(genericRecord); + genericData.addLogicalTypeConversion(new AvroJavaTimeConversions.TimeMicrosConversion()); + genericData.addLogicalTypeConversion( + new AvroJavaTimeConversions.TimestampMicrosConversion()); + break; + default: + // SpecificRecords generated with 1.10.0+ have all conversions registered. Passing null to + // toBeamRowStrict ensures that the GenericData of the record is used as is. + genericData = null; + } + + Row actual = AvroUtils.toBeamRowStrict(genericRecord, null, genericData); + + assertEquals(expected, actual); + } + + private static Row getRowWithLogicalTypes( + DateTime dateTime, + long timeMicros, + long timestampMicros, + BigDecimal bigDecimalPrecision5Scale2, + BigDecimal bigDecimalPrecision10Scale4, + BigDecimal bigDecimalPrecision20Scale6, + UUID uuid) { + return Row.withSchema(AvroUtils.toBeamSchema(LogicalTypesExample.getClassSchema())) + .withFieldValue("dateField", dateTime) + .withFieldValue("timeMillisField", (int) (timeMicros / 1000)) + .withFieldValue("timeMicrosField", timeMicros) + .withFieldValue("timestampMillisField", jodaInstant(timestampMicros)) + .withFieldValue("timestampMicrosField", timestampMicros) + .withFieldValue("localTimestampMillisField", timestampMicros / 1000) + .withFieldValue("localTimestampMicrosField", timestampMicros) + .withFieldValue("decimalSmall", bigDecimalPrecision5Scale2) + .withFieldValue("decimalMedium", bigDecimalPrecision10Scale4) + .withFieldValue("decimalLarge", bigDecimalPrecision20Scale6) + .withFieldValue("fixedDecimalSmall", bigDecimalPrecision5Scale2) + .withFieldValue("fixedDecimalMedium", bigDecimalPrecision10Scale4) + .withFieldValue("fixedDecimalLarge", bigDecimalPrecision20Scale6) + .withFieldValue("uuidField", uuid.toString()) + .build(); + } + + private static LogicalTypesExample getSpecificRecordWithLogicalTypes( + org.joda.time.DateTime dateTime, + long timeMicros, + long timestampMicros, + BigDecimal bigDecimalPrecision5Scale2, + BigDecimal bigDecimalPrecision10Scale4, + BigDecimal bigDecimalPrecision20Scale6, + UUID uuid) { + + java.time.LocalDate localDate = + java.time.LocalDate.of( + dateTime.get(DateTimeFieldType.year()), + dateTime.get(DateTimeFieldType.monthOfYear()), + dateTime.get(DateTimeFieldType.dayOfMonth())); + LogicalTypesExample r = new LogicalTypesExample(); + + if (VERSION_AVRO.equals("1.8.2")) { + // Avro 1.8.2 does not support java.time, must use joda time + r.put("dateField", dateTime.toLocalDate()); + r.put("timeMillisField", jodaLocalTime(timeMicros)); + r.put("timeMicrosField", timeMicros); + r.put("timestampMillisField", jodaInstant(timestampMicros).toDateTime()); + r.put("timestampMicrosField", timestampMicros); + } else { + r.put("dateField", localDate); + r.put("timeMillisField", javaLocalTime(timeMicros, ChronoUnit.MILLIS)); + r.put("timeMicrosField", javaLocalTime(timeMicros, ChronoUnit.MICROS)); + r.put("timestampMillisField", javaInstant(timestampMicros, ChronoUnit.MILLIS)); + r.put("timestampMicrosField", javaInstant(timestampMicros, ChronoUnit.MICROS)); + } + if (VERSION_AVRO.equals("1.8.2") || VERSION_AVRO.equals("1.9.2")) { + // local-timestamp-millis and local-timestamp-micros only in 1.10.0+ + r.put("localTimestampMillisField", timestampMicros / 1000); + r.put("localTimestampMicrosField", timestampMicros); + } else { + r.put( + "localTimestampMillisField", javaLocalDateTimeAtUtc(timestampMicros, ChronoUnit.MILLIS)); + r.put( + "localTimestampMicrosField", javaLocalDateTimeAtUtc(timestampMicros, ChronoUnit.MICROS)); + } + + r.put("decimalSmall", bigDecimalPrecision5Scale2); + r.put("decimalMedium", bigDecimalPrecision10Scale4); + r.put("decimalLarge", bigDecimalPrecision20Scale6); + r.put("fixedDecimalSmall", bigDecimalPrecision5Scale2); + r.put("fixedDecimalMedium", bigDecimalPrecision10Scale4); + r.put("fixedDecimalLarge", bigDecimalPrecision20Scale6); + + try { + r.put("uuidField", uuid.toString()); + } catch (ClassCastException e) { + // the avro tools version used by gradle-avro-plugin is more recent and uses UUID, while the + // ones used for backward compatibility tests (1.8.2, 1.9.2 and 1.10.2) use CharSequence + r.put("uuidField", uuid); + } + + return r; + } + @Property(trials = 1000) @SuppressWarnings("unchecked") public void avroToBeamRoundTrip( @@ -356,6 +514,28 @@ private static GenericRecord getGenericRecord() { .build(); } + private static java.time.Instant javaInstant(long micros, TemporalUnit temporalUnit) { + return java.time.Instant.ofEpochSecond(micros / 1000000, micros * 1000 % 1000000000) + .truncatedTo(temporalUnit); + } + + private static java.time.LocalDateTime javaLocalDateTimeAtUtc( + long micros, TemporalUnit temporalUnit) { + return javaInstant(micros, temporalUnit).atOffset(java.time.ZoneOffset.UTC).toLocalDateTime(); + } + + private static org.joda.time.Instant jodaInstant(long micros) { + return org.joda.time.Instant.ofEpochMilli(micros / 1000); + } + + private static java.time.LocalTime javaLocalTime(long micros, TemporalUnit temporalUnit) { + return java.time.LocalTime.ofNanoOfDay(micros * 1000).truncatedTo(temporalUnit); + } + + private static org.joda.time.LocalTime jodaLocalTime(long micros) { + return org.joda.time.LocalTime.fromMillisOfDay(micros / 1000); + } + @Test public void testFromAvroSchema() { assertEquals(getBeamSchema(), AvroUtils.toBeamSchema(getAvroSchema())); diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java index 458f4ad21382..9e126ca9bb35 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java @@ -53,6 +53,7 @@ import org.checkerframework.checker.nullness.qual.Nullable; import org.joda.time.DateTimeUtils.MillisProvider; import org.joda.time.Duration; +import org.joda.time.Instant; import org.joda.time.format.PeriodFormatter; import org.joda.time.format.PeriodFormatterBuilder; import org.slf4j.Logger; @@ -266,8 +267,10 @@ public class ExecutionStateTracker implements BundleProgressReporter { private final AtomicReference<@Nullable String> processBundleId; // Read by multiple threads, written by the bundle processing thread lazily. private final AtomicReference<@Nullable Thread> trackedThread; + // Read by multiple threads, written by start. + private final AtomicLong startTimeMillis; // Read by multiple threads, read and written by the ExecutionStateSampler thread lazily. - private final AtomicLong lastTransitionTime; + private final AtomicLong lastTransitionTimeMillis; // Used to throttle lull logging. private long lastLullReport; // Read and written by the bundle processing thread frequently. @@ -291,7 +294,8 @@ private ExecutionStateTracker() { this.metricsContainerRegistry = new MetricsContainerStepMap(); this.executionStates = new ArrayList<>(); this.trackedThread = new AtomicReference<>(); - this.lastTransitionTime = new AtomicLong(); + this.startTimeMillis = new AtomicLong(); + this.lastTransitionTimeMillis = new AtomicLong(); this.numTransitionsLazy = new AtomicLong(); this.currentStateLazy = new AtomicReference<>(); this.processBundleId = new AtomicReference<>(); @@ -349,10 +353,10 @@ private void takeSample(long currentTimeMillis, long millisSinceLastSample) { long transitionsAtThisSample = numTransitionsLazy.get(); if (transitionsAtThisSample != transitionsAtLastSample) { - lastTransitionTime.lazySet(currentTimeMillis); + lastTransitionTimeMillis.lazySet(currentTimeMillis); transitionsAtLastSample = transitionsAtThisSample; } else { - long lullTimeMs = currentTimeMillis - lastTransitionTime.get(); + long lullTimeMs = currentTimeMillis - lastTransitionTimeMillis.get(); if (lullTimeMs > MAX_LULL_TIME_MS) { if (lullTimeMs < lastLullReport // This must be a new report. || lullTimeMs > 1.2 * lastLullReport // Exponential backoff. @@ -399,20 +403,23 @@ private void takeSample(long currentTimeMillis, long millisSinceLastSample) { if (thread == null) { return null; } - long lastTransitionTimeMs = lastTransitionTime.get(); + long startTimeMillisSnapshot = startTimeMillis.get(); + long lastTransitionTimeMillisSnapshot = lastTransitionTimeMillis.get(); // We are actively processing a bundle but may have not yet entered into a state. ExecutionStateImpl current = currentStateLazy.get(); + @Nullable String id = null; + @Nullable String name = null; if (current != null) { - return ExecutionStateTrackerStatus.create( - current.ptransformId, - current.ptransformUniqueName, - thread, - lastTransitionTimeMs, - processBundleId.get()); - } else { - return ExecutionStateTrackerStatus.create( - null, null, thread, lastTransitionTimeMs, processBundleId.get()); + id = current.ptransformId; + name = current.ptransformUniqueName; } + return ExecutionStateTrackerStatus.create( + id, + name, + thread, + Instant.ofEpochMilli(startTimeMillisSnapshot), + Instant.ofEpochMilli(lastTransitionTimeMillisSnapshot), + processBundleId.get()); } /** Returns the ptransform id of the currently executing thread. */ @@ -525,7 +532,9 @@ public boolean error() { public void start(String processBundleId) { BeamFnLoggingMDC.setStateTracker(this); this.processBundleId.lazySet(processBundleId); - this.lastTransitionTime.lazySet(clock.getMillis()); + long nowMillis = clock.getMillis(); + this.startTimeMillis.lazySet(nowMillis); + this.lastTransitionTimeMillis.lazySet(nowMillis); this.trackedThread.lazySet(Thread.currentThread()); synchronized (activeStateTrackers) { activeStateTrackers.add(this); @@ -561,9 +570,10 @@ public void reset() { } this.processBundleId.lazySet(null); this.trackedThread.lazySet(null); + this.startTimeMillis.lazySet(0); this.numTransitions = 0; this.numTransitionsLazy.lazySet(0); - this.lastTransitionTime.lazySet(0); + this.lastTransitionTimeMillis.lazySet(0); this.metricsContainerRegistry.reset(); this.inErrorState = false; BeamFnLoggingMDC.setStateTracker(null); @@ -576,10 +586,16 @@ public static ExecutionStateTrackerStatus create( @Nullable String ptransformId, @Nullable String ptransformUniqueName, Thread trackedThread, - long lastTransitionTimeMs, + Instant startTime, + Instant lastTransitionTime, @Nullable String processBundleId) { return new AutoValue_ExecutionStateSampler_ExecutionStateTrackerStatus( - ptransformId, ptransformUniqueName, trackedThread, lastTransitionTimeMs, processBundleId); + ptransformId, + ptransformUniqueName, + trackedThread, + startTime, + lastTransitionTime, + processBundleId); } public abstract @Nullable String getPTransformId(); @@ -588,7 +604,9 @@ public static ExecutionStateTrackerStatus create( public abstract Thread getTrackedThread(); - public abstract long getLastTransitionTimeMillis(); + public abstract Instant getStartTime(); + + public abstract Instant getLastTransitionTime(); public abstract @Nullable String getProcessBundleId(); } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/status/BeamFnStatusClient.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/status/BeamFnStatusClient.java index 9a12a63c81d4..0b35a2c7d5b1 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/status/BeamFnStatusClient.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/status/BeamFnStatusClient.java @@ -41,8 +41,10 @@ import org.apache.beam.vendor.grpc.v1p69p0.io.grpc.ManagedChannel; import org.apache.beam.vendor.grpc.v1p69p0.io.grpc.stub.StreamObserver; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Streams; import org.checkerframework.checker.nullness.qual.Nullable; import org.joda.time.DateTimeUtils; +import org.joda.time.Duration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -175,7 +177,10 @@ String getCacheStats() { static class BundleState { final String instruction; final String trackedThreadName; - final long timeSinceTransition; + + final Duration timeSinceStart; + + final Duration timeSinceTransition; public String getInstruction() { return instruction; @@ -185,13 +190,22 @@ public String getTrackedThreadName() { return trackedThreadName; } - public long getTimeSinceTransition() { + public Duration getTimeSinceStart() { + return timeSinceStart; + } + + public Duration getTimeSinceTransition() { return timeSinceTransition; } - public BundleState(String instruction, String trackedThreadName, long timeSinceTransition) { + public BundleState( + String instruction, + String trackedThreadName, + Duration timeSinceStart, + Duration timeSinceTransition) { this.instruction = instruction; this.trackedThreadName = trackedThreadName; + this.timeSinceStart = timeSinceStart; this.timeSinceTransition = timeSinceTransition; } } @@ -200,10 +214,12 @@ public BundleState(String instruction, String trackedThreadName, long timeSinceT String getActiveProcessBundleState() { StringJoiner activeBundlesState = new StringJoiner("\n"); activeBundlesState.add("========== ACTIVE PROCESSING BUNDLES =========="); + if (processBundleCache.getActiveBundleProcessors().isEmpty()) { activeBundlesState.add("No active processing bundles."); } else { List bundleStates = new ArrayList<>(); + long nowMillis = DateTimeUtils.currentTimeMillis(); processBundleCache.getActiveBundleProcessors().entrySet().stream() .forEach( instructionAndBundleProcessor -> { @@ -215,24 +231,46 @@ String getActiveProcessBundleState() { new BundleState( instructionAndBundleProcessor.getKey(), executionStateTrackerStatus.getTrackedThread().getName(), - DateTimeUtils.currentTimeMillis() - - executionStateTrackerStatus.getLastTransitionTimeMillis())); + Duration.millis( + nowMillis - executionStateTrackerStatus.getStartTime().getMillis()), + Duration.millis( + nowMillis + - executionStateTrackerStatus + .getLastTransitionTime() + .getMillis()))); } }); - bundleStates.stream() - // reverse sort active bundle by time since last transition. - .sorted(Comparator.comparing(BundleState::getTimeSinceTransition).reversed()) - .limit(10) // only keep top 10 + activeBundlesState.add( + String.format("%d total bundles, showing selected slowest", bundleStates.size())); + // Keep the 10 oldest bundles and the 10 bundles that have been in their current step the + // longest. This will help debugging bundles that are taking a long time but changing steps + // frequently as well as steps that are stuck processing. + Streams.concat( + bundleStates.stream() + // reverse sort active bundle by time since bundle start. + .sorted(Comparator.comparing(BundleState::getTimeSinceStart).reversed()) + .limit(10), // only keep top 10, + bundleStates.stream() + // reverse sort active bundle by time since last transition. + .sorted(Comparator.comparing(BundleState::getTimeSinceTransition).reversed()) + .limit(10) // only keep top 10 + ) + .sorted(Comparator.comparing(BundleState::getTimeSinceStart).reversed()) + .distinct() .forEachOrdered( bundleState -> { activeBundlesState.add( String.format("---- Instruction %s ----", bundleState.getInstruction())); activeBundlesState.add( String.format("Tracked thread: %s", bundleState.getTrackedThreadName())); + activeBundlesState.add( + String.format( + "Time since start: %.2f seconds", + bundleState.getTimeSinceStart().getMillis() / 1000.0)); activeBundlesState.add( String.format( "Time since transition: %.2f seconds%n", - bundleState.getTimeSinceTransition() / 1000.0)); + bundleState.getTimeSinceTransition().getMillis() / 1000.0)); }); } return activeBundlesState.toString(); diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java index 4938d9eec56c..b5a860704a8b 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java @@ -170,13 +170,15 @@ public Long answer(InvocationOnMock invocation) throws Throwable { assertEquals("ptransformIdName2", activeBundleStatus2.getPTransformUniqueName()); assertEquals(Thread.currentThread(), activeBundleStatus1.getTrackedThread()); assertEquals(Thread.currentThread(), activeBundleStatus2.getTrackedThread()); + assertThat(activeBundleStatus1.getStartTime().getMillis(), equalTo(1L)); + assertThat(activeBundleStatus2.getStartTime().getMillis(), equalTo(1L)); assertThat( - activeBundleStatus1.getLastTransitionTimeMillis(), + activeBundleStatus1.getLastTransitionTime().getMillis(), // Because we are using lazySet, we aren't guaranteed to see the latest value // but we should definitely be seeing a value that isn't zero equalTo(1L)); assertThat( - activeBundleStatus2.getLastTransitionTimeMillis(), + activeBundleStatus2.getLastTransitionTime().getMillis(), // Internal implementation has this be equal to the second value we return (2 * 100L) equalTo(1L)); @@ -197,11 +199,11 @@ public Long answer(InvocationOnMock invocation) throws Throwable { assertEquals(Thread.currentThread(), activeStateStatus1.getTrackedThread()); assertEquals(Thread.currentThread(), activeStateStatus2.getTrackedThread()); assertThat( - activeStateStatus1.getLastTransitionTimeMillis(), - greaterThan(activeBundleStatus1.getLastTransitionTimeMillis())); + activeStateStatus1.getLastTransitionTime(), + greaterThan(activeBundleStatus1.getLastTransitionTime())); assertThat( - activeStateStatus2.getLastTransitionTimeMillis(), - greaterThan(activeBundleStatus2.getLastTransitionTimeMillis())); + activeStateStatus2.getLastTransitionTime(), + greaterThan(activeBundleStatus2.getLastTransitionTime())); // Validate intermediate monitoring data Map intermediateResults1 = new HashMap<>(); @@ -242,12 +244,14 @@ public Long answer(InvocationOnMock invocation) throws Throwable { assertNull(inactiveStateStatus2.getPTransformUniqueName()); assertEquals(Thread.currentThread(), inactiveStateStatus1.getTrackedThread()); assertEquals(Thread.currentThread(), inactiveStateStatus2.getTrackedThread()); + assertEquals(inactiveStateStatus1.getStartTime(), activeStateStatus1.getStartTime()); + assertEquals(inactiveStateStatus2.getStartTime(), activeStateStatus2.getStartTime()); assertThat( - inactiveStateStatus1.getLastTransitionTimeMillis(), - greaterThan(activeStateStatus1.getLastTransitionTimeMillis())); + inactiveStateStatus1.getLastTransitionTime(), + greaterThan(activeStateStatus1.getLastTransitionTime())); assertThat( - inactiveStateStatus2.getLastTransitionTimeMillis(), - greaterThan(activeStateStatus1.getLastTransitionTimeMillis())); + inactiveStateStatus2.getLastTransitionTime(), + greaterThan(activeStateStatus1.getLastTransitionTime())); // Validate the final monitoring data Map finalResults1 = new HashMap<>(); diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/status/BeamFnStatusClientTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/status/BeamFnStatusClientTest.java index 63c9891b7c25..1a9492e1054b 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/status/BeamFnStatusClientTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/status/BeamFnStatusClientTest.java @@ -51,6 +51,7 @@ import org.apache.beam.vendor.grpc.v1p69p0.io.grpc.inprocess.InProcessServerBuilder; import org.apache.beam.vendor.grpc.v1p69p0.io.grpc.stub.StreamObserver; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.Uninterruptibles; +import org.joda.time.Instant; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -68,14 +69,19 @@ public void testActiveBundleState() { ProcessBundleHandler handler = mock(ProcessBundleHandler.class); BundleProcessorCache processorCache = mock(BundleProcessorCache.class); Map bundleProcessorMap = new HashMap<>(); - for (int i = 0; i < 11; i++) { + for (int i = 0; i < 20; i++) { BundleProcessor processor = mock(BundleProcessor.class); ExecutionStateTracker executionStateTracker = mock(ExecutionStateTracker.class); when(processor.getStateTracker()).thenReturn(executionStateTracker); when(executionStateTracker.getStatus()) .thenReturn( ExecutionStateTrackerStatus.create( - "ptransformId", "ptransformIdName", Thread.currentThread(), i * 1000, null)); + "ptransformId", + "ptransformIdName", + Thread.currentThread(), + Instant.ofEpochMilli(((i + 7) % 20) * 100), + Instant.ofEpochMilli(i * 1000), + null)); String instruction = Integer.toString(i); when(processorCache.find(instruction)).thenReturn(processor); bundleProcessorMap.put(instruction, processor); @@ -95,11 +101,17 @@ public void testActiveBundleState() { joiner.add(client.getActiveProcessBundleState()); String actualState = joiner.toString(); + // The page should only contain the 10 oldest and 10 longest transitions. + // That is 0-9 for last transition and 0-2 + 13-19 for start time. List expectedInstructions = new ArrayList<>(); + for (int i = 13; i < 20; i++) { + expectedInstructions.add(String.format("Instruction %d", i)); + } for (int i = 0; i < 10; i++) { expectedInstructions.add(String.format("Instruction %d", i)); } assertThat(actualState, stringContainsInOrder(expectedInstructions)); + assertThat(actualState, containsString("20 total bundles")); assertThat(actualState, not(containsString("Instruction 10"))); } diff --git a/sdks/java/io/google-ads/build.gradle b/sdks/java/io/google-ads/build.gradle index 037fcbf28a6a..0796c412a65f 100644 --- a/sdks/java/io/google-ads/build.gradle +++ b/sdks/java/io/google-ads/build.gradle @@ -23,6 +23,8 @@ description = "Apache Beam :: SDKs :: Java :: IO :: Google Ads" ext.summary = "IO to read from Google Ads" dependencies { + implementation enforcedPlatform(library.java.google_cloud_platform_libraries_bom) + implementation project(path: ":sdks:java:core", configuration: "shadow") implementation project(path: ":sdks:java:extensions:google-cloud-platform-core") implementation library.java.jackson_annotations @@ -31,8 +33,8 @@ dependencies { implementation library.java.google_auth_library_oauth2_http implementation library.java.protobuf_java implementation library.java.protobuf_java_util - implementation library.java.google_ads - implementation library.java.google_ads_stubs + implementation "com.google.api-ads:google-ads:36.0.0" + implementation "com.google.api-ads:google-ads-stubs-v19:36.0.0" implementation library.java.joda_time implementation library.java.vendored_guava_32_1_2_jre testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") diff --git a/sdks/java/io/google-ads/src/main/java/org/apache/beam/sdk/io/googleads/GoogleAdsIO.java b/sdks/java/io/google-ads/src/main/java/org/apache/beam/sdk/io/googleads/GoogleAdsIO.java index c36e75667e7c..d90c7751e6b7 100644 --- a/sdks/java/io/google-ads/src/main/java/org/apache/beam/sdk/io/googleads/GoogleAdsIO.java +++ b/sdks/java/io/google-ads/src/main/java/org/apache/beam/sdk/io/googleads/GoogleAdsIO.java @@ -17,17 +17,84 @@ */ package org.apache.beam.sdk.io.googleads; +import com.google.protobuf.Message; +import java.io.Serializable; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.values.PCollection; +import org.checkerframework.checker.nullness.qual.Nullable; + /** * {@link GoogleAdsIO} provides an API for reading from the Google Ads API over supported * versions of the Google Ads client libraries. * - * @see GoogleAdsV17 + * @see GoogleAdsV19 */ -public class GoogleAdsIO { - private GoogleAdsIO() {} +public abstract class GoogleAdsIO { + + @SuppressWarnings( + "TypeParameterUnusedInFormals") // for source code backward compatible when underlying API + // version changed + public abstract , PCollection>> T read(); + + @SuppressWarnings( + "TypeParameterUnusedInFormals") // for source code backward compatible when underlying API + // version changed + public abstract < + T extends + PTransform, PCollection>> + T readAll(); + + public static GoogleAdsV19 current() { + return GoogleAdsV19.INSTANCE; + } + + /** + * Implement this interface to create a {@link RateLimitPolicy}. This should be used to limit all + * traffic sent to the Google Ads API for a pair of developer token and customer ID and any other + * relevant attributes for the specific Google Ads API service being called. + */ + public interface RateLimitPolicyFactory extends Serializable { + RateLimitPolicy getRateLimitPolicy(); + } + + /** + * This interface can be used to implement custom client-side rate limiting policies. Custom + * policies should follow best practices for interacting with the Google Ads API. + * + * @see Best + * Practices in the Google Ads documentation + */ + public interface RateLimitPolicy { + /** + * Called before a request is sent. + * + * @param developerToken The developer token used for the request. + * @param customerId The customer ID specified on the request. + * @param request Any Google Ads API request. + * @throws InterruptedException + */ + void onBeforeRequest(@Nullable String developerToken, String customerId, Message request) + throws InterruptedException; + + /** + * Called after a request succeeds. + * + * @param developerToken The developer token used for the request. + * @param customerId The customer ID specified on the request. + * @param request Any Google Ads API request. + */ + void onSuccess(@Nullable String developerToken, String customerId, Message request); - public static GoogleAdsV17 v17() { - return GoogleAdsV17.INSTANCE; + /** + * Called after a request fails with a retryable error. + * + * @param developerToken The developer token used for the request. + * @param customerId The customer ID specified on the request. + * @param request Any Google Ads API request. + * @param error A retryable error. + */ + void onError( + @Nullable String developerToken, String customerId, Message request, GoogleAdsErrorT error); } } diff --git a/sdks/java/io/google-ads/src/main/java/org/apache/beam/sdk/io/googleads/GoogleAdsV17.java b/sdks/java/io/google-ads/src/main/java/org/apache/beam/sdk/io/googleads/GoogleAdsV19.java similarity index 83% rename from sdks/java/io/google-ads/src/main/java/org/apache/beam/sdk/io/googleads/GoogleAdsV17.java rename to sdks/java/io/google-ads/src/main/java/org/apache/beam/sdk/io/googleads/GoogleAdsV19.java index df6c09b5f705..11f11ea5f8c7 100644 --- a/sdks/java/io/google-ads/src/main/java/org/apache/beam/sdk/io/googleads/GoogleAdsV17.java +++ b/sdks/java/io/google-ads/src/main/java/org/apache/beam/sdk/io/googleads/GoogleAdsV19.java @@ -22,20 +22,19 @@ import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; import com.google.ads.googleads.lib.GoogleAdsClient; -import com.google.ads.googleads.v17.errors.GoogleAdsError; -import com.google.ads.googleads.v17.errors.GoogleAdsException; -import com.google.ads.googleads.v17.errors.GoogleAdsFailure; -import com.google.ads.googleads.v17.errors.InternalErrorEnum; -import com.google.ads.googleads.v17.errors.QuotaErrorEnum; -import com.google.ads.googleads.v17.services.GoogleAdsRow; -import com.google.ads.googleads.v17.services.GoogleAdsServiceClient; -import com.google.ads.googleads.v17.services.SearchGoogleAdsStreamRequest; -import com.google.ads.googleads.v17.services.SearchGoogleAdsStreamResponse; +import com.google.ads.googleads.v19.errors.GoogleAdsError; +import com.google.ads.googleads.v19.errors.GoogleAdsException; +import com.google.ads.googleads.v19.errors.GoogleAdsFailure; +import com.google.ads.googleads.v19.errors.InternalErrorEnum; +import com.google.ads.googleads.v19.errors.QuotaErrorEnum; +import com.google.ads.googleads.v19.services.GoogleAdsRow; +import com.google.ads.googleads.v19.services.GoogleAdsServiceClient; +import com.google.ads.googleads.v19.services.SearchGoogleAdsStreamRequest; +import com.google.ads.googleads.v19.services.SearchGoogleAdsStreamResponse; import com.google.auto.value.AutoValue; import com.google.protobuf.Message; import com.google.protobuf.util.Durations; import java.io.IOException; -import java.io.Serializable; import java.util.Optional; import java.util.concurrent.TimeUnit; import org.apache.beam.sdk.options.PipelineOptions; @@ -58,7 +57,10 @@ import org.joda.time.Duration; /** - * {@link GoogleAdsV17} provides an API to read Google Ads API v17 reports. + * {@link GoogleAdsV19} provides an API to read Google Ads API v19 reports. + * + *

No backward compatibility guaranteed. Do not use directly. Use {@link GoogleAdsIO#current()} + * to access GoogleAdsIO instead. * *

The Google Ads API does not use service account credentials in the same way as Google Cloud * Platform APIs do. Service account credentials are typically only used to delegate (using @@ -77,15 +79,15 @@ * --googleAdsDeveloperToken=your-developer-token * * - *

Use {@link GoogleAdsV17#read()} to read either a bounded or unbounded {@link PCollection} of + *

Use {@link GoogleAdsV19#read()} to read either a bounded or unbounded {@link PCollection} of * {@link GoogleAdsRow} from a single Google Ads Query * Language query using {@link Read#withQuery(String)} and a {@link PCollection} of customer - * IDs. Alternatively, use {@link GoogleAdsV17#readAll()} to read either a bounded or unbounded + * IDs. Alternatively, use {@link GoogleAdsV19#readAll()} to read either a bounded or unbounded * {@link PCollection} of {@link GoogleAdsRow} from a {@link PCollection} of {@link * SearchGoogleAdsStreamRequest} potentially containing many different queries. * - *

For example, using {@link GoogleAdsV17#read()}: + *

For example, using {@link GoogleAdsV19#read()}: * *

{@code
  * Pipeline p = Pipeline.create();
@@ -93,7 +95,7 @@
  *     p.apply(Create.of(Long.toString(1234567890L)));
  * PCollection rows =
  *     customerIds.apply(
- *         GoogleAdsIO.v17()
+ *         GoogleAdsIO.current()
  *             .read()
  *             .withRateLimitPolicy(MY_RATE_LIMIT_POLICY)
  *             .withQuery(
@@ -105,7 +107,7 @@
  * p.run();
  * }
* - *

Alternatively, using {@link GoogleAdsV17#readAll()} to execute requests from a {@link + *

Alternatively, using {@link GoogleAdsV19#readAll()} to execute requests from a {@link * PCollection} of {@link SearchGoogleAdsStreamRequest}: * *

{@code
@@ -124,13 +126,13 @@
  *                             + "FROM campaign")
  *                     .build())));
  * PCollection rows =
- *     requests.apply(GoogleAdsIO.v17().readAll().withRateLimitPolicy(MY_RATE_LIMIT_POLICY));
+ *     requests.apply(GoogleAdsIO.current().readAll().withRateLimitPolicy(MY_RATE_LIMIT_POLICY));
  * p.run();
  * }
* *

Client-side rate limiting

* - * On construction of a {@link GoogleAdsV17#read()} or {@link GoogleAdsV17#readAll()} transform a + * On construction of a {@link GoogleAdsV19#read()} or {@link GoogleAdsV19#readAll()} transform a * rate limiting policy must be specified to stay well under the assigned quota for the Google Ads * API. The Google Ads API enforces global rate limits from the developer token down to the customer * ID and depending on the access level of the developer token a limit on the total number of @@ -154,24 +156,26 @@ * Functionality and Rate * sheet & non-compliance fees in the Google Ads API documentation for more details. * - * @see GoogleAdsIO#v17() + * @see GoogleAdsIO#current() * @see GoogleAdsOptions * @see Best * Practices in the Google Ads documentation */ -public class GoogleAdsV17 { - static final GoogleAdsV17 INSTANCE = new GoogleAdsV17(); +public class GoogleAdsV19 extends GoogleAdsIO { + static final GoogleAdsV19 INSTANCE = new GoogleAdsV19(); - private GoogleAdsV17() {} + private GoogleAdsV19() {} + @Override public Read read() { - return new AutoValue_GoogleAdsV17_Read.Builder() + return new AutoValue_GoogleAdsV19_Read.Builder() .setGoogleAdsClientFactory(DefaultGoogleAdsClientFactory.getInstance()) .build(); } + @Override public ReadAll readAll() { - return new AutoValue_GoogleAdsV17_ReadAll.Builder() + return new AutoValue_GoogleAdsV19_ReadAll.Builder() .setGoogleAdsClientFactory(DefaultGoogleAdsClientFactory.getInstance()) .build(); } @@ -180,7 +184,7 @@ public ReadAll readAll() { * A {@link PTransform} that reads the results of a Google Ads query as {@link GoogleAdsRow} * objects. * - * @see GoogleAdsIO#v17() + * @see GoogleAdsIO#current() * @see #readAll() */ @AutoValue @@ -194,7 +198,7 @@ public abstract static class Read abstract GoogleAdsClientFactory getGoogleAdsClientFactory(); - abstract @Nullable RateLimitPolicyFactory getRateLimitPolicyFactory(); + abstract @Nullable RateLimitPolicyFactory getRateLimitPolicyFactory(); abstract Builder toBuilder(); @@ -208,7 +212,8 @@ abstract static class Builder { abstract Builder setGoogleAdsClientFactory(GoogleAdsClientFactory googleAdsClientFactory); - abstract Builder setRateLimitPolicyFactory(RateLimitPolicyFactory rateLimitPolicyFactory); + abstract Builder setRateLimitPolicyFactory( + RateLimitPolicyFactory rateLimitPolicyFactory); abstract Read build(); } @@ -282,7 +287,7 @@ public Read withGoogleAdsClientFactory(GoogleAdsClientFactory googleAdsClientFac * @return A new {@link Read} transform with the specified rate limit policy factory. * @see GoogleAdsClient */ - public Read withRateLimitPolicy(RateLimitPolicyFactory rateLimitPolicyFactory) { + public Read withRateLimitPolicy(RateLimitPolicyFactory rateLimitPolicyFactory) { checkArgumentNotNull(rateLimitPolicyFactory, "rateLimitPolicyFactory cannot be null"); return toBuilder().setRateLimitPolicyFactory(rateLimitPolicyFactory).build(); @@ -291,7 +296,7 @@ public Read withRateLimitPolicy(RateLimitPolicyFactory rateLimitPolicyFactory) { @Override public PCollection expand(PCollection input) { String query = getQuery(); - RateLimitPolicyFactory rateLimitPolicyFactory = getRateLimitPolicyFactory(); + RateLimitPolicyFactory rateLimitPolicyFactory = getRateLimitPolicyFactory(); checkArgumentNotNull(query, "withQuery() is required"); checkArgumentNotNull(rateLimitPolicyFactory, "withRateLimitPolicy() is required"); @@ -325,7 +330,7 @@ public void populateDisplayData(DisplayData.Builder builder) { * A {@link PTransform} that reads the results of many {@link SearchGoogleAdsStreamRequest} * objects as {@link GoogleAdsRow} objects. * * - * @see GoogleAdsIO#v17() + * @see GoogleAdsIO#current() * @see #readAll() */ @AutoValue @@ -337,7 +342,7 @@ public abstract static class ReadAll abstract GoogleAdsClientFactory getGoogleAdsClientFactory(); - abstract @Nullable RateLimitPolicyFactory getRateLimitPolicyFactory(); + abstract @Nullable RateLimitPolicyFactory getRateLimitPolicyFactory(); abstract Builder toBuilder(); @@ -349,7 +354,8 @@ abstract static class Builder { abstract Builder setGoogleAdsClientFactory(GoogleAdsClientFactory googleAdsClientFactory); - abstract Builder setRateLimitPolicyFactory(RateLimitPolicyFactory rateLimitPolicyFactory); + abstract Builder setRateLimitPolicyFactory( + RateLimitPolicyFactory rateLimitPolicyFactory); abstract ReadAll build(); } @@ -408,7 +414,8 @@ public ReadAll withGoogleAdsClientFactory(GoogleAdsClientFactory googleAdsClient * @return A new {@link ReadAll} transform with the specified rate limit policy factory. * @see GoogleAdsClient */ - public ReadAll withRateLimitPolicy(RateLimitPolicyFactory rateLimitPolicyFactory) { + public ReadAll withRateLimitPolicy( + RateLimitPolicyFactory rateLimitPolicyFactory) { checkArgumentNotNull(rateLimitPolicyFactory, "rateLimitPolicyFactory cannot be null"); return toBuilder().setRateLimitPolicyFactory(rateLimitPolicyFactory).build(); @@ -443,13 +450,13 @@ static class ReadAllFn extends DoFn @VisibleForTesting static Sleeper sleeper = Sleeper.DEFAULT; - private final GoogleAdsV17.ReadAll spec; + private final GoogleAdsV19.ReadAll spec; private transient @Nullable GoogleAdsClient googleAdsClient; private transient @Nullable GoogleAdsServiceClient googleAdsServiceClient; - private transient @Nullable RateLimitPolicy rateLimitPolicy; + private transient @Nullable RateLimitPolicy rateLimitPolicy; - ReadAllFn(GoogleAdsV17.ReadAll spec) { + ReadAllFn(GoogleAdsV19.ReadAll spec) { this.spec = spec; } @@ -463,8 +470,8 @@ public void setup(PipelineOptions options) { .newGoogleAdsClient( adsOptions, spec.getDeveloperToken(), null, spec.getLoginCustomerId()); final GoogleAdsServiceClient googleAdsServiceClient = - googleAdsClient.getVersion17().createGoogleAdsServiceClient(); - final RateLimitPolicy rateLimitPolicy = + googleAdsClient.getVersion19().createGoogleAdsServiceClient(); + final RateLimitPolicy rateLimitPolicy = checkStateNotNull(spec.getRateLimitPolicyFactory()).getRateLimitPolicy(); this.googleAdsClient = googleAdsClient; @@ -477,7 +484,7 @@ public void setup(PipelineOptions options) { public void processElement(ProcessContext c) throws IOException, InterruptedException { final GoogleAdsClient googleAdsClient = this.googleAdsClient; final GoogleAdsServiceClient googleAdsServiceClient = this.googleAdsServiceClient; - final RateLimitPolicy rateLimitPolicy = this.rateLimitPolicy; + final RateLimitPolicy rateLimitPolicy = this.rateLimitPolicy; BackOff backoff = BACKOFF.backoff(); BackOff nextBackoff = backoff; @@ -565,81 +572,32 @@ private Optional findFirstRetryableError(GoogleAdsFailure e) { } } - /** - * Implement this interface to create a {@link RateLimitPolicy}. This should be used to limit all - * traffic sent to the Google Ads API for a pair of developer token and customer ID and any other - * relevant attributes for the specific Google Ads API service being called. - */ - public interface RateLimitPolicyFactory extends Serializable { - RateLimitPolicy getRateLimitPolicy(); - } - - /** - * This interface can be used to implement custom client-side rate limiting policies. Custom - * policies should follow best practices for interacting with the Google Ads API. - * - * @see Best - * Practices in the Google Ads documentation - */ - public interface RateLimitPolicy { - /** - * Called before a request is sent. - * - * @param developerToken The developer token used for the request. - * @param customerId The customer ID specified on the request. - * @param request Any Google Ads API request. - * @throws InterruptedException - */ - void onBeforeRequest(@Nullable String developerToken, String customerId, Message request) - throws InterruptedException; - - /** - * Called after a request succeeds. - * - * @param developerToken The developer token used for the request. - * @param customerId The customer ID specified on the request. - * @param request Any Google Ads API request. - */ - void onSuccess(@Nullable String developerToken, String customerId, Message request); - - /** - * Called after a request fails with a retryable error. - * - * @param developerToken The developer token used for the request. - * @param customerId The customer ID specified on the request. - * @param request Any Google Ads API request. - * @param error A retryable error. - */ - void onError( - @Nullable String developerToken, String customerId, Message request, GoogleAdsError error); - } - /** * This rate limit policy wraps a {@link RateLimiter} and can be used in low volume and * development use cases as a client-side rate limiting policy. This policy does not enforce a * global (per pipeline or otherwise) rate limit to requests and should not be used in deployments * where the Google Ads API quota is shared between multiple applications. * - *

This policy can be used to limit requests across all {@link GoogleAdsV17.Read} or {@link - * GoogleAdsV17.ReadAll} transforms by defining and using a {@link - * GoogleAdsV17.RateLimitPolicyFactory} which holds a shared static {@link - * GoogleAdsV17.SimpleRateLimitPolicy}. Note that the desired rate must be divided by the expected + *

This policy can be used to limit requests across all {@link GoogleAdsV19.Read} or {@link + * GoogleAdsV19.ReadAll} transforms by defining and using a {@link + * GoogleAdsV19.RateLimitPolicyFactory} which holds a shared static {@link + * GoogleAdsV19.SimpleRateLimitPolicy}. Note that the desired rate must be divided by the expected * maximum number of workers for the pipeline, otherwise the pipeline may exceed the desired rate * after an upscaling event. * *

{@code
-   * public class SimpleRateLimitPolicyFactory implements GoogleAdsV17.RateLimitPolicyFactory {
-   *   private static final GoogleAdsV17.RateLimitPolicy POLICY =
-   *       new GoogleAdsV17.SimpleRateLimitPolicy(1.0 / 1000.0);
+   * public class SimpleRateLimitPolicyFactory implements GoogleAdsIO.RateLimitPolicyFactory {
+   *   private static final GoogleAdsIO.RateLimitPolicy POLICY =
+   *       new GoogleAdsV19.SimpleRateLimitPolicy(1.0 / 1000.0);
    *
    *   @Override
-   *   public GoogleAdsV17.RateLimitPolicy getRateLimitPolicy() {
+   *   public GoogleAdsIO.RateLimitPolicy getRateLimitPolicy() {
    *     return POLICY;
    *   }
    * }
    * }
*/ - public static class SimpleRateLimitPolicy implements RateLimitPolicy { + public static class SimpleRateLimitPolicy implements RateLimitPolicy { private final RateLimiter rateLimiter; public SimpleRateLimitPolicy(double permitsPerSecond) { diff --git a/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/DummyRateLimitPolicy.java b/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/DummyRateLimitPolicy.java index b958f28f83b0..56ca204db4f4 100644 --- a/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/DummyRateLimitPolicy.java +++ b/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/DummyRateLimitPolicy.java @@ -17,11 +17,11 @@ */ package org.apache.beam.sdk.io.googleads; -import com.google.ads.googleads.v17.errors.GoogleAdsError; +import com.google.ads.googleads.v19.errors.GoogleAdsError; import com.google.protobuf.Message; import org.checkerframework.checker.nullness.qual.Nullable; -public class DummyRateLimitPolicy implements GoogleAdsV17.RateLimitPolicy { +public class DummyRateLimitPolicy implements GoogleAdsV19.RateLimitPolicy { @Override public void onBeforeRequest(@Nullable String developerToken, String customerId, Message request) throws InterruptedException {} diff --git a/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/GoogleAdsV17Test.java b/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/GoogleAdsIOTest.java similarity index 87% rename from sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/GoogleAdsV17Test.java rename to sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/GoogleAdsIOTest.java index f73ab10e0824..4804918bed6c 100644 --- a/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/GoogleAdsV17Test.java +++ b/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/GoogleAdsIOTest.java @@ -20,18 +20,18 @@ import static org.mockito.Mockito.any; import static org.mockito.Mockito.when; -import com.google.ads.googleads.v17.errors.AuthenticationErrorEnum.AuthenticationError; -import com.google.ads.googleads.v17.errors.ErrorCode; -import com.google.ads.googleads.v17.errors.ErrorDetails; -import com.google.ads.googleads.v17.errors.GoogleAdsError; -import com.google.ads.googleads.v17.errors.GoogleAdsException; -import com.google.ads.googleads.v17.errors.GoogleAdsFailure; -import com.google.ads.googleads.v17.errors.InternalErrorEnum.InternalError; -import com.google.ads.googleads.v17.errors.QuotaErrorDetails; -import com.google.ads.googleads.v17.errors.QuotaErrorEnum.QuotaError; -import com.google.ads.googleads.v17.services.GoogleAdsRow; -import com.google.ads.googleads.v17.services.SearchGoogleAdsStreamRequest; -import com.google.ads.googleads.v17.services.SearchGoogleAdsStreamResponse; +import com.google.ads.googleads.v19.errors.AuthenticationErrorEnum.AuthenticationError; +import com.google.ads.googleads.v19.errors.ErrorCode; +import com.google.ads.googleads.v19.errors.ErrorDetails; +import com.google.ads.googleads.v19.errors.GoogleAdsError; +import com.google.ads.googleads.v19.errors.GoogleAdsException; +import com.google.ads.googleads.v19.errors.GoogleAdsFailure; +import com.google.ads.googleads.v19.errors.InternalErrorEnum.InternalError; +import com.google.ads.googleads.v19.errors.QuotaErrorDetails; +import com.google.ads.googleads.v19.errors.QuotaErrorEnum.QuotaError; +import com.google.ads.googleads.v19.services.GoogleAdsRow; +import com.google.ads.googleads.v19.services.SearchGoogleAdsStreamRequest; +import com.google.ads.googleads.v19.services.SearchGoogleAdsStreamResponse; import com.google.api.gax.grpc.GrpcStatusCode; import com.google.api.gax.rpc.ApiException; import com.google.protobuf.Duration; @@ -41,7 +41,7 @@ import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.Pipeline.PipelineExecutionException; import org.apache.beam.sdk.extensions.gcp.auth.NoopCredentialFactory; -import org.apache.beam.sdk.io.googleads.GoogleAdsV17.RateLimitPolicyFactory; +import org.apache.beam.sdk.io.googleads.GoogleAdsIO.RateLimitPolicyFactory; import org.apache.beam.sdk.testing.NeedsRunner; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; @@ -61,8 +61,9 @@ import org.mockito.junit.MockitoJUnitRunner; @RunWith(Enclosed.class) -public class GoogleAdsV17Test { - static final RateLimitPolicyFactory TEST_POLICY_FACTORY = () -> new DummyRateLimitPolicy(); +public class GoogleAdsIOTest { + static final RateLimitPolicyFactory TEST_POLICY_FACTORY = + () -> new DummyRateLimitPolicy(); @RunWith(JUnit4.class) public static class ConstructionTests { @@ -73,7 +74,7 @@ public void testReadAllExpandWithDeveloperTokenFromBuilder() { pipeline .apply(Create.empty(new TypeDescriptor() {})) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .readAll() .withRateLimitPolicy(TEST_POLICY_FACTORY) .withDeveloperToken("abc")); @@ -84,7 +85,7 @@ public void testReadAllExpandWithDeveloperTokenFromOptions() { pipeline.getOptions().as(GoogleAdsOptions.class).setGoogleAdsDeveloperToken("abc"); pipeline .apply(Create.empty(new TypeDescriptor() {})) - .apply(GoogleAdsIO.v17().readAll().withRateLimitPolicy(TEST_POLICY_FACTORY)); + .apply(GoogleAdsIO.current().readAll().withRateLimitPolicy(TEST_POLICY_FACTORY)); } @Test @@ -93,7 +94,7 @@ public void testReadAllExpandWithDeveloperTokenFromOptionsAndBuilder() { pipeline .apply(Create.empty(new TypeDescriptor() {})) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .readAll() .withRateLimitPolicy(TEST_POLICY_FACTORY) .withDeveloperToken(null)); @@ -107,7 +108,7 @@ public void testReadAllExpandWithoutDeveloperToken() throws Exception { () -> pipeline .apply(Create.empty(new TypeDescriptor() {})) - .apply(GoogleAdsIO.v17().readAll().withRateLimitPolicy(TEST_POLICY_FACTORY))); + .apply(GoogleAdsIO.current().readAll().withRateLimitPolicy(TEST_POLICY_FACTORY))); } @Test @@ -118,7 +119,7 @@ public void testReadAllExpandWithoutRateLimitPolicy() throws Exception { () -> pipeline .apply(Create.empty(new TypeDescriptor() {})) - .apply(GoogleAdsIO.v17().readAll().withDeveloperToken("abc"))); + .apply(GoogleAdsIO.current().readAll().withDeveloperToken("abc"))); } @Test @@ -130,7 +131,7 @@ public void testReadAllExpandWithoutValidGoogleAdsClientFactory() throws Excepti pipeline .apply(Create.empty(new TypeDescriptor() {})) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .readAll() .withRateLimitPolicy(TEST_POLICY_FACTORY) .withGoogleAdsClientFactory(null))); @@ -144,7 +145,7 @@ public void testReadAllExpandWithoutValidRateLimitPolicy() throws Exception { () -> pipeline .apply(Create.empty(new TypeDescriptor() {})) - .apply(GoogleAdsIO.v17().readAll().withRateLimitPolicy(null))); + .apply(GoogleAdsIO.current().readAll().withRateLimitPolicy(null))); } @Test @@ -152,7 +153,7 @@ public void testReadExpandWithDeveloperTokenFromBuilder() { pipeline .apply(Create.empty(TypeDescriptors.strings())) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withRateLimitPolicy(TEST_POLICY_FACTORY) .withDeveloperToken("abc") @@ -161,7 +162,10 @@ public void testReadExpandWithDeveloperTokenFromBuilder() { pipeline .apply(Create.empty(TypeDescriptors.strings())) .apply( - GoogleAdsIO.v17().read().withRateLimitPolicy(TEST_POLICY_FACTORY).withQuery("GAQL")); + GoogleAdsIO.current() + .read() + .withRateLimitPolicy(TEST_POLICY_FACTORY) + .withQuery("GAQL")); } @Test @@ -170,7 +174,10 @@ public void testReadExpandWithDeveloperTokenFromOptions() { pipeline .apply(Create.empty(TypeDescriptors.strings())) .apply( - GoogleAdsIO.v17().read().withRateLimitPolicy(TEST_POLICY_FACTORY).withQuery("GAQL")); + GoogleAdsIO.current() + .read() + .withRateLimitPolicy(TEST_POLICY_FACTORY) + .withQuery("GAQL")); } @Test @@ -179,7 +186,7 @@ public void testReadExpandWithDeveloperTokenFromOptionsAndBuilder() { pipeline .apply(Create.empty(TypeDescriptors.strings())) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withRateLimitPolicy(TEST_POLICY_FACTORY) .withDeveloperToken(null) @@ -195,7 +202,7 @@ public void testReadExpandWithoutDeveloperToken() throws Exception { pipeline .apply(Create.empty(TypeDescriptors.strings())) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withRateLimitPolicy(TEST_POLICY_FACTORY) .withQuery("GAQL"))); @@ -209,7 +216,7 @@ public void testReadExpandWithoutQuery() throws Exception { () -> pipeline .apply(Create.empty(TypeDescriptors.strings())) - .apply(GoogleAdsIO.v17().read().withRateLimitPolicy(TEST_POLICY_FACTORY))); + .apply(GoogleAdsIO.current().read().withRateLimitPolicy(TEST_POLICY_FACTORY))); } @Test @@ -220,7 +227,7 @@ public void testReadExpandWithoutRateLimitPolicy() throws Exception { () -> pipeline .apply(Create.empty(TypeDescriptors.strings())) - .apply(GoogleAdsIO.v17().read().withDeveloperToken("abc").withQuery("GAQL"))); + .apply(GoogleAdsIO.current().read().withDeveloperToken("abc").withQuery("GAQL"))); } @Test @@ -232,7 +239,7 @@ public void testReadExpandWithoutValidGoogleAdsClientFactory() throws Exception pipeline .apply(Create.empty(TypeDescriptors.strings())) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withRateLimitPolicy(TEST_POLICY_FACTORY) .withQuery("GAQL") @@ -248,7 +255,7 @@ public void testReadExpandWithoutValidQuery() throws Exception { pipeline .apply(Create.empty(TypeDescriptors.strings())) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withRateLimitPolicy(TEST_POLICY_FACTORY) .withQuery(null))); @@ -260,7 +267,7 @@ public void testReadExpandWithoutValidQuery() throws Exception { pipeline .apply(Create.empty(TypeDescriptors.strings())) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withRateLimitPolicy(TEST_POLICY_FACTORY) .withQuery(""))); @@ -274,7 +281,7 @@ public void testReadExpandWithoutValidRateLimitPolicy() throws Exception { () -> pipeline .apply(Create.empty(TypeDescriptors.strings())) - .apply(GoogleAdsIO.v17().read().withQuery("GAQL").withRateLimitPolicy(null))); + .apply(GoogleAdsIO.current().read().withQuery("GAQL").withRateLimitPolicy(null))); } } @@ -286,15 +293,15 @@ public static class ExecutionTests { public void init() { GoogleAdsOptions options = pipeline.getOptions().as(GoogleAdsOptions.class); options.setGoogleAdsCredentialFactoryClass(NoopCredentialFactory.class); - synchronized (GoogleAdsV17.ReadAll.ReadAllFn.class) { - GoogleAdsV17.ReadAll.ReadAllFn.sleeper = (long millis) -> {}; + synchronized (GoogleAdsV19.ReadAll.ReadAllFn.class) { + GoogleAdsV19.ReadAll.ReadAllFn.sleeper = (long millis) -> {}; } } @Test @Category(NeedsRunner.class) public void testRead() { - when(MockGoogleAdsClientFactory.GOOGLE_ADS_SERVICE_STUB_V17 + when(MockGoogleAdsClientFactory.GOOGLE_ADS_SERVICE_STUB_V19 .searchStreamCallable() .call(any(SearchGoogleAdsStreamRequest.class)) .iterator()) @@ -309,7 +316,7 @@ public void testRead() { pipeline .apply(Create.of("123")) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withGoogleAdsClientFactory(new MockGoogleAdsClientFactory()) .withRateLimitPolicy(TEST_POLICY_FACTORY) @@ -323,7 +330,7 @@ public void testRead() { @Test @Category(NeedsRunner.class) public void testReadWithFailureFromMaxRetriesExceeded() throws Exception { - when(MockGoogleAdsClientFactory.GOOGLE_ADS_SERVICE_STUB_V17 + when(MockGoogleAdsClientFactory.GOOGLE_ADS_SERVICE_STUB_V19 .searchStreamCallable() .call(any(SearchGoogleAdsStreamRequest.class))) .thenThrow( @@ -341,7 +348,7 @@ public void testReadWithFailureFromMaxRetriesExceeded() throws Exception { pipeline .apply(Create.of("123")) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withGoogleAdsClientFactory(new MockGoogleAdsClientFactory()) .withRateLimitPolicy(TEST_POLICY_FACTORY) @@ -362,7 +369,7 @@ public void testReadWithFailureFromMaxRetriesExceeded() throws Exception { @Test @Category(NeedsRunner.class) public void testReadWithFailureFromNonRetryableError() throws Exception { - when(MockGoogleAdsClientFactory.GOOGLE_ADS_SERVICE_STUB_V17 + when(MockGoogleAdsClientFactory.GOOGLE_ADS_SERVICE_STUB_V19 .searchStreamCallable() .call(any(SearchGoogleAdsStreamRequest.class))) .thenThrow( @@ -381,7 +388,7 @@ public void testReadWithFailureFromNonRetryableError() throws Exception { pipeline .apply(Create.of("123")) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withGoogleAdsClientFactory(new MockGoogleAdsClientFactory()) .withRateLimitPolicy(TEST_POLICY_FACTORY) @@ -395,7 +402,7 @@ public void testReadWithFailureFromNonRetryableError() throws Exception { pipeline::run); Assert.assertEquals(IOException.class, exception.getCause().getClass()); Assert.assertEquals( - "com.google.ads.googleads.v17.errors.GoogleAdsException: errors {\n" + "com.google.ads.googleads.v19.errors.GoogleAdsException: errors {\n" + " error_code {\n" + " authentication_error: OAUTH_TOKEN_REVOKED\n" + " }\n" @@ -406,7 +413,7 @@ public void testReadWithFailureFromNonRetryableError() throws Exception { @Test @Category(NeedsRunner.class) public void testReadWithRecoveryFromInternalError() throws Exception { - when(MockGoogleAdsClientFactory.GOOGLE_ADS_SERVICE_STUB_V17 + when(MockGoogleAdsClientFactory.GOOGLE_ADS_SERVICE_STUB_V19 .searchStreamCallable() .call(any(SearchGoogleAdsStreamRequest.class)) .iterator()) @@ -443,7 +450,7 @@ public void testReadWithRecoveryFromInternalError() throws Exception { pipeline .apply(Create.of("123")) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withGoogleAdsClientFactory(new MockGoogleAdsClientFactory()) .withRateLimitPolicy(TEST_POLICY_FACTORY) @@ -457,7 +464,7 @@ public void testReadWithRecoveryFromInternalError() throws Exception { @Test @Category(NeedsRunner.class) public void testReadWithRecoveryFromQuotaErrorWithRetryDelay() throws Exception { - when(MockGoogleAdsClientFactory.GOOGLE_ADS_SERVICE_STUB_V17 + when(MockGoogleAdsClientFactory.GOOGLE_ADS_SERVICE_STUB_V19 .searchStreamCallable() .call(any(SearchGoogleAdsStreamRequest.class)) .iterator()) @@ -505,7 +512,7 @@ public void testReadWithRecoveryFromQuotaErrorWithRetryDelay() throws Exception pipeline .apply(Create.of("123")) .apply( - GoogleAdsIO.v17() + GoogleAdsIO.current() .read() .withGoogleAdsClientFactory(new MockGoogleAdsClientFactory()) .withRateLimitPolicy(TEST_POLICY_FACTORY) diff --git a/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/MockGoogleAdsClientFactory.java b/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/MockGoogleAdsClientFactory.java index 4ed1f87788f5..6ff6ef92e9e0 100644 --- a/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/MockGoogleAdsClientFactory.java +++ b/sdks/java/io/google-ads/src/test/java/org/apache/beam/sdk/io/googleads/MockGoogleAdsClientFactory.java @@ -23,12 +23,12 @@ import static org.mockito.Mockito.withSettings; import com.google.ads.googleads.lib.GoogleAdsClient; -import com.google.ads.googleads.v17.services.GoogleAdsServiceClient; -import com.google.ads.googleads.v17.services.stub.GoogleAdsServiceStub; +import com.google.ads.googleads.v19.services.GoogleAdsServiceClient; +import com.google.ads.googleads.v19.services.stub.GoogleAdsServiceStub; import org.checkerframework.checker.nullness.qual.Nullable; class MockGoogleAdsClientFactory implements GoogleAdsClientFactory { - static final GoogleAdsServiceStub GOOGLE_ADS_SERVICE_STUB_V17 = + static final GoogleAdsServiceStub GOOGLE_ADS_SERVICE_STUB_V19 = mock(GoogleAdsServiceStub.class, withSettings().defaultAnswer(RETURNS_DEEP_STUBS)); @Override @@ -39,8 +39,8 @@ public GoogleAdsClient newGoogleAdsClient( @Nullable Long loginCustomerId) { GoogleAdsClient mockGoogleAdsClient = mock(GoogleAdsClient.class, withSettings().defaultAnswer(RETURNS_DEEP_STUBS)); - when(mockGoogleAdsClient.getVersion17().createGoogleAdsServiceClient()) - .thenReturn(GoogleAdsServiceClient.create(GOOGLE_ADS_SERVICE_STUB_V17)); + when(mockGoogleAdsClient.getVersion19().createGoogleAdsServiceClient()) + .thenReturn(GoogleAdsServiceClient.create(GOOGLE_ADS_SERVICE_STUB_V19)); return mockGoogleAdsClient; } } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerAccessor.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerAccessor.java index ffdf0b099432..cc3caed44c1c 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerAccessor.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerAccessor.java @@ -220,7 +220,10 @@ static SpannerOptions buildSpannerOptions(SpannerConfig spannerConfig) { } ValueProvider host = spannerConfig.getHost(); if (host != null) { - builder.setHost(host.get()); + String hostValue = host.get(); + if (hostValue != null && !hostValue.trim().isEmpty()) { + builder.setHost(host.get()); + } } ValueProvider emulatorHost = spannerConfig.getEmulatorHost(); if (emulatorHost != null) { diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerSchema.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerSchema.java index 666cde876eeb..ebe5815d5684 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerSchema.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerSchema.java @@ -23,6 +23,8 @@ import java.io.Serializable; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableListMultimap; @@ -207,10 +209,17 @@ private static Type parseSpannerType(String spannerType, Dialect dialect) { } if (spannerType.startsWith("ARRAY")) { // Substring "ARRAY" - String spannerArrayType = - originalSpannerType.substring(6, originalSpannerType.length() - 1); - Type itemType = parseSpannerType(spannerArrayType, dialect); - return Type.array(itemType); + Pattern pattern = Pattern.compile("ARRAY<([^>(]+)>"); + Matcher matcher = pattern.matcher(originalSpannerType); + + if (matcher.find()) { + String spannerArrayType = matcher.group(1).trim(); + Type itemType = parseSpannerType(spannerArrayType, dialect); + return Type.array(itemType); + } else { + // Handle the case where the regex doesn't match (invalid ARRAY type) + throw new IllegalArgumentException("Invalid ARRAY type: " + originalSpannerType); + } } if (spannerType.startsWith("PROTO")) { // Substring "PROTO" diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerSchemaTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerSchemaTest.java index b82a1d4fbddd..14f3dbdd4ed9 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerSchemaTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerSchemaTest.java @@ -42,10 +42,12 @@ public void testSingleTable() throws Exception { .addColumn("test", "enumVal", "ENUM") .addColumn("test", "tokens", "TOKENLIST") .addColumn("test", "uuidCol", "UUID") + .addColumn("test", "arrayVal", "ARRAY(vector_length=>256)") + .addColumn("test", "arrayValue", "ARRAY") .build(); assertEquals(1, schema.getTables().size()); - assertEquals(8, schema.getColumns("test").size()); + assertEquals(10, schema.getColumns("test").size()); assertEquals(1, schema.getKeyParts("test").size()); assertEquals(Type.json(), schema.getColumns("test").get(3).getType()); assertEquals( @@ -54,6 +56,8 @@ public void testSingleTable() throws Exception { Type.protoEnum("customer.app.TestEnum"), schema.getColumns("test").get(5).getType()); assertEquals(Type.bytes(), schema.getColumns("test").get(6).getType()); assertEquals(Type.string(), schema.getColumns("test").get(7).getType()); + assertEquals(Type.array(Type.float32()), schema.getColumns("test").get(8).getType()); + assertEquals(Type.array(Type.float32()), schema.getColumns("test").get(9).getType()); } @Test diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcReadSchemaTransformProvider.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcReadSchemaTransformProvider.java index b4765f0392c1..b136b3e3e057 100644 --- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcReadSchemaTransformProvider.java +++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcReadSchemaTransformProvider.java @@ -33,6 +33,7 @@ import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider; import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider; import org.apache.beam.sdk.values.PCollectionRowTuple; +import org.apache.beam.sdk.values.Row; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; import org.checkerframework.checker.initialization.qual.Initialized; import org.checkerframework.checker.nullness.qual.NonNull; @@ -213,21 +214,52 @@ protected JdbcIO.DataSourceConfiguration dataSourceConfiguration() { @Override public PCollectionRowTuple expand(PCollectionRowTuple input) { - String query = config.getReadQuery(); - if (query == null) { - query = String.format("SELECT * FROM %s", config.getLocation()); + config.validate(); + // If we define a partition column, we follow a different route. + @Nullable String partitionColumn = config.getPartitionColumn(); + @Nullable String location = config.getLocation(); + if (partitionColumn != null) { + JdbcIO.ReadWithPartitions readRowsWithParitions = + JdbcIO.readWithPartitions() + .withDataSourceConfiguration(dataSourceConfiguration()) + .withTable(location) + .withPartitionColumn(partitionColumn) + .withRowOutput(); + + @Nullable Integer partitions = config.getNumPartitions(); + if (partitions != null) { + readRowsWithParitions = readRowsWithParitions.withNumPartitions(partitions); + } + + @Nullable Integer fetchSize = config.getFetchSize(); + if (fetchSize != null && fetchSize > 0) { + readRowsWithParitions = readRowsWithParitions.withFetchSize(fetchSize); + } + + @Nullable Boolean disableAutoCommit = config.getDisableAutoCommit(); + if (disableAutoCommit != null) { + readRowsWithParitions = readRowsWithParitions.withDisableAutoCommit(disableAutoCommit); + } + return PCollectionRowTuple.of("output", input.getPipeline().apply(readRowsWithParitions)); + } + @Nullable String readQuery = config.getReadQuery(); + if (readQuery == null) { + readQuery = String.format("SELECT * FROM %s", location); } JdbcIO.ReadRows readRows = - JdbcIO.readRows().withDataSourceConfiguration(dataSourceConfiguration()).withQuery(query); - Integer fetchSize = config.getFetchSize(); + JdbcIO.readRows() + .withDataSourceConfiguration(dataSourceConfiguration()) + .withQuery(readQuery); + + @Nullable Integer fetchSize = config.getFetchSize(); if (fetchSize != null && fetchSize > 0) { readRows = readRows.withFetchSize(fetchSize); } - Boolean outputParallelization = config.getOutputParallelization(); + @Nullable Boolean outputParallelization = config.getOutputParallelization(); if (outputParallelization != null) { readRows = readRows.withOutputParallelization(outputParallelization); } - Boolean disableAutoCommit = config.getDisableAutoCommit(); + @Nullable Boolean disableAutoCommit = config.getDisableAutoCommit(); if (disableAutoCommit != null) { readRows = readRows.withDisableAutoCommit(disableAutoCommit); } @@ -294,6 +326,14 @@ public abstract static class JdbcReadSchemaTransformConfiguration implements Ser @Nullable public abstract String getLocation(); + @SchemaFieldDescription("Name of a column of numeric type that will be used for partitioning.") + @Nullable + public abstract String getPartitionColumn(); + + @SchemaFieldDescription("The number of partitions") + @Nullable + public abstract Integer getNumPartitions(); + @SchemaFieldDescription( "Whether to reshuffle the resulting PCollection so results are distributed to all workers.") @Nullable @@ -340,13 +380,20 @@ public void validate(String jdbcType) throws IllegalArgumentException { boolean readQueryPresent = (getReadQuery() != null && !"".equals(getReadQuery())); boolean locationPresent = (getLocation() != null && !"".equals(getLocation())); + boolean partitionColumnPresent = + (getPartitionColumn() != null && !"".equals(getPartitionColumn())); + // If you specify a readQuery, it is to be used instead of a table. if (readQueryPresent && locationPresent) { throw new IllegalArgumentException("Query and Table are mutually exclusive configurations"); } if (!readQueryPresent && !locationPresent) { throw new IllegalArgumentException("Either Query or Table must be specified."); } + // Reading with partitions only supports table argument. + if (partitionColumnPresent && !locationPresent) { + throw new IllegalArgumentException("Table must be specified to read with partitions."); + } } public static Builder builder() { @@ -368,6 +415,10 @@ public abstract static class Builder { public abstract Builder setLocation(String value); + public abstract Builder setPartitionColumn(String value); + + public abstract Builder setNumPartitions(Integer value); + public abstract Builder setReadQuery(String value); public abstract Builder setConnectionProperties(String value); diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcUtil.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcUtil.java index 7a0558e6ca92..3cd379e10e7a 100644 --- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcUtil.java +++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcUtil.java @@ -112,7 +112,8 @@ static void registerJdbcDriver(Map jdbcType) { /** Utility method to save jar files locally in the worker. */ static URL[] saveFilesLocally(String driverJars) { - List listOfJarPaths = Splitter.on(',').trimResults().splitToList(driverJars); + List listOfJarPaths = + Splitter.on(',').trimResults().omitEmptyStrings().splitToList(driverJars); final String destRoot = Files.createTempDir().getAbsolutePath(); List driverJarUrls = new ArrayList<>(); diff --git a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcReadSchemaTransformProviderTest.java b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcReadSchemaTransformProviderTest.java index ca7690ac9a08..0837ea686ddf 100644 --- a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcReadSchemaTransformProviderTest.java +++ b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcReadSchemaTransformProviderTest.java @@ -122,6 +122,17 @@ public void testInvalidReadSchemaOptions() { .build() .validate(); }); + assertThrows( + IllegalArgumentException.class, + () -> { + JdbcReadSchemaTransformProvider.JdbcReadSchemaTransformConfiguration.builder() + .setDriverClassName("ClassName") + .setJdbcUrl("JdbcUrl") + .setReadQuery("Query") + .setPartitionColumn("Id") + .build() + .validate(); + }); } @Test @@ -188,6 +199,62 @@ public void testReadWithJdbcTypeSpecified() { pipeline.run(); } + @Test + public void testReadWithPartitions() { + JdbcReadSchemaTransformProvider provider = null; + for (SchemaTransformProvider p : ServiceLoader.load(SchemaTransformProvider.class)) { + if (p instanceof JdbcReadSchemaTransformProvider) { + provider = (JdbcReadSchemaTransformProvider) p; + break; + } + } + assertNotNull(provider); + + PCollection output = + PCollectionRowTuple.empty(pipeline) + .apply( + provider.from( + JdbcReadSchemaTransformProvider.JdbcReadSchemaTransformConfiguration.builder() + .setDriverClassName(DATA_SOURCE_CONFIGURATION.getDriverClassName().get()) + .setJdbcUrl(DATA_SOURCE_CONFIGURATION.getUrl().get()) + .setLocation(READ_TABLE_NAME) + .setPartitionColumn("Id") + .setNumPartitions(6) + .build())) + .get("output"); + Long expected = Long.valueOf(EXPECTED_ROW_COUNT); + PAssert.that(output.apply(Count.globally())).containsInAnyOrder(expected); + pipeline.run(); + } + + @Test + public void testReadWithPartitionsWithJdbcTypeSpecified() { + JdbcReadSchemaTransformProvider provider = null; + for (SchemaTransformProvider p : ServiceLoader.load(SchemaTransformProvider.class)) { + if (p instanceof JdbcReadSchemaTransformProvider) { + provider = (JdbcReadSchemaTransformProvider) p; + break; + } + } + assertNotNull(provider); + + PCollection output = + PCollectionRowTuple.empty(pipeline) + .apply( + provider.from( + JdbcReadSchemaTransformProvider.JdbcReadSchemaTransformConfiguration.builder() + .setJdbcUrl(DATA_SOURCE_CONFIGURATION.getUrl().get()) + .setJdbcType("derby") + .setLocation(READ_TABLE_NAME) + .setPartitionColumn("Id") + .setNumPartitions(6) + .build())) + .get("output"); + Long expected = Long.valueOf(EXPECTED_ROW_COUNT); + PAssert.that(output.apply(Count.globally())).containsInAnyOrder(expected); + pipeline.run(); + } + /** Create test data that is consistent with that generated by TestRow. */ private static void addInitialData(DataSource dataSource, String tableName) throws SQLException { try (Connection connection = dataSource.getConnection()) { diff --git a/sdks/java/io/kafka/build.gradle b/sdks/java/io/kafka/build.gradle index 14d28fff4d10..6e9b5aec0932 100644 --- a/sdks/java/io/kafka/build.gradle +++ b/sdks/java/io/kafka/build.gradle @@ -52,7 +52,6 @@ dependencies { provided library.java.jackson_dataformat_csv permitUnusedDeclared library.java.jackson_dataformat_csv implementation project(path: ":sdks:java:core", configuration: "shadow") - implementation project(path: ":runners:core-java") implementation project(path: ":model:pipeline", configuration: "shadow") implementation project(":sdks:java:extensions:avro") implementation project(":sdks:java:extensions:protobuf") @@ -67,6 +66,7 @@ dependencies { compileOnly library.java.kafka } testImplementation library.java.kafka_clients + testImplementation project(path: ":runners:core-java") implementation library.java.slf4j_api implementation library.java.joda_time implementation library.java.jackson_annotations diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformConfiguration.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformConfiguration.java index 693c1371f78c..aadc70915f4c 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformConfiguration.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformConfiguration.java @@ -43,7 +43,7 @@ public abstract class KafkaReadSchemaTransformConfiguration { public static final Set VALID_START_OFFSET_VALUES = Sets.newHashSet("earliest", "latest"); - public static final String VALID_FORMATS_STR = "RAW,AVRO,JSON,PROTO"; + public static final String VALID_FORMATS_STR = "RAW,STRING,AVRO,JSON,PROTO"; public static final Set VALID_DATA_FORMATS = Sets.newHashSet(VALID_FORMATS_STR.split(",")); diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformProvider.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformProvider.java index a3fd1d8c3fd7..f66d7591fb8e 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformProvider.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaReadSchemaTransformProvider.java @@ -27,6 +27,7 @@ import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; @@ -103,6 +104,17 @@ public Row apply(byte[] input) { }; } + public static SerializableFunction getRawStringToRowFunction(Schema stringSchema) { + return new SimpleFunction() { + @Override + public Row apply(byte[] input) { + return Row.withSchema(stringSchema) + .addValue(new String(input, StandardCharsets.UTF_8)) + .build(); + } + }; + } + @Override public String identifier() { return getUrn(ExternalTransforms.ManagedTransforms.Urns.KAFKA_READ); @@ -193,6 +205,9 @@ public PCollectionRowTuple expand(PCollectionRowTuple input) { if ("RAW".equals(format)) { beamSchema = Schema.builder().addField("payload", Schema.FieldType.BYTES).build(); valueMapper = getRawBytesToRowFunction(beamSchema); + } else if ("STRING".equals(format)) { + beamSchema = Schema.builder().addField("payload", Schema.FieldType.STRING).build(); + valueMapper = getRawStringToRowFunction(beamSchema); } else if ("PROTO".equals(format)) { String fileDescriptorPath = configuration.getFileDescriptorPath(); String messageName = checkArgumentNotNull(configuration.getMessageName()); diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaSinkMetrics.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaSinkMetrics.java index c1753f646c3e..f6456e8d3438 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaSinkMetrics.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaSinkMetrics.java @@ -17,7 +17,6 @@ */ package org.apache.beam.sdk.io.kafka; -import org.apache.beam.runners.core.metrics.MonitoringInfoConstants; import org.apache.beam.sdk.metrics.DelegatingGauge; import org.apache.beam.sdk.metrics.DelegatingHistogram; import org.apache.beam.sdk.metrics.Gauge; @@ -26,6 +25,7 @@ import org.apache.beam.sdk.metrics.MetricName; import org.apache.beam.sdk.util.HistogramData; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; +import org.checkerframework.checker.nullness.qual.Nullable; /** * Helper class to create per worker metrics for Kafka Sink stages. @@ -71,7 +71,7 @@ public static Histogram createRPCLatencyHistogram(RpcMethod method, String topic nameBuilder.addLabel(RPC_METHOD, method.toString()); nameBuilder.addLabel(TOPIC_LABEL, topic); - nameBuilder.addMetricLabel(MonitoringInfoConstants.Labels.PER_WORKER_METRIC, "true"); + nameBuilder.addMetricLabel("PER_WORKER_METRIC", "true"); MetricName metricName = nameBuilder.build(METRICS_NAMESPACE); HistogramData.BucketType buckets = HistogramData.ExponentialBuckets.of(1, 17); @@ -89,7 +89,7 @@ public static Histogram createRPCLatencyHistogram(RpcMethod method, String topic public static Gauge createBacklogGauge(MetricName name) { // TODO(#34195): Unify metrics collection path. // Currently KafkaSink metrics only supports aggregated per worker metrics. - Preconditions.checkState(MonitoringInfoConstants.isPerWorkerMetric(name)); + Preconditions.checkState(isPerWorkerMetric(name)); return new DelegatingGauge(name, false); } @@ -107,7 +107,7 @@ public static MetricName getMetricGaugeName(String topic, int partitionId) { LabeledMetricNameUtils.MetricNameBuilder.baseNameBuilder(ESTIMATED_BACKLOG_SIZE); nameBuilder.addLabel(PARTITION_ID, String.valueOf(partitionId)); nameBuilder.addLabel(TOPIC_LABEL, topic); - nameBuilder.addMetricLabel(MonitoringInfoConstants.Labels.PER_WORKER_METRIC, "true"); + nameBuilder.addMetricLabel("PER_WORKER_METRIC", "true"); return nameBuilder.build(METRICS_NAMESPACE); } @@ -126,4 +126,12 @@ static KafkaMetrics kafkaMetrics() { public static void setSupportKafkaMetrics(boolean supportKafkaMetrics) { KafkaSinkMetrics.supportKafkaMetrics = supportKafkaMetrics; } + + private static boolean isPerWorkerMetric(MetricName metricName) { + @Nullable String value = metricName.getLabels().get("PER_WORKER_METRIC"); + if (value != null && value.equals("true")) { + return true; + } + return false; + } } diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaSinkMetricsTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaSinkMetricsTest.java index 17fc116b3a8e..4da22b954ddc 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaSinkMetricsTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaSinkMetricsTest.java @@ -19,7 +19,9 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; +import static org.junit.Assert.assertTrue; +import org.apache.beam.runners.core.metrics.MonitoringInfoConstants; import org.apache.beam.sdk.metrics.Gauge; import org.apache.beam.sdk.metrics.Histogram; import org.apache.beam.sdk.metrics.MetricName; @@ -43,6 +45,11 @@ public void testCreatingHistogram() throws Exception { "RpcLatency*rpc_method:POLL;topic_name:topic1;", ImmutableMap.of("PER_WORKER_METRIC", "true")); assertThat(histogram.getName(), equalTo(histogramName)); + assertTrue( + histogram + .getName() + .getLabels() + .containsKey(MonitoringInfoConstants.Labels.PER_WORKER_METRIC)); } @Test @@ -58,5 +65,7 @@ public void testCreatingBacklogGauge() throws Exception { ImmutableMap.of("PER_WORKER_METRIC", "true")); assertThat(gauge.getName(), equalTo(gaugeName)); + assertTrue( + gauge.getName().getLabels().containsKey(MonitoringInfoConstants.Labels.PER_WORKER_METRIC)); } } diff --git a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakeIO.java b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakeIO.java index 2b5dd18b4dae..5350e0e2c1fe 100644 --- a/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakeIO.java +++ b/sdks/java/io/snowflake/src/main/java/org/apache/beam/sdk/io/snowflake/SnowflakeIO.java @@ -1176,7 +1176,10 @@ public void processElement(ProcessContext context) { for (Object o : context.element()) { if (o instanceof String) { String field = (String) o; - field = field.replace("'", "''"); + field = field.replace("\\", "\\\\"); + if (!this.quotationMark.isEmpty()) { + field = field.replace(this.quotationMark, "\\" + this.quotationMark); + } field = quoteNonEmptyField(field); csvItems.add(field); diff --git a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SnowflakeIOWriteTest.java b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SnowflakeIOWriteTest.java index cc36567dca6d..496037388875 100644 --- a/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SnowflakeIOWriteTest.java +++ b/sdks/java/io/snowflake/src/test/java/org/apache/beam/sdk/io/snowflake/test/unit/write/SnowflakeIOWriteTest.java @@ -208,7 +208,7 @@ public void writeToExternalWithDoubleQuotation() throws SnowflakeSQLException { List actualData = FakeSnowflakeDatabase.getElements(FAKE_TABLE); List escapedTestData = testDataInStrings.stream() - .map(e -> e.replace("'", "''")) + .map(e -> e.replace("\"", "\\\"")) .map(e -> e.isEmpty() ? "" : String.format("\"%s\"", e)) .collect(Collectors.toList()); assertTrue(TestUtils.areListsEqual(escapedTestData, actualData)); @@ -233,8 +233,7 @@ public void writeToExternalWithBlankQuotation() throws SnowflakeSQLException { List actualData = FakeSnowflakeDatabase.getElements(FAKE_TABLE); - List escapedTestData = - testDataInStrings.stream().map(e -> e.replace("'", "''")).collect(Collectors.toList()); - assertTrue(TestUtils.areListsEqual(escapedTestData, actualData)); + // no escape for blank quotation + assertTrue(TestUtils.areListsEqual(testDataInStrings, actualData)); } } diff --git a/sdks/python/apache_beam/coders/coder_impl.pxd b/sdks/python/apache_beam/coders/coder_impl.pxd index 8a28499555c1..27cffe7b62df 100644 --- a/sdks/python/apache_beam/coders/coder_impl.pxd +++ b/sdks/python/apache_beam/coders/coder_impl.pxd @@ -130,6 +130,11 @@ cdef class VarIntCoderImpl(StreamCoderImpl): cpdef bytes encode(self, value) +cdef class VarInt32CoderImpl(StreamCoderImpl): + @cython.locals(ivalue=libc.stdint.int32_t) + cpdef bytes encode(self, value) + + cdef class SingletonCoderImpl(CoderImpl): cdef object _value diff --git a/sdks/python/apache_beam/coders/coder_impl.py b/sdks/python/apache_beam/coders/coder_impl.py index 5dff35052901..49cbbdd17e69 100644 --- a/sdks/python/apache_beam/coders/coder_impl.py +++ b/sdks/python/apache_beam/coders/coder_impl.py @@ -974,6 +974,37 @@ def estimate_size(self, value, nested=False): return get_varint_size(value) +class VarInt32CoderImpl(StreamCoderImpl): + """For internal use only; no backwards-compatibility guarantees. + + A coder for int32 objects.""" + def encode_to_stream(self, value, out, nested): + # type: (int, create_OutputStream, bool) -> None + out.write_var_int32(value) + + def decode_from_stream(self, in_stream, nested): + # type: (create_InputStream, bool) -> int + return in_stream.read_var_int32() + + def encode(self, value): + ivalue = value # type cast + if 0 <= ivalue < len(small_ints): + return small_ints[ivalue] + return StreamCoderImpl.encode(self, value) + + def decode(self, encoded): + if len(encoded) == 1: + i = ord(encoded) + if 0 <= i < 128: + return i + return StreamCoderImpl.decode(self, encoded) + + def estimate_size(self, value, nested=False): + # type: (Any, bool) -> int + # Note that VarInts are encoded the same way regardless of nesting. + return get_varint_size(int(value) & 0xFFFFFFFF) + + class SingletonCoderImpl(CoderImpl): """For internal use only; no backwards-compatibility guarantees. diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py index e6250532aef1..cb23e3967e33 100644 --- a/sdks/python/apache_beam/coders/coders.py +++ b/sdks/python/apache_beam/coders/coders.py @@ -629,7 +629,7 @@ def __repr__(self): class VarIntCoder(FastCoder): - """Variable-length integer coder.""" + """Variable-length integer coder matches Java SDK's VarLongCoder.""" def _create_impl(self): return coder_impl.VarIntCoderImpl() @@ -650,6 +650,25 @@ def __hash__(self): Coder.register_structured_urn(common_urns.coders.VARINT.urn, VarIntCoder) +class VarInt32Coder(FastCoder): + """Variable-length integer coder matches Java SDK's VarIntCoder.""" + def _create_impl(self): + return coder_impl.VarInt32CoderImpl() + + def is_deterministic(self): + # type: () -> bool + return True + + def to_type_hint(self): + return int + + def __eq__(self, other): + return type(self) == type(other) + + def __hash__(self): + return hash(type(self)) + + class BigEndianShortCoder(FastCoder): """A coder used for big-endian int16 values.""" def _create_impl(self): diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py index f3381cdb1d69..bed93cbc5545 100644 --- a/sdks/python/apache_beam/coders/coders_test_common.py +++ b/sdks/python/apache_beam/coders/coders_test_common.py @@ -318,6 +318,20 @@ def test_varint_coder(self): for k in range(0, int(math.log(MAX_64_BIT_INT))) ]) + def test_varint32_coder(self): + # Small ints. + self.check_coder(coders.VarInt32Coder(), *range(-10, 10)) + # Multi-byte encoding starts at 128 + self.check_coder(coders.VarInt32Coder(), *range(120, 140)) + # Large values + MAX_32_BIT_INT = 0x7fffffff + self.check_coder( + coders.VarIntCoder(), + *[ + int(math.pow(-1, k) * math.exp(k)) + for k in range(0, int(math.log(MAX_32_BIT_INT))) + ]) + def test_float_coder(self): self.check_coder( coders.FloatCoder(), *[float(0.1 * x) for x in range(-100, 100)]) diff --git a/sdks/python/apache_beam/coders/row_coder.py b/sdks/python/apache_beam/coders/row_coder.py index e93abbc887fb..dc473b1d6d7d 100644 --- a/sdks/python/apache_beam/coders/row_coder.py +++ b/sdks/python/apache_beam/coders/row_coder.py @@ -33,6 +33,7 @@ from apache_beam.coders.coders import SinglePrecisionFloatCoder from apache_beam.coders.coders import StrUtf8Coder from apache_beam.coders.coders import TimestampCoder +from apache_beam.coders.coders import VarInt32Coder from apache_beam.coders.coders import VarIntCoder from apache_beam.portability import common_urns from apache_beam.portability.api import schema_pb2 @@ -142,8 +143,10 @@ def _coder_from_type(field_type): def _nonnull_coder_from_type(field_type): type_info = field_type.WhichOneof("type_info") if type_info == "atomic_type": - if field_type.atomic_type in (schema_pb2.INT32, schema_pb2.INT64): + if field_type.atomic_type == schema_pb2.INT64: return VarIntCoder() + elif field_type.atomic_type == schema_pb2.INT32: + return VarInt32Coder() if field_type.atomic_type == schema_pb2.INT16: return BigEndianShortCoder() elif field_type.atomic_type == schema_pb2.FLOAT: diff --git a/sdks/python/apache_beam/coders/row_coder_test.py b/sdks/python/apache_beam/coders/row_coder_test.py index 6ac982835cb3..4d47bca3e2b2 100644 --- a/sdks/python/apache_beam/coders/row_coder_test.py +++ b/sdks/python/apache_beam/coders/row_coder_test.py @@ -203,6 +203,29 @@ def test_create_row_coder_from_schema(self): for test_case in self.PEOPLE: self.assertEqual(test_case, coder.decode(coder.encode(test_case))) + def test_row_coder_negative_varint(self): + schema = schema_pb2.Schema( + id="negative", + fields=[ + schema_pb2.Field( + name="i64", + type=schema_pb2.FieldType(atomic_type=schema_pb2.INT64)), + schema_pb2.Field( + name="i32", + type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32)) + ]) + coder = RowCoder(schema) + Negative = typing.NamedTuple( + "Negative", [ + ("i64", np.int64), + ("i32", np.int32), + ]) + test_cases = [ + Negative(-1, -1023), Negative(-1023, -1), Negative(-2**63, -2**31) + ] + for test_case in test_cases: + self.assertEqual(test_case, coder.decode(coder.encode(test_case))) + @unittest.skip( "https://github.com/apache/beam/issues/19696 - Overflow behavior in " "VarIntCoder is currently inconsistent") diff --git a/sdks/python/apache_beam/coders/slow_stream.py b/sdks/python/apache_beam/coders/slow_stream.py index b08ad8e9a37f..fb4aa50f233d 100644 --- a/sdks/python/apache_beam/coders/slow_stream.py +++ b/sdks/python/apache_beam/coders/slow_stream.py @@ -58,6 +58,9 @@ def write_var_int64(self, v: int) -> None: if not v: break + def write_var_int32(self, v: int) -> None: + self.write_var_int64(int(v) & 0xFFFFFFFF) + def write_bigendian_int64(self, v): self.write(struct.pack('>q', v)) @@ -156,6 +159,10 @@ def read_var_int64(self): result -= 1 << 64 return result + def read_var_int32(self): + v = self.read_var_int64() + return struct.unpack('q', self.read(8))[0] diff --git a/sdks/python/apache_beam/coders/stream.pxd b/sdks/python/apache_beam/coders/stream.pxd index 97d66aa089a4..24ce637f0420 100644 --- a/sdks/python/apache_beam/coders/stream.pxd +++ b/sdks/python/apache_beam/coders/stream.pxd @@ -26,6 +26,7 @@ cdef class OutputStream(object): cpdef write(self, bytes b, bint nested=*) cpdef write_byte(self, unsigned char val) cpdef write_var_int64(self, libc.stdint.int64_t v) + cpdef write_var_int32(self, libc.stdint.int64_t v) cpdef write_bigendian_int64(self, libc.stdint.int64_t signed_v) cpdef write_bigendian_uint64(self, libc.stdint.uint64_t signed_v) cpdef write_bigendian_int32(self, libc.stdint.int32_t signed_v) @@ -43,6 +44,8 @@ cdef class ByteCountingOutputStream(OutputStream): cdef size_t count cpdef write(self, bytes b, bint nested=*) + cpdef write_var_int64(self, libc.stdint.int64_t val) + cpdef write_var_int32(self, libc.stdint.int64_t val) cpdef write_byte(self, unsigned char val) cpdef write_bigendian_int64(self, libc.stdint.int64_t val) cpdef write_bigendian_uint64(self, libc.stdint.uint64_t val) @@ -61,6 +64,7 @@ cdef class InputStream(object): cpdef bytes read(self, size_t len) cpdef long read_byte(self) except? -1 cpdef libc.stdint.int64_t read_var_int64(self) except? -1 + cpdef libc.stdint.int32_t read_var_int32(self) except? -1 cpdef libc.stdint.int64_t read_bigendian_int64(self) except? -1 cpdef libc.stdint.uint64_t read_bigendian_uint64(self) except? -1 cpdef libc.stdint.int32_t read_bigendian_int32(self) except? -1 diff --git a/sdks/python/apache_beam/coders/stream.pyx b/sdks/python/apache_beam/coders/stream.pyx index 3977660f68b0..dbc671d7f1a8 100644 --- a/sdks/python/apache_beam/coders/stream.pyx +++ b/sdks/python/apache_beam/coders/stream.pyx @@ -73,6 +73,12 @@ cdef class OutputStream(object): if not v: break + cpdef write_var_int32(self, libc.stdint.int64_t signed_v): + """Encode an int using variable-length encoding to a stream.""" + # for backward compatibility, input type is int64_t thus tolerates overflow + cdef libc.stdint.int64_t v = signed_v & 0xFFFFFFFF + self.write_var_int64(v) + cpdef write_bigendian_int64(self, libc.stdint.int64_t signed_v): self.write_bigendian_uint64(signed_v) @@ -91,7 +97,7 @@ cdef class OutputStream(object): cpdef write_bigendian_int32(self, libc.stdint.int32_t signed_v): cdef libc.stdint.uint32_t v = signed_v - if self.buffer_size < self.pos + 4: + if self.buffer_size < self.pos + 4: self.extend(4) self.data[self.pos ] = (v >> 24) self.data[self.pos + 1] = (v >> 16) @@ -151,6 +157,12 @@ cdef class ByteCountingOutputStream(OutputStream): cpdef write_var_int64(self, libc.stdint.int64_t signed_v): self.count += get_varint_size(signed_v) + cpdef write_var_int32(self, libc.stdint.int64_t signed_v): + if signed_v < 0: + self.count += 5 + else: + self.count += get_varint_size(signed_v) + cpdef write_byte(self, unsigned char _): self.count += 1 @@ -225,6 +237,11 @@ cdef class InputStream(object): return result + cpdef libc.stdint.int32_t read_var_int32(self) except? -1: + """Decode a variable-length encoded int32 from a stream.""" + cdef libc.stdint.int64_t v = self.read_var_int64() + return (v); + cpdef libc.stdint.int64_t read_bigendian_int64(self) except? -1: return self.read_bigendian_uint64() diff --git a/sdks/python/apache_beam/coders/stream_test.py b/sdks/python/apache_beam/coders/stream_test.py index 57662056b2a0..1e8b2ac11627 100644 --- a/sdks/python/apache_beam/coders/stream_test.py +++ b/sdks/python/apache_beam/coders/stream_test.py @@ -92,6 +92,27 @@ def test_medium_var_int64(self): def test_large_var_int64(self): self.run_read_write_var_int64([0, 2**63 - 1, -2**63, 2**63 - 3]) + def run_read_write_var_int32(self, values): + out_s = self.OutputStream() + for v in values: + out_s.write_var_int32(v) + in_s = self.InputStream(out_s.get()) + for v in values: + self.assertEqual(v, in_s.read_var_int32()) + + def test_small_var_int32(self): + self.run_read_write_var_int32(range(-10, 30)) + + def test_medium_var_int32(self): + base = -1.7 + self.run_read_write_var_int32([ + int(base**pow) + for pow in range(1, int(31 * math.log(2) / math.log(-base))) + ]) + + def test_large_var_int32(self): + self.run_read_write_var_int32([0, 2**31 - 1, -2**31, 2**31 - 3]) + def test_read_write_double(self): values = 0, 1, -1, 1e100, 1.0 / 3, math.pi, float('inf') out_s = self.OutputStream() diff --git a/sdks/python/apache_beam/internal/cloudpickle/__init__.py b/sdks/python/apache_beam/internal/cloudpickle/__init__.py new file mode 100644 index 000000000000..3768a936efd3 --- /dev/null +++ b/sdks/python/apache_beam/internal/cloudpickle/__init__.py @@ -0,0 +1,18 @@ +from . import cloudpickle +from .cloudpickle import * # noqa + +__doc__ = cloudpickle.__doc__ + +__version__ = "3.1.1" + +__all__ = [ # noqa + "__version__", + "Pickler", + "CloudPickler", + "dumps", + "loads", + "dump", + "load", + "register_pickle_by_value", + "unregister_pickle_by_value", +] diff --git a/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py new file mode 100644 index 000000000000..48980526cf18 --- /dev/null +++ b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py @@ -0,0 +1,1537 @@ +"""Pickler class to extend the standard pickle.Pickler functionality + +The main objective is to make it natural to perform distributed computing on +clusters (such as PySpark, Dask, Ray...) with interactively defined code +(functions, classes, ...) written in notebooks or console. + +In particular this pickler adds the following features: +- serialize interactively-defined or locally-defined functions, classes, + enums, typevars, lambdas and nested functions to compiled byte code; +- deal with some other non-serializable objects in an ad-hoc manner where + applicable. + +This pickler is therefore meant to be used for the communication between short +lived Python processes running the same version of Python and libraries. In +particular, it is not meant to be used for long term storage of Python objects. + +It does not include an unpickler, as standard Python unpickling suffices. + +This module was extracted from the `cloud` package, developed by `PiCloud, Inc. +`_. + +Copyright (c) 2012-now, CloudPickle developers and contributors. +Copyright (c) 2012, Regents of the University of California. +Copyright (c) 2009 `PiCloud, Inc. `_. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the University of California, Berkeley nor the + names of its contributors may be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +# mypy: ignore-errors +# pylint: skip-file + +import _collections_abc +from collections import ChainMap, OrderedDict +import abc +import builtins +import copyreg +import dataclasses +import dis +from enum import Enum +import io +import itertools +import logging +import opcode +import pickle +from pickle import _getattribute as _pickle_getattribute +import platform +import struct +import sys +import threading +import types +import typing +import uuid +import warnings +import weakref + +# The following import is required to be imported in the cloudpickle +# namespace to be able to load pickle files generated with older versions of +# cloudpickle. See: tests/test_backward_compat.py +from types import CellType # noqa: F401 + +# cloudpickle is meant for inter process communication: we expect all +# communicating processes to run the same Python version hence we favor +# communication speed over compatibility: +DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL + +# Names of modules whose resources should be treated as dynamic. +_PICKLE_BY_VALUE_MODULES = set() + +# Track the provenance of reconstructed dynamic classes to make it possible to +# reconstruct instances from the matching singleton class definition when +# appropriate and preserve the usual "isinstance" semantics of Python objects. +_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() +_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() +_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock() + +PYPY = platform.python_implementation() == "PyPy" + +builtin_code_type = None +if PYPY: + # builtin-code objects only exist in pypy + builtin_code_type = type(float.__new__.__code__) + +_extract_code_globals_cache = weakref.WeakKeyDictionary() + + +def _get_or_create_tracker_id(class_def): + with _DYNAMIC_CLASS_TRACKER_LOCK: + class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) + if class_tracker_id is None: + class_tracker_id = uuid.uuid4().hex + _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id + _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def + return class_tracker_id + + +def _lookup_class_or_track(class_tracker_id, class_def): + if class_tracker_id is not None: + with _DYNAMIC_CLASS_TRACKER_LOCK: + class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault( + class_tracker_id, class_def) + _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id + return class_def + + +def register_pickle_by_value(module): + """Register a module to make its functions and classes picklable by value. + + By default, functions and classes that are attributes of an importable + module are to be pickled by reference, that is relying on re-importing + the attribute from the module at load time. + + If `register_pickle_by_value(module)` is called, all its functions and + classes are subsequently to be pickled by value, meaning that they can + be loaded in Python processes where the module is not importable. + + This is especially useful when developing a module in a distributed + execution environment: restarting the client Python process with the new + source code is enough: there is no need to re-install the new version + of the module on all the worker nodes nor to restart the workers. + + Note: this feature is considered experimental. See the cloudpickle + README.md file for more details and limitations. + """ + if not isinstance(module, types.ModuleType): + raise ValueError( + f"Input should be a module object, got {str(module)} instead") + # In the future, cloudpickle may need a way to access any module registered + # for pickling by value in order to introspect relative imports inside + # functions pickled by value. (see + # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633). + # This access can be ensured by checking that module is present in + # sys.modules at registering time and assuming that it will still be in + # there when accessed during pickling. Another alternative would be to + # store a weakref to the module. Even though cloudpickle does not implement + # this introspection yet, in order to avoid a possible breaking change + # later, we still enforce the presence of module inside sys.modules. + if module.__name__ not in sys.modules: + raise ValueError( + f"{module} was not imported correctly, have you used an " + "`import` statement to access it?") + _PICKLE_BY_VALUE_MODULES.add(module.__name__) + + +def unregister_pickle_by_value(module): + """Unregister that the input module should be pickled by value.""" + if not isinstance(module, types.ModuleType): + raise ValueError( + f"Input should be a module object, got {str(module)} instead") + if module.__name__ not in _PICKLE_BY_VALUE_MODULES: + raise ValueError(f"{module} is not registered for pickle by value") + else: + _PICKLE_BY_VALUE_MODULES.remove(module.__name__) + + +def list_registry_pickle_by_value(): + return _PICKLE_BY_VALUE_MODULES.copy() + + +def _is_registered_pickle_by_value(module): + module_name = module.__name__ + if module_name in _PICKLE_BY_VALUE_MODULES: + return True + while True: + parent_name = module_name.rsplit(".", 1)[0] + if parent_name == module_name: + break + if parent_name in _PICKLE_BY_VALUE_MODULES: + return True + module_name = parent_name + return False + + +if sys.version_info >= (3, 14): + + def _getattribute(obj, name): + return _pickle_getattribute(obj, name.split('.')) +else: + + def _getattribute(obj, name): + return _pickle_getattribute(obj, name)[0] + + +def _whichmodule(obj, name): + """Find the module an object belongs to. + + This function differs from ``pickle.whichmodule`` in two ways: + - it does not mangle the cases where obj's module is __main__ and obj was + not found in any module. + - Errors arising during module introspection are ignored, as those errors + are considered unwanted side effects. + """ + module_name = getattr(obj, "__module__", None) + + if module_name is not None: + return module_name + # Protect the iteration by using a copy of sys.modules against dynamic + # modules that trigger imports of other modules upon calls to getattr or + # other threads importing at the same time. + for module_name, module in sys.modules.copy().items(): + # Some modules such as coverage can inject non-module objects inside + # sys.modules + if (module_name == "__main__" or module_name == "__mp_main__" or + module is None or not isinstance(module, types.ModuleType)): + continue + try: + if _getattribute(module, name) is obj: + return module_name + except Exception: + pass + return None + + +def _should_pickle_by_reference(obj, name=None): + """Test whether an function or a class should be pickled by reference + + Pickling by reference means by that the object (typically a function or a + class) is an attribute of a module that is assumed to be importable in the + target Python environment. Loading will therefore rely on importing the + module and then calling `getattr` on it to access the function or class. + + Pickling by reference is the only option to pickle functions and classes + in the standard library. In cloudpickle the alternative option is to + pickle by value (for instance for interactively or locally defined + functions and classes or for attributes of modules that have been + explicitly registered to be pickled by value. + """ + if isinstance(obj, types.FunctionType) or issubclass(type(obj), type): + module_and_name = _lookup_module_and_qualname(obj, name=name) + if module_and_name is None: + return False + module, name = module_and_name + return not _is_registered_pickle_by_value(module) + + elif isinstance(obj, types.ModuleType): + # We assume that sys.modules is primarily used as a cache mechanism for + # the Python import machinery. Checking if a module has been added in + # is sys.modules therefore a cheap and simple heuristic to tell us + # whether we can assume that a given module could be imported by name + # in another Python process. + if _is_registered_pickle_by_value(obj): + return False + return obj.__name__ in sys.modules + else: + raise TypeError( + "cannot check importability of {} instances".format(type(obj).__name__)) + + +def _lookup_module_and_qualname(obj, name=None): + if name is None: + name = getattr(obj, "__qualname__", None) + if name is None: # pragma: no cover + # This used to be needed for Python 2.7 support but is probably not + # needed anymore. However we keep the __name__ introspection in case + # users of cloudpickle rely on this old behavior for unknown reasons. + name = getattr(obj, "__name__", None) + + module_name = _whichmodule(obj, name) + + if module_name is None: + # In this case, obj.__module__ is None AND obj was not found in any + # imported module. obj is thus treated as dynamic. + return None + + if module_name == "__main__": + return None + + # Note: if module_name is in sys.modules, the corresponding module is + # assumed importable at unpickling time. See #357 + module = sys.modules.get(module_name, None) + if module is None: + # The main reason why obj's module would not be imported is that this + # module has been dynamically created, using for example + # types.ModuleType. The other possibility is that module was removed + # from sys.modules after obj was created/imported. But this case is not + # supported, as the standard pickle does not support it either. + return None + + try: + obj2 = _getattribute(module, name) + except AttributeError: + # obj was not found inside the module it points to + return None + if obj2 is not obj: + return None + return module, name + + +def _extract_code_globals(co): + """Find all globals names read or written to by codeblock co.""" + out_names = _extract_code_globals_cache.get(co) + if out_names is None: + # We use a dict with None values instead of a set to get a + # deterministic order and avoid introducing non-deterministic pickle + # bytes as a results. + out_names = {name: None for name in _walk_global_ops(co)} + + # Declaring a function inside another one using the "def ..." syntax + # generates a constant code object corresponding to the one of the + # nested function's As the nested function may itself need global + # variables, we need to introspect its code, extract its globals, (look + # for code object in it's co_consts attribute..) and add the result to + # code_globals + if co.co_consts: + for const in co.co_consts: + if isinstance(const, types.CodeType): + out_names.update(_extract_code_globals(const)) + + _extract_code_globals_cache[co] = out_names + + return out_names + + +def _find_imported_submodules(code, top_level_dependencies): + """Find currently imported submodules used by a function. + + Submodules used by a function need to be detected and referenced for the + function to work correctly at depickling time. Because submodules can be + referenced as attribute of their parent package (``package.submodule``), we + need a special introspection technique that does not rely on GLOBAL-related + opcodes to find references of them in a code object. + + Example: + ``` + import concurrent.futures + import cloudpickle + def func(): + x = concurrent.futures.ThreadPoolExecutor + if __name__ == '__main__': + cloudpickle.dumps(func) + ``` + The globals extracted by cloudpickle in the function's state include the + concurrent package, but not its submodule (here, concurrent.futures), which + is the module used by func. Find_imported_submodules will detect the usage + of concurrent.futures. Saving this module alongside with func will ensure + that calling func once depickled does not fail due to concurrent.futures + not being imported + """ + + subimports = [] + # check if any known dependency is an imported package + for x in top_level_dependencies: + if (isinstance(x, types.ModuleType) and hasattr(x, "__package__") and + x.__package__): + # check if the package has any currently loaded sub-imports + prefix = x.__name__ + "." + # A concurrent thread could mutate sys.modules, + # make sure we iterate over a copy to avoid exceptions + for name in list(sys.modules): + # Older versions of pytest will add a "None" module to + # sys.modules. + if name is not None and name.startswith(prefix): + # check whether the function can address the sub-module + tokens = set(name[len(prefix):].split(".")) + if not tokens - set(code.co_names): + subimports.append(sys.modules[name]) + return subimports + + +# relevant opcodes +STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"] +DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"] +LOAD_GLOBAL = opcode.opmap["LOAD_GLOBAL"] +GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL) +HAVE_ARGUMENT = dis.HAVE_ARGUMENT +EXTENDED_ARG = dis.EXTENDED_ARG + +_BUILTIN_TYPE_NAMES = {} +for k, v in types.__dict__.items(): + if type(v) is type: + _BUILTIN_TYPE_NAMES[v] = k + + +def _builtin_type(name): + if name == "ClassType": # pragma: no cover + # Backward compat to load pickle files generated with cloudpickle + # < 1.3 even if loading pickle files from older versions is not + # officially supported. + return type + return getattr(types, name) + + +def _walk_global_ops(code): + """Yield referenced name for global-referencing instructions in code.""" + for instr in dis.get_instructions(code): + op = instr.opcode + if op in GLOBAL_OPS: + yield instr.argval + + +def _extract_class_dict(cls): + """Retrieve a copy of the dict of a class without the inherited method.""" + # Hack to circumvent non-predictable memoization caused by string interning. + # See the inline comment in _class_setstate for details. + clsdict = {"".join(k): cls.__dict__[k] for k in sorted(cls.__dict__)} + + if len(cls.__bases__) == 1: + inherited_dict = cls.__bases__[0].__dict__ + else: + inherited_dict = {} + for base in reversed(cls.__bases__): + inherited_dict.update(base.__dict__) + to_remove = [] + for name, value in clsdict.items(): + try: + base_value = inherited_dict[name] + if value is base_value: + to_remove.append(name) + except KeyError: + pass + for name in to_remove: + clsdict.pop(name) + return clsdict + + +def is_tornado_coroutine(func): + """Return whether `func` is a Tornado coroutine function. + + Running coroutines are not supported. + """ + warnings.warn( + "is_tornado_coroutine is deprecated in cloudpickle 3.0 and will be " + "removed in cloudpickle 4.0. Use tornado.gen.is_coroutine_function " + "directly instead.", + category=DeprecationWarning, + ) + if "tornado.gen" not in sys.modules: + return False + gen = sys.modules["tornado.gen"] + if not hasattr(gen, "is_coroutine_function"): + # Tornado version is too old + return False + return gen.is_coroutine_function(func) + + +def subimport(name): + # We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is + # the name of a submodule, __import__ will return the top-level root module + # of this submodule. For instance, __import__('os.path') returns the `os` + # module. + __import__(name) + return sys.modules[name] + + +def dynamic_subimport(name, vars): + mod = types.ModuleType(name) + mod.__dict__.update(vars) + mod.__dict__["__builtins__"] = builtins.__dict__ + return mod + + +def _get_cell_contents(cell): + try: + return cell.cell_contents + except ValueError: + # Handle empty cells explicitly with a sentinel value. + return _empty_cell_value + + +def instance(cls): + """Create a new instance of a class. + + Parameters + ---------- + cls : type + The class to create an instance of. + + Returns + ------- + instance : cls + A new instance of ``cls``. + """ + return cls() + + +@instance +class _empty_cell_value: + """Sentinel for empty closures.""" + @classmethod + def __reduce__(cls): + return cls.__name__ + + +def _make_function(code, globals, name, argdefs, closure): + # Setting __builtins__ in globals is needed for nogil CPython. + globals["__builtins__"] = __builtins__ + return types.FunctionType(code, globals, name, argdefs, closure) + + +def _make_empty_cell(): + if False: + # trick the compiler into creating an empty cell in our lambda + cell = None + raise AssertionError("this route should not be executed") + + return (lambda: cell).__closure__[0] + + +def _make_cell(value=_empty_cell_value): + cell = _make_empty_cell() + if value is not _empty_cell_value: + cell.cell_contents = value + return cell + + +def _make_skeleton_class( + type_constructor, name, bases, type_kwargs, class_tracker_id, extra): + """Build dynamic class with an empty __dict__ to be filled once memoized + + If class_tracker_id is not None, try to lookup an existing class definition + matching that id. If none is found, track a newly reconstructed class + definition under that id so that other instances stemming from the same + class id will also reuse this class definition. + + The "extra" variable is meant to be a dict (or None) that can be used for + forward compatibility shall the need arise. + """ + # We need to intern the keys of the type_kwargs dict to avoid having + # different pickles for the same dynamic class depending on whether it was + # dynamically created or reconstructed from a pickled stream. + type_kwargs = {sys.intern(k): v for k, v in type_kwargs.items()} + + skeleton_class = types.new_class( + name, + bases, {"metaclass": type_constructor}, + lambda ns: ns.update(type_kwargs)) + + return _lookup_class_or_track(class_tracker_id, skeleton_class) + + +def _make_skeleton_enum( + bases, name, qualname, members, module, class_tracker_id, extra): + """Build dynamic enum with an empty __dict__ to be filled once memoized + + The creation of the enum class is inspired by the code of + EnumMeta._create_. + + If class_tracker_id is not None, try to lookup an existing enum definition + matching that id. If none is found, track a newly reconstructed enum + definition under that id so that other instances stemming from the same + class id will also reuse this enum definition. + + The "extra" variable is meant to be a dict (or None) that can be used for + forward compatibility shall the need arise. + """ + # enums always inherit from their base Enum class at the last position in + # the list of base classes: + enum_base = bases[-1] + metacls = enum_base.__class__ + classdict = metacls.__prepare__(name, bases) + + for member_name, member_value in members.items(): + classdict[member_name] = member_value + enum_class = metacls.__new__(metacls, name, bases, classdict) + enum_class.__module__ = module + enum_class.__qualname__ = qualname + + return _lookup_class_or_track(class_tracker_id, enum_class) + + +def _make_typevar( + name, bound, constraints, covariant, contravariant, class_tracker_id): + tv = typing.TypeVar( + name, + *constraints, + bound=bound, + covariant=covariant, + contravariant=contravariant, + ) + return _lookup_class_or_track(class_tracker_id, tv) + + +def _decompose_typevar(obj): + return ( + obj.__name__, + obj.__bound__, + obj.__constraints__, + obj.__covariant__, + obj.__contravariant__, + _get_or_create_tracker_id(obj), + ) + + +def _typevar_reduce(obj): + # TypeVar instances require the module information hence why we + # are not using the _should_pickle_by_reference directly + module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) + + if module_and_name is None: + return (_make_typevar, _decompose_typevar(obj)) + elif _is_registered_pickle_by_value(module_and_name[0]): + return (_make_typevar, _decompose_typevar(obj)) + + return (getattr, module_and_name) + + +def _get_bases(typ): + if "__orig_bases__" in getattr(typ, "__dict__", {}): + # For generic types (see PEP 560) + # Note that simply checking `hasattr(typ, '__orig_bases__')` is not + # correct. Subclasses of a fully-parameterized generic class does not + # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')` + # will return True because it's defined in the base class. + bases_attr = "__orig_bases__" + else: + # For regular class objects + bases_attr = "__bases__" + return getattr(typ, bases_attr) + + +def _make_dict_keys(obj, is_ordered=False): + if is_ordered: + return OrderedDict.fromkeys(obj).keys() + else: + return dict.fromkeys(obj).keys() + + +def _make_dict_values(obj, is_ordered=False): + if is_ordered: + return OrderedDict((i, _) for i, _ in enumerate(obj)).values() + else: + return {i: _ for i, _ in enumerate(obj)}.values() + + +def _make_dict_items(obj, is_ordered=False): + if is_ordered: + return OrderedDict(obj).items() + else: + return obj.items() + + +# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS +# ------------------------------------------------- + + +def _class_getnewargs(obj): + type_kwargs = {} + if "__module__" in obj.__dict__: + type_kwargs["__module__"] = obj.__module__ + + __dict__ = obj.__dict__.get("__dict__", None) + if isinstance(__dict__, property): + type_kwargs["__dict__"] = __dict__ + + return ( + type(obj), + obj.__name__, + _get_bases(obj), + type_kwargs, + _get_or_create_tracker_id(obj), + None, + ) + + +def _enum_getnewargs(obj): + members = {e.name: e.value for e in obj} + return ( + obj.__bases__, + obj.__name__, + obj.__qualname__, + members, + obj.__module__, + _get_or_create_tracker_id(obj), + None, + ) + + +# COLLECTION OF OBJECTS RECONSTRUCTORS +# ------------------------------------ +def _file_reconstructor(retval): + return retval + + +# COLLECTION OF OBJECTS STATE GETTERS +# ----------------------------------- + + +def _function_getstate(func): + # - Put func's dynamic attributes (stored in func.__dict__) in state. These + # attributes will be restored at unpickling time using + # f.__dict__.update(state) + # - Put func's members into slotstate. Such attributes will be restored at + # unpickling time by iterating over slotstate and calling setattr(func, + # slotname, slotvalue) + slotstate = { + # Hack to circumvent non-predictable memoization caused by string interning. + # See the inline comment in _class_setstate for details. + "__name__": "".join(func.__name__), + "__qualname__": "".join(func.__qualname__), + "__annotations__": func.__annotations__, + "__kwdefaults__": func.__kwdefaults__, + "__defaults__": func.__defaults__, + "__module__": func.__module__, + "__doc__": func.__doc__, + "__closure__": func.__closure__, + } + + f_globals_ref = _extract_code_globals(func.__code__) + f_globals = { + k: func.__globals__[k] + for k in f_globals_ref if k in func.__globals__ + } + + if func.__closure__ is not None: + closure_values = list(map(_get_cell_contents, func.__closure__)) + else: + closure_values = () + + # Extract currently-imported submodules used by func. Storing these modules + # in a smoke _cloudpickle_subimports attribute of the object's state will + # trigger the side effect of importing these modules at unpickling time + # (which is necessary for func to work correctly once depickled) + slotstate["_cloudpickle_submodules"] = _find_imported_submodules( + func.__code__, itertools.chain(f_globals.values(), closure_values)) + slotstate["__globals__"] = f_globals + + # Hack to circumvent non-predictable memoization caused by string interning. + # See the inline comment in _class_setstate for details. + state = {"".join(k): v for k, v in func.__dict__.items()} + return state, slotstate + + +def _class_getstate(obj): + clsdict = _extract_class_dict(obj) + clsdict.pop("__weakref__", None) + + if issubclass(type(obj), abc.ABCMeta): + # If obj is an instance of an ABCMeta subclass, don't pickle the + # cache/negative caches populated during isinstance/issubclass + # checks, but pickle the list of registered subclasses of obj. + clsdict.pop("_abc_cache", None) + clsdict.pop("_abc_negative_cache", None) + clsdict.pop("_abc_negative_cache_version", None) + registry = clsdict.pop("_abc_registry", None) + if registry is None: + # The abc caches and registered subclasses of a + # class are bundled into the single _abc_impl attribute + clsdict.pop("_abc_impl", None) + (registry, _, _, _) = abc._get_dump(obj) + + clsdict["_abc_impl"] = [ + subclass_weakref() for subclass_weakref in registry + ] + else: + # In the above if clause, registry is a set of weakrefs -- in + # this case, registry is a WeakSet + clsdict["_abc_impl"] = [type_ for type_ in registry] + + if "__slots__" in clsdict: + # pickle string length optimization: member descriptors of obj are + # created automatically from obj's __slots__ attribute, no need to + # save them in obj's state + if isinstance(obj.__slots__, str): + clsdict.pop(obj.__slots__) + else: + for k in obj.__slots__: + clsdict.pop(k, None) + + clsdict.pop("__dict__", None) # unpicklable property object + + return (clsdict, {}) + + +def _enum_getstate(obj): + clsdict, slotstate = _class_getstate(obj) + + members = {e.name: e.value for e in obj} + # Cleanup the clsdict that will be passed to _make_skeleton_enum: + # Those attributes are already handled by the metaclass. + for attrname in [ + "_generate_next_value_", + "_member_names_", + "_member_map_", + "_member_type_", + "_value2member_map_", + ]: + clsdict.pop(attrname, None) + for member in members: + clsdict.pop(member) + # Special handling of Enum subclasses + return clsdict, slotstate + + +# COLLECTIONS OF OBJECTS REDUCERS +# ------------------------------- +# A reducer is a function taking a single argument (obj), and that returns a +# tuple with all the necessary data to re-construct obj. Apart from a few +# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to +# correctly pickle an object. +# While many built-in objects (Exceptions objects, instances of the "object" +# class, etc), are shipped with their own built-in reducer (invoked using +# obj.__reduce__), some do not. The following methods were created to "fill +# these holes". + + +def _code_reduce(obj): + """code object reducer.""" + # If you are not sure about the order of arguments, take a look at help + # of the specific type from types, for example: + # >>> from types import CodeType + # >>> help(CodeType) + + # Hack to circumvent non-predictable memoization caused by string interning. + # See the inline comment in _class_setstate for details. + co_name = "".join(obj.co_name) + + # Create shallow copies of these tuple to make cloudpickle payload deterministic. + # When creating a code object during load, copies of these four tuples are + # created, while in the main process, these tuples can be shared. + # By always creating copies, we make sure the resulting payload is deterministic. + co_names = tuple(name for name in obj.co_names) + co_varnames = tuple(name for name in obj.co_varnames) + co_freevars = tuple(name for name in obj.co_freevars) + co_cellvars = tuple(name for name in obj.co_cellvars) + if hasattr(obj, "co_exceptiontable"): + # Python 3.11 and later: there are some new attributes + # related to the enhanced exceptions. + args = ( + obj.co_argcount, + obj.co_posonlyargcount, + obj.co_kwonlyargcount, + obj.co_nlocals, + obj.co_stacksize, + obj.co_flags, + obj.co_code, + obj.co_consts, + co_names, + co_varnames, + obj.co_filename, + co_name, + obj.co_qualname, + obj.co_firstlineno, + obj.co_linetable, + obj.co_exceptiontable, + co_freevars, + co_cellvars, + ) + elif hasattr(obj, "co_linetable"): + # Python 3.10 and later: obj.co_lnotab is deprecated and constructor + # expects obj.co_linetable instead. + args = ( + obj.co_argcount, + obj.co_posonlyargcount, + obj.co_kwonlyargcount, + obj.co_nlocals, + obj.co_stacksize, + obj.co_flags, + obj.co_code, + obj.co_consts, + co_names, + co_varnames, + obj.co_filename, + co_name, + obj.co_firstlineno, + obj.co_linetable, + co_freevars, + co_cellvars, + ) + elif hasattr(obj, "co_nmeta"): # pragma: no cover + # "nogil" Python: modified attributes from 3.9 + args = ( + obj.co_argcount, + obj.co_posonlyargcount, + obj.co_kwonlyargcount, + obj.co_nlocals, + obj.co_framesize, + obj.co_ndefaultargs, + obj.co_nmeta, + obj.co_flags, + obj.co_code, + obj.co_consts, + co_varnames, + obj.co_filename, + co_name, + obj.co_firstlineno, + obj.co_lnotab, + obj.co_exc_handlers, + obj.co_jump_table, + co_freevars, + co_cellvars, + obj.co_free2reg, + obj.co_cell2reg, + ) + else: + # Backward compat for 3.8 and 3.9 + args = ( + obj.co_argcount, + obj.co_posonlyargcount, + obj.co_kwonlyargcount, + obj.co_nlocals, + obj.co_stacksize, + obj.co_flags, + obj.co_code, + obj.co_consts, + co_names, + co_varnames, + obj.co_filename, + co_name, + obj.co_firstlineno, + obj.co_lnotab, + co_freevars, + co_cellvars, + ) + return types.CodeType, args + + +def _cell_reduce(obj): + """Cell (containing values of a function's free variables) reducer.""" + try: + obj.cell_contents + except ValueError: # cell is empty + return _make_empty_cell, () + else: + return _make_cell, (obj.cell_contents, ) + + +def _classmethod_reduce(obj): + orig_func = obj.__func__ + return type(obj), (orig_func, ) + + +def _file_reduce(obj): + """Save a file.""" + import io + + if not hasattr(obj, "name") or not hasattr(obj, "mode"): + raise pickle.PicklingError( + "Cannot pickle files that do not map to an actual file") + if obj is sys.stdout: + return getattr, (sys, "stdout") + if obj is sys.stderr: + return getattr, (sys, "stderr") + if obj is sys.stdin: + raise pickle.PicklingError("Cannot pickle standard input") + if obj.closed: + raise pickle.PicklingError("Cannot pickle closed files") + if hasattr(obj, "isatty") and obj.isatty(): + raise pickle.PicklingError("Cannot pickle files that map to tty objects") + if "r" not in obj.mode and "+" not in obj.mode: + raise pickle.PicklingError( + "Cannot pickle files that are not opened for reading: %s" % obj.mode) + + name = obj.name + + retval = io.StringIO() + + try: + # Read the whole file + curloc = obj.tell() + obj.seek(0) + contents = obj.read() + obj.seek(curloc) + except OSError as e: + raise pickle.PicklingError( + "Cannot pickle file %s as it cannot be read" % name) from e + retval.write(contents) + retval.seek(curloc) + + retval.name = name + return _file_reconstructor, (retval, ) + + +def _getset_descriptor_reduce(obj): + return getattr, (obj.__objclass__, obj.__name__) + + +def _mappingproxy_reduce(obj): + return types.MappingProxyType, (dict(obj), ) + + +def _memoryview_reduce(obj): + return bytes, (obj.tobytes(), ) + + +def _module_reduce(obj): + if _should_pickle_by_reference(obj): + return subimport, (obj.__name__, ) + else: + # Some external libraries can populate the "__builtins__" entry of a + # module's `__dict__` with unpicklable objects (see #316). For that + # reason, we do not attempt to pickle the "__builtins__" entry, and + # restore a default value for it at unpickling time. + state = obj.__dict__.copy() + state.pop("__builtins__", None) + return dynamic_subimport, (obj.__name__, state) + + +def _method_reduce(obj): + return (types.MethodType, (obj.__func__, obj.__self__)) + + +def _logger_reduce(obj): + return logging.getLogger, (obj.name, ) + + +def _root_logger_reduce(obj): + return logging.getLogger, () + + +def _property_reduce(obj): + return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__) + + +def _weakset_reduce(obj): + return weakref.WeakSet, (list(obj), ) + + +def _dynamic_class_reduce(obj): + """Save a class that can't be referenced as a module attribute. + + This method is used to serialize classes that are defined inside + functions, or that otherwise can't be serialized as attribute lookups + from importable modules. + """ + if Enum is not None and issubclass(obj, Enum): + return ( + _make_skeleton_enum, + _enum_getnewargs(obj), + _enum_getstate(obj), + None, + None, + _class_setstate, + ) + else: + return ( + _make_skeleton_class, + _class_getnewargs(obj), + _class_getstate(obj), + None, + None, + _class_setstate, + ) + + +def _class_reduce(obj): + """Select the reducer depending on the dynamic nature of the class obj.""" + if obj is type(None): # noqa + return type, (None, ) + elif obj is type(Ellipsis): + return type, (Ellipsis, ) + elif obj is type(NotImplemented): + return type, (NotImplemented, ) + elif obj in _BUILTIN_TYPE_NAMES: + return _builtin_type, (_BUILTIN_TYPE_NAMES[obj], ) + elif not _should_pickle_by_reference(obj): + return _dynamic_class_reduce(obj) + return NotImplemented + + +def _dict_keys_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_keys, (list(obj), ) + + +def _dict_values_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_values, (list(obj), ) + + +def _dict_items_reduce(obj): + return _make_dict_items, (dict(obj), ) + + +def _odict_keys_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_keys, (list(obj), True) + + +def _odict_values_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_values, (list(obj), True) + + +def _odict_items_reduce(obj): + return _make_dict_items, (dict(obj), True) + + +def _dataclass_field_base_reduce(obj): + return _get_dataclass_field_type_sentinel, (obj.name, ) + + +# COLLECTIONS OF OBJECTS STATE SETTERS +# ------------------------------------ +# state setters are called at unpickling time, once the object is created and +# it has to be updated to how it was at unpickling time. + + +def _function_setstate(obj, state): + """Update the state of a dynamic function. + + As __closure__ and __globals__ are readonly attributes of a function, we + cannot rely on the native setstate routine of pickle.load_build, that calls + setattr on items of the slotstate. Instead, we have to modify them inplace. + """ + state, slotstate = state + obj.__dict__.update(state) + + obj_globals = slotstate.pop("__globals__") + obj_closure = slotstate.pop("__closure__") + # _cloudpickle_subimports is a set of submodules that must be loaded for + # the pickled function to work correctly at unpickling time. Now that these + # submodules are depickled (hence imported), they can be removed from the + # object's state (the object state only served as a reference holder to + # these submodules) + slotstate.pop("_cloudpickle_submodules") + + obj.__globals__.update(obj_globals) + obj.__globals__["__builtins__"] = __builtins__ + + if obj_closure is not None: + for i, cell in enumerate(obj_closure): + try: + value = cell.cell_contents + except ValueError: # cell is empty + continue + obj.__closure__[i].cell_contents = value + + for k, v in slotstate.items(): + setattr(obj, k, v) + + +def _class_setstate(obj, state): + state, slotstate = state + registry = None + for attrname, attr in state.items(): + if attrname == "_abc_impl": + registry = attr + else: + # Note: setting attribute names on a class automatically triggers their + # interning in CPython: + # https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957 + # + # This means that to get deterministic pickling for a dynamic class that + # was initially defined in a different Python process, the pickler + # needs to ensure that dynamic class and function attribute names are + # systematically copied into a non-interned version to avoid + # unpredictable pickle payloads. + # + # Indeed the Pickler's memoizer relies on physical object identity to break + # cycles in the reference graph of the object being serialized. + setattr(obj, attrname, attr) + + if sys.version_info >= (3, 13) and "__firstlineno__" in state: + # Set the Python 3.13+ only __firstlineno__ attribute one more time, as it + # will be automatically deleted by the `setattr(obj, attrname, attr)` call + # above when `attrname` is "__firstlineno__". We assume that preserving this + # information might be important for some users and that it not stale in the + # context of cloudpickle usage, hence legitimate to propagate. Furthermore it + # is necessary to do so to keep deterministic chained pickling as tested in + # test_deterministic_str_interning_for_chained_dynamic_class_pickling. + obj.__firstlineno__ = state["__firstlineno__"] + + if registry is not None: + for subclass in registry: + obj.register(subclass) + + return obj + + +# COLLECTION OF DATACLASS UTILITIES +# --------------------------------- +# There are some internal sentinel values whose identity must be preserved when +# unpickling dataclass fields. Each sentinel value has a unique name that we can +# use to retrieve its identity at unpickling time. + +_DATACLASSE_FIELD_TYPE_SENTINELS = { + dataclasses._FIELD.name: dataclasses._FIELD, + dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR, + dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR, +} + + +def _get_dataclass_field_type_sentinel(name): + return _DATACLASSE_FIELD_TYPE_SENTINELS[name] + + +class Pickler(pickle.Pickler): + # set of reducers defined and used by cloudpickle (private) + _dispatch_table = {} + _dispatch_table[classmethod] = _classmethod_reduce + _dispatch_table[io.TextIOWrapper] = _file_reduce + _dispatch_table[logging.Logger] = _logger_reduce + _dispatch_table[logging.RootLogger] = _root_logger_reduce + _dispatch_table[memoryview] = _memoryview_reduce + _dispatch_table[property] = _property_reduce + _dispatch_table[staticmethod] = _classmethod_reduce + _dispatch_table[CellType] = _cell_reduce + _dispatch_table[types.CodeType] = _code_reduce + _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce + _dispatch_table[types.ModuleType] = _module_reduce + _dispatch_table[types.MethodType] = _method_reduce + _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce + _dispatch_table[weakref.WeakSet] = _weakset_reduce + _dispatch_table[typing.TypeVar] = _typevar_reduce + _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce + _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce + _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce + _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce + _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce + _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce + _dispatch_table[abc.abstractmethod] = _classmethod_reduce + _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce + _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce + _dispatch_table[abc.abstractproperty] = _property_reduce + _dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce + + dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) + + # function reducers are defined as instance methods of cloudpickle.Pickler + # objects, as they rely on a cloudpickle.Pickler attribute (globals_ref) + def _dynamic_function_reduce(self, func): + """Reduce a function that is not pickleable via attribute lookup.""" + newargs = self._function_getnewargs(func) + state = _function_getstate(func) + return (_make_function, newargs, state, None, None, _function_setstate) + + def _function_reduce(self, obj): + """Reducer for function objects. + + If obj is a top-level attribute of a file-backed module, this reducer + returns NotImplemented, making the cloudpickle.Pickler fall back to + traditional pickle.Pickler routines to save obj. Otherwise, it reduces + obj using a custom cloudpickle reducer designed specifically to handle + dynamic functions. + """ + if _should_pickle_by_reference(obj): + return NotImplemented + else: + return self._dynamic_function_reduce(obj) + + def _function_getnewargs(self, func): + code = func.__code__ + + # base_globals represents the future global namespace of func at + # unpickling time. Looking it up and storing it in + # cloudpickle.Pickler.globals_ref allow functions sharing the same + # globals at pickling time to also share them once unpickled, at one + # condition: since globals_ref is an attribute of a cloudpickle.Pickler + # instance, and that a new cloudpickle.Pickler is created each time + # cloudpickle.dump or cloudpickle.dumps is called, functions also need + # to be saved within the same invocation of + # cloudpickle.dump/cloudpickle.dumps (for example: + # cloudpickle.dumps([f1, f2])). There is no such limitation when using + # cloudpickle.Pickler.dump, as long as the multiple invocations are + # bound to the same cloudpickle.Pickler instance. + base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) + + if base_globals == {}: + # Add module attributes used to resolve relative imports + # instructions inside func. + for k in ["__package__", "__name__", "__path__", "__file__"]: + if k in func.__globals__: + base_globals[k] = func.__globals__[k] + + # Do not bind the free variables before the function is created to + # avoid infinite recursion. + if func.__closure__ is None: + closure = None + else: + closure = tuple(_make_empty_cell() for _ in range(len(code.co_freevars))) + + return code, base_globals, None, None, closure + + def dump(self, obj): + try: + return super().dump(obj) + except RuntimeError as e: + if len(e.args) > 0 and "recursion" in e.args[0]: + msg = "Could not pickle object as excessively deep recursion required." + raise pickle.PicklingError(msg) from e + else: + raise + + def __init__(self, file, protocol=None, buffer_callback=None): + if protocol is None: + protocol = DEFAULT_PROTOCOL + super().__init__(file, protocol=protocol, buffer_callback=buffer_callback) + # map functions __globals__ attribute ids, to ensure that functions + # sharing the same global namespace at pickling time also share + # their global namespace at unpickling time. + self.globals_ref = {} + self.proto = int(protocol) + + if not PYPY: + # pickle.Pickler is the C implementation of the CPython pickler and + # therefore we rely on reduce_override method to customize the pickler + # behavior. + + # `cloudpickle.Pickler.dispatch` is only left for backward + # compatibility - note that when using protocol 5, + # `cloudpickle.Pickler.dispatch` is not an extension of + # `pickle._Pickler.dispatch` dictionary, because `cloudpickle.Pickler` + # subclasses the C-implemented `pickle.Pickler`, which does not expose + # a `dispatch` attribute. Earlier versions of `cloudpickle.Pickler` + # used `cloudpickle.Pickler.dispatch` as a class-level attribute + # storing all reducers implemented by cloudpickle, but the attribute + # name was not a great choice given because it would collide with a + # similarly named attribute in the pure-Python `pickle._Pickler` + # implementation in the standard library. + dispatch = dispatch_table + + # Implementation of the reducer_override callback, in order to + # efficiently serialize dynamic functions and classes by subclassing + # the C-implemented `pickle.Pickler`. + # TODO: decorrelate reducer_override (which is tied to CPython's + # implementation - would it make sense to backport it to pypy? - and + # pickle's protocol 5 which is implementation agnostic. Currently, the + # availability of both notions coincide on CPython's pickle, but it may + # not be the case anymore when pypy implements protocol 5. + + def reducer_override(self, obj): + """Type-agnostic reducing callback for function and classes. + + For performance reasons, subclasses of the C `pickle.Pickler` class + cannot register custom reducers for functions and classes in the + dispatch_table attribute. Reducers for such types must instead + implemented via the special `reducer_override` method. + + Note that this method will be called for any object except a few + builtin-types (int, lists, dicts etc.), which differs from reducers + in the Pickler's dispatch_table, each of them being invoked for + objects of a specific type only. + + This property comes in handy for classes: although most classes are + instances of the ``type`` metaclass, some of them can be instances + of other custom metaclasses (such as enum.EnumMeta for example). In + particular, the metaclass will likely not be known in advance, and + thus cannot be special-cased using an entry in the dispatch_table. + reducer_override, among other things, allows us to register a + reducer that will be called for any class, independently of its + type. + + Notes: + + * reducer_override has the priority over dispatch_table-registered + reducers. + * reducer_override can be used to fix other limitations of + cloudpickle for other types that suffered from type-specific + reducers, such as Exceptions. See + https://github.com/cloudpipe/cloudpickle/issues/248 + """ + t = type(obj) + try: + is_anyclass = issubclass(t, type) + except TypeError: # t is not a class (old Boost; see SF #502085) + is_anyclass = False + + if is_anyclass: + return _class_reduce(obj) + elif isinstance(obj, types.FunctionType): + return self._function_reduce(obj) + else: + # fallback to save_global, including the Pickler's + # dispatch_table + return NotImplemented + + else: + # When reducer_override is not available, hack the pure-Python + # Pickler's types.FunctionType and type savers. Note: the type saver + # must override Pickler.save_global, because pickle.py contains a + # hard-coded call to save_global when pickling meta-classes. + dispatch = pickle.Pickler.dispatch.copy() + + def _save_reduce_pickle5( + self, + func, + args, + state=None, + listitems=None, + dictitems=None, + state_setter=None, + obj=None, + ): + save = self.save + write = self.write + self.save_reduce( + func, + args, + state=None, + listitems=listitems, + dictitems=dictitems, + obj=obj, + ) + # backport of the Python 3.8 state_setter pickle operations + save(state_setter) + save(obj) # simple BINGET opcode as obj is already memoized. + save(state) + write(pickle.TUPLE2) + # Trigger a state_setter(obj, state) function call. + write(pickle.REDUCE) + # The purpose of state_setter is to carry-out an + # inplace modification of obj. We do not care about what the + # method might return, so its output is eventually removed from + # the stack. + write(pickle.POP) + + def save_global(self, obj, name=None, pack=struct.pack): + """Main dispatch method. + + The name of this method is somewhat misleading: all types get + dispatched here. + """ + if obj is type(None): # noqa + return self.save_reduce(type, (None, ), obj=obj) + elif obj is type(Ellipsis): + return self.save_reduce(type, (Ellipsis, ), obj=obj) + elif obj is type(NotImplemented): + return self.save_reduce(type, (NotImplemented, ), obj=obj) + elif obj in _BUILTIN_TYPE_NAMES: + return self.save_reduce( + _builtin_type, (_BUILTIN_TYPE_NAMES[obj], ), obj=obj) + + if name is not None: + super().save_global(obj, name=name) + elif not _should_pickle_by_reference(obj, name=name): + self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj) + else: + super().save_global(obj, name=name) + + dispatch[type] = save_global + + def save_function(self, obj, name=None): + """Registered with the dispatch to handle all function types. + + Determines what kind of function obj is (e.g. lambda, defined at + interactive prompt, etc) and handles the pickling appropriately. + """ + if _should_pickle_by_reference(obj, name=name): + return super().save_global(obj, name=name) + elif PYPY and isinstance(obj.__code__, builtin_code_type): + return self.save_pypy_builtin_func(obj) + else: + return self._save_reduce_pickle5( + *self._dynamic_function_reduce(obj), obj=obj) + + def save_pypy_builtin_func(self, obj): + """Save pypy equivalent of builtin functions. + + PyPy does not have the concept of builtin-functions. Instead, + builtin-functions are simple function instances, but with a + builtin-code attribute. + Most of the time, builtin functions should be pickled by attribute. + But PyPy has flaky support for __qualname__, so some builtin + functions such as float.__new__ will be classified as dynamic. For + this reason only, we created this special routine. Because + builtin-functions are not expected to have closure or globals, + there is no additional hack (compared the one already implemented + in pickle) to protect ourselves from reference cycles. A simple + (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note + also that PyPy improved their support for __qualname__ in v3.6, so + this routing should be removed when cloudpickle supports only PyPy + 3.6 and later. + """ + rv = ( + types.FunctionType, + (obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__), + obj.__dict__, + ) + self.save_reduce(*rv, obj=obj) + + dispatch[types.FunctionType] = save_function + + +# Shorthands similar to pickle.dump/pickle.dumps + + +def dump(obj, file, protocol=None, buffer_callback=None): + """Serialize obj as bytes streamed into file + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python (although this is not always + guaranteed to work because cloudpickle relies on some internal + implementation details that can change from one Python version to the + next). + """ + Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj) + + +def dumps(obj, protocol=None, buffer_callback=None): + """Serialize obj as a string of bytes allocated in memory + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python (although this is not always + guaranteed to work because cloudpickle relies on some internal + implementation details that can change from one Python version to the + next). + """ + with io.BytesIO() as file: + cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback) + cp.dump(obj) + return file.getvalue() + + +# Include pickles unloading functions in this namespace for convenience. +load, loads = pickle.load, pickle.loads + +# Backward compat alias. +CloudPickler = Pickler diff --git a/sdks/python/apache_beam/internal/cloudpickle/cloudpickle_fast.py b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle_fast.py new file mode 100644 index 000000000000..fb11b4878c31 --- /dev/null +++ b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle_fast.py @@ -0,0 +1,14 @@ +"""Compatibility module. + +It can be necessary to load files generated by previous versions of cloudpickle +that rely on symbols being defined under the `cloudpickle.cloudpickle_fast` +namespace. + +See: tests/test_backward_compat.py +""" + +from . import cloudpickle + + +def __getattr__(name): + return getattr(cloudpickle, name) diff --git a/sdks/python/apache_beam/internal/cloudpickle_pickler.py b/sdks/python/apache_beam/internal/cloudpickle_pickler.py index 83cdac4b5f33..ecb9b91de870 100644 --- a/sdks/python/apache_beam/internal/cloudpickle_pickler.py +++ b/sdks/python/apache_beam/internal/cloudpickle_pickler.py @@ -30,20 +30,76 @@ import base64 import bz2 import io +import sys import threading import zlib -import cloudpickle +from apache_beam.internal.cloudpickle import cloudpickle try: from absl import flags except (ImportError, ModuleNotFoundError): pass + +def _get_proto_enum_descriptor_class(): + try: + from google.protobuf.internal import api_implementation + except ImportError: + return None + + implementation_type = api_implementation.Type() + + if implementation_type == 'upb': + try: + from google._upb._message import EnumDescriptor + return EnumDescriptor + except ImportError: + pass + elif implementation_type == 'cpp': + try: + from google.protobuf.pyext._message import EnumDescriptor + return EnumDescriptor + except ImportError: + pass + elif implementation_type == 'python': + try: + from google.protobuf.internal.python_message import EnumDescriptor + return EnumDescriptor + except ImportError: + pass + + return None + + +EnumDescriptor = _get_proto_enum_descriptor_class() + # Pickling, especially unpickling, causes broken module imports on Python 3 # if executed concurrently, see: BEAM-8651, http://bugs.python.org/issue38884. _pickle_lock = threading.RLock() RLOCK_TYPE = type(_pickle_lock) +LOCK_TYPE = type(threading.Lock()) + + +def _reconstruct_enum_descriptor(full_name): + for _, module in sys.modules.items(): + if not hasattr(module, 'DESCRIPTOR'): + continue + + for _, attr_value in vars(module).items(): + if not hasattr(attr_value, 'DESCRIPTOR'): + continue + + if hasattr(attr_value.DESCRIPTOR, 'enum_types_by_name'): + for (_, enum_desc) in attr_value.DESCRIPTOR.enum_types_by_name.items(): + if enum_desc.full_name == full_name: + return enum_desc + raise ImportError(f'Could not find enum descriptor: {full_name}') + + +def _pickle_enum_descriptor(obj): + full_name = obj.full_name + return _reconstruct_enum_descriptor, (full_name, ) def dumps(o, enable_trace=True, use_zlib=False) -> bytes: @@ -59,6 +115,12 @@ def dumps(o, enable_trace=True, use_zlib=False) -> bytes: pickler.dispatch_table[RLOCK_TYPE] = _pickle_rlock except NameError: pass + try: + pickler.dispatch_table[LOCK_TYPE] = _lock_reducer + except NameError: + pass + if EnumDescriptor is not None: + pickler.dispatch_table[EnumDescriptor] = _pickle_enum_descriptor pickler.dump(o) s = file.getvalue() @@ -106,6 +168,10 @@ def _pickle_rlock(obj): return RLOCK_TYPE, tuple([]) +def _lock_reducer(obj): + return threading.Lock, tuple([]) + + def dump_session(file_path): # It is possible to dump session with cloudpickle. However, since references # are saved it should not be necessary. See https://s.apache.org/beam-picklers diff --git a/sdks/python/apache_beam/internal/cloudpickle_pickler_test.py b/sdks/python/apache_beam/internal/cloudpickle_pickler_test.py index 8ae93d53fd1d..597c62907e90 100644 --- a/sdks/python/apache_beam/internal/cloudpickle_pickler_test.py +++ b/sdks/python/apache_beam/internal/cloudpickle_pickler_test.py @@ -26,12 +26,21 @@ from apache_beam.internal import module_test from apache_beam.internal.cloudpickle_pickler import dumps from apache_beam.internal.cloudpickle_pickler import loads +from apache_beam.portability.api import beam_runner_api_pb2 class PicklerTest(unittest.TestCase): NO_MAPPINGPROXYTYPE = not hasattr(types, "MappingProxyType") + def test_pickle_enum_descriptor(self): + TimeDomain = beam_runner_api_pb2.TimeDomain.Enum + + def fn(): + return TimeDomain.EVENT_TIME + + self.assertEqual(fn(), loads(dumps(fn))()) + def test_basics(self): self.assertEqual([1, 'a', ('z', )], loads(dumps([1, 'a', ('z', )]))) fun = lambda x: 'xyz-%s' % x @@ -97,6 +106,12 @@ def test_pickle_rlock(self): self.assertIsInstance(loads(dumps(rlock_instance)), rlock_type) + def test_pickle_lock(self): + lock_instance = threading.Lock() + lock_type = type(lock_instance) + + self.assertIsInstance(loads(dumps(lock_instance)), lock_type) + @unittest.skipIf(NO_MAPPINGPROXYTYPE, 'test if MappingProxyType introduced') def test_dump_and_load_mapping_proxy(self): self.assertEqual( diff --git a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py index 01d868950c03..ca6fa3d711ca 100644 --- a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py +++ b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py @@ -61,10 +61,10 @@ JdbcTestRow = typing.NamedTuple( "JdbcTestRow", - [("f_id", int), ("f_float", float), ("f_char", str), ("f_varchar", str), - ("f_bytes", bytes), ("f_varbytes", bytes), ("f_timestamp", Timestamp), - ("f_decimal", Decimal), ("f_date", datetime.date), - ("f_time", datetime.time)], + [("f_id", int), ("f_id_long", int), ("f_float", float), ("f_char", str), + ("f_varchar", str), ("f_bytes", bytes), ("f_varbytes", bytes), + ("f_timestamp", Timestamp), ("f_decimal", Decimal), + ("f_date", datetime.date), ("f_time", datetime.time)], ) coders.registry.register_coder(JdbcTestRow, coders.RowCoder) @@ -72,6 +72,7 @@ "CustomSchemaRow", [ ("renamed_id", int), + ("renamed_id_long", int), ("renamed_float", float), ("renamed_char", str), ("renamed_varchar", str), @@ -184,7 +185,7 @@ def create_test_table(self, connection, table_name, database): connection.execute( sqlalchemy.text( f"CREATE TABLE IF NOT EXISTS {table_name}" + - "(f_id INTEGER, f_float DOUBLE PRECISION, " + + "(f_id INTEGER, f_id_long BIGINT, f_float DOUBLE PRECISION, " + "f_char CHAR(10), f_varchar VARCHAR(10), " + f"f_bytes {binary_type[0]}, f_varbytes {binary_type[1]}, " + "f_timestamp TIMESTAMP(3), f_decimal DECIMAL(10, 2), " + @@ -193,7 +194,8 @@ def create_test_table(self, connection, table_name, database): def generate_test_data(self, count): return [ JdbcTestRow( - i, + i - 3, + i - 3, i + 0.1, f'Test{i}', f'Test{i}', @@ -225,6 +227,7 @@ def test_xlang_jdbc_write_read(self, database): expected_rows.append( JdbcTestRow( + row.f_id, row.f_id, row.f_float, f_char, @@ -310,6 +313,7 @@ def test_xlang_jdbc_read_with_explicit_schema(self, database): expected_rows.append( CustomSchemaRow( + row.f_id, row.f_id, row.f_float, f_char, @@ -324,6 +328,7 @@ def test_xlang_jdbc_read_with_explicit_schema(self, database): def custom_row_equals(expected, actual): return ( expected.renamed_id == actual.renamed_id and + expected.renamed_id_long == actual.renamed_id_long and expected.renamed_float == actual.renamed_float and expected.renamed_char.rstrip() == actual.renamed_char.rstrip() and expected.renamed_varchar == actual.renamed_varchar and @@ -390,7 +395,7 @@ def test_xlang_jdbc_custom_statements(self, database): SimpleRow(2, "Item2", 20.75), SimpleRow(3, "Item3", 30.25), SimpleRow(4, "Item4", 40.0), - SimpleRow(5, "Item5", 50.5) + SimpleRow(-5, "Item5", 50.5) ] config = self.jdbc_configs[database] diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py index 9f60b5af6726..5b6507542932 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery.py +++ b/sdks/python/apache_beam/io/gcp/bigquery.py @@ -238,7 +238,7 @@ def compute_table_name(row): Chaining of operations after WriteToBigQuery -------------------------------------------- -WritToBigQuery returns an object with several PCollections that consist of +WriteToBigQuery returns an object with several PCollections that consist of metadata about the write operations. These are useful to inspect the write operation and follow with the results:: diff --git a/sdks/python/apache_beam/io/textio.py b/sdks/python/apache_beam/io/textio.py index 0d7803bcabb1..d817463cfef6 100644 --- a/sdks/python/apache_beam/io/textio.py +++ b/sdks/python/apache_beam/io/textio.py @@ -20,6 +20,7 @@ # pytype: skip-file import logging +import os from functools import partial from typing import TYPE_CHECKING from typing import Any @@ -781,6 +782,13 @@ def __init__( """ super().__init__(**kwargs) + if file_pattern: + try: + if not os.path.dirname(file_pattern): + file_pattern = os.path.join('.', file_pattern) + except TypeError: + pass + self._source = self._source_class( file_pattern, min_bundle_size, diff --git a/sdks/python/apache_beam/io/textio_test.py b/sdks/python/apache_beam/io/textio_test.py index d1bfdf6bfd35..d8454f9dfbd6 100644 --- a/sdks/python/apache_beam/io/textio_test.py +++ b/sdks/python/apache_beam/io/textio_test.py @@ -23,6 +23,7 @@ import gzip import logging import os +import platform import shutil import tempfile import unittest @@ -42,6 +43,7 @@ from apache_beam.io.textio import ReadFromText from apache_beam.io.textio import ReadFromTextWithFilename from apache_beam.io.textio import WriteToText +from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.testing.test_pipeline import TestPipeline from apache_beam.testing.test_utils import TempDir from apache_beam.testing.util import assert_that @@ -197,6 +199,23 @@ def test_read_single_file(self): assert len(expected_data) == TextSourceTest.DEFAULT_NUM_RECORDS self._run_read_test(file_name, expected_data) + @unittest.skipIf(platform.system() == 'Windows', 'Skipping on Windows') + def test_read_from_text_file_pattern_with_dot_slash(self): + cwd = os.getcwd() + expected = ['abc', 'de'] + with TempDir() as temp_dir: + temp_dir.create_temp_file(suffix='.txt', lines=[b'a', b'b', b'c']) + temp_dir.create_temp_file(suffix='.txt', lines=[b'd', b'e']) + + os.chdir(temp_dir.get_path()) + with TestPipeline() as p: + dot_slash = p | 'ReadDotSlash' >> ReadFromText('./*.txt') + no_dot_slash = p | 'ReadNoSlash' >> ReadFromText('*.txt') + + assert_that(dot_slash, equal_to(expected)) + assert_that(no_dot_slash, equal_to(expected)) + os.chdir(cwd) + def test_read_single_file_smaller_than_default_buffer(self): file_name, expected_data = write_data(TextSourceTest.DEFAULT_NUM_RECORDS) self._run_read_test( @@ -1442,6 +1461,20 @@ def test_read_escaped_escapechar_after_splitting_many(self): source_test_utils.assert_sources_equal_reference_source( reference_source_info, sources_info) + def test_read_from_text_with_value_provider(self): + class UserDefinedOptions(PipelineOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_value_provider_argument( + '--file_pattern', + help='This keyword argument is a value provider', + default='some value') + + options = UserDefinedOptions(['--file_pattern', 'abc']) + with self.assertRaises(OSError): + with TestPipeline(options=options) as pipeline: + _ = pipeline | 'Read' >> ReadFromText(options.file_pattern) + class TextSinkTest(unittest.TestCase): def setUp(self): diff --git a/sdks/python/apache_beam/io/tfrecordio.py b/sdks/python/apache_beam/io/tfrecordio.py index da7ab0954339..b911c64a1348 100644 --- a/sdks/python/apache_beam/io/tfrecordio.py +++ b/sdks/python/apache_beam/io/tfrecordio.py @@ -41,7 +41,8 @@ def _default_crc32c_fn(value): - """Calculates crc32c of a bytes object using either snappy or crcmod.""" + """Calculates crc32c of a bytes object using + either snappy or google-crc32c or crcmod.""" if not _default_crc32c_fn.fn: try: @@ -55,11 +56,20 @@ def _default_crc32c_fn(value): except ImportError: pass + if not _default_crc32c_fn.fn: + try: + import google_crc32c # pylint: disable=import-error + + if getattr(google_crc32c, 'value', None): + _default_crc32c_fn.fn = google_crc32c.value # pylint: disable=protected-access + except ImportError: + pass + if not _default_crc32c_fn.fn: _LOGGER.warning( - 'Couldn\'t find python-snappy<0.7 so the implementation of ' - '_TFRecordUtil._masked_crc32c is not as fast as it could ' - 'be.') + 'Couldn\'t find python-snappy or google-crc32c so the ' + 'implementation of _TFRecordUtil._masked_crc32c is not as fast ' + 'as it could be.') _default_crc32c_fn.fn = crcmod.predefined.mkPredefinedCrcFun('crc-32c') return _default_crc32c_fn.fn(value) diff --git a/sdks/python/apache_beam/ml/anomaly/specifiable_test.py b/sdks/python/apache_beam/ml/anomaly/specifiable_test.py index 4492cbbe4104..8ededd2deff2 100644 --- a/sdks/python/apache_beam/ml/anomaly/specifiable_test.py +++ b/sdks/python/apache_beam/ml/anomaly/specifiable_test.py @@ -25,6 +25,7 @@ from parameterized import parameterized +from apache_beam.internal.cloudpickle import cloudpickle from apache_beam.ml.anomaly.specifiable import _FALLBACK_SUBSPACE from apache_beam.ml.anomaly.specifiable import _KNOWN_SPECIFIABLE from apache_beam.ml.anomaly.specifiable import Spec @@ -353,7 +354,6 @@ def test_on_pickle(self): self.assertEqual(FooForPickle.counter, 1) self.assertEqual(new_foo_2.__dict__, foo.__dict__) - import cloudpickle FooForPickle.counter = 0 foo = FooForPickle(456) self.assertEqual(FooForPickle.counter, 0) diff --git a/sdks/python/apache_beam/ml/inference/base.py b/sdks/python/apache_beam/ml/inference/base.py index e4c3e4cab5e0..117a73de1b9a 100644 --- a/sdks/python/apache_beam/ml/inference/base.py +++ b/sdks/python/apache_beam/ml/inference/base.py @@ -27,6 +27,7 @@ collection, sharing model between threads, and batching elements. """ +import functools import logging import os import pickle @@ -34,6 +35,8 @@ import threading import time import uuid +from abc import ABC +from abc import abstractmethod from collections import OrderedDict from collections import defaultdict from copy import deepcopy @@ -56,7 +59,10 @@ from typing import Union import apache_beam as beam +from apache_beam.io.components.adaptive_throttler import AdaptiveThrottler +from apache_beam.metrics.metric import Metrics from apache_beam.utils import multi_process_shared +from apache_beam.utils import retry from apache_beam.utils import shared try: @@ -67,6 +73,7 @@ _NANOSECOND_TO_MILLISECOND = 1_000_000 _NANOSECOND_TO_MICROSECOND = 1_000 +_MILLISECOND_TO_SECOND = 1_000 ModelT = TypeVar('ModelT') ExampleT = TypeVar('ExampleT') @@ -339,6 +346,139 @@ def should_garbage_collect_on_timeout(self) -> bool: return self.share_model_across_processes() +class RemoteModelHandler(ABC, ModelHandler[ExampleT, PredictionT, ModelT]): + """Has the ability to call a model at a remote endpoint.""" + def __init__( + self, + namespace: str = '', + num_retries: int = 5, + throttle_delay_secs: int = 5, + retry_filter: Callable[[Exception], bool] = lambda x: True, + *, + window_ms: int = 1 * _MILLISECOND_TO_SECOND, + bucket_ms: int = 1 * _MILLISECOND_TO_SECOND, + overload_ratio: float = 2): + """Initializes metrics tracking + an AdaptiveThrottler class for enabling + client-side throttling for remote calls to an inference service. + See https://s.apache.org/beam-client-side-throttling for more details + on the configuration of the throttling and retry + mechanics. + + Args: + namespace: the metrics and logging namespace + num_retries: the maximum number of times to retry a request on retriable + errors before failing + throttle_delay_secs: the amount of time to throttle when the client-side + elects to throttle + retry_filter: a function accepting an exception as an argument and + returning a boolean. On a true return, the run_inference call will + be retried. Defaults to always retrying. + window_ms: length of history to consider, in ms, to set throttling. + bucket_ms: granularity of time buckets that we store data in, in ms. + overload_ratio: the target ratio between requests sent and successful + requests. This is "K" in the formula in + https://landing.google.com/sre/book/chapters/handling-overload.html. + """ + # Configure AdaptiveThrottler and throttling metrics for client-side + # throttling behavior. + self.throttled_secs = Metrics.counter( + namespace, "cumulativeThrottlingSeconds") + self.throttler = AdaptiveThrottler( + window_ms=window_ms, bucket_ms=bucket_ms, overload_ratio=overload_ratio) + self.logger = logging.getLogger(namespace) + + self.num_retries = num_retries + self.throttle_delay_secs = throttle_delay_secs + self.retry_filter = retry_filter + + def __init_subclass__(cls): + if cls.load_model is not RemoteModelHandler.load_model: + raise Exception( + "Cannot override RemoteModelHandler.load_model, ", + "implement create_client instead.") + if cls.run_inference is not RemoteModelHandler.run_inference: + raise Exception( + "Cannot override RemoteModelHandler.run_inference, ", + "implement request instead.") + + @abstractmethod + def create_client(self) -> ModelT: + """Creates the client that is used to make the remote inference request + in request(). All relevant arguments should be passed to __init__(). + """ + raise NotImplementedError(type(self)) + + def load_model(self) -> ModelT: + return self.create_client() + + def retry_on_exception(func): + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + return retry.with_exponential_backoff( + num_retries=self.num_retries, + retry_filter=self.retry_filter)(func)(self, *args, **kwargs) + + return wrapper + + @retry_on_exception + def run_inference( + self, + batch: Sequence[ExampleT], + model: ModelT, + inference_args: Optional[Dict[str, Any]] = None) -> Iterable[PredictionT]: + """Runs inferences on a batch of examples. Calls a remote model for + predictions and will retry if a retryable exception is raised. + + Args: + batch: A sequence of examples or features. + model: The model used to make inferences. + inference_args: Extra arguments for models whose inference call requires + extra parameters. + + Returns: + An Iterable of Predictions. + """ + while self.throttler.throttle_request(time.time() * _MILLISECOND_TO_SECOND): + self.logger.info( + "Delaying request for %d seconds due to previous failures", + self.throttle_delay_secs) + time.sleep(self.throttle_delay_secs) + self.throttled_secs.inc(self.throttle_delay_secs) + + try: + req_time = time.time() + predictions = self.request(batch, model, inference_args) + self.throttler.successful_request(req_time * _MILLISECOND_TO_SECOND) + return predictions + except Exception as e: + self.logger.error("exception raised as part of request, got %s", e) + raise + + @abstractmethod + def request( + self, + batch: Sequence[ExampleT], + model: ModelT, + inference_args: Optional[Dict[str, Any]] = None) -> Iterable[PredictionT]: + """Makes a request to a remote inference service and returns the response. + Should raise an exception of some kind if there is an error to enable the + retry and client-side throttling logic to work. Returns an iterable of the + desired prediction type. This method should return the values directly, as + handling return values as a generator can prevent the retry logic from + functioning correctly. + + Args: + batch: A sequence of examples or features. + model: The model used to make inferences. + inference_args: Extra arguments for models whose inference call requires + extra parameters. + + Returns: + An Iterable of Predictions. + """ + raise NotImplementedError(type(self)) + + class _ModelManager: """ A class for efficiently managing copies of multiple models. Will load a diff --git a/sdks/python/apache_beam/ml/inference/base_test.py b/sdks/python/apache_beam/ml/inference/base_test.py index 31f02c9c61c5..b1dfded99432 100644 --- a/sdks/python/apache_beam/ml/inference/base_test.py +++ b/sdks/python/apache_beam/ml/inference/base_test.py @@ -1870,5 +1870,188 @@ def test_model_status_provides_valid_garbage_collection(self): self.assertEqual(0, len(tags)) +def _always_retry(e: Exception) -> bool: + return True + + +class FakeRemoteModelHandler(base.RemoteModelHandler[int, int, FakeModel]): + def __init__( + self, + clock=None, + min_batch_size=1, + max_batch_size=9999, + retry_filter=_always_retry, + **kwargs): + self._fake_clock = clock + self._min_batch_size = min_batch_size + self._max_batch_size = max_batch_size + self._env_vars = kwargs.get('env_vars', {}) + self._multi_process_shared = multi_process_shared + super().__init__( + namespace='FakeRemoteModelHandler', retry_filter=retry_filter) + + def create_client(self): + return FakeModel() + + def request(self, batch, model, inference_args=None) -> Iterable[int]: + responses = [] + for example in batch: + responses.append(model.predict(example)) + return responses + + def batch_elements_kwargs(self): + return { + 'min_batch_size': self._min_batch_size, + 'max_batch_size': self._max_batch_size + } + + +class FakeAlwaysFailsRemoteModelHandler(base.RemoteModelHandler[int, + int, + FakeModel]): + def __init__( + self, + clock=None, + min_batch_size=1, + max_batch_size=9999, + retry_filter=_always_retry, + **kwargs): + self._fake_clock = clock + self._min_batch_size = min_batch_size + self._max_batch_size = max_batch_size + self._env_vars = kwargs.get('env_vars', {}) + super().__init__( + namespace='FakeRemoteModelHandler', + retry_filter=retry_filter, + num_retries=2, + throttle_delay_secs=1) + + def create_client(self): + return FakeModel() + + def request(self, batch, model, inference_args=None) -> Iterable[int]: + raise Exception + + def batch_elements_kwargs(self): + return { + 'min_batch_size': self._min_batch_size, + 'max_batch_size': self._max_batch_size + } + + +class FakeFailsOnceRemoteModelHandler(base.RemoteModelHandler[int, + int, + FakeModel]): + def __init__( + self, + clock=None, + min_batch_size=1, + max_batch_size=9999, + retry_filter=_always_retry, + **kwargs): + self._fake_clock = clock + self._min_batch_size = min_batch_size + self._max_batch_size = max_batch_size + self._env_vars = kwargs.get('env_vars', {}) + self._should_fail = True + super().__init__( + namespace='FakeRemoteModelHandler', + retry_filter=retry_filter, + num_retries=2, + throttle_delay_secs=1) + + def create_client(self): + return FakeModel() + + def request(self, batch, model, inference_args=None) -> Iterable[int]: + if self._should_fail: + self._should_fail = False + raise Exception + else: + self._should_fail = True + responses = [] + for example in batch: + responses.append(model.predict(example)) + return responses + + def batch_elements_kwargs(self): + return { + 'min_batch_size': self._min_batch_size, + 'max_batch_size': self._max_batch_size + } + + +class RunInferenceRemoteTest(unittest.TestCase): + def test_normal_model_execution(self): + with TestPipeline() as pipeline: + examples = [1, 5, 3, 10] + expected = [example + 1 for example in examples] + pcoll = pipeline | 'start' >> beam.Create(examples) + actual = pcoll | base.RunInference(FakeRemoteModelHandler()) + assert_that(actual, equal_to(expected), label='assert:inferences') + + def test_repeated_requests_fail(self): + test_pipeline = TestPipeline() + with self.assertRaises(Exception): + _ = ( + test_pipeline + | beam.Create([1, 2, 3, 4]) + | base.RunInference(FakeAlwaysFailsRemoteModelHandler())) + test_pipeline.run() + + def test_works_on_retry(self): + with TestPipeline() as pipeline: + examples = [1, 5, 3, 10] + expected = [example + 1 for example in examples] + pcoll = pipeline | 'start' >> beam.Create(examples) + actual = pcoll | base.RunInference(FakeFailsOnceRemoteModelHandler()) + assert_that(actual, equal_to(expected), label='assert:inferences') + + def test_exception_on_load_model_override(self): + with self.assertRaises(Exception): + + class _(base.RemoteModelHandler[int, int, FakeModel]): + def __init__(self, clock=None, retry_filter=_always_retry, **kwargs): + self._fake_clock = clock + self._min_batch_size = 1 + self._max_batch_size = 1 + self._env_vars = kwargs.get('env_vars', {}) + super().__init__( + namespace='FakeRemoteModelHandler', retry_filter=retry_filter) + + def load_model(self): + return FakeModel() + + def request(self, batch, model, inference_args=None) -> Iterable[int]: + responses = [] + for example in batch: + responses.append(model.predict(example)) + return responses + + def test_exception_on_run_inference_override(self): + with self.assertRaises(Exception): + + class _(base.RemoteModelHandler[int, int, FakeModel]): + def __init__(self, clock=None, retry_filter=_always_retry, **kwargs): + self._fake_clock = clock + self._min_batch_size = 1 + self._max_batch_size = 1 + self._env_vars = kwargs.get('env_vars', {}) + super().__init__( + namespace='FakeRemoteModelHandler', retry_filter=retry_filter) + + def create_client(self): + return FakeModel() + + def run_inference(self, + batch, + model, + inference_args=None) -> Iterable[int]: + responses = [] + for example in batch: + responses.append(model.predict(example)) + return responses + + if __name__ == '__main__': unittest.main() diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt index e0a5c704de4f..bc2113b5395f 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/tensorflow_tests_requirements.txt @@ -20,4 +20,4 @@ tensorflow>=2.12.0 tensorflow_hub>=0.10.0 Pillow>=9.0.0 typing-extensions>=4.8.0 - +google-cloud-monitoring>=2.27.0 diff --git a/sdks/python/apache_beam/ml/inference/torch_tests_requirements.txt b/sdks/python/apache_beam/ml/inference/torch_tests_requirements.txt index 790f015f9b29..df6273038f1a 100644 --- a/sdks/python/apache_beam/ml/inference/torch_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/inference/torch_tests_requirements.txt @@ -19,3 +19,4 @@ torch>=1.7.1 torchvision>=0.8.2 pillow>=8.0.0 transformers>=4.18.0 +google-cloud-monitoring>=2.27.0 \ No newline at end of file diff --git a/sdks/python/apache_beam/runners/dataflow/internal/names.py b/sdks/python/apache_beam/runners/dataflow/internal/names.py index 21eb6e64df1f..fb370d8d6f3b 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/names.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/names.py @@ -34,6 +34,6 @@ # Unreleased sdks use container image tag specified below. # Update this tag whenever there is a change that # requires changes to SDK harness container or SDK harness launcher. -BEAM_DEV_SDK_CONTAINER_TAG = 'beam-master-20250311' +BEAM_DEV_SDK_CONTAINER_TAG = 'beam-master-20250401' DATAFLOW_CONTAINER_IMAGE_REPOSITORY = 'gcr.io/cloud-dataflow/v1beta3' diff --git a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/yarn.lock b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/yarn.lock index 135bface1ac8..bcd70a0d378b 100644 --- a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/yarn.lock +++ b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/yarn.lock @@ -233,11 +233,11 @@ "@babel/helper-plugin-utils" "^7.8.0" "@babel/runtime@^7.1.2": - version "7.10.5" - resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.10.5.tgz#303d8bd440ecd5a491eae6117fd3367698674c5c" - integrity sha512-otddXKhdNn7d0ptoFRHtMLa8LqDxLYwTjB4nYgM1yy5N6gU/MUf8zqyyLltCH3yAVitBzmwK4us+DD0l/MauAg== + version "7.26.10" + resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.26.10.tgz#a07b4d8fa27af131a633d7b3524db803eb4764c2" + integrity sha512-2WJMeRQPHKSPemqk/awGrAiuFfzBmOIPXKizAsVhWH9YJqLZ0H+HS4c8loHGgW6utJ3E/ejXQUsiGaQy2NZ9Fw== dependencies: - regenerator-runtime "^0.13.4" + regenerator-runtime "^0.14.0" "@babel/template@^7.10.4", "@babel/template@^7.3.3": version "7.10.4" @@ -7802,10 +7802,10 @@ regenerator-runtime@^0.11.0: resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.11.1.tgz#be05ad7f9bf7d22e056f9726cee5017fbf19e2e9" integrity sha512-MguG95oij0fC3QV3URf4V2SDYGJhJnJGqvIIgdECeODCT98wSWDAJ94SSuVpYQUoTcGUIL6L4yNB7j1DFFHSBg== -regenerator-runtime@^0.13.4: - version "0.13.5" - resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.5.tgz#d878a1d094b4306d10b9096484b33ebd55e26697" - integrity sha512-ZS5w8CpKFinUzOwW3c83oPeVXoNsrLsaCoLtJvAClH135j/R77RuymhiSErhm2lKcwSCIpmvIWSbDkIfAqKQlA== +regenerator-runtime@^0.14.0: + version "0.14.1" + resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f" + integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw== regex-not@^1.0.0, regex-not@^1.0.2: version "1.0.2" diff --git a/sdks/python/apache_beam/testing/benchmarks/inference/pytorch_image_classification_benchmarks.py b/sdks/python/apache_beam/testing/benchmarks/inference/pytorch_image_classification_benchmarks.py index 514c9d672850..a90c268ed538 100644 --- a/sdks/python/apache_beam/testing/benchmarks/inference/pytorch_image_classification_benchmarks.py +++ b/sdks/python/apache_beam/testing/benchmarks/inference/pytorch_image_classification_benchmarks.py @@ -19,19 +19,22 @@ import logging from apache_beam.examples.inference import pytorch_image_classification -from apache_beam.testing.load_tests.load_test import LoadTest +from apache_beam.testing.load_tests.dataflow_cost_benchmark import DataflowCostBenchmark from torchvision import models _PERF_TEST_MODELS = ['resnet50', 'resnet101', 'resnet152'] _PRETRAINED_MODEL_MODULE = 'torchvision.models' -class PytorchVisionBenchmarkTest(LoadTest): +class PytorchVisionBenchmarkTest(DataflowCostBenchmark): def __init__(self): # TODO (https://github.com/apache/beam/issues/23008) # make get_namespace() method in RunInference static self.metrics_namespace = 'BeamML_PyTorch' - super().__init__(metrics_namespace=self.metrics_namespace) + super().__init__( + metrics_namespace=self.metrics_namespace, + pcollection='PyTorchRunInference/BeamML_RunInference_Postprocess-0.out0' + ) def test(self): pretrained_model_name = self.pipeline.get_option('pretrained_model_name') diff --git a/sdks/python/apache_beam/testing/benchmarks/inference/pytorch_language_modeling_benchmarks.py b/sdks/python/apache_beam/testing/benchmarks/inference/pytorch_language_modeling_benchmarks.py index 1d6ecb2bd438..282a7a4e35fe 100644 --- a/sdks/python/apache_beam/testing/benchmarks/inference/pytorch_language_modeling_benchmarks.py +++ b/sdks/python/apache_beam/testing/benchmarks/inference/pytorch_language_modeling_benchmarks.py @@ -19,10 +19,10 @@ import logging from apache_beam.examples.inference import pytorch_language_modeling -from apache_beam.testing.load_tests.load_test import LoadTest +from apache_beam.testing.load_tests.dataflow_cost_benchmark import DataflowCostBenchmark -class PytorchLanguageModelingBenchmarkTest(LoadTest): +class PytorchLanguageModelingBenchmarkTest(DataflowCostBenchmark): def __init__(self): # TODO (https://github.com/apache/beam/issues/23008): # make get_namespace() method in RunInference static diff --git a/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py b/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py index 223b973e5fbe..89750a3a1bd6 100644 --- a/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py +++ b/sdks/python/apache_beam/testing/benchmarks/inference/tensorflow_mnist_classification_cost_benchmark.py @@ -24,7 +24,7 @@ class TensorflowMNISTClassificationCostBenchmark(DataflowCostBenchmark): def __init__(self): - super().__init__() + super().__init__(pcollection='PostProcessOutputs.out0') def test(self): extra_opts = {} diff --git a/sdks/python/apache_beam/testing/benchmarks/wordcount/requirements.txt b/sdks/python/apache_beam/testing/benchmarks/wordcount/requirements.txt new file mode 100644 index 000000000000..19c4367ea3af --- /dev/null +++ b/sdks/python/apache_beam/testing/benchmarks/wordcount/requirements.txt @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +google-cloud-monitoring>=2.27.0 \ No newline at end of file diff --git a/sdks/python/apache_beam/testing/benchmarks/wordcount/wordcount.py b/sdks/python/apache_beam/testing/benchmarks/wordcount/wordcount.py index 513ede47e80a..73662512f57c 100644 --- a/sdks/python/apache_beam/testing/benchmarks/wordcount/wordcount.py +++ b/sdks/python/apache_beam/testing/benchmarks/wordcount/wordcount.py @@ -24,7 +24,7 @@ class WordcountCostBenchmark(DataflowCostBenchmark): def __init__(self): - super().__init__() + super().__init__(pcollection='Format.out0') def test(self): extra_opts = {} diff --git a/sdks/python/apache_beam/testing/load_tests/dataflow_cost_benchmark.py b/sdks/python/apache_beam/testing/load_tests/dataflow_cost_benchmark.py index 96a1cd31e298..87af2ef6a507 100644 --- a/sdks/python/apache_beam/testing/load_tests/dataflow_cost_benchmark.py +++ b/sdks/python/apache_beam/testing/load_tests/dataflow_cost_benchmark.py @@ -17,13 +17,19 @@ # pytype: skip-file import logging +import re import time +from datetime import datetime from typing import Any from typing import Optional +from google.cloud import monitoring_v3 +from google.protobuf.duration_pb2 import Duration + import apache_beam.testing.load_tests.dataflow_cost_consts as costs from apache_beam.metrics.execution import MetricResult from apache_beam.runners.dataflow.dataflow_runner import DataflowPipelineResult +from apache_beam.runners.dataflow.internal.apiclient import DataflowApplicationClient from apache_beam.runners.runner import PipelineState from apache_beam.testing.load_tests.load_test import LoadTest @@ -44,61 +50,88 @@ class DataflowCostBenchmark(LoadTest): calculate the cost of the job later, as different accelerators have different billing rates per hour of use. """ + + WORKER_START_PATTERN = re.compile( + r'^All workers have finished the startup processes and ' + r'began to receive work requests.*$') + WORKER_STOP_PATTERN = re.compile(r'^Stopping worker pool.*$') + def __init__( self, metrics_namespace: Optional[str] = None, is_streaming: bool = False, - gpu: Optional[costs.Accelerator] = None): + gpu: Optional[costs.Accelerator] = None, + pcollection: str = 'ProcessOutput.out0'): + """ + Initializes DataflowCostBenchmark. + + Args: + metrics_namespace (Optional[str]): Namespace for metrics. + is_streaming (bool): Whether the pipeline is streaming or batch. + gpu (Optional[costs.Accelerator]): Optional GPU type. + pcollection (str): PCollection name to monitor throughput. + """ self.is_streaming = is_streaming self.gpu = gpu + self.pcollection = pcollection super().__init__(metrics_namespace=metrics_namespace) + self.dataflow_client = DataflowApplicationClient( + self.pipeline.get_pipeline_options()) + self.monitoring_client = monitoring_v3.MetricServiceClient() - def run(self): + def run(self) -> None: try: self.test() if not hasattr(self, 'result'): self.result = self.pipeline.run() - # Defaults to waiting forever unless timeout has been set state = self.result.wait_until_finish(duration=self.timeout_ms) assert state != PipelineState.FAILED + logging.info( 'Pipeline complete, sleeping for 4 minutes to allow resource ' 'metrics to populate.') time.sleep(240) + self.extra_metrics = self._retrieve_cost_metrics(self.result) + additional_metrics = self._get_additional_metrics(self.result) + self.extra_metrics.update(additional_metrics) + + logging.info(self.extra_metrics) self._metrics_monitor.publish_metrics(self.result, self.extra_metrics) finally: self.cleanup() def _retrieve_cost_metrics(self, result: DataflowPipelineResult) -> dict[str, Any]: + """Calculates estimated cost based on pipeline resource usage.""" job_id = result.job_id() metrics = result.metrics().all_metrics(job_id) metrics_dict = self._process_metrics_list(metrics) - logging.info(metrics_dict) + cost = 0.0 - if (self.is_streaming): + if self.is_streaming: cost += metrics_dict.get( "TotalVcpuTime", 0.0) / 3600 * costs.VCPU_PER_HR_STREAMING - cost += ( - metrics_dict.get("TotalMemoryUsage", 0.0) / - 1000) / 3600 * costs.MEM_PER_GB_HR_STREAMING + cost += metrics_dict.get( + "TotalMemoryUsage", 0.0) / 1000 / 3600 * costs.MEM_PER_GB_HR_STREAMING cost += metrics_dict.get( "TotalStreamingDataProcessed", 0.0) * costs.SHUFFLE_PER_GB_STREAMING else: cost += metrics_dict.get( "TotalVcpuTime", 0.0) / 3600 * costs.VCPU_PER_HR_BATCH - cost += ( - metrics_dict.get("TotalMemoryUsage", 0.0) / - 1000) / 3600 * costs.MEM_PER_GB_HR_BATCH + cost += metrics_dict.get( + "TotalMemoryUsage", 0.0) / 1000 / 3600 * costs.MEM_PER_GB_HR_BATCH cost += metrics_dict.get( "TotalStreamingDataProcessed", 0.0) * costs.SHUFFLE_PER_GB_BATCH - if (self.gpu): + + if self.gpu: rate = costs.ACCELERATOR_TO_COST[self.gpu] cost += metrics_dict.get("TotalGpuTime", 0.0) / 3600 * rate + cost += metrics_dict.get("TotalPdUsage", 0.0) / 3600 * costs.PD_PER_GB_HR cost += metrics_dict.get( "TotalSsdUsage", 0.0) / 3600 * costs.PD_SSD_PER_GB_HR + metrics_dict["EstimatedCost"] = cost return metrics_dict @@ -109,7 +142,87 @@ def _process_metrics_list(self, metric_key = entry.key metric = metric_key.metric if metric_key.step == '' and metric.namespace == 'dataflow/v1b3': - if entry.committed is None: - entry.committed = 0.0 - system_metrics[metric.name] = entry.committed + system_metrics[metric.name] = entry.committed or 0.0 return system_metrics + + def _get_worker_time_interval( + self, job_id: str) -> tuple[Optional[str], Optional[str]]: + """Extracts worker start and stop times from job messages.""" + messages, _ = self.dataflow_client.list_messages( + job_id=job_id, + start_time=None, + end_time=None, + minimum_importance='JOB_MESSAGE_DETAILED') + + start_time, end_time = None, None + for message in messages: + text = message.messageText + if text: + if self.WORKER_START_PATTERN.match(text): + start_time = message.time + if self.WORKER_STOP_PATTERN.match(text): + end_time = message.time + + return start_time, end_time + + def _get_throughput_metrics( + self, project: str, job_id: str, start_time: str, + end_time: str) -> dict[str, float]: + interval = monitoring_v3.TimeInterval( + start_time=start_time, end_time=end_time) + aggregation = monitoring_v3.Aggregation( + alignment_period=Duration(seconds=60), + per_series_aligner=monitoring_v3.Aggregation.Aligner.ALIGN_MEAN) + + requests = { + "Bytes": monitoring_v3.ListTimeSeriesRequest( + name=f"projects/{project}", + filter=f'metric.type=' + f'"dataflow.googleapis.com/job/estimated_bytes_produced_count" ' + f'AND metric.labels.job_id=' + f'"{job_id}" AND metric.labels.pcollection="{self.pcollection}"', + interval=interval, + aggregation=aggregation), + "Elements": monitoring_v3.ListTimeSeriesRequest( + name=f"projects/{project}", + filter=f'metric.type="dataflow.googleapis.com/job/element_count" ' + f'AND metric.labels.job_id="{job_id}" ' + f'AND metric.labels.pcollection="{self.pcollection}"', + interval=interval, + aggregation=aggregation) + } + + metrics = {} + for key, req in requests.items(): + time_series = self.monitoring_client.list_time_series(request=req) + values = [ + point.value.double_value for series in time_series + for point in series.points + ] + metrics[f"AvgThroughput{key}"] = sum(values) / len( + values) if values else 0.0 + + return metrics + + def _get_job_runtime(self, start_time: str, end_time: str) -> float: + """Calculates the job runtime duration in seconds.""" + start_dt = datetime.fromisoformat(start_time[:-1]) + end_dt = datetime.fromisoformat(end_time[:-1]) + return (end_dt - start_dt).total_seconds() + + def _get_additional_metrics(self, + result: DataflowPipelineResult) -> dict[str, Any]: + job_id = result.job_id() + job = self.dataflow_client.get_job(job_id) + project = job.projectId + start_time, end_time = self._get_worker_time_interval(job_id) + if not start_time or not end_time: + logging.warning('Could not find valid worker start/end times.') + return {} + + throughput_metrics = self._get_throughput_metrics( + project, job_id, start_time, end_time) + return { + **throughput_metrics, + "JobRuntimeSeconds": self._get_job_runtime(start_time, end_time), + } diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility.py b/sdks/python/apache_beam/typehints/native_type_compatibility.py index da5bd6b0c0c4..9a4722e38574 100644 --- a/sdks/python/apache_beam/typehints/native_type_compatibility.py +++ b/sdks/python/apache_beam/typehints/native_type_compatibility.py @@ -66,8 +66,11 @@ collections.abc.MutableSet, collections.abc.Collection, collections.abc.Sequence, + collections.abc.Mapping, ] +_CONVERTED_MODULES = ('typing', 'collections', 'collections.abc') + def _get_args(typ): """Returns a list of arguments to the given type. @@ -127,6 +130,10 @@ def _match_is_primitive(match_against): return lambda user_type: _is_primitive(user_type, match_against) +def _match_is_dict(user_type): + return _is_primitive(user_type, dict) or _safe_issubclass(user_type, dict) + + def _match_is_exactly_mapping(user_type): # Avoid unintentionally catching all subtypes (e.g. strings and mappings). expected_origin = collections.abc.Mapping @@ -353,8 +360,7 @@ def convert_to_beam_type(typ): # This is needed to fix https://github.com/apache/beam/issues/33356 pass - elif (typ_module != 'typing') and (typ_module != - 'collections.abc') and not is_builtin(typ): + elif typ_module not in _CONVERTED_MODULES and not is_builtin(typ): # Only translate primitives and types from collections.abc and typing. return typ if (typ_module == 'collections.abc' and @@ -371,8 +377,7 @@ def convert_to_beam_type(typ): # unsupported. _TypeMapEntry(match=is_forward_ref, arity=0, beam_type=typehints.Any), _TypeMapEntry(match=is_any, arity=0, beam_type=typehints.Any), - _TypeMapEntry( - match=_match_is_primitive(dict), arity=2, beam_type=typehints.Dict), + _TypeMapEntry(match=_match_is_dict, arity=2, beam_type=typehints.Dict), _TypeMapEntry( match=_match_is_exactly_iterable, arity=1, @@ -414,6 +419,9 @@ def convert_to_beam_type(typ): match=_match_is_exactly_sequence, arity=1, beam_type=typehints.Sequence), + _TypeMapEntry( + match=_match_is_exactly_mapping, arity=2, + beam_type=typehints.Mapping), ] # Find the first matching entry. @@ -534,6 +542,9 @@ def convert_to_python_type(typ): return collections.abc.Sequence[convert_to_python_type(typ.inner_type)] if isinstance(typ, typehints.IteratorTypeConstraint): return collections.abc.Iterator[convert_to_python_type(typ.yielded_type)] + if isinstance(typ, typehints.MappingTypeConstraint): + return collections.abc.Mapping[convert_to_python_type(typ.key_type), + convert_to_python_type(typ.value_type)] raise ValueError('Failed to convert Beam type: %s' % typ) diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility_test.py b/sdks/python/apache_beam/typehints/native_type_compatibility_test.py index e5366260c88e..5808a56c6e5f 100644 --- a/sdks/python/apache_beam/typehints/native_type_compatibility_test.py +++ b/sdks/python/apache_beam/typehints/native_type_compatibility_test.py @@ -228,9 +228,9 @@ def test_convert_to_beam_type_with_collections_types(self): collections.abc.Iterable[tuple[str, int]], typehints.Iterable[typehints.Tuple[str, int]]), ( - 'mapping not caught', + 'mapping', collections.abc.Mapping[str, int], - collections.abc.Mapping[str, int]), + typehints.Mapping[str, int]), ('set', collections.abc.Set[int], typehints.Set[int]), ('mutable set', collections.abc.MutableSet[int], typehints.Set[int]), ( @@ -253,6 +253,15 @@ def test_convert_to_beam_type_with_collections_types(self): 'sequence of tuples', collections.abc.Sequence[tuple[str, int]], typehints.Sequence[typehints.Tuple[str, int]]), + ( + 'ordered dict', + collections.OrderedDict[str, int], + typehints.Dict[str, int]), + ( + 'default dict', + collections.defaultdict[str, int], + typehints.Dict[str, int]), + ('count', collections.Counter[str, int], typehints.Dict[str, int]), ] for test_case in test_cases: diff --git a/sdks/python/apache_beam/typehints/schemas_test.py b/sdks/python/apache_beam/typehints/schemas_test.py index 15144c6c2c17..fbbdc035e11e 100644 --- a/sdks/python/apache_beam/typehints/schemas_test.py +++ b/sdks/python/apache_beam/typehints/schemas_test.py @@ -30,7 +30,6 @@ from typing import Optional from typing import Sequence -import cloudpickle import dill import numpy as np from hypothesis import given @@ -38,6 +37,7 @@ from parameterized import parameterized from parameterized import parameterized_class +from apache_beam.internal.cloudpickle import cloudpickle from apache_beam.portability import common_urns from apache_beam.portability.api import schema_pb2 from apache_beam.typehints import row_type diff --git a/sdks/python/apache_beam/typehints/typehints.py b/sdks/python/apache_beam/typehints/typehints.py index 51b1b1ca68d0..0920a3cf36b0 100644 --- a/sdks/python/apache_beam/typehints/typehints.py +++ b/sdks/python/apache_beam/typehints/typehints.py @@ -505,20 +505,24 @@ class UnionHint(CompositeTypeHint): """ class UnionConstraint(TypeConstraint): def __init__(self, union_types): - self.union_types = set(normalize(t) for t in union_types) + # Use a list instead of a set to make the pickle serialization + # deterministic. + self.union_types = list(set(normalize(t) for t in union_types)) + # Sorting the type name strings simplifies unit tests. + self.union_types.sort(key=repr) def __eq__(self, other): return ( isinstance(other, UnionHint.UnionConstraint) and - self.union_types == other.union_types) + # The union types represent a set even thought they are stored as a + # list. + set(self.union_types) == set(other.union_types)) def __hash__(self): return 1 + sum(hash(t) for t in self.union_types) def __repr__(self): - # Sorting the type name strings simplifies unit tests. - return 'Union[%s]' % ( - ', '.join(sorted(repr(t) for t in self.union_types))) + return 'Union[%s]' % (', '.join(repr(t) for t in self.union_types)) def inner_types(self): for t in self.union_types: @@ -550,7 +554,7 @@ def type_check(self, instance): '%s type-constraint violated. Expected an instance of one of: %s, ' 'received %s instead.%s' % ( repr(self), - tuple(sorted(repr(t) for t in self.union_types)), + tuple(repr(t) for t in self.union_types), instance.__class__.__name__, error_msg)) @@ -583,7 +587,7 @@ def __getitem__(self, type_params): t, error_msg_prefix='All parameters to a Union hint') if isinstance(t, self.UnionConstraint): - params |= t.union_types + params |= set(t.union_types) elif isinstance(t, DictConstraint): if dict_union is None: dict_union = t @@ -1056,7 +1060,7 @@ def _consistent_with_check_(self, sub): for elem in sub.tuple_types) # TODO(https://github.com/apache/beam/issues/29135): allow for # consistency checks with Mapping types - elif isinstance(sub, DictConstraint): + elif isinstance(sub, (MappingTypeConstraint, DictConstraint)): return True elif not isinstance(sub, TypeConstraint): if getattr(sub, '__origin__', None) is not None and getattr( @@ -1111,6 +1115,146 @@ def __getitem__(self, type_param): ABCSequenceTypeConstraint = SequenceHint.ABCSequenceTypeConstraint +class MappingHint(CompositeTypeHint): + """A Mapping type-hint. + + Mapping[K, V] represents any mapping (dict-like object) where all keys are + of type K and all values are of type V. This is more general than Dict as it + supports any object implementing the Mapping ABC. + + Examples of valid mappings include: + - dict + - collections.defaultdict + - collections.OrderedDict + - types.MappingProxyType + """ + class MappingTypeConstraint(TypeConstraint): + def __init__(self, key_type, value_type): + self.key_type = normalize(key_type) + self.value_type = normalize(value_type) + + def __repr__(self): + return 'Mapping[%s, %s]' % (repr(self.key_type), repr(self.value_type)) + + def __eq__(self, other): + return ( + type(self) == type(other) and self.key_type == other.key_type and + self.value_type == other.value_type) + + def __hash__(self): + return hash((type(self), self.key_type, self.value_type)) + + def _inner_types(self): + yield self.key_type + yield self.value_type + + def _consistent_with_check_(self, sub): + if isinstance(sub, (self.__class__, DictConstraint)): + # A Dict is a Mapping of the same types + return ( + is_consistent_with(sub.key_type, self.key_type) and + is_consistent_with(sub.value_type, self.value_type)) + elif hasattr(sub, '__origin__'): + # Handle collection subtypes using ABC + if issubclass(sub.__origin__, abc.Mapping): + args = getattr(sub, '__args__', None) + if args and len(args) == 2: + return ( + is_consistent_with(args[0], self.key_type) and + is_consistent_with(args[1], self.value_type)) + return True + return False + + def _raise_type_error(self, is_key, instance, inner_error_message=''): + type_desc = 'key' if is_key else 'value' + expected_type = self.key_type if is_key else self.value_type + + if inner_error_message: + raise CompositeTypeHintError( + '%s hint %s-type constraint violated. All %ss should be of type ' + '%s. Instead: %s' % ( + repr(self), + type_desc, + type_desc, + repr(expected_type), + inner_error_message, + )) + else: + raise CompositeTypeHintError( + '%s hint %s-type constraint violated. All %ss should be of ' + 'type %s. Instead, %s is of type %s.' % ( + repr(self), + type_desc, + type_desc, + repr(expected_type), + instance, + instance.__class__.__name__, + )) + + def type_check(self, instance): + if not isinstance(instance, abc.Mapping): + raise CompositeTypeHintError( + 'Mapping type-constraint violated. All passed instances must be of ' + 'type Mapping. %s is of type %s.' % + (instance, instance.__class__.__name__)) + + for key, value in instance.items(): + try: + check_constraint(self.key_type, key) + except CompositeTypeHintError as e: + self._raise_type_error(True, key, str(e)) + except SimpleTypeHintError: + self._raise_type_error(True, key) + + try: + check_constraint(self.value_type, value) + except CompositeTypeHintError as e: + self._raise_type_error(False, value, str(e)) + except SimpleTypeHintError: + self._raise_type_error(False, value) + + def match_type_variables(self, concrete_type): + if isinstance(concrete_type, (MappingTypeConstraint, DictConstraint)): + bindings = {} + bindings.update( + match_type_variables(self.key_type, concrete_type.key_type)) + bindings.update( + match_type_variables(self.value_type, concrete_type.value_type)) + return bindings + return {} + + def bind_type_variables(self, bindings): + bound_key_type = bind_type_variables(self.key_type, bindings) + bound_value_type = bind_type_variables(self.value_type, bindings) + if (bound_key_type, bound_value_type) == (self.key_type, self.value_type): + return self + return Mapping[bound_key_type, bound_value_type] + + def __getitem__(self, type_params): + if not isinstance(type_params, tuple): + raise TypeError( + 'Parameter to Mapping type-hint must be a tuple of types: ' + 'Mapping[.., ..].') + + if len(type_params) != 2: + raise TypeError( + 'Length of parameters to a Mapping type-hint must be exactly 2. ' + 'Passed parameters: %s, have a length of %s.' % + (type_params, len(type_params))) + + key_type, value_type = type_params + + validate_composite_type_param( + key_type, error_msg_prefix='Key-type parameter to a Mapping hint') + validate_composite_type_param( + value_type, error_msg_prefix='Value-type parameter to a Mapping hint') + + return self.MappingTypeConstraint(key_type, value_type) + + +MappingTypeConstraint = MappingHint.MappingTypeConstraint + + class IterableHint(CompositeTypeHint): """An Iterable type-hint. @@ -1138,6 +1282,8 @@ def _consistent_with_check_(self, sub): return all( is_consistent_with(elem, self.inner_type) for elem in sub.tuple_types) + elif isinstance(sub, MappingTypeConstraint): + return is_consistent_with(sub.key_type, self.inner_type) return False def __getitem__(self, type_param): @@ -1292,6 +1438,7 @@ def __getitem__(self, type_params): FrozenSet = FrozenSetHint() Collection = CollectionHint() Sequence = SequenceHint() +Mapping = MappingHint() Iterable = IterableHint() Iterator = IteratorHint() Generator = GeneratorHint() @@ -1315,7 +1462,7 @@ def normalize(x, none_as_type=False): elif x in _KNOWN_PRIMITIVE_TYPES: return _KNOWN_PRIMITIVE_TYPES[x] elif getattr(x, '__module__', - None) in ('typing', 'collections.abc') or getattr( + None) in ('typing', 'collections', 'collections.abc') or getattr( x, '__origin__', None) in _KNOWN_PRIMITIVE_TYPES: beam_type = native_type_compatibility.convert_to_beam_type(x) if beam_type != x: diff --git a/sdks/python/apache_beam/typehints/typehints_test.py b/sdks/python/apache_beam/typehints/typehints_test.py index a81da5abec40..d5c6c6d26593 100644 --- a/sdks/python/apache_beam/typehints/typehints_test.py +++ b/sdks/python/apache_beam/typehints/typehints_test.py @@ -248,17 +248,12 @@ def test_nested_compatibility(self): self.assertCompatible( Tuple[Any, Any], Union[Tuple[str, int], Tuple[str, float]]) - def test_union_hint_repr(self): - hint = typehints.Union[DummyTestClass1, str] - self.assertIn( + def test_union_hint_repr_ordered_by_type(self): + hint = typehints.Union[DummyTestClass1, str, int, bool] + self.assertEqual( str(hint), - # Uses frozen set internally, so order not guaranteed. - [ - "Union[, ]", - "Union[, ]" - ]) + "Union[, " + ", , ]") def test_union_hint_enforcement_composite_type_in_union(self): o = DummyTestClass1() @@ -715,6 +710,11 @@ def test_type_checks_not_dict(self): 'must be of type dict. [1, 2] is of type list.', e.exception.args[0]) + def test_type_check_collection(self): + hint = typehints.Dict[str, int] + l = collections.defaultdict(list[("blue", 2)]) + self.assertIsNone(hint.type_check(l)) + def test_type_check_invalid_key_type(self): hint = typehints.Dict[typehints.Tuple[int, int, int], typehints.List[str]] d = {(1, 2): ['m', '1', '2', '3']} @@ -767,12 +767,166 @@ def test_normalize_with_builtin_dict(self): converted_beam_type = typehints.normalize(dict[str, int], False) self.assertEqual(converted_beam_type, expected_beam_type) + def test_normalize_with_collections_dicts(self): + test_cases = [ + ( + 'default dict', + collections.defaultdict[str, bool], + typehints.Dict[str, bool]), + ( + 'ordered dict', + collections.OrderedDict[str, bool], + typehints.Dict[str, bool]), + ('counter', collections.Counter[str, int], typehints.Dict[str, int]), + ] + for test_case in test_cases: + description = test_case[0] + collections_type = test_case[1] + expected_beam_type = test_case[2] + converted_beam_type = typehints.normalize(collections_type) + self.assertEqual(converted_beam_type, expected_beam_type, description) + def test_builtin_and_type_compatibility(self): self.assertCompatible(dict, typing.Dict) self.assertCompatible(dict[str, int], typing.Dict[str, int]) self.assertCompatible( dict[str, list[int]], typing.Dict[str, typing.List[int]]) + def test_collections_subclass_compatibility(self): + self.assertCompatible( + collections.defaultdict[str, bool], typing.Dict[str, bool]) + self.assertCompatible( + collections.OrderedDict[str, int], typing.Dict[str, int]) + self.assertCompatible(collections.Counter[str, int], typing.Dict[str, int]) + + +class MappingHintTestCase(TypeHintTestCase): + def test_getitem_param_must_be_tuple(self): + with self.assertRaises(TypeError) as e: + typehints.Mapping[4] + + self.assertEqual( + 'Parameter to Mapping type-hint must be a tuple of ' + 'types: Mapping[.., ..].', + e.exception.args[0]) + + def test_getitem_param_must_have_length_2(self): + with self.assertRaises(TypeError) as e: + typehints.Mapping[float, int, bool] + + self.assertEqual( + "Length of parameters to a Mapping type-hint must be " + "exactly 2. Passed parameters: ({}, {}, {}), have a " + "length of 3.".format(float, int, bool), + e.exception.args[0]) + + def test_key_type_must_be_valid_composite_param(self): + try: + typehints.Mapping[list, int] + except TypeError: + self.fail("built-in composite raised TypeError unexpectedly") + + def test_value_type_must_be_valid_composite_param(self): + with self.assertRaises(TypeError): + typehints.Mapping[str, 5] + + def test_compatibility(self): + hint1 = typehints.Mapping[int, str] + hint2 = typehints.Mapping[bool, int] + hint3 = typehints.Mapping[int, + typehints.List[typehints.Tuple[str, str, str]]] + hint4 = typehints.Mapping[int, int] + + self.assertCompatible(hint1, hint1) + self.assertCompatible(hint3, hint3) + self.assertNotCompatible(hint3, 4) + self.assertNotCompatible(hint2, hint1) # Key incompatibility. + self.assertNotCompatible(hint1, hint4) # Value incompatibility. + + def test_repr(self): + hint3 = typehints.Mapping[int, + typehints.List[typehints.Tuple[str, str, str]]] + self.assertEqual( + 'Mapping[, List[Tuple[, ' \ + ', ]]]', + repr(hint3)) + + def test_type_checks_not_dict(self): + hint = typehints.Mapping[int, str] + l = [1, 2] + with self.assertRaises(TypeError) as e: + hint.type_check(l) + self.assertEqual( + 'Mapping type-constraint violated. All passed instances ' + 'must be of type Mapping. [1, 2] is of type list.', + e.exception.args[0]) + + def test_type_check_invalid_key_type(self): + hint = typehints.Mapping[typehints.Tuple[int, int, int], + typehints.List[str]] + d = {(1, 2): ['m', '1', '2', '3']} + with self.assertRaises((TypeError, TypeError)) as e: + hint.type_check(d) + self.assertEqual( + 'Mapping[Tuple[, , ], ' + 'List[]] hint key-type ' + 'constraint violated. All keys should be of type ' + 'Tuple[, , ]. Instead: ' + 'Passed object instance is of the proper type, but differs in ' + 'length from the hinted type. Expected a tuple of ' + 'length 3, received a tuple of length 2.', + e.exception.args[0]) + + def test_type_check_invalid_value_type(self): + hint = typehints.Mapping[str, typehints.Mapping[int, str]] + d = {'f': [1, 2, 3]} + with self.assertRaises(TypeError) as e: + hint.type_check(d) + self.assertEqual( + "Mapping[, Mapping[, ]] hint" + ' value-type constraint violated. All values should be of type' + " Mapping[, ]. Instead: Mapping" + ' type-constraint violated. All passed instances must be of type' + ' Mapping. [1, 2, 3] is of type list.', + e.exception.args[0], + ) + + def test_type_check_valid_simple_type(self): + hint = typehints.Mapping[int, str] + d = {4: 'f', 9: 'k'} + self.assertIsNone(hint.type_check(d)) + + def test_type_check_valid_composite_type(self): + hint = typehints.Mapping[typehints.Tuple[str, str], typehints.List[int]] + d = {('f', 'k'): [1, 2, 3], ('m', 'r'): [4, 6, 9]} + self.assertIsNone(hint.type_check(d)) + + def test_match_type_variables(self): + S = typehints.TypeVariable('S') # pylint: disable=invalid-name + T = typehints.TypeVariable('T') # pylint: disable=invalid-name + hint = typehints.Mapping[S, T] + self.assertEqual({ + S: int, T: str + }, + hint.match_type_variables(typehints.Mapping[int, str])) + + def test_builtin_and_type_compatibility(self): + self.assertCompatible(typing.Mapping, dict) + self.assertCompatible(typing.Mapping[str, int], dict[str, int]) + self.assertCompatible( + typing.Mapping[str, typing.List[int]], dict[str, list[int]]) + self.assertCompatible(typing.Iterable[str], typing.Mapping[str, int]) + self.assertNotCompatible(typing.Mapping[str, int], typing.Iterable[str]) + self.assertCompatible(typing.Mapping[str, int], typing.Mapping[str, int]) + + def test_collections_compatibility(self): + self.assertCompatible(typing.Mapping, collections.defaultdict) + self.assertCompatible(typing.Mapping, collections.defaultdict[Any, Any]) + self.assertCompatible( + typing.Mapping[str, int], collections.defaultdict[str, int]) + self.assertCompatible( + typing.Mapping[str, int], collections.OrderedDict[str, int]) + class BaseSetHintTest: class CommonTests(TypeHintTestCase): diff --git a/sdks/python/apache_beam/utils/retry.py b/sdks/python/apache_beam/utils/retry.py index 485fc9d627e9..03e42829b843 100644 --- a/sdks/python/apache_beam/utils/retry.py +++ b/sdks/python/apache_beam/utils/retry.py @@ -274,7 +274,9 @@ def with_exponential_backoff( The decorator is intended to be used on callables that make HTTP or RPC requests that can temporarily timeout or have transient errors. For instance the make_http_request() call below will be retried 16 times with exponential - backoff and fuzzing of the delay interval (default settings). + backoff and fuzzing of the delay interval (default settings). The callable + should return values directly instead of yielding them, as generators are not + evaluated within the try-catch block and will not be retried on exception. from apache_beam.utils import retry # ... diff --git a/sdks/python/apache_beam/utils/subprocess_server.py b/sdks/python/apache_beam/utils/subprocess_server.py index efb27715cd82..85d9286bddd0 100644 --- a/sdks/python/apache_beam/utils/subprocess_server.py +++ b/sdks/python/apache_beam/utils/subprocess_server.py @@ -423,7 +423,12 @@ def local_jar(cls, url, cache_dir=None): url_read = urlopen(url) with open(cached_jar + '.tmp', 'wb') as jar_write: shutil.copyfileobj(url_read, jar_write, length=1 << 20) - os.rename(cached_jar + '.tmp', cached_jar) + try: + os.rename(cached_jar + '.tmp', cached_jar) + except FileNotFoundError: + # A race when multiple programs run in parallel and the cached_jar + # is already moved. Safe to ignore. + pass except URLError as e: raise RuntimeError( f'Unable to fetch remote job server jar at {url}: {e}. If no ' diff --git a/sdks/python/apache_beam/yaml/readme_test.py b/sdks/python/apache_beam/yaml/readme_test.py index 555d1d0b583f..fffacf7f42b9 100644 --- a/sdks/python/apache_beam/yaml/readme_test.py +++ b/sdks/python/apache_beam/yaml/readme_test.py @@ -299,6 +299,8 @@ def extract_name(input_spec): if code_lines: if code_lines[0].startswith('- type:'): specs = yaml.load('\n'.join(code_lines), Loader=SafeLoader) + if 'dependencies:' in specs: + test_type = 'PARSE' is_chain = not any('input' in spec for spec in specs) if is_chain: undefined_inputs = set(['input']) diff --git a/sdks/python/apache_beam/yaml/standard_io.yaml b/sdks/python/apache_beam/yaml/standard_io.yaml index b2544e773552..271b4add4295 100644 --- a/sdks/python/apache_beam/yaml/standard_io.yaml +++ b/sdks/python/apache_beam/yaml/standard_io.yaml @@ -218,6 +218,8 @@ password: 'password' query: 'read_query' table: 'location' + partition_column : 'partition_column' + partitions: 'partitions' type: 'jdbc_type' username: 'username' 'WriteToJdbc': diff --git a/sdks/python/apache_beam/yaml/yaml_provider.py b/sdks/python/apache_beam/yaml/yaml_provider.py index 9e30c8ccae07..133a5fe5b714 100755 --- a/sdks/python/apache_beam/yaml/yaml_provider.py +++ b/sdks/python/apache_beam/yaml/yaml_provider.py @@ -41,7 +41,6 @@ from typing import Optional from typing import Union -import clonevirtualenv import docstring_parser import yaml @@ -661,6 +660,15 @@ def create_transform(self, type, args, yaml_create_transform): return self._transform_factories[type](yaml_create_transform, **args) +# Note: This function is used to override the default provider by some +# users, so a change here will be breaking to those users. Change with +# caution. +def get_default_sql_provider(): + return beam_jar( + urns={'Sql': 'beam:external:java:sql:v1'}, + gradle_target='sdks:java:extensions:sql:expansion-service:shadowJar') + + class SqlBackedProvider(Provider): def __init__( self, @@ -668,9 +676,7 @@ def __init__( sql_provider: Optional[Provider] = None): self._transforms = transforms if sql_provider is None: - sql_provider = beam_jar( - urns={'Sql': 'beam:external:java:sql:v1'}, - gradle_target='sdks:java:extensions:sql:expansion-service:shadowJar') + sql_provider = get_default_sql_provider() self._sql_provider = sql_provider def sql_provider(self): @@ -1190,6 +1196,8 @@ def _create_venv_from_clone( venv = cls._path(base_python, packages) if not os.path.exists(venv): try: + # Avoid hard dependency for environments where this is never used. + import clonevirtualenv clonable_venv = cls._create_venv_to_clone(base_python) clonevirtualenv.clone_virtualenv(clonable_venv, venv) venv_pip = os.path.join(venv, 'bin', 'pip') diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 161f88af8540..3911cad25f19 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -95,13 +95,13 @@ tasks.register("generateYamlDocs") { dependsOn ":sdks:java:extensions:sql:expansion-service:shadowJar" dependsOn ":sdks:java:io:expansion-service:build" dependsOn ":sdks:java:io:google-cloud-platform:expansion-service:build" - def extraPackages = "pyyaml markdown docstring_parser pandas pygments Jinja2" + def extraPackages = "pyyaml markdown docstring_parser pandas pygments Jinja2 virtualenv-clone" doLast { exec { executable 'sh' args '-c', "${envdir}/bin/pip install $extraPackages && " + - "${envdir}/bin/python -m apache_beam.yaml.generate_yaml_docs --html_file=${buildDir}/yaml-ref.html --examples_file=${buildDir}/yaml-examples" + "${envdir}/bin/python -m apache_beam.yaml.generate_yaml_docs --html_file=${buildDir}/yaml-ref.html --examples_file=${buildDir}/yaml-examples.html" } } outputs.file "${buildDir}/yaml-ref.html" diff --git a/sdks/python/container/base_image_requirements_manual.txt b/sdks/python/container/base_image_requirements_manual.txt index 9fa3a807a392..0e50702d807a 100644 --- a/sdks/python/container/base_image_requirements_manual.txt +++ b/sdks/python/container/base_image_requirements_manual.txt @@ -35,8 +35,7 @@ guppy3 mmh3 # Optimizes execution of some Beam codepaths. TODO: Make it Beam's dependency. nltk # Commonly used for natural language processing. nose==1.3.7 # For Dataflow internal testing. TODO: remove this. -# https://github.com/apache/beam/issues/34226 -python-snappy==0.6.1 # Optimizes execution of some Beam codepaths. +google-crc32c scipy scikit-learn build>=1.0,<2 # tool to build sdist from setup.py in stager. diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index 6020d6773cf5..d8c84479b50f 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -23,21 +23,20 @@ annotated-types==0.7.0 async-timeout==5.0.1 -attrs==25.1.0 +attrs==25.3.0 backports.tarfile==1.2.0 beautifulsoup4==4.13.3 bs4==0.0.2 build==1.2.2.post1 -cachetools==5.5.1 +cachetools==5.5.2 certifi==2025.1.31 cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 cloudpickle==2.2.1 -cramjam==2.9.1 crcmod==1.7 -cryptography==44.0.0 -Cython==3.0.11 +cryptography==44.0.2 +Cython==3.0.12 Deprecated==1.2.18 deprecation==2.1.0 dill==0.3.1.1 @@ -51,48 +50,48 @@ fastavro==1.10.0 fasteners==0.19 freezegun==1.5.1 future==1.0.0 -google-api-core==2.24.1 -google-api-python-client==2.160.0 +google-api-core==2.24.2 +google-api-python-client==2.166.0 google-apitools==0.5.31 google-auth==2.38.0 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.79.0 -google-cloud-bigquery==3.29.0 -google-cloud-bigquery-storage==2.28.0 -google-cloud-bigtable==2.28.1 -google-cloud-core==2.4.1 +google-cloud-aiplatform==1.87.0 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +google-cloud-bigtable==2.30.0 +google-cloud-core==2.4.3 google-cloud-datastore==2.20.2 -google-cloud-dlp==3.26.0 -google-cloud-language==2.16.0 +google-cloud-dlp==3.29.0 +google-cloud-language==2.17.1 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.28.0 -google-cloud-pubsublite==1.11.1 -google-cloud-recommendations-ai==0.10.15 -google-cloud-resource-manager==1.14.0 -google-cloud-spanner==3.51.0 +google-cloud-pubsub==2.29.0 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.17 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.53.0 google-cloud-storage==2.19.0 -google-cloud-videointelligence==2.15.0 -google-cloud-vision==3.9.0 -google-crc32c==1.6.0 +google-cloud-videointelligence==2.16.1 +google-cloud-vision==3.10.1 +google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.67.0rc1 +googleapis-common-protos==1.69.2 greenlet==3.1.1 -grpc-google-iam-v1==0.14.0 +grpc-google-iam-v1==0.14.2 grpc-interceptor==0.15.4 grpcio==1.65.5 grpcio-status==1.65.5 guppy3==3.1.5 hdfs==2.7.3 httplib2==0.22.0 -hypothesis==6.125.2 +hypothesis==6.130.6 idna==3.10 -importlib_metadata==8.5.0 -iniconfig==2.0.0 +importlib_metadata==8.6.1 +iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 jaraco.functools==4.1.0 -jeepney==0.8.0 -Jinja2==3.1.5 +jeepney==0.9.0 +Jinja2==3.1.6 joblib==1.4.2 jsonpickle==3.4.2 jsonschema==4.23.0 @@ -101,70 +100,71 @@ keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 MarkupSafe==3.0.2 mmh3==5.1.0 -mock==5.1.0 +mock==5.2.0 more-itertools==10.6.0 nltk==3.9.1 nose==1.3.7 -numpy==2.2.2 +numpy==2.2.4 oauth2client==4.1.3 objsize==0.7.1 -opentelemetry-api==1.30.0 -opentelemetry-sdk==1.30.0 -opentelemetry-semantic-conventions==0.51b0 -orjson==3.10.15 +opentelemetry-api==1.31.1 +opentelemetry-sdk==1.31.1 +opentelemetry-semantic-conventions==0.52b1 +orjson==3.10.16 overrides==7.7.0 packaging==24.2 pandas==2.2.3 parameterized==0.9.0 pluggy==1.5.0 -proto-plus==1.26.0 -protobuf==5.29.3 +proto-plus==1.26.1 +protobuf==5.29.4 psycopg2-binary==2.9.9 pyarrow==16.1.0 pyarrow-hotfix==0.6 pyasn1==0.6.1 -pyasn1_modules==0.4.1 +pyasn1_modules==0.4.2 pycparser==2.22 -pydantic==2.10.6 -pydantic_core==2.27.2 +pydantic==2.11.1 +pydantic_core==2.33.0 pydot==1.4.2 PyHamcrest==2.1.0 -pymongo==4.11 +pymongo==4.11.3 PyMySQL==1.1.1 -pyparsing==3.2.1 +pyparsing==3.2.3 pyproject_hooks==1.2.0 pytest==7.4.4 pytest-timeout==2.3.1 pytest-xdist==3.6.1 python-dateutil==2.9.0.post0 -python-snappy==0.6.1 -pytz==2025.1 +pytz==2025.2 PyYAML==6.0.2 redis==5.2.1 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 requests-mock==1.12.1 -rpds-py==0.22.3 +rpds-py==0.24.0 rsa==4.9 scikit-learn==1.6.1 -scipy==1.15.1 +scipy==1.15.2 SecretStorage==3.3.3 shapely==2.0.7 six==1.17.0 sortedcontainers==2.4.0 soupsieve==2.6 -SQLAlchemy==2.0.38 +SQLAlchemy==2.0.40 sqlparse==0.5.3 tenacity==8.5.0 testcontainers==3.7.1 -threadpoolctl==3.5.0 +threadpoolctl==3.6.0 tomli==2.2.1 tqdm==4.67.1 -typing_extensions==4.12.2 -tzdata==2025.1 +typing-inspection==0.4.0 +typing_extensions==4.13.0 +tzdata==2025.2 uritemplate==4.1.1 urllib3==2.3.0 +virtualenv-clone==0.5.7 wrapt==1.17.2 zipp==3.21.0 zstandard==0.23.0 diff --git a/sdks/python/container/py311/base_image_requirements.txt b/sdks/python/container/py311/base_image_requirements.txt index 5eb6808610d3..dc0c3f2b95a6 100644 --- a/sdks/python/container/py311/base_image_requirements.txt +++ b/sdks/python/container/py311/base_image_requirements.txt @@ -22,21 +22,20 @@ # Reach out to a committer if you need help. annotated-types==0.7.0 -attrs==25.1.0 +attrs==25.3.0 backports.tarfile==1.2.0 beautifulsoup4==4.13.3 bs4==0.0.2 build==1.2.2.post1 -cachetools==5.5.1 +cachetools==5.5.2 certifi==2025.1.31 cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 cloudpickle==2.2.1 -cramjam==2.9.1 crcmod==1.7 -cryptography==44.0.0 -Cython==3.0.11 +cryptography==44.0.2 +Cython==3.0.12 Deprecated==1.2.18 deprecation==2.1.0 dill==0.3.1.1 @@ -49,48 +48,48 @@ fastavro==1.10.0 fasteners==0.19 freezegun==1.5.1 future==1.0.0 -google-api-core==2.24.1 -google-api-python-client==2.160.0 +google-api-core==2.24.2 +google-api-python-client==2.166.0 google-apitools==0.5.31 google-auth==2.38.0 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.79.0 -google-cloud-bigquery==3.29.0 -google-cloud-bigquery-storage==2.28.0 -google-cloud-bigtable==2.28.1 -google-cloud-core==2.4.1 +google-cloud-aiplatform==1.87.0 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +google-cloud-bigtable==2.30.0 +google-cloud-core==2.4.3 google-cloud-datastore==2.20.2 -google-cloud-dlp==3.26.0 -google-cloud-language==2.16.0 +google-cloud-dlp==3.29.0 +google-cloud-language==2.17.1 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.28.0 -google-cloud-pubsublite==1.11.1 -google-cloud-recommendations-ai==0.10.15 -google-cloud-resource-manager==1.14.0 -google-cloud-spanner==3.51.0 +google-cloud-pubsub==2.29.0 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.17 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.53.0 google-cloud-storage==2.19.0 -google-cloud-videointelligence==2.15.0 -google-cloud-vision==3.9.0 -google-crc32c==1.6.0 +google-cloud-videointelligence==2.16.1 +google-cloud-vision==3.10.1 +google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.67.0rc1 +googleapis-common-protos==1.69.2 greenlet==3.1.1 -grpc-google-iam-v1==0.14.0 +grpc-google-iam-v1==0.14.2 grpc-interceptor==0.15.4 grpcio==1.65.5 grpcio-status==1.65.5 guppy3==3.1.5 hdfs==2.7.3 httplib2==0.22.0 -hypothesis==6.125.2 +hypothesis==6.130.6 idna==3.10 -importlib_metadata==8.5.0 -iniconfig==2.0.0 +importlib_metadata==8.6.1 +iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 jaraco.functools==4.1.0 -jeepney==0.8.0 -Jinja2==3.1.5 +jeepney==0.9.0 +Jinja2==3.1.6 joblib==1.4.2 jsonpickle==3.4.2 jsonschema==4.23.0 @@ -99,69 +98,70 @@ keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 MarkupSafe==3.0.2 mmh3==5.1.0 -mock==5.1.0 +mock==5.2.0 more-itertools==10.6.0 nltk==3.9.1 nose==1.3.7 -numpy==2.2.2 +numpy==2.2.4 oauth2client==4.1.3 objsize==0.7.1 -opentelemetry-api==1.30.0 -opentelemetry-sdk==1.30.0 -opentelemetry-semantic-conventions==0.51b0 -orjson==3.10.15 +opentelemetry-api==1.31.1 +opentelemetry-sdk==1.31.1 +opentelemetry-semantic-conventions==0.52b1 +orjson==3.10.16 overrides==7.7.0 packaging==24.2 pandas==2.2.3 parameterized==0.9.0 pluggy==1.5.0 -proto-plus==1.26.0 -protobuf==5.29.3 +proto-plus==1.26.1 +protobuf==5.29.4 psycopg2-binary==2.9.9 pyarrow==16.1.0 pyarrow-hotfix==0.6 pyasn1==0.6.1 -pyasn1_modules==0.4.1 +pyasn1_modules==0.4.2 pycparser==2.22 -pydantic==2.10.6 -pydantic_core==2.27.2 +pydantic==2.11.1 +pydantic_core==2.33.0 pydot==1.4.2 PyHamcrest==2.1.0 -pymongo==4.11 +pymongo==4.11.3 PyMySQL==1.1.1 -pyparsing==3.2.1 +pyparsing==3.2.3 pyproject_hooks==1.2.0 pytest==7.4.4 pytest-timeout==2.3.1 pytest-xdist==3.6.1 python-dateutil==2.9.0.post0 -python-snappy==0.6.1 -pytz==2025.1 +pytz==2025.2 PyYAML==6.0.2 redis==5.2.1 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 requests-mock==1.12.1 -rpds-py==0.22.3 +rpds-py==0.24.0 rsa==4.9 scikit-learn==1.6.1 -scipy==1.15.1 +scipy==1.15.2 SecretStorage==3.3.3 shapely==2.0.7 six==1.17.0 sortedcontainers==2.4.0 soupsieve==2.6 -SQLAlchemy==2.0.38 +SQLAlchemy==2.0.40 sqlparse==0.5.3 tenacity==8.5.0 testcontainers==3.7.1 -threadpoolctl==3.5.0 +threadpoolctl==3.6.0 tqdm==4.67.1 -typing_extensions==4.12.2 -tzdata==2025.1 +typing-inspection==0.4.0 +typing_extensions==4.13.0 +tzdata==2025.2 uritemplate==4.1.1 urllib3==2.3.0 +virtualenv-clone==0.5.7 wrapt==1.17.2 zipp==3.21.0 zstandard==0.23.0 diff --git a/sdks/python/container/py312/base_image_requirements.txt b/sdks/python/container/py312/base_image_requirements.txt index ef46fb7503ec..a34f4ccef489 100644 --- a/sdks/python/container/py312/base_image_requirements.txt +++ b/sdks/python/container/py312/base_image_requirements.txt @@ -22,20 +22,19 @@ # Reach out to a committer if you need help. annotated-types==0.7.0 -attrs==25.1.0 +attrs==25.3.0 beautifulsoup4==4.13.3 bs4==0.0.2 build==1.2.2.post1 -cachetools==5.5.1 +cachetools==5.5.2 certifi==2025.1.31 cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 cloudpickle==2.2.1 -cramjam==2.9.1 crcmod==1.7 -cryptography==44.0.0 -Cython==3.0.11 +cryptography==44.0.2 +Cython==3.0.12 Deprecated==1.2.18 deprecation==2.1.0 dill==0.3.1.1 @@ -48,48 +47,48 @@ fastavro==1.10.0 fasteners==0.19 freezegun==1.5.1 future==1.0.0 -google-api-core==2.24.1 -google-api-python-client==2.160.0 +google-api-core==2.24.2 +google-api-python-client==2.166.0 google-apitools==0.5.31 google-auth==2.38.0 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.79.0 -google-cloud-bigquery==3.29.0 -google-cloud-bigquery-storage==2.28.0 -google-cloud-bigtable==2.28.1 -google-cloud-core==2.4.1 +google-cloud-aiplatform==1.87.0 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +google-cloud-bigtable==2.30.0 +google-cloud-core==2.4.3 google-cloud-datastore==2.20.2 -google-cloud-dlp==3.26.0 -google-cloud-language==2.16.0 +google-cloud-dlp==3.29.0 +google-cloud-language==2.17.1 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.28.0 -google-cloud-pubsublite==1.11.1 -google-cloud-recommendations-ai==0.10.15 -google-cloud-resource-manager==1.14.0 -google-cloud-spanner==3.51.0 +google-cloud-pubsub==2.29.0 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.17 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.53.0 google-cloud-storage==2.19.0 -google-cloud-videointelligence==2.15.0 -google-cloud-vision==3.9.0 -google-crc32c==1.6.0 +google-cloud-videointelligence==2.16.1 +google-cloud-vision==3.10.1 +google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.67.0rc1 +googleapis-common-protos==1.69.2 greenlet==3.1.1 -grpc-google-iam-v1==0.14.0 +grpc-google-iam-v1==0.14.2 grpc-interceptor==0.15.4 grpcio==1.65.5 grpcio-status==1.65.5 guppy3==3.1.5 hdfs==2.7.3 httplib2==0.22.0 -hypothesis==6.125.2 +hypothesis==6.130.6 idna==3.10 -importlib_metadata==8.5.0 -iniconfig==2.0.0 +importlib_metadata==8.6.1 +iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 jaraco.functools==4.1.0 -jeepney==0.8.0 -Jinja2==3.1.5 +jeepney==0.9.0 +Jinja2==3.1.6 joblib==1.4.2 jsonpickle==3.4.2 jsonschema==4.23.0 @@ -98,70 +97,71 @@ keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 MarkupSafe==3.0.2 mmh3==5.1.0 -mock==5.1.0 +mock==5.2.0 more-itertools==10.6.0 nltk==3.9.1 nose==1.3.7 -numpy==2.2.2 +numpy==2.2.4 oauth2client==4.1.3 objsize==0.7.1 -opentelemetry-api==1.30.0 -opentelemetry-sdk==1.30.0 -opentelemetry-semantic-conventions==0.51b0 -orjson==3.10.15 +opentelemetry-api==1.31.1 +opentelemetry-sdk==1.31.1 +opentelemetry-semantic-conventions==0.52b1 +orjson==3.10.16 overrides==7.7.0 packaging==24.2 pandas==2.2.3 parameterized==0.9.0 pluggy==1.5.0 -proto-plus==1.26.0 -protobuf==5.29.3 +proto-plus==1.26.1 +protobuf==5.29.4 psycopg2-binary==2.9.9 pyarrow==16.1.0 pyarrow-hotfix==0.6 pyasn1==0.6.1 -pyasn1_modules==0.4.1 +pyasn1_modules==0.4.2 pycparser==2.22 -pydantic==2.10.6 -pydantic_core==2.27.2 +pydantic==2.11.1 +pydantic_core==2.33.0 pydot==1.4.2 PyHamcrest==2.1.0 -pymongo==4.11 +pymongo==4.11.3 PyMySQL==1.1.1 -pyparsing==3.2.1 +pyparsing==3.2.3 pyproject_hooks==1.2.0 pytest==7.4.4 pytest-timeout==2.3.1 pytest-xdist==3.6.1 python-dateutil==2.9.0.post0 -python-snappy==0.6.1 -pytz==2025.1 +pytz==2025.2 PyYAML==6.0.2 redis==5.2.1 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 requests-mock==1.12.1 -rpds-py==0.22.3 +rpds-py==0.24.0 rsa==4.9 scikit-learn==1.6.1 -scipy==1.15.1 +scipy==1.15.2 SecretStorage==3.3.3 -setuptools==75.8.0 +setuptools==78.1.0 shapely==2.0.7 six==1.17.0 sortedcontainers==2.4.0 soupsieve==2.6 -SQLAlchemy==2.0.38 +SQLAlchemy==2.0.40 sqlparse==0.5.3 tenacity==8.5.0 testcontainers==3.7.1 -threadpoolctl==3.5.0 +threadpoolctl==3.6.0 tqdm==4.67.1 -typing_extensions==4.12.2 -tzdata==2025.1 +typing-inspection==0.4.0 +typing_extensions==4.13.0 +tzdata==2025.2 uritemplate==4.1.1 urllib3==2.3.0 +virtualenv-clone==0.5.7 wheel==0.45.1 wrapt==1.17.2 zipp==3.21.0 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt index ba676fdbae07..6be1fdd3b0d4 100644 --- a/sdks/python/container/py39/base_image_requirements.txt +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -23,21 +23,20 @@ annotated-types==0.7.0 async-timeout==5.0.1 -attrs==25.1.0 +attrs==25.3.0 backports.tarfile==1.2.0 beautifulsoup4==4.13.3 bs4==0.0.2 build==1.2.2.post1 -cachetools==5.5.1 +cachetools==5.5.2 certifi==2025.1.31 cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 cloudpickle==2.2.1 -cramjam==2.9.1 crcmod==1.7 -cryptography==44.0.0 -Cython==3.0.11 +cryptography==44.0.2 +Cython==3.0.12 Deprecated==1.2.18 deprecation==2.1.0 dill==0.3.1.1 @@ -51,48 +50,48 @@ fastavro==1.10.0 fasteners==0.19 freezegun==1.5.1 future==1.0.0 -google-api-core==2.24.1 -google-api-python-client==2.160.0 +google-api-core==2.24.2 +google-api-python-client==2.166.0 google-apitools==0.5.31 google-auth==2.38.0 google-auth-httplib2==0.2.0 -google-cloud-aiplatform==1.79.0 -google-cloud-bigquery==3.29.0 -google-cloud-bigquery-storage==2.28.0 -google-cloud-bigtable==2.28.1 -google-cloud-core==2.4.1 +google-cloud-aiplatform==1.87.0 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +google-cloud-bigtable==2.30.0 +google-cloud-core==2.4.3 google-cloud-datastore==2.20.2 -google-cloud-dlp==3.26.0 -google-cloud-language==2.16.0 +google-cloud-dlp==3.29.0 +google-cloud-language==2.17.1 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.28.0 -google-cloud-pubsublite==1.11.1 -google-cloud-recommendations-ai==0.10.15 -google-cloud-resource-manager==1.14.0 -google-cloud-spanner==3.51.0 +google-cloud-pubsub==2.29.0 +google-cloud-pubsublite==1.12.0 +google-cloud-recommendations-ai==0.10.17 +google-cloud-resource-manager==1.14.2 +google-cloud-spanner==3.53.0 google-cloud-storage==2.19.0 -google-cloud-videointelligence==2.15.0 -google-cloud-vision==3.9.0 -google-crc32c==1.6.0 +google-cloud-videointelligence==2.16.1 +google-cloud-vision==3.10.1 +google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.67.0rc1 +googleapis-common-protos==1.69.2 greenlet==3.1.1 -grpc-google-iam-v1==0.14.0 +grpc-google-iam-v1==0.14.2 grpc-interceptor==0.15.4 grpcio==1.65.5 grpcio-status==1.65.5 guppy3==3.1.5 hdfs==2.7.3 httplib2==0.22.0 -hypothesis==6.125.2 +hypothesis==6.130.6 idna==3.10 -importlib_metadata==8.5.0 -iniconfig==2.0.0 +importlib_metadata==8.6.1 +iniconfig==2.1.0 jaraco.classes==3.4.0 jaraco.context==6.0.1 jaraco.functools==4.1.0 -jeepney==0.8.0 -Jinja2==3.1.5 +jeepney==0.9.0 +Jinja2==3.1.6 joblib==1.4.2 jsonpickle==3.4.2 jsonschema==4.23.0 @@ -101,51 +100,50 @@ keyring==25.6.0 keyrings.google-artifactregistry-auth==1.1.2 MarkupSafe==3.0.2 mmh3==5.1.0 -mock==5.1.0 +mock==5.2.0 more-itertools==10.6.0 nltk==3.9.1 nose==1.3.7 numpy==2.0.2 oauth2client==4.1.3 objsize==0.7.1 -opentelemetry-api==1.30.0 -opentelemetry-sdk==1.30.0 -opentelemetry-semantic-conventions==0.51b0 -orjson==3.10.15 +opentelemetry-api==1.31.1 +opentelemetry-sdk==1.31.1 +opentelemetry-semantic-conventions==0.52b1 +orjson==3.10.16 overrides==7.7.0 packaging==24.2 pandas==2.2.3 parameterized==0.9.0 pluggy==1.5.0 -proto-plus==1.26.0 -protobuf==5.29.3 +proto-plus==1.26.1 +protobuf==5.29.4 psycopg2-binary==2.9.9 pyarrow==16.1.0 pyarrow-hotfix==0.6 pyasn1==0.6.1 -pyasn1_modules==0.4.1 +pyasn1_modules==0.4.2 pycparser==2.22 -pydantic==2.10.6 -pydantic_core==2.27.2 +pydantic==2.11.1 +pydantic_core==2.33.0 pydot==1.4.2 PyHamcrest==2.1.0 -pymongo==4.11 +pymongo==4.11.3 PyMySQL==1.1.1 -pyparsing==3.2.1 +pyparsing==3.2.3 pyproject_hooks==1.2.0 pytest==7.4.4 pytest-timeout==2.3.1 pytest-xdist==3.6.1 python-dateutil==2.9.0.post0 -python-snappy==0.6.1 -pytz==2025.1 +pytz==2025.2 PyYAML==6.0.2 redis==5.2.1 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 requests-mock==1.12.1 -rpds-py==0.22.3 +rpds-py==0.24.0 rsa==4.9 scikit-learn==1.6.1 scipy==1.13.1 @@ -154,17 +152,19 @@ shapely==2.0.7 six==1.17.0 sortedcontainers==2.4.0 soupsieve==2.6 -SQLAlchemy==2.0.38 +SQLAlchemy==2.0.40 sqlparse==0.5.3 tenacity==8.5.0 testcontainers==3.7.1 -threadpoolctl==3.5.0 +threadpoolctl==3.6.0 tomli==2.2.1 tqdm==4.67.1 -typing_extensions==4.12.2 -tzdata==2025.1 +typing-inspection==0.4.0 +typing_extensions==4.13.0 +tzdata==2025.2 uritemplate==4.1.1 urllib3==2.3.0 +virtualenv-clone==0.5.7 wrapt==1.17.2 zipp==3.21.0 zstandard==0.23.0 diff --git a/sdks/python/scripts/run_pylint.sh b/sdks/python/scripts/run_pylint.sh index 89ea7fe441e4..9ea8b31d62fc 100755 --- a/sdks/python/scripts/run_pylint.sh +++ b/sdks/python/scripts/run_pylint.sh @@ -86,21 +86,22 @@ echo "Running isort..." ISORT_EXCLUDED=( "apiclient.py" "avroio_test.py" + "cloudpickle.py" "datastore_wordcount.py" "datastoreio_test.py" + "doctests_test.py" + "fast_coders_test.py" "hadoopfilesystem.py" "iobase_test.py" - "fast_coders_test.py" - "slow_coders_test.py" - "tfdv_analyze_and_validate.py" - "preprocess.py" + "main_test.py" "model.py" - "taxi.py" + "preprocess.py" "process_tfma.py" - "doctests_test.py" "render_test.py" + "slow_coders_test.py" + "taxi.py" + "tfdv_analyze_and_validate.py" "yaml/main.py" - "main_test.py" ) SKIP_PARAM="" for file in "${ISORT_EXCLUDED[@]}"; do diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 49d9ac368811..e1756176093b 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -211,9 +211,9 @@ def copy_tests_from_docs(): for path in glob.glob(os.path.join(docs_src, 'yaml*.md')): shutil.copy(path, docs_dest) else: - if not os.path.exists(docs_dest): - raise RuntimeError( - f'Could not locate yaml docs in {docs_src} or {docs_dest}.') + warnings.warn( + f'Could not locate yaml docs source directory {docs_src}. ' + f'Skipping copying tests from docs.') def generate_external_transform_wrappers(): @@ -347,11 +347,6 @@ def get_portability_package_data(): # dill on client and server, therefore list of allowed versions is # very narrow. See: https://github.com/uqfoundation/dill/issues/341. 'dill>=0.3.1.1,<0.3.2', - # It is prudent to use the same version of pickler at job submission - # and at runtime, therefore bounds need to be tight. - # To avoid depending on an old dependency, update the minor version on - # every Beam release, see: https://github.com/apache/beam/issues/23119 - 'cloudpickle~=2.2.1', 'fastavro>=0.23.6,<2', 'fasteners>=0.3,<1.0', # TODO(https://github.com/grpc/grpc/issues/37710): Unpin grpc diff --git a/website/www/site/config.toml b/website/www/site/config.toml index 776c63f9e444..6d7e8f706af0 100644 --- a/website/www/site/config.toml +++ b/website/www/site/config.toml @@ -104,7 +104,7 @@ github_project_repo = "https://github.com/apache/beam" [params] description = "Apache Beam is an open source, unified model and set of language-specific SDKs for defining and executing data processing workflows, and also data ingestion and integration flows, supporting Enterprise Integration Patterns (EIPs) and Domain Specific Languages (DSLs). Dataflow pipelines simplify the mechanics of large-scale batch and streaming data processing and can run on a number of runtimes like Apache Flink, Apache Spark, and Google Cloud Dataflow (a cloud service). Beam also brings DSL in different languages, allowing users to easily implement their data integration processes." -release_latest = "2.63.0" +release_latest = "2.64.0" # The repository and branch where the files live in Github or Colab. This is used # to serve and stage from your local branch, but publish to the master branch. # e.g. https://github.com/{{< param branch_repo >}}/path/to/notebook.ipynb diff --git a/website/www/site/content/en/blog/beam-2.63.0.md b/website/www/site/content/en/blog/beam-2.63.0.md index 3b740781d06b..148344ac88d8 100644 --- a/website/www/site/content/en/blog/beam-2.63.0.md +++ b/website/www/site/content/en/blog/beam-2.63.0.md @@ -77,9 +77,13 @@ For more information on changes in 2.63.0, check out the [detailed release notes * Fixed the user mailing list address ([#26013](https://github.com/apache/beam/issues/26013)). * [Dataflow Streaming] Fixed an issue where Dataflow Streaming workers were reporting lineage metrics as cumulative rather than delta. ([#33691](https://github.com/apache/beam/pull/33691)) +## Known Issues + +* (Java) Current version of protobuf has a [bug](https://github.com/protocolbuffers/protobuf/issues/20599) leading to incompatibilities with clients using older versions of Protobuf ([example issue](https://github.com/GoogleCloudPlatform/DataflowTemplates/issues/2191)). This issue has been seen in SpannerIO in particular. Tracked in [#34452](https://github.com/GoogleCloudPlatform/DataflowTemplates/issues/34452) + ## List of Contributors -According to git shortlog, the following people contributed to the 2.62.0 release. Thank you to all contributors! +According to git shortlog, the following people contributed to the 2.63.0 release. Thank you to all contributors! Ahmed Abualsaud, Alex Merose, diff --git a/website/www/site/content/en/blog/beam-2.64.0.md b/website/www/site/content/en/blog/beam-2.64.0.md new file mode 100644 index 000000000000..b24d8891ca05 --- /dev/null +++ b/website/www/site/content/en/blog/beam-2.64.0.md @@ -0,0 +1,174 @@ +--- +title: "Apache Beam 2.64.0" +date: 2025-03-31 10:30:00 -0500 +categories: + - blog + - release +authors: + - liferoad +--- + + +We are happy to present the new 2.64.0 release of Beam. +This release includes both improvements and new functionality. +See the [download page](/get-started/downloads/{$DOWNLOAD_ANCHOR}) for this release. + + + +For more information on changes in 2.64.0, check out the [detailed release notes](https://github.com/apache/beam/milestone/28). +## Highlights + +* Managed API for [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/managed/Managed.html) and [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.managed.html#module-apache_beam.transforms.managed) supports [key I/O connectors](https://beam.apache.org/documentation/io/connectors/) Iceberg, Kafka, and BigQuery. + +## I/Os + +* [Java] Use API compatible with both com.google.cloud.bigdataoss:util 2.x and 3.x in BatchLoads ([#34105](https://github.com/apache/beam/pull/34105)) +* [IcebergIO] Added new CDC source for batch and streaming, available as `Managed.ICEBERG_CDC` ([#33504](https://github.com/apache/beam/pull/33504)) +* [IcebergIO] Address edge case where bundle retry following a successful data commit results in data duplication ([#34264](https://github.com/apache/beam/pull/34264)) + +## New Features / Improvements + +* [Python] Support custom coders in Reshuffle ([#29908](https://github.com/apache/beam/issues/29908), [#33356](https://github.com/apache/beam/issues/33356)). +* [Java] Upgrade SLF4J to 2.0.16. Update default Spark version to 3.5.0. ([#33574](https://github.com/apache/beam/pull/33574)) +* [Java] Support for `--add-modules` JVM option is added through a new pipeline option `JdkAddRootModules`. This allows extending the module graph with optional modules such as SDK incubator modules. Sample usage: ` --jdkAddRootModules=jdk.incubator.vector` ([#30281](https://github.com/apache/beam/issues/30281)). +* Managed API for [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/managed/Managed.html) and [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.managed.html#module-apache_beam.transforms.managed) supports [key I/O connectors](https://beam.apache.org/documentation/io/connectors/) Iceberg, Kafka, and BigQuery. +* Prism now supports event time triggers for most common cases. ([#31438](https://github.com/apache/beam/issues/31438)) + * Prism does not yet support triggered side inputs, or triggers on merging windows (such as session windows). + +## Breaking Changes + +* [Python] Reshuffle now correctly respects user-specified type hints, fixing a previous bug where it might use FastPrimitivesCoder wrongly. This change could break pipelines with incorrect type hints in Reshuffle. If you have issues after upgrading, temporarily set update_compatibility_version to a previous Beam version to use the old behavior. The recommended solution is to fix the type hints in your code. ([#33932](https://github.com/apache/beam/pull/33932)) +* [Java] SparkReceiver 2 has been moved to SparkReceiver 3 that supports Spark 3.x. ([#33574](https://github.com/apache/beam/pull/33574)) +* [Python] Correct parsing of `collections.abc.Sequence` type hints was added, which can lead to pipelines failing type hint checks that were previously passing erroneously. These issues will be most commonly seen trying to consume a PCollection with a `Sequence` type hint after a GroupByKey or a CoGroupByKey. ([#33999](https://github.com/apache/beam/pull/33999)). + +## Bugfixes + +* (Python) Fixed occasional pipeline stuckness that was affecting Python 3.11 users ([#33966](https://github.com/apache/beam/issues/33966)). +* (Java) Fixed TIME field encodings for BigQuery Storage API writes on GenericRecords ([#34059](https://github.com/apache/beam/pull/34059)). +* (Java) Fixed a race condition in JdbcIO which could cause hangs trying to acquire a connection ([#34058](https://github.com/apache/beam/pull/34058)). +* (Java) Fix BigQuery Storage Write compatibility with Avro 1.8 ([#34281](https://github.com/apache/beam/pull/34281)). +* Fixed checkpoint recovery and streaming behavior in Spark Classic and Portable runner's Flatten transform by replacing queueStream with SingleEmitInputDStream ([#34080](https://github.com/apache/beam/pull/34080), [#18144](https://github.com/apache/beam/issues/18144), [#20426](https://github.com/apache/beam/issues/20426)) +* (Java) Fixed Read caching of UnboundedReader objects to effectively cache across multiple DoFns and avoid checkpointing unstarted reader. [#34146](https://github.com/apache/beam/pull/34146) [#33901](https://github.com/apache/beam/pull/33901) + +## Known Issues + +* (Java) Current version of protobuf has a [bug](https://github.com/protocolbuffers/protobuf/issues/20599) leading to incompatibilities with clients using older versions of Protobuf ([example issue](https://github.com/GoogleCloudPlatform/DataflowTemplates/issues/2191)). This issue has been seen in SpannerIO in particular. Tracked in [#34452](https://github.com/GoogleCloudPlatform/DataflowTemplates/issues/34452). +* (Java) When constructing `SpannerConfig` for `SpannerIO`, calling `withHost` with a null or empty host will now result in a Null Pointer Exception (`java.lang.NullPointerException: Cannot invoke "java.lang.CharSequence.length()" because "this.text" is null`). See https://github.com/GoogleCloudPlatform/DataflowTemplates/issues/34489 for context. + +## List of Contributors + +According to git shortlog, the following people contributed to the 2.64.0 release. Thank you to all contributors! + +Ahmed Abualsaud + +akashorabek + +Arun Pandian + +Bentsi Leviav + +Chamikara Jayalath + +Charles Nguyen + +Claire McGinty + +claudevdm + +Damon + +Danny McCormick + +darshan-sj + +Derrick Williams + +fozzie15 + +Hai Joey Tran + +Jack McCluskey + +Jozef Vilcek + +jrmccluskey + +Kenneth Knowles + +Liam Miller-Cushon + +liferoad + +Luv Agarwal + +martin trieu + +Matar + +Matthew Suozzo + +Michel Davit + +Minbo Bae + +Mohamed Awnallah + +Naireen Hussain + +Pablo Rodriguez Defino + +Radosław Stankiewicz + +Rakesh Kumar + +Reuven Lax + +Robert Bradshaw + +Robert Burke + +Rohit + +Rohit Sinha + +Sam Whittle + +Saumil Patel + +Shunping Huang + +So-shi Nakachi + +Steven van Rossum + +Suvrat Acharya + +Svetak Sundhar + +synenka + +Talat UYARER + +tvalentyn + +twosom + +utkarshparekh + +Vitaly Terentyev + +XQ Hu + +Yi Hu + +Zilin Du diff --git a/website/www/site/content/en/documentation/programming-guide.md b/website/www/site/content/en/documentation/programming-guide.md index 955c2b8797d1..363175b306a1 100644 --- a/website/www/site/content/en/documentation/programming-guide.md +++ b/website/www/site/content/en/documentation/programming-guide.md @@ -1887,7 +1887,7 @@ PCollection sum = pc.apply( # The resulting PCollection, called result, contains one value: the sum of all # the elements in the input PCollection. pc = ... -{{< code_sample "sdks/python/apache_beam/examples/snippets/snippets_test.py" combine_custom_average_execute >}} +{{< code_sample "sdks/python/apache_beam/examples/snippets/snippets_test.py" global_sum >}} {{< /highlight >}} {{< highlight go >}} diff --git a/website/www/site/content/en/documentation/sdks/python-custom-multi-language-pipelines-guide.md b/website/www/site/content/en/documentation/sdks/python-custom-multi-language-pipelines-guide.md index 60523cbb3b2a..b927234a0237 100644 --- a/website/www/site/content/en/documentation/sdks/python-custom-multi-language-pipelines-guide.md +++ b/website/www/site/content/en/documentation/sdks/python-custom-multi-language-pipelines-guide.md @@ -270,6 +270,23 @@ inspect.signature(MyTransform) This metadata is generated directly from the provider's implementation. The class documentation is generated from the [optional **description** method](#additional-metadata). The signature information is generated from the `@SchemaFieldDescription` annotations in the [configuration object](#implement-a-configuration). +### Using Beam native Java SchemaTransforms +If there's an existing Beam native Java SchemaTransform you'd like to use, and you know which expansion service module it's in, you can connect to it using `BeamJarExpansionService`: + +```python +from apache_beam.transforms.external_transform_provider import ExternalTransformProvider +from apache_beam.transforms.external import BeamJarExpansionService + +identifier = "beam:schematransform:org.apache.beam:bigquery_fileloads:v1" +expansion_service = "sdks:java:io:google-cloud-platform:expansion-service:shadowJar" + +provider = ExternalTransformProvider(BeamJarExpansionService(expansion_service)) +BqFileLoads = provider.get_urn(identifier) + +with beam.Pipeline(argv=args) as p: + p | beam.Create(...) | BqFileLoads(table="project.dataset.table") +``` + ## Appendix ### Portable transform diff --git a/website/www/site/content/en/documentation/sdks/yaml-udf.md b/website/www/site/content/en/documentation/sdks/yaml-udf.md index ded40de8b85e..a3e4790c7b34 100644 --- a/website/www/site/content/en/documentation/sdks/yaml-udf.md +++ b/website/www/site/content/en/documentation/sdks/yaml-udf.md @@ -508,3 +508,33 @@ a `{type: 'basic_type_name'}` nesting. This can be especially useful to resolve errors involving the inability to handle the `beam:logical:pythonsdk_any:v1` type. + + +## Dependencies + +Often user defined functions need to rely on external dependencies. +These can be provided with a `dependencies` attribute in the transform +config. For example + +``` +- type: MapToFields + config: + language: python + dependencies: + - 'scipy>=1.15' + fields: + new_col: + callable: | + import scipy.special + + def func(t): + return scipy.special.zeta(complex(1/2, t)) +``` + +The dependencies are interpreted according to the language, e.g. +for Java one provides a list of maven identifiers and/or jars, +and for Python one provides a list of pypi package specifiers and/or sdist tarballs. +See also the full examples using +[java dependencies](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/elementwise/map_to_fields_with_java_deps.yaml) +and +[python dependencies](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/yaml/examples/transforms/elementwise/map_to_fields_with_deps.yaml). diff --git a/website/www/site/content/en/get-started/downloads.md b/website/www/site/content/en/get-started/downloads.md index 688d16238da8..c3ad74ef799f 100644 --- a/website/www/site/content/en/get-started/downloads.md +++ b/website/www/site/content/en/get-started/downloads.md @@ -96,11 +96,19 @@ versions denoted `0.x.y`. ## Releases +### 2.64.0 (2025-03-31) + +Official [source code download](https://archive.apache.org/dist/beam/2.64.0/apache-beam-2.64.0-source-release.zip). +[SHA-512](https://archive.apache.org/dist/beam/2.64.0/apache-beam-2.64.0-source-release.zip.sha512). +[signature](https://archive.apache.org/dist/beam/2.64.0/apache-beam-2.64.0-source-release.zip.asc). + +[Release notes](https://github.com/apache/beam/releases/tag/v2.64.0) + ### 2.63.0 (2025-02-11) -Official [source code download](https://downloads.apache.org/beam/2.63.0/apache-beam-2.63.0-source-release.zip). -[SHA-512](https://downloads.apache.org/beam/2.63.0/apache-beam-2.63.0-source-release.zip.sha512). -[signature](https://downloads.apache.org/beam/2.63.0/apache-beam-2.63.0-source-release.zip.asc). +Official [source code download](https://archive.apache.org/dist/beam/2.63.0/apache-beam-2.63.0-source-release.zip). +[SHA-512](https://archive.apache.org/dist/beam/2.63.0/apache-beam-2.63.0-source-release.zip.sha512). +[signature](https://archive.apache.org/dist/beam/2.63.0/apache-beam-2.63.0-source-release.zip.asc). [Release notes](https://github.com/apache/beam/releases/tag/v2.63.0) diff --git a/website/www/site/content/en/performance/_index.md b/website/www/site/content/en/performance/_index.md index f821b0f25084..45f819d23bed 100644 --- a/website/www/site/content/en/performance/_index.md +++ b/website/www/site/content/en/performance/_index.md @@ -30,11 +30,22 @@ from a pipeline Job running on [Dataflow](/documentation/runners/dataflow/). See the [glossary](/performance/glossary) for a list of the metrics and their definition. -# Measured Beam IOs +# Measured Beam Java IOs See the following pages for performance measures recorded when reading from and writing to various Beam IOs. - [BigQuery](/performance/bigquery) - [BigTable](/performance/bigtable) -- [TextIO](/performance/textio) \ No newline at end of file +- [TextIO](/performance/textio) + +# Measured Beam Python ML Pipelines + +See the following pages for performance measures recorded when running various Beam ML pipelines. + +- [PyTorch Language Modeling BERT base](/performance/pytorchbertbase) +- [PyTorch Language Modeling BERT large](/performance/pytorchbertlarge) +- [PyTorch Vision Classification Resnet 101](/performance/pytorchresnet101) +- [PyTorch Vision Classification Resnet 152](/performance/pytorchresnet152) +- [PyTorch Vision Classification Resnet 152 Tesla T4 GPU](/performance/pytorchresnet152tesla) +- [TensorFlow MNIST Image Classification](/performance/tensorflowmnist) \ No newline at end of file diff --git a/website/www/site/content/en/performance/pytorchbertbase/_index.md b/website/www/site/content/en/performance/pytorchbertbase/_index.md new file mode 100644 index 000000000000..9ef6cb455cac --- /dev/null +++ b/website/www/site/content/en/performance/pytorchbertbase/_index.md @@ -0,0 +1,34 @@ +--- +title: "PyTorch Language Modeling BERT base Performance" +--- + + + +# PyTorch Language Modeling BERT base Performance + +The following graphs show various metrics when running Pytorch Language Modeling using Hugging Face bert-base-uncased model pipeline. +See the [glossary](/performance/glossary) for definitions. + +## What is the estimated cost to run the pipeline? + +{{< performance_looks io="pytorchbertbase" read_or_write="write" section="cost" >}} + +## How has various metrics changed when running the pipeline for different Beam SDK versions? + +{{< performance_looks io="pytorchbertbase" read_or_write="write" section="version" >}} + +## How has various metrics changed over time when running the pipeline? + +{{< performance_looks io="pytorchbertbase" read_or_write="write" section="date" >}} diff --git a/website/www/site/content/en/performance/pytorchbertlarge/_index.md b/website/www/site/content/en/performance/pytorchbertlarge/_index.md new file mode 100644 index 000000000000..ea18712c1019 --- /dev/null +++ b/website/www/site/content/en/performance/pytorchbertlarge/_index.md @@ -0,0 +1,34 @@ +--- +title: "PyTorch Language Modeling BERT large Performance" +--- + + + +# PyTorch Language Modeling BERT base Performance + +The following graphs show various metrics when running Pytorch Language Modeling using Hugging Face bert-large-uncased model pipeline. +See the [glossary](/performance/glossary) for definitions. + +## What is the estimated cost to run the pipeline? + +{{< performance_looks io="pytorchbertlarge" read_or_write="write" section="cost" >}} + +## How has various metrics changed when running the pipeline for different Beam SDK versions? + +{{< performance_looks io="pytorchbertlarge" read_or_write="write" section="version" >}} + +## How has various metrics changed over time when running the pipeline? + +{{< performance_looks io="pytorchbertlarge" read_or_write="write" section="date" >}} diff --git a/website/www/site/content/en/performance/pytorchresnet101/_index.md b/website/www/site/content/en/performance/pytorchresnet101/_index.md new file mode 100644 index 000000000000..d65c5ec377fc --- /dev/null +++ b/website/www/site/content/en/performance/pytorchresnet101/_index.md @@ -0,0 +1,34 @@ +--- +title: "Pytorch Vision Classification with Resnet 101 Performance" +--- + + + +# Pytorch Vision Classification with Resnet 101 Performance + +The following graphs show various metrics when running Pytorch Vision Classification with Resnet 101 pipeline. +See the [glossary](/performance/glossary) for definitions. + +## What is the estimated cost to run the pipeline? + +{{< performance_looks io="pytorchresnet101" read_or_write="write" section="cost" >}} + +## How has various metrics changed when running the pipeline for different Beam SDK versions? + +{{< performance_looks io="pytorchresnet101" read_or_write="write" section="version" >}} + +## How has various metrics changed over time when running the pipeline? + +{{< performance_looks io="pytorchresnet101" read_or_write="write" section="date" >}} diff --git a/website/www/site/content/en/performance/pytorchresnet152/_index.md b/website/www/site/content/en/performance/pytorchresnet152/_index.md new file mode 100644 index 000000000000..1270eb1b4f37 --- /dev/null +++ b/website/www/site/content/en/performance/pytorchresnet152/_index.md @@ -0,0 +1,34 @@ +--- +title: "Pytorch Vision Classification with Resnet 152 Performance" +--- + + + +# Pytorch Vision Classification with Resnet 152 Performance + +The following graphs show various metrics when running Pytorch Vision Classification with Resnet 152 pipeline. +See the [glossary](/performance/glossary) for definitions. + +## What is the estimated cost to run the pipeline? + +{{< performance_looks io="pytorchresnet152" read_or_write="write" section="cost" >}} + +## How has various metrics changed when running the pipeline for different Beam SDK versions? + +{{< performance_looks io="pytorchresnet152" read_or_write="write" section="version" >}} + +## How has various metrics changed over time when running the pipeline? + +{{< performance_looks io="pytorchresnet152" read_or_write="write" section="date" >}} diff --git a/website/www/site/content/en/performance/pytorchresnet152tesla/_index.md b/website/www/site/content/en/performance/pytorchresnet152tesla/_index.md new file mode 100644 index 000000000000..cd03ce0d985d --- /dev/null +++ b/website/www/site/content/en/performance/pytorchresnet152tesla/_index.md @@ -0,0 +1,34 @@ +--- +title: "Pytorch Vision Classification with Resnet 152 with Tesla T4 GPU Performance" +--- + + + +# Pytorch Vision Classification with Resnet 152 with Tesla T4 GPU Performance + +The following graphs show various metrics when running Pytorch Vision Classification with Resnet 152 with Tesla T4 GPU pipeline. +See the [glossary](/performance/glossary) for definitions. + +## What is the estimated cost to run the pipeline? + +{{< performance_looks io="pytorchresnet152tesla" read_or_write="write" section="cost" >}} + +## How has various metrics changed when running the pipeline for different Beam SDK versions? + +{{< performance_looks io="pytorchresnet152tesla" read_or_write="write" section="version" >}} + +## How has various metrics changed over time when running the pipeline? + +{{< performance_looks io="pytorchresnet152tesla" read_or_write="write" section="date" >}} diff --git a/website/www/site/content/en/performance/tensorflowmnist/_index.md b/website/www/site/content/en/performance/tensorflowmnist/_index.md new file mode 100644 index 000000000000..350405fed1cd --- /dev/null +++ b/website/www/site/content/en/performance/tensorflowmnist/_index.md @@ -0,0 +1,34 @@ +--- +title: "TensorFlow MNIST Image Classification Performance" +--- + + + +# TensorFlow MNIST Image Classification Performance + +The following graphs show various metrics when running TensorFlow MNIST Image Classification pipeline. +See the [glossary](/performance/glossary) for definitions. + +## What is the estimated cost to run the pipeline? + +{{< performance_looks io="tensorflowmnist" read_or_write="write" section="cost" >}} + +## How has various metrics changed when running the pipeline for different Beam SDK versions? + +{{< performance_looks io="tensorflowmnist" read_or_write="write" section="version" >}} + +## How has various metrics changed over time when running the pipeline? + +{{< performance_looks io="tensorflowmnist" read_or_write="write" section="date" >}} diff --git a/website/www/site/data/performance.yaml b/website/www/site/data/performance.yaml index dc375811c833..5fd8f613d1a5 100644 --- a/website/www/site/data/performance.yaml +++ b/website/www/site/data/performance.yaml @@ -106,3 +106,99 @@ looks: title: AvgInputThroughputBytesPerSec by Version - id: fVVHhXCrHNgBG52TJsTjR8VbmWCCQnVN title: AvgInputThroughputElementsPerSec by Version + pytorchbertbase: + write: + folder: 76 + cost: + - id: Vybj7cBtbvVWJG63RRcYCTBC8TrD3Sdm + title: RunTime and EstimatedCost + date: + - id: DZfwm7T8kyVXzBkd7Hm65y8JNfNzZzYT + title: AvgThroughputBytesPerSec by Date + - id: ZDnG6kH55T2WPSD7yQh5cF6pkrQdRHKr + title: AvgThroughputElementsPerSec by Date + version: + - id: YCGWnm7S84qRcVm6kPKRwwgnKpg5xyJW + title: AvgThroughputBytesPerSec by Version + - id: 2dPXDTthFxDhvdypyHYNp7bSbMJggW6x + title: AvgThroughputElementsPerSec by Version + pytorchbertlarge: + write: + folder: 77 + cost: + - id: gTN4qQbqFfJMWJKzwJHsXpjVV8McFbm8 + title: RunTime and EstimatedCost + date: + - id: jGS2p6kTK9pZq94sYdqmNcz67PP6pKFd + title: AvgThroughputBytesPerSec by Date + - id: wfhCtgfnqM5YjRYbp4624fnyJcT2zXcT + title: AvgThroughputElementsPerSec by Date + version: + - id: Z3k29nwZrdCXJZdg5Yg7SSKDm2T4y8rZ + title: AvgThroughputBytesPerSec by Version + - id: D5g8qkqGKTpNqC8RV9cK2mPPD7rqJ8f4 + title: AvgThroughputElementsPerSec by Version + pytorchresnet101: + write: + folder: 78 + cost: + - id: DKbt3WmgTxnxXd5FKMtPvf5SgxYSByPT + title: RunTime and EstimatedCost + date: + - id: GDMn2mY45d4wpvw3tZpJhYnC6gpqysvn + title: AvgThroughputBytesPerSec by Date + - id: VnXf9SqntCd2SRw3Br2bgfkytVGdGxrV + title: AvgThroughputElementsPerSec by Date + version: + - id: cmWSXFn4Vp2pvpFJK3NNQg3mdTk7ywBC + title: AvgThroughputBytesPerSec by Version + - id: BpPdzhWWJttM8gcmQ4WSpFKX38BfHwbk + title: AvgThroughputElementsPerSec by Version + pytorchresnet152: + write: + folder: 79 + cost: + - id: jkV2YJPv3MgqD22DRB65cbGNVjPDcJwT + title: RunTime and EstimatedCost + date: + - id: pvQwSM5JvxmJDcXpDJySctdYZkWDF69H + title: AvgThroughputBytesPerSec by Date + - id: JGctprgybxbfp2sBjspnBdRppmRXS5Sn + title: AvgThroughputElementsPerSec by Date + version: + - id: qc689x3JQxg5DWWVC4mBPqGCdx3hPSTG + title: AvgThroughputBytesPerSec by Version + - id: wS7Htr76CJ75gJ47tVP8ZT8rBw6BY3QW + title: AvgThroughputElementsPerSec by Version + pytorchresnet152tesla: + write: + folder: 80 + cost: + - id: YD3mVwkS3976Cv7bCSSmDP5f4jXFsFRF + title: RunTime and EstimatedCost + date: + - id: 8r96B3vsfhTpwgz4FgH7xbH5KY8d5k4b + title: AvgThroughputBytesPerSec by Date + - id: whGvSJZzRbpvfYrqMhnsJRHWk3mKyF7r + title: AvgThroughputElementsPerSec by Date + version: + - id: hGVcdDzrSndZh68P9jrY5MMTCQ6wwrKb + title: AvgThroughputBytesPerSec by Version + - id: DVhGKTmJWknSvfQVPQ9FDrvPYgdJ2dFd + title: AvgThroughputElementsPerSec by Version + tensorflowmnist: + write: + folder: 75 + cost: + - id: Vs9ZHMkCkrSgJF7FCPdQS5HwK8PQTyWb + title: RunTime and EstimatedCost + date: + - id: 7mYxWj4hDXQp2SZ28vMNTCZGhWcPQdwJ + title: AvgThroughputBytesPerSec by Date + - id: bWhWQ9t2jKGscc9ghgH77wRszTxwW8mM + title: AvgThroughputElementsPerSec by Date + version: + - id: y3jVqx2xKcZGpkMBTSCZCpGMPPFHrC8V + title: AvgThroughputBytesPerSec by Version + - id: YdD9SMWCDNJ7wCY4WZwyd2Jt9Ts38FY2 + title: AvgThroughputElementsPerSec by Version