diff --git a/.github/actions/action.yml b/.github/actions/action.yml index decaa5ff3f8..b49aebfcfa1 100644 --- a/.github/actions/action.yml +++ b/.github/actions/action.yml @@ -98,7 +98,8 @@ runs: --environment dev \ --platform dgx_h100 \ --tag ${{ inputs.tag }} \ - --container-image ${{ inputs.container-image }} + --container-image ${{ inputs.container-image }} \ + --hf-home /mnt/datadrive/TestData/nemo-fw/TestData/HF_HOME RUN_TEST_EOF ) @@ -186,6 +187,7 @@ runs: --platform dgx_h100 \ --container-image ${{ inputs.container-image }} \ --data-dir /mnt/datadrive/TestData/megatron-lm/artifacts \ + --hf-home /mnt/datadrive/TestData/nemo-fw/TestData/HF_HOME RUN_TEST_EOF ) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index ed3bbca7f2f..3442d4f1f02 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -25,7 +25,7 @@ on: workflow_dispatch: concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.ref || github.event.pull_request.number }} + group: ${{ github.workflow }}-${{ github.head_ref || github.event.merge_group.base_ref || github.ref }} cancel-in-progress: true permissions: diff --git a/tests/test_utils/python_scripts/launch_nemo_run_workload.py b/tests/test_utils/python_scripts/launch_nemo_run_workload.py index bf4bb37aa20..cdb730fae63 100644 --- a/tests/test_utils/python_scripts/launch_nemo_run_workload.py +++ b/tests/test_utils/python_scripts/launch_nemo_run_workload.py @@ -54,6 +54,7 @@ def is_flaky_failure(concat_allranks_logs: str) -> bool: "--n-repeat", required=False, type=int, help="Number of times to repeat the workload", default=1 ) @click.option("--data-dir", required=False, type=str, help="Data directory of the workload") +@click.option("--hf-home", required=False, type=str, help="HF home directory of the workload") @click.option("--tag", required=False, type=str, help="Tag of the workload") @click.option( "--enable-lightweight-mode", @@ -73,6 +74,7 @@ def main( container_image, n_repeat: int = 1, data_dir: Optional[str] = None, + hf_home: Optional[str] = None, tag: Optional[str] = None, enable_lightweight_mode: Optional[bool] = False, ): @@ -107,6 +109,8 @@ def main( artifacts.append(f"{os.getcwd()}:/opt/megatron-lm") if data_dir: artifacts.append(f"{pathlib.Path(data_dir)}:/mnt/artifacts") + if hf_home: + artifacts.append(f"{pathlib.Path(hf_home)}:/mnt/hf_home") executor = run.DockerExecutor( container_image=container_image, @@ -122,6 +126,8 @@ def main( "CLUSTER": "dgxh100_dgxc", "NCCL_DEBUG": "INFO", "NCCL_DEBUG_FILE": "/opt/megatron-lm/assets_dir/logs/nccl_debug.log", + "HF_HOME": "/mnt/hf_home", + "TRANSFORMERS_OFFLINE": "1", }, packager=run.Packager(), volumes=artifacts,