diff --git a/.gitignore b/.gitignore
index 694624a3..efa46633 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,4 +22,14 @@ presto/docker/config/generated*/
 # Generated Presto Docker Compose files
 presto/docker/docker-compose/generated*/
 
+# Slurm logs and results
+presto/slurm/presto-nvl72/logs/
+presto/slurm/presto-nvl72/*.err
+presto/slurm/presto-nvl72/*.out
+presto/slurm/presto-nvl72/result_dir/
+presto/slurm/presto-nvl72/kept_results/
+presto/slurm/presto-nvl72/worker_data/
+presto/slurm/presto-nvl72/profiles/
+presto/slurm/presto-nvl72/worker_info/
+
 devstate*
diff --git a/benchmark_data_tools/duckdb_utils.py b/benchmark_data_tools/duckdb_utils.py
index f7c85b74..0e16fc2a 100644
--- a/benchmark_data_tools/duckdb_utils.py
+++ b/benchmark_data_tools/duckdb_utils.py
@@ -6,6 +6,10 @@
 import duckdb
 
 
+def quote_ident(name: str) -> str:
+    return '"' + name.replace('"', '""') + '"'
+
+
 def init_benchmark_tables(benchmark_type, scale_factor):
     tables = duckdb.sql("SHOW TABLES").fetchall()
     assert len(tables) == 0
@@ -22,27 +26,27 @@ def init_benchmark_tables(benchmark_type, scale_factor):
 def drop_benchmark_tables():
     tables = duckdb.sql("SHOW TABLES").fetchall()
     for (table,) in tables:
-        duckdb.sql(f"DROP TABLE {table}")
+        duckdb.sql(f"DROP TABLE {quote_ident(table)}")
 
 
 def create_table(table_name, data_path):
-    duckdb.sql(f"DROP TABLE IF EXISTS {table_name}")
-    duckdb.sql(f"CREATE TABLE {table_name} AS SELECT * FROM '{data_path}/*.parquet';")
+    duckdb.sql(f"DROP TABLE IF EXISTS {quote_ident(table_name)}")
+    duckdb.sql(f"CREATE TABLE {quote_ident(table_name)} AS SELECT * FROM '{data_path}/*.parquet';")
 
 
 # Generates a sample table with a small limit.
 # This is mainly used to extract the schema from the parquet files.
 def create_not_null_table_from_sample(table_name, data_path):
-    duckdb.sql(f"DROP TABLE IF EXISTS {table_name}")
-    duckdb.sql(f"CREATE TABLE {table_name} AS SELECT * FROM '{data_path}/*.parquet' LIMIT 10;")
-    ret = duckdb.sql(f"DESCRIBE TABLE {table_name}").fetchall()
+    duckdb.sql(f"DROP TABLE IF EXISTS {quote_ident(table_name)}")
+    duckdb.sql(f"CREATE TABLE {quote_ident(table_name)} AS SELECT * FROM '{data_path}/*.parquet' LIMIT 10;")
+    ret = duckdb.sql(f"DESCRIBE TABLE {quote_ident(table_name)}").fetchall()
     for row in ret:
-        duckdb.sql(f"ALTER TABLE {table_name} ALTER COLUMN {row[0]} SET NOT NULL;")
+        duckdb.sql(f"ALTER TABLE {quote_ident(table_name)} ALTER COLUMN {row[0]} SET NOT NULL;")
 
 
 def create_table_from_sample(table_name, data_path):
-    duckdb.sql(f"DROP TABLE IF EXISTS {table_name}")
-    duckdb.sql(f"CREATE TABLE {table_name} AS SELECT * FROM '{data_path}/*.parquet' LIMIT 10;")
+    duckdb.sql(f"DROP TABLE IF EXISTS {quote_ident(table_name)}")
+    duckdb.sql(f"CREATE TABLE {quote_ident(table_name)} AS SELECT * FROM '{data_path}/*.parquet' LIMIT 10;")
 
 
 def is_decimal_column(column_type):
diff --git a/presto/docker/config/template/etc_coordinator/config_native.properties b/presto/docker/config/template/etc_coordinator/config_native.properties
index fc2362b7..3e6ad0dc 100644
--- a/presto/docker/config/template/etc_coordinator/config_native.properties
+++ b/presto/docker/config/template/etc_coordinator/config_native.properties
@@ -48,7 +48,7 @@ optimizer.generate-domain-filters=true
 # Upper limit for broadcasted table size to avoid memory blowups.
 # See: https://github.com/prestodb/presto/issues/22161#issuecomment-1994128619
 join-max-broadcast-table-size={{ .JoinMaxBroadcastTableSizeMb }}MB
-# Default is AUTOMATIC, ucx exchange does not support BROADCAST partition type.
+# overwritten to "PARTITIONED" in multi-node context
 join-distribution-type=AUTOMATIC
 
 # Client request timeout to avoid hung queries.
@@ -58,7 +58,7 @@ query.execution-policy=phased
 # Kill queries based on total reservation on blocked nodes to recover memory.
 query.low-memory-killer.policy=total-reservation-on-blocked-nodes
 # Upper limit on query wall time to keep tests bounded.
-query.max-execution-time=30m
+query.max-execution-time=10m
 # Keep metadata of up to 1000 queries for UI and debugging.
 query.max-history=1000
 # Memory quotas per node and cluster to protect stability.
diff --git a/presto/docker/config/template/etc_worker/config_native.properties b/presto/docker/config/template/etc_worker/config_native.properties
index b1ee1082..c36788c7 100644
--- a/presto/docker/config/template/etc_worker/config_native.properties
+++ b/presto/docker/config/template/etc_worker/config_native.properties
@@ -29,13 +29,15 @@ system-mem-limit-gb={{ sub .ContainerMemoryGb .GeneratorParameters.MemoryPushBac
 system-mem-shrink-gb=20
 
 # Optimize for single-node execution when the entire query can run locally.
+# overwritten to "false" in multi-node settings.
 single-node-execution-enabled=true
 
 # Enable cuDF (CPU mode will ignore this setting)
 cudf.enabled=true
+# overwritten to "true" in multi-node settings.
 cudf.exchange=false
-# Port number currently must be exactly 3 more than server port (ignored if cudf.exchange is false)
-cudf.exchange.server.port=8083
+# overwritten when cudf.exchange is enabled (ignored otherwise)
+cudf.exchange.server.port=0000
 cudf.memory_resource=async
 
 async-data-cache-enabled=false
diff --git a/presto/scripts/common_functions.sh b/presto/scripts/common_functions.sh
index 9c7cc4df..a8fdf4aa 100644
--- a/presto/scripts/common_functions.sh
+++ b/presto/scripts/common_functions.sh
@@ -1,25 +1,12 @@
 #!/bin/bash
 
-# Copyright (c) 2025, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
 
 function wait_for_worker_node_registration() {
   trap "rm -rf node_response.json" RETURN
 
   echo "Waiting for a worker node to be registered..."
-  HOSTNAME=${1:-localhost}
-  PORT=${2:-8080}
   COORDINATOR_URL=http://${HOSTNAME}:${PORT}
   echo "Coordinator URL: $COORDINATOR_URL"
   local -r MAX_RETRIES=12
diff --git a/presto/scripts/generate_presto_config.sh b/presto/scripts/generate_presto_config.sh
index 71735825..cc20b82a 100755
--- a/presto/scripts/generate_presto_config.sh
+++ b/presto/scripts/generate_presto_config.sh
@@ -30,19 +30,23 @@ if [ ! -x "${SCRIPT_DIR}/../pbench/pbench" ]; then
   echo_error "ERROR: generate_presto_config.sh script cannot find pbench at ${SCRIPT_DIR}/../pbench/pbench"
 fi
 
+# This function duplicates the worker configs when we are running multiple workers.
+# It also adds certain config options to the workers if those options apply only to multi-worker environments.
 function duplicate_worker_configs() {
   local worker_id=$1
   echo "Duplicating worker configs for GPU ID $worker_id"
   local worker_config="${CONFIG_DIR}/etc_worker_${worker_id}"
-  local worker_native_config="${worker_config}/config_native.properties"
   local coord_config="${CONFIG_DIR}/etc_coordinator"
+  local worker_native_config="${worker_config}/config_native.properties"
   local coord_native_config="${coord_config}/config_native.properties"
+  # Need to stagger the port numbers because ucx exchange currently expects to be exactly
+  # 3 higher than the http port.
   local http_port="10$(printf "%02d\n" "$worker_id")0"
   local exch_port="10$(printf "%02d\n" "$worker_id")3"
   rm -rf ${worker_config}
   cp -r ${CONFIG_DIR}/etc_worker ${worker_config}
 
-  # Single node execution needs to be disabled if we are running multiple workers.
+  # Some configs should only be applied if we are in a multi-worker environment.
   if [[ ${NUM_WORKERS} -gt 1 ]]; then
     sed -i "s+single-node-execution-enabled.*+single-node-execution-enabled=false+g" ${coord_native_config}
     sed -i "s+single-node-execution-enabled.*+single-node-execution-enabled=false+g" ${worker_native_config}
@@ -72,7 +76,7 @@ RAM_GB=$(lsmem -b | grep "Total online memory" | awk '{print int($4 / (1024*1024
 if [[ -z ${VARIANT_TYPE} || ! ${VARIANT_TYPE} =~ ^(cpu|gpu|java)$ ]]; then
   echo_error "ERROR: VARIANT_TYPE must be set to a valid variant type (cpu, gpu, java)."
 fi
-if [[ -z ${VCPU_PER_WORKER} ]]; then
+if [[ -z ${VCPU_PER_WORKER:-} ]]; then
   if [[ "${VARIANT_TYPE}" == "gpu" ]]; then
     VCPU_PER_WORKER=2
   else
@@ -122,6 +126,7 @@ EOF
   fi
 
   COORD_CONFIG="${CONFIG_DIR}/etc_coordinator/config_native.properties"
+  WORKER_CONFIG="${CONFIG_DIR}/etc_worker/config_native.properties"
   # now perform other variant-specific modifications to the generated configs
   if [[ "${VARIANT_TYPE}" == "gpu" ]]; then
     # for GPU variant, uncomment these optimizer settings
@@ -158,10 +163,13 @@ fi
 
 # We want to propagate any changes from the original worker config to the new worker configs even if
 # we did not re-generate the configs.
-if [[ -n "$NUM_WORKERS" && -n "$GPU_IDS" && "$VARIANT_TYPE" == "gpu" ]]; then
-  # Count the number of GPU IDs provided
-  IFS=',' read -ra GPU_ID_ARRAY <<< "$GPU_IDS"
-  for i in "${GPU_ID_ARRAY[@]}"; do
+if [[ -n "$NUM_WORKERS" && "$VARIANT_TYPE" == "gpu" ]]; then
+  if [[ -n ${GPU_IDS:-} ]]; then
+      WORKER_IDS=($(echo "$GPU_IDS" | tr ',' ' '))
+  else
+      WORKER_IDS=($(seq 0 $((NUM_WORKERS - 1))))
+  fi
+  for i in "${WORKER_IDS[@]}"; do
     duplicate_worker_configs $i
   done
 fi
diff --git a/presto/scripts/run_benchmark.sh b/presto/scripts/run_benchmark.sh
index d651d71f..ff5117ef 100755
--- a/presto/scripts/run_benchmark.sh
+++ b/presto/scripts/run_benchmark.sh
@@ -30,6 +30,7 @@ OPTIONS:
                             stored inside a directory under the --output-dir path with a name matching the tag name.
                             Tags must contain only alphanumeric and underscore characters.
     -p, --profile           Enable profiling of benchmark queries.
+    --profile-script-path   Path to profiler functions script (default: ./profiler_functions.sh).
     --skip-drop-cache       Skip dropping system caches before each benchmark query (dropped by default).
     -m, --metrics           Collect detailed metrics from Presto REST API after each query.
                             Metrics are stored in query-specific directories.
@@ -147,6 +148,15 @@ parse_args() {
         PROFILE=true
         shift
         ;;
+      --profile-script-path)
+        if [[ -n $2 ]]; then
+          PROFILE_SCRIPT_PATH=$2
+          shift 2
+        else
+          echo "Error: --profile-script-path requires a value"
+          exit 1
+        fi
+        ;;
       --skip-drop-cache)
         SKIP_DROP_CACHE=true
         shift
@@ -218,7 +228,10 @@ if [[ -n ${TAG} ]]; then
 fi
 
 if [[ "${PROFILE}" == "true" ]]; then
-  PYTEST_ARGS+=("--profile --profile-script-path $(readlink -f ./profiler_functions.sh)")
+  if [[ -z "${PROFILE_SCRIPT_PATH:-}" ]]; then
+    PROFILE_SCRIPT_PATH="$(readlink -f ./profiler_functions.sh)"
+  fi
+  PYTEST_ARGS+=("--profile --profile-script-path ${PROFILE_SCRIPT_PATH}")
 fi
 
 if [[ "${METRICS}" == "true" ]]; then
diff --git a/presto/scripts/setup_benchmark_helper_check_instance_and_parse_args.sh b/presto/scripts/setup_benchmark_helper_check_instance_and_parse_args.sh
index 044c4fe6..bcf35d0a 100644
--- a/presto/scripts/setup_benchmark_helper_check_instance_and_parse_args.sh
+++ b/presto/scripts/setup_benchmark_helper_check_instance_and_parse_args.sh
@@ -31,6 +31,7 @@ OPTIONS:
     -s, --schema-name                   Name of the schema that will contain the created tables.
     -d, --data-dir-name                 Name of the directory inside the PRESTO_DATA_DIR path for the benchmark data.
     --skip-analyze-tables               Skip analyzing tables after creating them. Default is to analyze tables.
+    --no-docker				Skip the setup/teardown steps that require docker.
     $SCRIPT_EXTRA_OPTIONS_DESCRIPTION
 
 EXAMPLES:
@@ -49,6 +50,7 @@ fi
 # Compute the directory where this script resides (if not already set by caller)
 SCRIPT_DIR="${SCRIPT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)}"
 
+NO_DOCKER=false
 SKIP_ANALYZE_TABLES=false
 parse_args() {
   while [[ $# -gt 0 ]]; do
@@ -88,6 +90,10 @@ parse_args() {
         SKIP_ANALYZE_TABLES=true
         shift
         ;;
+      --no-docker)
+        NO_DOCKER=true
+        shift
+        ;;
       *)
         SCRIPT_EXTRA_OPTIONS_UNKNOWN_ARG=true
         if [[ -n $SCRIPT_EXTRA_OPTIONS_PARSER ]]; then
diff --git a/presto/scripts/setup_benchmark_tables.sh b/presto/scripts/setup_benchmark_tables.sh
index d9662219..e69391a5 100755
--- a/presto/scripts/setup_benchmark_tables.sh
+++ b/presto/scripts/setup_benchmark_tables.sh
@@ -32,11 +32,14 @@ function cleanup() {
 
 trap cleanup EXIT
 
-"${SCRIPT_DIR}/start_native_cpu_presto.sh"
-
-source "${SCRIPT_DIR}/common_functions.sh"
+# These scripts are used in some non-docker environments, so provide the option to skip
+# the docker setup/teardown.
+if [[ -z "$NO_DOCKER" ]]; then
+  "${SCRIPT_DIR}/start_native_cpu_presto.sh"
+  source "${SCRIPT_DIR}/common_functions.sh"
+  wait_for_worker_node_registration
+fi
 
-wait_for_worker_node_registration
 
 "${SCRIPT_DIR}/../../scripts/run_py_script.sh" -p $SCHEMA_GEN_SCRIPT_PATH \
                                --benchmark-type $BENCHMARK_TYPE \
@@ -53,4 +56,6 @@ if [[ "$SKIP_ANALYZE_TABLES" == "false" ]]; then
   "${SCRIPT_DIR}/analyze_tables.sh" -s $SCHEMA_NAME
 fi
 
-"${SCRIPT_DIR}/stop_presto.sh"
+if [[ -z "$NO_DOCKER" ]]; then
+  "${SCRIPT_DIR}/stop_presto.sh"
+fi
diff --git a/presto/scripts/start_presto_helper.sh b/presto/scripts/start_presto_helper.sh
index 465fac38..7714701a 100755
--- a/presto/scripts/start_presto_helper.sh
+++ b/presto/scripts/start_presto_helper.sh
@@ -19,7 +19,12 @@ fi
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
 # Get the root of the git repository
-REPO_ROOT="$(git -C "${SCRIPT_DIR}" rev-parse --show-toplevel)"
+if command -v git &> /dev/null; then
+  REPO_ROOT="$(git -C "${SCRIPT_DIR}" rev-parse --show-toplevel)"
+else
+  REPO_ROOT="$SCRIPT_DIR/../.."
+fi
+
 
 # Validate sibling repos
 if [[ "$VARIANT_TYPE" == "java" ]]; then
@@ -88,12 +93,6 @@ else
   echo "Internal error: unexpected VARIANT_TYPE value: $VARIANT_TYPE"
 fi
 
-# Default GPU_IDS if NUM_WORKERS is set but GPU_IDS is not
-if [[ -n $NUM_WORKERS && -z $GPU_IDS ]]; then
-  # Generate default GPU IDs: 0,1,2,...,N-1
-  export GPU_IDS=$(seq -s, 0 $((NUM_WORKERS - 1)))
-fi
-
 "${SCRIPT_DIR}/stop_presto.sh"
 
 "${SCRIPT_DIR}/generate_presto_config.sh"
diff --git a/presto/slurm/presto-nvl72/README.md b/presto/slurm/presto-nvl72/README.md
index baed39d0..b2fb24e3 100644
--- a/presto/slurm/presto-nvl72/README.md
+++ b/presto/slurm/presto-nvl72/README.md
@@ -17,43 +17,61 @@ presto-nvl72/
 
 ## Quick Start
 
-### Running the Benchmark
+### Running the benchmark via launcher (recommended)
 
 ```bash
-cd /mnt/data/bzaitlen/presto-nvl72
-./launch-run.sh
+cd presto/slurm/presto-nvl72
+./launch-run.sh -n <nodes> -s <scale_factor> [-i <iterations>] [additional sbatch options]
+
+# examples
+./launch-run.sh -n 8 -s 3000
+./launch-run.sh -n 4 -s 10000 -i 3 --partition gpu --account myacct
 ```
 
-Or submit directly:
+The launcher:
+- requires node count (-n/--nodes) and scale factor (-s/--scale-factor)
+- accepts optional iterations (-i/--iterations, default 1)
+- embeds nodes/SF/iterations in .out/.err filenames
+- prints the first node’s hostname/IP when allocated and a ready-to-run SSH port-forward command to access the Presto Web UI on your machine (http://localhost:9200)
+
+### Submitting directly (advanced)
 
 ```bash
-sbatch run-presto-benchmarks.slurm
+export SCALE_FACTOR=3000
+export NUM_ITERATIONS=1
+sbatch --nodes 8 \
+  --output "presto-tpch-run_n8_sf3000_i1_%j.out" \
+  --error  "presto-tpch-run_n8_sf3000_i1_%j.err" \
+  --export "ALL,SCALE_FACTOR=${SCALE_FACTOR},NUM_ITERATIONS=${NUM_ITERATIONS}" \
+  run-presto-benchmarks.slurm
 ```
 
 ## Configuration
 
-**To change settings, edit the values directly in `run-presto-benchmarks.slurm`**
+Primary configuration is passed via the launcher flags and environment. The `.slurm` script validates that required variables are set.
 
-All configuration is at the top of the file in the "User Configuration" section.
+Key variables:
 
-### Configuration Variables
+- SCALE_FACTOR: required (provided via `-s/--scale-factor`)
+- NUM_ITERATIONS: required by the job; launcher defaults to 1 (`-i/--iterations` to override)
+- NUM_NODES: derived from Slurm allocation; provided via `-n/--nodes` to launcher
+- REPO_ROOT: auto-detected from script location
+- LOGS: `${SCRIPT_DIR}/logs` by default
+- IMAGE_DIR, DATA, CONFIGS: see below or override via environment if needed
 
-| Variable | Current Value | Description |
-|----------|---------------|-------------|
-| `SCALE_FACTOR` | 300 | TPC-H scale factor |
-| `NUM_ITERATIONS` | 5 | Number of query iterations |
-| `WORKER_IMAGE` | presto-native-worker-gpu | Worker container image |
-| `NUM_NODES` | 4 | Number of nodes to allocate |
-| `NUM_GPUS_PER_NODE` | 4 | GPUs per node |
-| `DATA` | /mnt/data/tpch-rs/scale-300 | Data directory |
-| `IMAGE_DIR` | /mnt/home/misiug/images | Container image directory |
-| `LOGS` | /mnt/data/bzaitlen/presto-nvl72/logs | Log directory |
+Other defaults:
+- WORKER_IMAGE: `presto-native-worker-gpu`
+- NUM_GPUS_PER_NODE: `4`
+- DATA: `/mnt/data/tpch-rs`
+- IMAGE_DIR: `/mnt/data/images/presto`
+- CONFIGS: `${REPO_ROOT}/presto/docker/config/generated/gpu`
 
 ### SBATCH Directives
 
 - **Time limit**: 1 hour (adjust `--time` if needed)
 - **Node allocation**: Full node (144 CPUs, 4 GPUs, exclusive)
 - **Memory**: All available (`--mem=0`)
+- `--nodes`, `--output`, and `--error` are passed by the launcher instead of being embedded in the `.slurm` file.
 
 ## Monitoring
 
@@ -62,7 +80,7 @@ All configuration is at the top of the file in the "User Configuration" section.
 squeue -u $USER
 
 # Monitor job output
-tail -f presto-tpch-run_<JOB_ID>.out
+tail -f presto-tpch-run_n<NODES>_sf<SCALE_FACTOR>_i<ITER>_<JOB_ID>.out
 
 # Check logs during execution
 tail -f logs/coord.log
@@ -70,12 +88,26 @@ tail -f logs/cli.log
 tail -f logs/worker_0.log
 ```
 
+## Coordinator IP and Web UI
+
+After submission, the launcher waits until nodes are allocated, then prints:
+- the first node’s hostname/IP
+- an SSH port-forward command you can run locally to access the Presto Web UI
+
+Example output snippet:
+
+```text
+Run this command on a machine to get access to the webUI:
+    ssh -N -L 9200:<COORDINATOR_IP>:9200 <jump-host>
+The UI will be available at http://localhost:9200
+```
+
 ## Results
 
 Results are saved to:
 - **Logs**: `logs/` directory
 - **CSV Summary**: `result_dir/summary.csv`
-- **Historical Results**: `${WORKSPACE}/benchmark-storage/YYYY/MM/DD/`
+- **Historical Results**: `${REPO_ROOT}/benchmark-storage/YYYY/MM/DD/`
 
 ## Prerequisites
 
@@ -85,7 +117,7 @@ Results are saved to:
 
 2. **Data directory** must be accessible at `${DATA}` (will be mounted in containers)
 
-3. **velox-testing repo** will be auto-cloned to `${WORKSPACE}/velox-testing` if not present
+3. **velox-testing repo** will be auto-cloned to `${REPO_ROOT}/velox-testing` if not present
 
 ## Troubleshooting
 
@@ -104,7 +136,7 @@ cat logs/worker_*.log
 ### Image not found
 Verify images exist:
 ```bash
-ls -lh /mnt/home/misiug/images/*.sqsh
+ls -lh /mnt/data/images/presto/*.sqsh
 ```
 
 ### Data directory issues
diff --git a/presto/slurm/presto-nvl72/create-presto-benchmarks.sh b/presto/slurm/presto-nvl72/create-presto-benchmarks.sh
index 116192b2..4b219c29 100755
--- a/presto/slurm/presto-nvl72/create-presto-benchmarks.sh
+++ b/presto/slurm/presto-nvl72/create-presto-benchmarks.sh
@@ -1,4 +1,7 @@
 #!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
 set -e
 set -x
 
@@ -8,29 +11,15 @@ set -x
 # This script creates the Presto schema and tables for existing TPC-H data
 
 # Source helper functions
-source /mnt/home/misiug/veloxtesting/presto-nvl72/echo_helpers.sh
-source /mnt/home/misiug/veloxtesting/presto-nvl72/functions.sh
+source ./echo_helpers.sh
+source ./functions.sh
 
 # ==============================================================================
 # Setup and Validation
 # ==============================================================================
 echo "Setting up Presto environment for schema creation..."
-export VARIANT_TYPE=cpu
 setup
 
-worker_config="${CONFIGS}/etc_worker/config_native.properties"
-sed -i "s/system-memory-gb.*/system-memory-gb=400/g" ${worker_config}
-sed -i "s/query-memory-gb.*/query-memory-gb=400/g" ${worker_config}
-sed -i "s/query\.max-memory-per-node.*/query\.max-memory-per-node=400GB/g" ${worker_config}
-
-coord_config="${CONFIGS}/etc_coordinator/config_native.properties"
-sed -i "s/memory\.heap-headroom-per-node.*/memory\.heap-headroom-per-node=120GB/g" ${coord_config}
-sed -i "s/query\.max-total-memory-per-node.*/query\.max-total-memory-per-node=300GB/g" ${coord_config}
-sed -i "s/query\.max-total-memory.*/query\.max-total-memory=300GB/g" ${coord_config}
-sed -i "s/query\.max-memory-per-node.*/query\.max-memory-per-node=250GB/g" ${coord_config}
-sed -i "s/query\.max-memory.*/query\.max-memory=250GB/g" ${coord_config}
-sed -i "s/cluster-tag.*//g" ${coord_config}
-
 # ==============================================================================
 # Start Coordinator
 # ==============================================================================
@@ -38,8 +27,6 @@ echo "Starting Presto coordinator on ${COORD}..."
 run_coordinator
 wait_until_coordinator_is_running
 
-
-
 # ==============================================================================
 # Start Workers (GPU workers for schema creation)
 # ==============================================================================
diff --git a/presto/slurm/presto-nvl72/create-presto-benchmarks.slurm b/presto/slurm/presto-nvl72/create-presto-benchmarks.slurm
index adaac17d..1bea2354 100644
--- a/presto/slurm/presto-nvl72/create-presto-benchmarks.slurm
+++ b/presto/slurm/presto-nvl72/create-presto-benchmarks.slurm
@@ -1,9 +1,6 @@
 #!/bin/bash
 #SBATCH --job-name=presto-tpch-create
-#SBATCH --output=/mnt/home/misiug/veloxtesting/presto-nvl72/%x_%j.out
-#SBATCH --error=/mnt/home/misiug/veloxtesting/presto-nvl72/%x_%j.err
 #SBATCH --time=01:00:00
-#SBATCH --nodes=1
 #SBATCH --ntasks-per-node=1
 #SBATCH --cpus-per-task=144
 #SBATCH --mem=0
@@ -14,21 +11,31 @@
 # User Configuration - Edit these values directly
 # ==============================================================================
 # TPC-H Configuration
-export SCALE_FACTOR=10000
+if [ -z "${SCALE_FACTOR:-}" ]; then
+    echo "Error: SCALE_FACTOR is required. Set via launcher: -s|--scale-factor" >&2
+    exit 1
+fi
+export SCALE_FACTOR
+if [ -z "${SCRIPT_DIR:-}" ]; then
+    echo "Error: SCRIPT_DIR is required."
+    exit 1
+fi
+export SCRIPT_DIR
 
 # Directory Configuration
-export WORKSPACE=/mnt/home/misiug
+export VT_ROOT="$(cd -- "${SCRIPT_DIR}/../../.." >/dev/null 2>&1 && pwd -P)"
 export DATA=/mnt/data/tpch-rs
-export IMAGE_DIR=/mnt/home/misiug/images
-export LOGS=/mnt/home/misiug/veloxtesting/presto-nvl72/logs
-export CONFIGS=${WORKSPACE}/config/generated/cpu
+export IMAGE_DIR=/mnt/data/images/presto
+export LOGS=$SCRIPT_DIR/logs
+export VARIANT_TYPE=cpu
+export CONFIGS=$VT_ROOT/presto/docker/config/generated/$VARIANT_TYPE
 
 # Container Images
 # Coordinator: ${IMAGE_DIR}/presto-coordinator-test.sqsh
 # Worker: ${IMAGE_DIR}/${WORKER_IMAGE}.sqsh (CPU workers required for ANALYZE)
-export WORKER_IMAGE=presto-native-worker-cpu
-export NUM_NODES=1
-export NUM_GPUS_PER_NODE=1
+export WORKER_IMAGE=presto-native-worker-$VARIANT_TYPE
+export NUM_NODES=$SLURM_JOB_NUM_NODES
+export NUM_GPUS_PER_NODE=4
 
 # Presto Configuration
 export PORT=9200
@@ -67,7 +74,7 @@ echo "========================================"
 # Create necessary directories
 mkdir -p ${LOGS}
 mkdir -p ${DATA}
-mkdir -p ${WORKSPACE}/.hive_metastore
+mkdir -p ${VT_ROOT}/.hive_metastore
 
 # Launch the job script
-bash ${WORKSPACE}/veloxtesting/presto-nvl72/create-presto-benchmarks.sh
+bash ${VT_ROOT}/veloxtesting/presto-nvl72/create-presto-benchmarks.sh
diff --git a/presto/slurm/presto-nvl72/functions.sh b/presto/slurm/presto-nvl72/functions.sh
index 5f07d6dc..e337a667 100755
--- a/presto/slurm/presto-nvl72/functions.sh
+++ b/presto/slurm/presto-nvl72/functions.sh
@@ -1,18 +1,6 @@
 #!/bin/bash
-
-# UCX Configuration
-export UCX_TLS=^ib,ud:aux,sm
-export UCX_MAX_RNDV_RAILS=1
-export UCX_RNDV_PIPELINE_ERROR_HANDLING=y
-export UCX_TCP_KEEPINTVL=1ms
-export UCX_KEEPALIVE_INTERVAL=1ms
-
-
-# Image directory for presto container images (can be overridden via environment)
-IMAGE_DIR="${IMAGE_DIR:-${WORKSPACE}/images}"
-
-# Logs directory for presto execution logs (can be overridden via environment)
-LOGS="${LOGS:-/mnt/home/misiug/veloxtesting/presto-nvl72/logs}"
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 
 # Validates job preconditions and assigns default values for presto execution.
 function setup {
@@ -20,46 +8,34 @@ function setup {
     [ -z "$SLURM_JOB_ACCOUNT" ] && echo "required argument '--account' not specified" && exit 1
     [ -z "$SLURM_JOB_PARTITION" ] && echo "required argument '--partition' not specified" && exit 1
     [ -z "$SLURM_NNODES" ] && echo "required argument '--nodes' not specified" && exit 1
-    [ -z "$NUM_NODES" ] && echo "NUM_WORKERS must be set" && exit 1
+    [ -z "$IMAGE_DIR" ] && echo "IMAGE_DIR must be set" && exit 1
+    [ -z "$LOGS" ] && echo "LOGS must be set" && exit 1
+    [ -z "$CONFIGS" ] && echo "CONFIGS must be set" && exit 1
+    [ -z "$NUM_NODES" ] && echo "NUM_NODES must be set" && exit 1
     [ -z "$NUM_GPUS_PER_NODE" ] && echo "NUM_GPUS_PER_NODE env variable must be set" && exit 1
-    [ ! -d "$WORKSPACE" ] && echo "WORKSPACE must be a valid directory" && exit 1
+    [ ! -d "$VT_ROOT" ] && echo "VT_ROOT must be a valid directory" && exit 1
     [ ! -d "$DATA" ] && echo "DATA must be a valid directory" && exit 1
 
-    NUM_WORKERS=$(( $NUM_NODES * $NUM_GPUS_PER_NODE ))
-    mkdir -p ${LOGS}
-    # Only set CONFIGS if not already set (allow override from environment)
-    #CONFIGS="${CONFIGS:-${WORKSPACE}/config/generated/gpu}"
-    #CONFIGS="${CONFIGS:-${WORKSPACE}/config/generated/cpu}"
-    CONFIGS="${CONFIGS:-${WORKSPACE}/config/generated/${VARIANT_TYPE}}"
-    COORD=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -1)
-    PORT=9200
-    CUDF_LIB=/usr/lib64/presto-native-libs
-    if [ "${NUM_WORKERS}" -eq "1" ]; then
-	SINGLE_NODE_EXECUTION=true
+    if [ ! -d ${VT_ROOT}/.hive_metastore ]; then
+        echo "Copying hive metastore from data source."
+        copy_hive_metastore
     else
-	SINGLE_NODE_EXECUTION=false
+        echo "Hive metastore already exists.  Reusing."
     fi
 
-    if [ ! -d ${WORKSPACE}/velox-testing ]; then
-        git clone -b misiug/cluster https://github.com/rapidsai/velox-testing.git ${WORKSPACE}/velox-testing
-        #sed -i "s/python3 /python3.12 /g" ${WORKSPACE}/velox-testing/scripts/py_env_functions.sh
-    fi
+    [ ! -d ${VT_ROOT}/.hive_metastore/tpchsf${SCALE_FACTOR} ] && echo "Schema for SF ${SCALE_FACTOR} does not exist in hive metastore." && exit 1
 
-    [ ! -d ${CONFIGS} ] && generate_configs
+    generate_configs
 
     validate_config_directory
 }
 
 function generate_configs {
     mkdir -p ${CONFIGS}
-    pushd ${WORKSPACE}/velox-testing/presto/scripts
-    #VARIANT_TYPE=cpu ./generate_presto_config.sh
-    #VARIANT_TYPE=gpu ./generate_presto_config.sh
+    pushd ${VT_ROOT}/presto/scripts
     OVERWRITE_CONFIG=true ./generate_presto_config.sh
     popd
-    mv ${WORKSPACE}/velox-testing/presto/docker/config/generated/${VARIANT_TYPE}/* ${CONFIGS}/
-    #mv ${WORKSPACE}/velox-testing/presto/docker/config/generated/gpu/* ${CONFIGS}/
-    #mv ${WORKSPACE}/velox-testing/presto/docker/config/generated/cpu/* ${CONFIGS}/
+    # These options are require to run in some cluster contexts.
     echo "--add-modules=java.management,jdk.management" >> ${CONFIGS}/etc_common/jvm.config
     echo "-Dcom.sun.management.jmxremote=false" >> ${CONFIGS}/etc_common/jvm.config
     echo "-XX:-UseContainerSupport" >> ${CONFIGS}/etc_common/jvm.config
@@ -82,17 +58,15 @@ function validate_environment_preconditions {
 # Execute script through the coordinator image (used for coordinator and cli executables)
 function run_coord_image {
     [ $# -ne 2 ] && echo_error "$0 expected one argument for '<script>' and one for '<coord/cli>'"
-    validate_environment_preconditions LOGS CONFIGS WORKSPACE COORD DATA
+    validate_environment_preconditions LOGS CONFIGS VT_ROOT COORD DATA COORD_IMAGE
     local script=$1
     local type=$2
     [ "$type" != "coord" ] && [ "$type" != "cli" ] && echo_error "coord type must be coord/cli"
     local log_file="${type}.log"
 
-    local coord_image="${IMAGE_DIR}/presto-coordinator.sqsh"
+    local coord_image="${IMAGE_DIR}/${COORD_IMAGE}.sqsh"
     [ ! -f "${coord_image}" ] && echo_error "coord image does not exist at ${coord_image}"
 
-    mkdir -p ${WORKSPACE}/.hive_metastore
-
     # Coordinator runs as a background process, whereas we want to wait for cli
     # so that the job will finish when the cli is done (terminating background
     # processes like the coordinator and workers).
@@ -102,13 +76,13 @@ function run_coord_image {
 --export=ALL,JAVA_HOME=/usr/lib/jvm/jre-17-openjdk \
 --container-env=JAVA_HOME=/usr/lib/jvm/jre-17-openjdk \
 --container-env=PATH=/usr/lib/jvm/jre-17-openjdk/bin:$PATH \
---container-mounts=${WORKSPACE}:/workspace,\
-${DATA}:/data,\
+--container-mounts=${VT_ROOT}:/workspace,\
 ${CONFIGS}/etc_common:/opt/presto-server/etc,\
 ${CONFIGS}/etc_coordinator/node.properties:/opt/presto-server/etc/node.properties,\
 ${CONFIGS}/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties,\
 ${CONFIGS}/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties,\
-${WORKSPACE}/.hive_metastore:/var/lib/presto/data/hive/metastore \
+${DATA}:/var/lib/presto/data/hive/data/user_data,\
+${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore \
 -- bash -lc "unset JAVA_HOME; export JAVA_HOME=/usr/lib/jvm/jre-17-openjdk; export PATH=/usr/lib/jvm/jre-17-openjdk/bin:\$PATH; ${script}" >> ${LOGS}/${log_file} 2>&1 &
     else
         srun -w $COORD --ntasks=1 --overlap \
@@ -116,13 +90,13 @@ ${WORKSPACE}/.hive_metastore:/var/lib/presto/data/hive/metastore \
 --export=ALL,JAVA_HOME=/usr/lib/jvm/jre-17-openjdk \
 --container-env=JAVA_HOME=/usr/lib/jvm/jre-17-openjdk \
 --container-env=PATH=/usr/lib/jvm/jre-17-openjdk/bin:$PATH \
---container-mounts=${WORKSPACE}:/workspace,\
-${DATA}:/data,\
+--container-mounts=${VT_ROOT}:/workspace,\
 ${CONFIGS}/etc_common:/opt/presto-server/etc,\
 ${CONFIGS}/etc_coordinator/node.properties:/opt/presto-server/etc/node.properties,\
 ${CONFIGS}/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties,\
 ${CONFIGS}/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties,\
-${WORKSPACE}/.hive_metastore:/var/lib/presto/data/hive/metastore \
+${DATA}:/var/lib/presto/data/hive/data/user_data,\
+${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore \
 -- bash -lc "unset JAVA_HOME; export JAVA_HOME=/usr/lib/jvm/jre-17-openjdk; export PATH=/usr/lib/jvm/jre-17-openjdk/bin:\$PATH; ${script}" >> ${LOGS}/${log_file} 2>&1
     fi
 }
@@ -130,12 +104,14 @@ ${WORKSPACE}/.hive_metastore:/var/lib/presto/data/hive/metastore \
 # Runs a coordinator on a specific node with default configurations.
 # Overrides the config files with the coord node and other needed updates.
 function run_coordinator {
-    validate_environment_preconditions CONFIGS SINGLE_NODE_EXECUTION
+    validate_environment_preconditions CONFIGS
     local coord_config="${CONFIGS}/etc_coordinator/config_native.properties"
-    # Replace placeholder in configs
+
+    # Update configs with assigned node address and port.
     sed -i "s+discovery\.uri.*+discovery\.uri=http://${COORD}:${PORT}+g" ${coord_config}
     sed -i "s+http-server\.http\.port=.*+http-server\.http\.port=${PORT}+g" ${coord_config}
-    sed -i "s+single-node-execution-enabled.*+single-node-execution-enabled=${SINGLE_NODE_EXECUTION}+g" ${coord_config}
+
+    mkdir -p ${VT_ROOT}/.hive_metastore
 
 read -r -d '' COORD_SCRIPT <<'EOS' || true
 set -euo pipefail
@@ -175,48 +151,63 @@ run_coord_image "$COORD_SCRIPT" "coord"
 # Runs a worker on a given node with custom configuration files which are generated as necessary.
 function run_worker {
     [ $# -ne 4 ] && echo_error "$0 expected arguments 'gpu_id', 'image', 'node_id', and 'worker_id'"
-    validate_environment_preconditions LOGS CONFIGS WORKSPACE COORD SINGLE_NODE_EXECUTION CUDF_LIB DATA
+    validate_environment_preconditions LOGS CONFIGS VT_ROOT COORD CUDF_LIB DATA
 
-    local gpu_id=$1
-    local image=$2
-    local node=$3
-    local worker_id=$4
-    local worker_two_digit=$(printf "%02d\n" "$worker_id")
+    local gpu_id=$1 image=$2 node=$3 worker_id=$4
     echo "running worker ${worker_id} with image ${image} on node ${node} with gpu_id ${gpu_id}"
-    if [ "$image" == "presto-native-worker-cpu" ]; then
-	NUM_DRIVERS=64
-    elif (( $NUM_WORKERS > 1 )); then
-	NUM_DRIVERS=1
-    else
-	NUM_DRIVERS=2
-    fi
 
     local worker_image="${IMAGE_DIR}/${image}.sqsh"
     [ ! -f "${worker_image}" ] && echo_error "worker image does not exist at ${worker_image}"
 
     # Make a copy of the worker config that can be given a unique id for this worker.
-    rm -rf "${CONFIGS}/etc_worker_${worker_id}"
-    cp -r "${CONFIGS}/etc_worker" "${CONFIGS}/etc_worker_${worker_id}"
     local worker_config="${CONFIGS}/etc_worker_${worker_id}/config_native.properties"
     local worker_node="${CONFIGS}/etc_worker_${worker_id}/node.properties"
     local worker_hive="${CONFIGS}/etc_worker_${worker_id}/catalog/hive.properties"
-    local worker_data="/mnt/home/misiug/veloxtesting/presto-nvl72/worker_data_${worker_id}"
-
-    # Create unique configuration/data files for each worker:
-    # Give each worker a unique port.
-    sed -i "s+http-server\.http\.port.*+http-server\.http\.port=10${worker_two_digit}0+g" ${worker_config}
-    # If we are using cudf exchange then the port number is hard coded (in current velox) to port # + 3
-    sed -i "s+cudf\.exchange\.server\.port=.*+cudf\.exchange\.server\.port=10${worker_two_digit}3+g" ${worker_config}
-    # Update discovery based on which node the coordinator is running on.
-    sed -i "s+discovery\.uri.*+discovery\.uri=http://${COORD}:${PORT}+g" ${worker_config}
-    sed -i "s+single-node-execution-enabled.*+single-node-execution-enabled=${SINGLE_NODE_EXECUTION}+g" ${worker_config}
-    sed -i "s+task.max-drivers-per-task.*+task.max-drivers-per-task=${NUM_DRIVERS}+g" ${worker_config}
-    # Give each worker a unique id.
-    sed -i "s+node\.id.*+node\.id=worker_${worker_id}+g" ${worker_node}
-
-    # Create unique data dir per worker.
+    
+    # Create worker_data directory and unique data dir per worker
+    mkdir -p ${SCRIPT_DIR}/worker_data
+    local worker_data="${SCRIPT_DIR}/worker_data/worker_${worker_id}"
     mkdir -p ${worker_data}
-    mkdir -p ${WORKSPACE}/.hive_metastore
+    mkdir -p ${worker_data}/hive/data/user_data
+
+    # Each worker needs to be told how to access the coordianator
+    sed -i "s+discovery\.uri.*+discovery\.uri=http://${COORD}:${PORT}+g" ${worker_config}
+    mkdir -p ${VT_ROOT}/.hive_metastore
+    
+    # Create profiles directory for profiling output
+    mkdir -p ${SCRIPT_DIR}/profiles
+    
+    # Create worker info directory and save worker info for profiling commands
+    mkdir -p ${SCRIPT_DIR}/worker_info
+    local worker_info_file="${SCRIPT_DIR}/worker_info/worker_${worker_id}.info"
+    echo "WORKER_NODE=${node}" > "${worker_info_file}"
+    echo "WORKER_IMAGE=${image}" >> "${worker_info_file}"
+    
+    # Build container mounts
+    local container_mounts="${VT_ROOT}:/workspace,\
+${CONFIGS}/etc_common:/opt/presto-server/etc,\
+${worker_node}:/opt/presto-server/etc/node.properties,\
+${worker_config}:/opt/presto-server/etc/config.properties,\
+${worker_hive}:/opt/presto-server/etc/catalog/hive.properties,\
+${worker_data}:/var/lib/presto/data,\
+${DATA}:/var/lib/presto/data/hive/data/user_data,\
+${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore,\
+${SCRIPT_DIR}/profiles:/presto_profiles,\
+${SCRIPT_DIR}/worker_info:/worker_info"
+    
+    # Build the presto server command
+    local presto_cmd="/usr/bin/presto_server --etc-dir=/opt/presto-server/etc"
+    
+    # If profiling is enabled, start presto_server and then attach nsys profiling
+    # We use nsys start/stop instead of nsys profile because:
+    # 1. nsys stop can be called explicitly to write the file even if process is killed
+    # 2. This gives us more control over when profiling stops and files are written
+    if [ "${ENABLE_PROFILING:-false}" == "true" ]; then
+        local target_output="/presto_profiles/worker_${worker_id}.nsys-rep"
+        local stop_signal_file="/presto_profiles/stop_profiling_${worker_id}"
+        # Build as a single-line command with semicolons to avoid parsing issues
+        presto_cmd="mkdir -p /presto_profiles /worker_info && if command -v nsys >/dev/null 2>&1; then echo 'Starting presto_server for worker ${worker_id}' >&2; /usr/bin/presto_server --etc-dir=/opt/presto-server/etc & presto_pid=\$!; echo \$presto_pid > /worker_info/worker_${worker_id}_pid.txt; sleep 20; echo 'Starting nsys profiling for worker ${worker_id} (PID: '\$presto_pid')' >&2; nsys start --gpu-metrics-devices=cuda-visible -o ${target_output} 2>&1 || echo 'WARNING: nsys start failed' >&2; trap 'echo \"Stopping nsys profiling (trap)...\" >&2; nsys stop 2>&1 || true; rm -f ${stop_signal_file} 2>/dev/null || true' EXIT TERM INT; (echo 'Monitoring loop started for worker ${worker_id}' >&2; while true; do if [ -f ${stop_signal_file} ]; then echo 'Stop signal file detected for worker ${worker_id}, stopping nsys profiling...' >&2; nsys stop 2>&1 || echo 'WARNING: nsys stop failed' >&2; rm -f ${stop_signal_file} 2>/dev/null || true; echo 'nsys profiling stopped for worker ${worker_id}' >&2; break; fi; sleep 1; done) & monitor_pid=\$!; echo 'Waiting for presto_server (PID: '\$presto_pid')...' >&2; wait \$presto_pid; kill \$monitor_pid 2>/dev/null || true; else echo 'WARNING: nsys not found in container, running without profiling' >&2; /usr/bin/presto_server --etc-dir=/opt/presto-server/etc; fi"
+    fi
 
     # Need to fix this to run with cpu nodes as well.
     # Run the worker with the new configs.
@@ -224,21 +215,18 @@ function run_worker {
     # Don't use --gres=gpu:1 here since the job already allocated GPUs
     # Set CUDA_VISIBLE_DEVICES explicitly in bash command to override SLURM default
     srun -N1 -w $node --ntasks=1 --overlap \
---container-image=${worker_image} \
---export=ALL \
---container-env=LD_LIBRARY_PATH="/usr/lib64/presto-native-libs:/usr/local/lib:/usr/lib64" \
---container-mounts=${WORKSPACE}:/workspace,\
-${DATA}:/data,\
-${CONFIGS}/etc_common:/opt/presto-server/etc,\
-${worker_node}:/opt/presto-server/etc/node.properties,\
-${worker_config}:/opt/presto-server/etc/config.properties,\
-${worker_hive}:/opt/presto-server/etc/catalog/hive.properties,\
-${worker_data}:/var/lib/presto/data,\
-${WORKSPACE}/.hive_metastore:/var/lib/presto/data/hive/metastore \
---container-env=LD_LIBRARY_PATH="$CUDF_LIB:$LD_LIBRARY_PATH" \
---container-env=GLOG_vmodule=IntraNodeTransferRegistry=3,ExchangeOperator=3 \
---container-env=GLOG_logtostderr=1 \
--- /bin/bash -c "export CUDA_VISIBLE_DEVICES=${gpu_id}; echo \"CUDA_VISIBLE_DEVICES=\$CUDA_VISIBLE_DEVICES\"; echo \"--- Environment Variables ---\"; set | grep -E 'UCX_|CUDA_VISIBLE_DEVICES'; nvidia-smi -L; /usr/bin/presto_server --etc-dir=/opt/presto-server/etc" > ${LOGS}/worker_${worker_id}.log 2>&1 &
+        --container-image=${worker_image} \
+        --export=ALL \
+        --container-env=LD_LIBRARY_PATH="/usr/lib64/presto-native-libs:/usr/local/lib:/usr/lib64" \
+        --container-env=LD_LIBRARY_PATH="$CUDF_LIB:$LD_LIBRARY_PATH" \
+        --container-env=GLOG_vmodule=IntraNodeTransferRegistry=3,ExchangeOperator=3 \
+        --container-env=GLOG_logtostderr=1 \
+        --container-mounts=${container_mounts} \
+        -- /bin/bash -c "export CUDA_VISIBLE_DEVICES=${gpu_id}; echo \"CUDA_VISIBLE_DEVICES=\$CUDA_VISIBLE_DEVICES\"; echo \"--- Environment Variables ---\"; set | grep -E 'UCX_|CUDA_VISIBLE_DEVICES'; nvidia-smi -L; ${presto_cmd}" > ${LOGS}/worker_${worker_id}.log 2>&1 &
+}
+
+function copy_hive_metastore {
+    cp -r /mnt/data/tpch-rs/HIVE-METASTORE-MG-260313 ${VT_ROOT}/.hive_metastore
 }
 
 #./analyze_tables.sh --port $PORT --hostname $HOSTNAME -s tpchsf${scale_factor}
@@ -247,8 +235,22 @@ function setup_benchmark {
     [ $# -ne 1 ] && echo_error "$0 expected one argument for 'scale factor'"
     local scale_factor=$1
     local data_path="/data/date-scale-${scale_factor}"
-    run_coord_image "export PORT=$PORT; export HOSTNAME=$COORD; export PRESTO_DATA_DIR=/data; yum install python3.12 -y; yum install jq -y; cd /workspace/velox-testing/presto/scripts; ./setup_benchmark_tables.sh -b tpch -d date-scale-${scale_factor} -s tpchsf${scale_factor}; " "cli"
-    #run_coord_image "export COORD=${COORD}:${PORT}; export SCHEMA=tpchsf${scale_factor}; cd /workspace/velox-testing/presto/scripts; ./register_benchmark.sh register -l ${data_path} -s tpchsf${scale_factor} -c ${COORD}:${PORT}" "cli"
+    run_coord_image "export PORT=$PORT; export HOSTNAME=$COORD; export PRESTO_DATA_DIR=/var/lib/presto/data/hive/data/user_data; yum install python3.12 -y; yum install jq -y; cd /workspace/presto/scripts; ./setup_benchmark_tables.sh -b tpch -d date-scale-${scale_factor} -s tpchsf${scale_factor} --skip-analyze-tables --no-docker; " "cli"
+
+    # Copy the hive metastore from a local copy.  This means we don't have to create
+    # or analyze the tables.
+    for dataset in $(ls ${SCRIPT_DIR}/ANALYZED_HIVE_METASTORE); do
+	if [[ -d ${VT_ROOT}/.hive_metastore/${dataset} ]]; then
+	    echo "replacing dataset metadata: $dataset"
+	    cp -r ${SCRIPT_DIR}/ANALYZED_HIVE_METASTORE/${dataset} ${VT_ROOT}/.hive_metastore/
+	    for table in $(ls ${VT_ROOT}/.hive_metastore/${dataset}); do
+		# Need to remove checksum file (it will be recreated).
+		if [ -f ${VT_ROOT}/.hive_metastore/${dataset}/${table}/..prestoSchema.crc ]; then
+		    rm ${VT_ROOT}/.hive_metastore/${dataset}/${table}/..prestoSchema.crc
+		fi
+	    done
+        fi
+    done
 }
 
 # Run a cli node that will connect to the coordinator and run queries from queries.sql
@@ -258,7 +260,167 @@ function run_queries {
     [ $# -ne 2 ] && echo_error "$0 expected two arguments for '<iterations>' and '<scale_factor>'"
     local num_iterations=$1
     local scale_factor=$2
-    run_coord_image "export PORT=$PORT; export HOSTNAME=$COORD; export PRESTO_DATA_DIR=/data; yum install python3.12 jq -y > /dev/null; cd /workspace/velox-testing/presto/scripts; ./run_benchmark.sh -b tpch -s tpchsf${scale_factor} -i ${num_iterations} --hostname ${COORD} --port $PORT -o /workspace/veloxtesting/slurm_scripts/result_dir" "cli"
+    
+    # Build profiling command
+    # Note: When profiling is enabled, workers are already wrapped with nsys launch
+    # So we don't need per-query profiling - profiles are created for the entire worker lifetime
+    local benchmark_cmd="./run_benchmark.sh -q 1 -b tpch -s tpchsf${scale_factor} -i ${num_iterations} \
+        --hostname ${COORD} --port $PORT -o /workspace/presto/slurm/presto-nvl72/result_dir --skip-drop-cache"
+    
+    echo "ENABLE_PROFILING=${ENABLE_PROFILING:-false}"
+    if [ "${ENABLE_PROFILING:-false}" == "true" ]; then
+        echo "Profiling enabled - workers are wrapped with nsys launch"
+        echo "Profiles will be created for entire worker lifetime (not per-query)"
+        echo "Profile files will be at: ${SCRIPT_DIR}/profiles/worker_*.nsys-rep"
+        # Don't add --profile flag since we're using nsys launch on workers instead
+    else
+        echo "Profiling disabled (ENABLE_PROFILING=${ENABLE_PROFILING:-false})"
+    fi
+    echo "Benchmark command: ${benchmark_cmd}"
+    
+    # We currently skip dropping cache because it requires docker (not available on the cluster).
+    # Note: SCRIPT_DIR must be set to the slurm directory so profiler functions can find worker info files
+    # Also mount the host's /usr/bin so srun might be accessible (though this may not work in all setups)
+    run_coord_image "export PORT=$PORT; \
+    export HOSTNAME=$COORD; \
+    export PRESTO_DATA_DIR=/var/lib/presto/data/hive/data/user_data; \
+    export SCRIPT_DIR=/workspace/presto/slurm/presto-nvl72; \
+    export VT_ROOT=/workspace; \
+    export IMAGE_DIR=${IMAGE_DIR}; \
+    export NUM_WORKERS=${NUM_WORKERS}; \
+    export SLURM_JOB_NODELIST=${SLURM_JOB_NODELIST}; \
+    yum install python3.12 jq -y > /dev/null; \
+    cd /workspace/presto/scripts; \
+    ${benchmark_cmd}" "cli"
+}
+
+# Signal workers to stop profiling (nsys stop) and wait for profile files to be generated.
+# This does NOT shut down workers or presto servers - that happens after the job completes.
+function stop_workers {
+    validate_environment_preconditions COORD SCRIPT_DIR
+    
+    if [ "${ENABLE_PROFILING:-false}" != "true" ]; then
+        echo "Profiling not enabled, skipping stop_workers"
+        return 0
+    fi
+    
+    echo "Signaling workers to stop profiling (nsys stop)..."
+    
+    # Ensure profiles directory exists
+    mkdir -p ${SCRIPT_DIR}/profiles
+    
+    # Find all worker info files to determine which workers exist
+    local worker_info_dir="${SCRIPT_DIR}/worker_info"
+    if [ ! -d "${worker_info_dir}" ]; then
+        echo "Warning: Worker info directory not found at ${worker_info_dir}, cannot stop profiling"
+        return 1
+    fi
+    
+    local worker_info_files=($(find "${worker_info_dir}" -name "worker_*.info" -type f 2>/dev/null | sort -V))
+    
+    if [ ${#worker_info_files[@]} -eq 0 ]; then
+        echo "Warning: No worker info files found in ${worker_info_dir}, cannot stop profiling"
+        return 1
+    fi
+    
+    # Signal each worker to run nsys stop by creating a stop signal file
+    # The worker script periodically checks for this file and runs nsys stop when it finds it
+    local worker_ids=()
+    for worker_info_file in "${worker_info_files[@]}"; do
+        # Extract worker_id from filename (e.g., worker_0.info -> 0)
+        local worker_id=$(basename "$worker_info_file" | sed 's/^worker_//; s/\.info$//')
+        worker_ids+=("$worker_id")
+        
+        local stop_signal_file="${SCRIPT_DIR}/profiles/stop_profiling_${worker_id}"
+        
+        echo "  Creating stop signal for worker ${worker_id} at ${stop_signal_file}..."
+        touch "${stop_signal_file}"
+        if [ -f "${stop_signal_file}" ]; then
+            echo "    Stop signal file created successfully"
+        else
+            echo "    WARNING: Failed to create stop signal file"
+        fi
+    done
+    
+    # Give workers a moment to detect the signal files and run nsys stop
+    echo "  Waiting 5 seconds for workers to detect stop signals and run nsys stop..."
+    sleep 5
+    
+    # Wait for profile files to be generated
+    echo "Waiting for profile files to be generated..."
+    echo "  Checking in directory: ${SCRIPT_DIR}/profiles"
+    echo "  Looking for ${#worker_ids[@]} profile files"
+    local max_wait=120  # Maximum wait time in seconds (increased for nsys stop to complete)
+    local wait_interval=2  # Check every 2 seconds
+    local waited=0
+    local all_profiles_ready=false
+    local last_ready_count=0
+    
+    while [ $waited -lt $max_wait ]; do
+        local profiles_ready=0
+        local profiles_missing=()
+        
+        for worker_id in "${worker_ids[@]}"; do
+            local profile_file="${SCRIPT_DIR}/profiles/worker_${worker_id}.nsys-rep"
+            if [ -f "$profile_file" ] && [ -s "$profile_file" ]; then
+                ((profiles_ready++))
+            else
+                profiles_missing+=("$worker_id")
+            fi
+        done
+        
+        if [ $profiles_ready -eq ${#worker_ids[@]} ]; then
+            all_profiles_ready=true
+            break
+        fi
+        
+        # Print progress whenever the count changes, or every 5 seconds
+        if [ $profiles_ready -ne $last_ready_count ] || [ $((waited % 5)) -eq 0 ]; then
+            echo "  Waiting for profiles... (${profiles_ready}/${#worker_ids[@]} ready, waited ${waited}s)"
+            if [ ${#profiles_missing[@]} -gt 0 ] && [ ${#profiles_missing[@]} -le 10 ]; then
+                echo "    Missing profiles for workers: ${profiles_missing[*]}"
+            elif [ ${#profiles_missing[@]} -gt 10 ]; then
+                echo "    Missing profiles for ${#profiles_missing[@]} workers"
+            fi
+            last_ready_count=$profiles_ready
+        fi
+        
+        sleep $wait_interval
+        waited=$((waited + wait_interval))
+    done
+    
+    # Final check - list what we actually found
+    echo "Final profile file check:"
+    local found_files=($(find "${SCRIPT_DIR}/profiles" -name "worker_*.nsys-rep" -type f 2>/dev/null | sort -V))
+    echo "  Found ${#found_files[@]} profile file(s) in ${SCRIPT_DIR}/profiles"
+    if [ ${#found_files[@]} -gt 0 ]; then
+        for file in "${found_files[@]}"; do
+            local size=$(du -h "$file" 2>/dev/null | cut -f1 || echo "unknown")
+            echo "    $(basename "$file"): ${size}"
+        done
+    fi
+    
+    if [ "$all_profiles_ready" = true ]; then
+        echo "All profile files generated successfully:"
+        for worker_id in "${worker_ids[@]}"; do
+            local profile_file="${SCRIPT_DIR}/profiles/worker_${worker_id}.nsys-rep"
+            if [ -f "$profile_file" ]; then
+                local size=$(du -h "$profile_file" | cut -f1)
+                echo "  worker_${worker_id}.nsys-rep: ${size}"
+            fi
+        done
+    else
+        echo "Warning: Not all profile files were generated within ${max_wait}s"
+        echo "  Generated: ${profiles_ready}/${#worker_ids[@]}"
+        for worker_id in "${worker_ids[@]}"; do
+            local profile_file="${SCRIPT_DIR}/profiles/worker_${worker_id}.nsys-rep"
+            if [ ! -f "$profile_file" ] || [ ! -s "$profile_file" ]; then
+                echo "    Missing: worker_${worker_id}.nsys-rep"
+            fi
+        done
+    fi
+    
+    echo "Profiling stopped (workers and presto servers remain running)"
 }
 
 # Check if the coordinator is running via curl.  Fail after 10 retries.
@@ -409,7 +571,7 @@ function create_output_prefix() {
 
 # Push results to gitlab.
 function push_csv() {
-    local results_dir="/mnt/home/misiug/veloxtesting/presto-nvl72/results_dir"
+    local results_dir="${SCRIPT_DIR}/results_dir"
     local timestamp="$(date +%Y%m%d_%H%M%S)"
     local run_dir="${results_dir}/run_${timestamp}_scale${SCALE_FACTOR}"
 
@@ -417,8 +579,8 @@ function push_csv() {
     mkdir -p ${run_dir}
 
     # Copy result_dir if it exists
-    if [ -d "/mnt/home/misiug/veloxtesting/presto-nvl72/result_dir" ]; then
-        cp -r /mnt/home/misiug/veloxtesting/presto-nvl72/result_dir ${run_dir}/
+    if [ -d "${results_dir}" ]; then
+        cp -r ${results_dir} ${run_dir}/
     fi
 
     # Copy logs
@@ -426,10 +588,15 @@ function push_csv() {
         cp -r ${LOGS} ${run_dir}/
     fi
 
+    # Copy profiles if they exist
+    if [ -d "${SCRIPT_DIR}/profiles" ] && [ "$(ls -A ${SCRIPT_DIR}/profiles 2>/dev/null)" ]; then
+        cp -r ${SCRIPT_DIR}/profiles ${run_dir}/ 2>/dev/null || true
+    fi
+
     # Copy slurm output files from the job directory
     if [ -n "${SLURM_JOB_ID}" ]; then
-        cp /mnt/home/misiug/veloxtesting/presto-nvl72/presto-tpch-run_${SLURM_JOB_ID}.out ${run_dir}/ 2>/dev/null || true
-        cp /mnt/home/misiug/veloxtesting/presto-nvl72/presto-tpch-run_${SLURM_JOB_ID}.err ${run_dir}/ 2>/dev/null || true
+        cp ${SCRIPT_DIR}/presto-tpch-run_${SLURM_JOB_ID}.out ${run_dir}/ 2>/dev/null || true
+        cp ${SCRIPT_DIR}/presto-tpch-run_${SLURM_JOB_ID}.err ${run_dir}/ 2>/dev/null || true
     fi
 
     # Copy configs
@@ -437,5 +604,8 @@ function push_csv() {
     cp ${CONFIGS}/etc_coordinator/config_native.properties ${run_dir}/configs/coordinator.config 2>/dev/null || true
     cp ${CONFIGS}/etc_worker_0/config_native.properties ${run_dir}/configs/worker.config 2>/dev/null || true
 
+    # Clean up worker info directory
+    rm -rf ${SCRIPT_DIR}/worker_info 2>/dev/null || true
+
     echo "Results saved to: ${run_dir}"
 }
diff --git a/presto/slurm/presto-nvl72/get_avg_col.sh b/presto/slurm/presto-nvl72/get_avg_col.sh
new file mode 100755
index 00000000..1d3971f8
--- /dev/null
+++ b/presto/slurm/presto-nvl72/get_avg_col.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+awk -F'|' '
+  /^[[:space:]]*Q[0-9]+/ {
+    v = $2
+    gsub(/^[[:space:]]+|[[:space:]]+$/, "", v)
+    up = toupper(v)
+    if (up == "NULL" || v == "") { print "NULL"; next }
+    if (v ~ /^-?[0-9]+(\.[0-9]+)?$/) { printf "%.3f\n", v/1000 }
+  }
+' "$@"
diff --git a/presto/slurm/presto-nvl72/get_luke_col.sh b/presto/slurm/presto-nvl72/get_luke_col.sh
new file mode 100755
index 00000000..2040926a
--- /dev/null
+++ b/presto/slurm/presto-nvl72/get_luke_col.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+awk -F'|' '
+  /^[[:space:]]*Q[0-9]+/ {
+    v = $NF
+    gsub(/^[[:space:]]+|[[:space:]]+$/, "", v)
+    up = toupper(v)
+    if (up == "NULL" || v == "") { print "NULL"; next }
+    if (v ~ /^-?[0-9]+(\.[0-9]+)?$/) { printf "%.3f\n", v/1000 }
+  }
+' "$@"
diff --git a/presto/slurm/presto-nvl72/index.html b/presto/slurm/presto-nvl72/index.html
new file mode 100644
index 00000000..26714165
--- /dev/null
+++ b/presto/slurm/presto-nvl72/index.html
@@ -0,0 +1,63 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+
+    <meta name="description" content="Cluster Overview - Presto">
+    <title>Cluster Overview - Presto</title>
+
+    <link rel="icon" href="assets/favicon.ico">
+    <!-- Link for btn checkmark-->
+    <link rel="stylesheet" href="vendor/bootstrap/css/bootstrap-icons.min.css">
+
+    <!-- Bootstrap core -->
+    <link href="vendor/bootstrap/css/bootstrap.min.css" rel="stylesheet">
+
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
+    <!--[if lt IE 9]>
+    <script src="vendor/html5shiv/html5shiv.min.js"></script>
+    <script src="vendor/respond/respond.min.js"></script>
+    <![endif]-->
+
+    <!-- jQuery -->
+    <script type="text/javascript" src="vendor/jquery/jquery-3.7.1.min.js"></script>
+    <!-- Sparkline -->
+    <script type="text/javascript" src="vendor/jquery.sparkline/jquery.sparkline.min.js"></script>
+    <!-- Bootstrap JS -->
+    <script type="text/javascript" src="vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
+
+    <!-- CSS loader -->
+    <link href="vendor/css-loaders/loader.css" rel="stylesheet">
+
+    <!-- Custom CSS -->
+    <link href="assets/presto.css" rel="stylesheet">
+</head>
+
+<body data-bs-theme="dark">
+
+<div class="container">
+    <div id="title"></div>
+
+    <div class="hud-container pt-3 mb-2">
+        <div id="cluster-hud">
+            <div class="loader">Loading...</div>
+        </div>
+    </div>
+
+    <div id="query-list-container">
+        <div id="query-list-title">
+            Query Details
+        </div>
+        <div id="query-list" class="font-white"><div class="loader">Loading...</div></div>
+    </div>
+
+</div> <!-- /container -->
+
+<script type="text/javascript" src="index.js"></script>
+
+<!-- Fonts -->
+<link href="https://fonts.googleapis.com/css?family=Roboto:400,500,700" rel="stylesheet">
+
+</body>
+</html>
diff --git a/presto/slurm/presto-nvl72/launch-run.sh b/presto/slurm/presto-nvl72/launch-run.sh
index 52841bb1..cdb7dc6f 100755
--- a/presto/slurm/presto-nvl72/launch-run.sh
+++ b/presto/slurm/presto-nvl72/launch-run.sh
@@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+
 #!/bin/bash
 # ==============================================================================
 # Presto TPC-H Benchmark Launcher
@@ -5,7 +9,10 @@
 # Simple launcher script to submit the presto benchmark job to slurm
 #
 # Usage:
-#   ./launch-run.sh [additional sbatch options]
+#   ./launch-run.sh -n|--nodes <count> -s|--scale-factor <sf> [-i|--iterations <n>] [-p|--profile] [additional sbatch options]
+#
+# Options:
+#   -p, --profile    Enable profiling of benchmark queries (creates .nsys-rep files for each worker)
 #
 # To change configuration, edit run-presto-benchmarks.slurm directly
 # ==============================================================================
@@ -15,23 +22,151 @@ set -e
 # Change to script directory
 cd "$(dirname "$0")"
 
-# Clean up old output files
-rm -f result_dir/* logs/* *.out *.err 2>/dev/null || true
-mkdir -p result_dir logs
+# Clean up old output files, worker info directory, and worker data directories
+rm -f profiles/* result_dir/* logs/* *.out *.err 2>/dev/null || true
+rm -rf worker_info worker_data 2>/dev/null || true
+mkdir -p result_dir logs worker_info worker_data
 
 echo "Submitting Presto TPC-H benchmark job..."
 echo "Configuration is set in run-presto-benchmarks.slurm"
 echo ""
 
-# Submit job
-JOB_ID=$(sbatch "$@" run-presto-benchmarks.slurm | awk '{print $NF}')
-#JOB_ID=$(sbatch "$@" create-presto-benchmarks.slurm | awk '{print $NF}')
+# Parse required -n/--nodes and -s/--scale-factor, optional -i/--iterations, and collect extra sbatch args
+NODES_COUNT=""
+SCALE_FACTOR=""
+NUM_ITERATIONS="1"
+EXTRA_ARGS=()
+NUM_GPUS_PER_NODE="4"
+WORKER_IMAGE="presto-native-worker-gpu"
+COORD_IMAGE="presto-coordinator"
+ENABLE_PROFILING="false"
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        -n|--nodes)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                NODES_COUNT="$2"
+                shift 2
+            else
+                echo "Error: -n|--nodes requires a value."
+                echo "Usage: $0 -n|--nodes <count> -s|--scale-factor <sf> [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+        -s|--scale-factor)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                SCALE_FACTOR="$2"
+                shift 2
+            else
+                echo "Error: -s|--scale-factor requires a value."
+                echo "Usage: $0 -n|--nodes <count> -s|--scale-factor <sf> [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+        -i|--iterations)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                NUM_ITERATIONS="$2"
+                shift 2
+            else
+                echo "Error: -i|--iterations requires a value"
+                echo "Usage: $0 -n|--nodes <count> -s|--scale-factor <sf> [-i|--iterations <n>] [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+	-g|--num-gpus-per-node)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                NUM_GPUS_PER_NODE="$2"
+                shift 2
+            else
+                echo "Error: -g|--num-gpus-per-node requires a value"
+                echo "Usage: $0 -n|--nodes <count> -s|--scale-factor <sf> [-i|--iterations <n>] [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+	-w|--worker-image)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                WORKER_IMAGE="$2"
+                shift 2
+            else
+                echo "Error: -w|--worker-image requires a value"
+                echo "Usage: $0 -n|--nodes <count> -s|--scale-factor <sf> [-i|--iterations <n>] [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+	-c|--coord-image)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                COORD_IMAGE="$2"
+                shift 2
+            else
+                echo "Error: -c|--coord-image requires a value"
+                echo "Usage: $0 -n|--nodes <count> -s|--scale-factor <sf> [-i|--iterations <n>] [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+        -p|--profile)
+            ENABLE_PROFILING="true"
+            shift
+            ;;
+        --)
+            shift
+            break
+            ;;
+        *)
+            EXTRA_ARGS+=("$1")
+            shift
+            ;;
+    esac
+done
+
+if [[ -z "${NODES_COUNT}" ]]; then
+    echo "Error: -n|--nodes is required"
+    echo "Usage: $0 -n|--nodes <count> -s|--scale-factor <sf> [-i|--iterations <n>] [additional sbatch options]"
+    exit 1
+fi
+if [[ -z "${SCALE_FACTOR}" ]]; then
+    echo "Error: -s|--scale-factor is required"
+    echo "Usage: $0 -n|--nodes <count> -s|--scale-factor <sf> [-i|--iterations <n>] [additional sbatch options]"
+    exit 1
+fi
+
+# Submit job (include nodes/SF/iterations in file names)
+OUT_FMT="presto-tpch-run_n${NODES_COUNT}_sf${SCALE_FACTOR}_i${NUM_ITERATIONS}_%j.out"
+ERR_FMT="presto-tpch-run_n${NODES_COUNT}_sf${SCALE_FACTOR}_i${NUM_ITERATIONS}_%j.err"
+SCRIPT_DIR="$PWD"
+JOB_ID=$(sbatch --nodes="${NODES_COUNT}" --export="ALL,SCALE_FACTOR=${SCALE_FACTOR},NUM_ITERATIONS=${NUM_ITERATIONS},SCRIPT_DIR=${SCRIPT_DIR},NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE},WORKER_IMAGE=${WORKER_IMAGE},COORD_IMAGE=${COORD_IMAGE},ENABLE_PROFILING=${ENABLE_PROFILING}" \
+--output="${OUT_FMT}" --error="${ERR_FMT}" "${EXTRA_ARGS[@]}" --gres="gpu:${NUM_GPUS_PER_NODE}" \
+run-presto-benchmarks.slurm | awk '{print $NF}')
+OUT_FILE="${OUT_FMT//%j/${JOB_ID}}"
+ERR_FILE="${ERR_FMT//%j/${JOB_ID}}"
+
+# Resolve and print first node IP once nodes are allocated
+echo "Resolving first node IP..."
+for i in {1..60}; do
+    STATE=$(squeue -j "$JOB_ID" -h -o "%T" 2>/dev/null || true)
+    NODELIST=$(squeue -j "$JOB_ID" -h -o "%N" 2>/dev/null || true)
+    if [[ -n "${NODELIST:-}" && "${NODELIST}" != "(null)" ]]; then
+        FIRST_NODE=$(scontrol show hostnames "$NODELIST" | head -n 1)
+        if [[ -n "${FIRST_NODE:-}" ]]; then
+            part=$(scontrol getaddrs "$FIRST_NODE" 2>/dev/null | awk 'NR==1{print $2}')
+	    FIRST_IP="${part%%:*}"
+            echo "Run this command on a machine to get access to the webUI:
+  ssh -N -L 9200:$FIRST_IP:9200 sunk.pocf62-use13a.coreweave.app
+The UI will be available at http://localhost:9200"
+	    echo ""
+            break
+        fi
+    fi
+    sleep 5
+done
 
 echo "Job submitted with ID: $JOB_ID"
 echo ""
 echo "Monitor job with:"
 echo "  squeue -j $JOB_ID"
-echo "  tail -f presto-tpch-run_${JOB_ID}.out"
+echo "  tail -f ${OUT_FILE}"
+echo "  tail -f ${ERR_FILE}"
+echo "  tail -f logs/coord.log"
+echo "  tail -f logs/worker_*.log"
+echo "  tail -f logs/cli.log"
 echo ""
 echo "Waiting for job to complete..."
 
@@ -44,8 +179,10 @@ echo ""
 echo "Job completed!"
 echo ""
 echo "Output files:"
-ls -lh presto-tpch-run_${JOB_ID}.{out,err} 2>/dev/null || echo "No output files found"
+ls -lh "${OUT_FILE}" "${ERR_FILE}" 2>/dev/null || echo "No output files found"
 echo ""
 echo "Showing job output:"
 echo "========================================"
-cat presto-tpch-run_${JOB_ID}.out 2>/dev/null || echo "No output available"
+cat "${OUT_FILE}" 2>/dev/null || echo "No output available"
+echo "Showing benchmark results:"
+cat logs/cli.log 2>/dev/null || echo "No CLI output available"
diff --git a/presto/slurm/presto-nvl72/profiler_functions_slurm.sh b/presto/slurm/presto-nvl72/profiler_functions_slurm.sh
new file mode 100755
index 00000000..a3e13e03
--- /dev/null
+++ b/presto/slurm/presto-nvl72/profiler_functions_slurm.sh
@@ -0,0 +1,192 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+
+# This script provides profiling functions for SLURM/Singularity-based Presto workers.
+# It uses srun to execute commands in worker containers instead of docker exec.
+
+# Set default values if not provided (when called from within container)
+# IMPORTANT: SCRIPT_DIR must point to the slurm directory where worker info files are stored
+# If it's set to the scripts directory, fix it
+if [[ -z "${SCRIPT_DIR:-}" ]] || [[ "${SCRIPT_DIR}" == *"/presto/scripts"* ]]; then
+    SCRIPT_DIR="/workspace/presto/slurm/presto-nvl72"
+fi
+VT_ROOT="${VT_ROOT:-/workspace}"
+IMAGE_DIR="${IMAGE_DIR:-/mnt/data/images/presto}"
+
+# Get the worker node and image for a specific worker ID
+function get_worker_info() {
+    local worker_id=$1
+    local worker_info_file="${SCRIPT_DIR}/worker_info/worker_${worker_id}.info"
+    
+    if [ ! -f "$worker_info_file" ]; then
+        echo "Error: Worker info file not found for worker ${worker_id} at ${worker_info_file}" >&2
+        echo "SCRIPT_DIR is: ${SCRIPT_DIR}" >&2
+        echo "Looking for: ${worker_info_file}" >&2
+        ls -la "${SCRIPT_DIR}/worker_info/"* 2>&1 || echo "No worker info files found" >&2
+        return 1
+    fi
+    
+    source "$worker_info_file"
+    if [ -z "${WORKER_NODE:-}" ] || [ -z "${WORKER_IMAGE:-}" ]; then
+        echo "Error: Worker info file incomplete for worker ${worker_id}" >&2
+        return 1
+    fi
+    echo "${WORKER_NODE}:${WORKER_IMAGE}"
+}
+
+# Execute a command in a worker container using srun
+function exec_in_worker() {
+    local worker_id=$1
+    local command=$2
+    
+    local worker_info
+    worker_info=$(get_worker_info "$worker_id") || return 1
+    
+    local worker_node="${worker_info%%:*}"
+    local worker_image="${worker_info##*:}"
+    local worker_image_path="${IMAGE_DIR}/${worker_image}.sqsh"
+    
+    # Execute command in the worker container
+    # Note: This assumes we're running from within the SLURM job context where srun is available
+    srun -N1 -w "$worker_node" --ntasks=1 --overlap \
+        --container-image="${worker_image_path}" \
+        --export=ALL \
+        --container-mounts="${VT_ROOT}:/workspace,${SCRIPT_DIR}/profiles:/presto_profiles,${SCRIPT_DIR}/worker_info:/worker_info" \
+        -- bash -c "$command"
+}
+
+# Check if profiling directory exists in worker container, create it if it doesn't
+function check_profile_output_directory() {
+    local worker_id=$1
+    
+    # Try to create the directory if it doesn't exist
+    exec_in_worker "$worker_id" "mkdir -p /presto_profiles" >/dev/null 2>&1 || true
+    
+    # Verify it exists now
+    if ! exec_in_worker "$worker_id" "[[ -d /presto_profiles ]]" 2>/dev/null; then
+        echo "Warning: Could not create /presto_profiles directory in worker ${worker_id} container" >&2
+        return 1
+    fi
+}
+
+# Get the PID of presto_server process in a worker container
+function get_presto_pid() {
+    local worker_id=$1
+    local pid_file="/worker_info/worker_${worker_id}_pid.txt"
+    
+    # Try to read PID from file first
+    local pid=$(exec_in_worker "$worker_id" "cat ${pid_file} 2>/dev/null" 2>/dev/null | tr -d '\n\r ' || echo "")
+    
+    # If not found in file, try to find it by process name
+    if [ -z "$pid" ] || [ "$pid" = "0" ] || ! kill -0 "$pid" 2>/dev/null; then
+        pid=$(exec_in_worker "$worker_id" "pgrep -f 'presto_server.*--etc-dir' | head -1" 2>/dev/null | tr -d '\n\r ' || echo "")
+    fi
+    
+    if [ -z "$pid" ] || [ "$pid" = "0" ]; then
+        echo "Error: Could not find presto_server PID for worker ${worker_id}" >&2
+        return 1
+    fi
+    
+    echo "$pid"
+}
+
+# Start profiling on a specific worker using nsys attach
+function start_profiler_worker() {
+    local worker_id=$1
+    local profile_output_file_path=$2
+    
+    check_profile_output_directory "$worker_id"
+    
+    # Get the PID of the presto_server process
+    local pid
+    pid=$(get_presto_pid "$worker_id") || return 1
+    
+    local profile_basename=$(basename "$profile_output_file_path")
+    local output_file="/presto_profiles/${profile_basename}.nsys-rep"
+    
+    # Use nsys attach to attach to the running process
+    # Note: This must be executed from the HOST, not from inside a container
+    # We'll write a command file that gets executed from the host
+    echo "Attaching nsys to presto_server (PID: $pid) in worker ${worker_id}" >&2
+    
+    # For now, try to execute from container - this will fail but show the approach
+    # The real solution requires executing from host, which we'll implement via a command file
+    exec_in_worker "$worker_id" \
+        "nsys attach --pid=$pid --gpu-metrics-devices=all -t nvtx,cuda,osrt,ucx --cuda-memory-usage=true --cuda-um-cpu-page-faults=true --cuda-um-gpu-page-faults=true --cudabacktrace=true -o ${output_file}" || {
+        echo "Warning: nsys attach failed. Trying alternative: writing command to file for host execution" >&2
+        # Write command to a file that can be executed from the host
+        echo "nsys attach --pid=$pid -o ${output_file}" > "${SCRIPT_DIR}/profiles/.profiler_cmd_${worker_id}.sh"
+        return 1
+    }
+}
+
+# Stop profiling on a specific worker and ensure file is accessible
+function stop_profiler_worker() {
+    local worker_id=$1
+    local profile_output_file_path=$2
+    
+    check_profile_output_directory "$worker_id"
+    
+    local profile_basename=$(basename "$profile_output_file_path")
+    local container_file_path="/presto_profiles/${profile_basename}.nsys-rep"
+    
+    # Stop profiling
+    exec_in_worker "$worker_id" "nsys stop"
+    
+    # Change ownership so file is accessible
+    exec_in_worker "$worker_id" "chown -R \$(id -u):\$(id -g) /presto_profiles"
+    
+    # The file should already be accessible via the mounted directory at ${SCRIPT_DIR}/profiles/
+    # But we verify it exists
+    local host_file_path="${SCRIPT_DIR}/profiles/${profile_basename}.nsys-rep"
+    if [ ! -f "$host_file_path" ]; then
+        echo "Warning: Profile file not found at expected location: $host_file_path" >&2
+        return 1
+    fi
+    
+    echo "Profile saved to: $host_file_path"
+}
+
+# Start profiling on all workers
+function start_profiler() {
+    local profile_output_file_path=$1
+    
+    if [ -z "${NUM_WORKERS:-}" ]; then
+        echo "Error: NUM_WORKERS not set" >&2
+        return 1
+    fi
+    
+    echo "Starting profiling on ${NUM_WORKERS} workers for profile: ${profile_output_file_path}" >&2
+    for ((worker_id=0; worker_id<NUM_WORKERS; worker_id++)); do
+        # Each worker gets a unique profile file name
+        local worker_profile_path="${profile_output_file_path}_worker${worker_id}"
+        echo "  Starting profiler on worker ${worker_id} -> ${worker_profile_path}" >&2
+        if ! start_profiler_worker "$worker_id" "$worker_profile_path"; then
+            echo "Warning: Failed to start profiler on worker ${worker_id}" >&2
+        fi
+    done
+}
+
+# Stop profiling on all workers
+function stop_profiler() {
+    local profile_output_file_path=$1
+    
+    if [ -z "${NUM_WORKERS:-}" ]; then
+        echo "Error: NUM_WORKERS not set" >&2
+        return 1
+    fi
+    
+    echo "Stopping profiling on ${NUM_WORKERS} workers for profile: ${profile_output_file_path}" >&2
+    for ((worker_id=0; worker_id<NUM_WORKERS; worker_id++)); do
+        # Each worker gets a unique profile file name
+        local worker_profile_path="${profile_output_file_path}_worker${worker_id}"
+        echo "  Stopping profiler on worker ${worker_id} -> ${worker_profile_path}" >&2
+        if ! stop_profiler_worker "$worker_id" "$worker_profile_path"; then
+            echo "Warning: Failed to stop profiler on worker ${worker_id}" >&2
+        fi
+    done
+}
+
diff --git a/presto/slurm/presto-nvl72/run-presto-benchmarks.sh b/presto/slurm/presto-nvl72/run-presto-benchmarks.sh
index 3f444caf..4dc7b81d 100755
--- a/presto/slurm/presto-nvl72/run-presto-benchmarks.sh
+++ b/presto/slurm/presto-nvl72/run-presto-benchmarks.sh
@@ -1,4 +1,7 @@
 #!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
 set -e
 set -x
 
@@ -9,16 +12,14 @@ set -x
 # by the slurm launcher script. All configuration is passed via environment vars.
 
 # Source helper functions
-source /mnt/home/misiug/veloxtesting/presto-nvl72/echo_helpers.sh
-source /mnt/home/misiug/veloxtesting/presto-nvl72/functions.sh
+source $SCRIPT_DIR/echo_helpers.sh
+source $SCRIPT_DIR/functions.sh
 
 # ==============================================================================
 # Setup and Validation
 # ==============================================================================
 echo "Setting up Presto environment..."
-export VARIANT_TYPE=gpu
 setup
-echo "Environment setup"
 
 # ==============================================================================
 # Start Coordinator
@@ -47,9 +48,7 @@ done
 echo "Waiting for ${NUM_WORKERS} workers to register with coordinator..."
 wait_for_workers_to_register $NUM_WORKERS
 
-# ==============================================================================
-# Create Schema and Register Tables
-# ==============================================================================
+# Not currently needed because we are copying the hive metastore from the data source.
 #echo "Creating TPC-H schema and registering tables for scale factor ${SCALE_FACTOR}..."
 #setup_benchmark ${SCALE_FACTOR}
 
@@ -59,16 +58,38 @@ wait_for_workers_to_register $NUM_WORKERS
 echo "Running TPC-H queries (${NUM_ITERATIONS} iterations, scale factor ${SCALE_FACTOR})..."
 run_queries ${NUM_ITERATIONS} ${SCALE_FACTOR}
 
+# ==============================================================================
+# Stop Workers (if profiling, this ensures profile files are created)
+# ==============================================================================
+if [ "${ENABLE_PROFILING:-false}" == "true" ]; then
+    echo "Stopping workers to finalize profile files..."
+    stop_workers
+fi
+
 # ==============================================================================
 # Process Results
 # ==============================================================================
 echo "Processing results..."
-mkdir -p /mnt/home/misiug/veloxtesting/presto-nvl72/result_dir
-#tpch_summary_to_csv ${LOGS}/cli.log /mnt/home/misiug/veloxtesting/presto-nvl72/result_dir/summary.csv
-#push_csv
+mkdir -p ${SCRIPT_DIR}/result_dir
+cp -r ${LOGS}/cli.log ${SCRIPT_DIR}/result_dir/summary.txt
+
+# Check for profile files if profiling was enabled
+if [ "${ENABLE_PROFILING:-false}" == "true" ]; then
+    echo "Checking for profile files..."
+    if [ -d "${SCRIPT_DIR}/profiles" ]; then
+        profile_count=$(find ${SCRIPT_DIR}/profiles -name "*.nsys-rep" 2>/dev/null | wc -l)
+        echo "Found ${profile_count} profile file(s) in ${SCRIPT_DIR}/profiles"
+        ls -lh ${SCRIPT_DIR}/profiles/*.nsys-rep 2>/dev/null || echo "No .nsys-rep files found (workers may still be running)"
+    else
+        echo "Profiles directory does not exist"
+    fi
+fi
 
 echo "========================================"
 echo "Benchmark complete!"
-echo "Results saved to: /mnt/home/misiug/veloxtesting/presto-nvl72/results_dir"
+echo "Results saved to: ${SCRIPT_DIR}/results_dir"
 echo "Logs available at: ${LOGS}"
+if [ "${ENABLE_PROFILING:-false}" == "true" ]; then
+    echo "Profiles directory: ${SCRIPT_DIR}/profiles"
+fi
 echo "========================================"
diff --git a/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm b/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
index bfe8016c..0d785912 100755
--- a/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
+++ b/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
@@ -1,44 +1,60 @@
 #!/bin/bash
 #SBATCH --job-name=presto-tpch-run
-#SBATCH --output=/mnt/home/misiug/veloxtesting/presto-nvl72/%x_%j.out
-#SBATCH --error=/mnt/home/misiug/veloxtesting/presto-nvl72/%x_%j.err
-#SBATCH --time=01:00:00
-#SBATCH --nodes=10
+#SBATCH --time=03:00:00
 #SBATCH --ntasks-per-node=1
 #SBATCH --cpus-per-task=144
 #SBATCH --mem=0
-#SBATCH --gres=gpu:4
 #SBATCH --exclusive
 
 # ==============================================================================
 # User Configuration - Edit these values directly
 # ==============================================================================
 # TPC-H Configuration
-export SCALE_FACTOR=10000
-export NUM_ITERATIONS=1
+if [ -z "${SCALE_FACTOR:-}" ]; then
+    echo "Error: SCALE_FACTOR is required. Set via launcher: -s|--scale-factor" >&2
+    exit 1
+fi
+export SCALE_FACTOR
+if [ -z "${NUM_ITERATIONS:-}" ]; then
+    echo "Error: NUM_ITERATIONS is required. Set via launcher: -i|--iterations" >&2
+    exit 1
+fi
+export NUM_ITERATIONS
+if [ -z "${SCRIPT_DIR:-}" ]; then
+    echo "Error: SCRIPT_DIR is required."
+    exit 1
+fi
+export SCRIPT_DIR
+if [ -z "${WORKER_IMAGE:-}" ]; then
+    echo "Error: WORKER_IMAGE is required."
+    exit 1
+fi
+export WORKER_IMAGE
+if [ -z "${COORD_IMAGE:-}" ]; then
+    echo "Error: COORD_IMAGE is required."
+    exit 1
+fi
+export COORD_IMAGE
 
-# Directory Configuration
-export WORKSPACE=/mnt/home/misiug
+# Assumes the repo root is four steps up from the script directory.  This should refer to velox-testing.
+export VT_ROOT="$(cd -- "${SCRIPT_DIR}/../../.." >/dev/null 2>&1 && pwd -P)"
 export DATA=/mnt/data/tpch-rs
-export IMAGE_DIR=/mnt/home/misiug/images
-export LOGS=/mnt/home/misiug/veloxtesting/presto-nvl72/logs
-export CONFIGS=/mnt/home/misiug/veloxtesting/config/generated/gpu
-#export CONFIGS=/mnt/home/misiug/veloxtesting/config/generated/cpu
+export IMAGE_DIR=/mnt/data/images/presto
+export LOGS=$SCRIPT_DIR/logs
+export VARIANT_TYPE=gpu
+export CONFIGS=$VT_ROOT/presto/docker/config/generated/$VARIANT_TYPE
 
 # Container Images
 # Coordinator: ${IMAGE_DIR}/presto-coordinator-test.sqsh
 # Worker: ${IMAGE_DIR}/${WORKER_IMAGE}.sqsh
-#export WORKER_IMAGE=presto-native-worker-cpu
-export WORKER_IMAGE=presto-native-worker-gpu
 export NUM_NODES=$SLURM_JOB_NUM_NODES
-export NUM_GPUS_PER_NODE=4
 
 # Presto Configuration
 export PORT=9200
 export CUDF_LIB=/usr/lib64/presto-native-libs
 
 # UCX Configuration
-export UCX_TLS=^ib,ud:aux
+export UCX_TLS=^ib,ud:aux,sm
 export UCX_MAX_RNDV_RAILS=1
 export UCX_RNDV_PIPELINE_ERROR_HANDLING=y
 export UCX_TCP_KEEPINTVL=1ms
@@ -67,6 +83,7 @@ echo "Nodes: $SLURM_JOB_NUM_NODES"
 echo "Node list: $SLURM_JOB_NODELIST"
 echo "Coordinator node: $COORD"
 echo "Worker image: $WORKER_IMAGE"
+echo "Coord image: $COORD_IMAGE"
 echo "Scale factor: $SCALE_FACTOR"
 echo "Iterations: $NUM_ITERATIONS"
 echo "Data directory: $DATA"
@@ -77,7 +94,6 @@ echo "========================================"
 
 # Create necessary directories
 mkdir -p ${LOGS}
-mkdir -p ${DATA}
 
 # Launch the job script
-bash /mnt/home/misiug/veloxtesting/presto-nvl72/run-presto-benchmarks.sh
+bash $SCRIPT_DIR/run-presto-benchmarks.sh
diff --git a/presto/slurm/presto-nvl72/run_multiple.sh b/presto/slurm/presto-nvl72/run_multiple.sh
new file mode 100755
index 00000000..b3c9d9bd
--- /dev/null
+++ b/presto/slurm/presto-nvl72/run_multiple.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+NUM_ITERATIONS=2
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+	 -n|--nodes)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                NODES_COUNT="$2"
+                shift 2
+            else
+		echo "Error: -n|--nodes requires a set of comma separated values.  E.g. (2,4,8)"
+                echo "Usage: $0 -n|--nodes <count1,count2> -s|--scale-factor <sf1,sf2> -w <image_name> -c <image_name> [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+        -s|--scale-factor)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                SCALE_FACTOR="$2"
+                shift 2
+            else
+		echo "Error: -s|--scale-factor requires a set of comma separated values. E.g. (1000,3000)"
+		echo "Usage: $0 -n|--nodes <count1,count2> -s|--scale-factor <sf1,sf2> -w <image_name> -c <image_name> [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+        -i|--iterations)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                NUM_ITERATIONS="$2"
+                shift 2
+            else
+                echo "Error: -i|--iterations requires a value"
+		echo "Usage: $0 -n|--nodes <count1,count2> -s|--scale-factor <sf1,sf2> -w <image_name> -c <image_name> [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+	-w|--worker-image)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                WORKER_IMAGE="$2"
+                shift 2
+            else
+                echo "Error: -w|--worker-image requires a value"
+		echo "Usage: $0 -n|--nodes <count1,count2> -s|--scale-factor <sf1,sf2> -w <image_name> -c <image_name> [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+	-c|--coord-image)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                COORD_IMAGE="$2"
+                shift 2
+            else
+                echo "Error: -c|--coord-image requires a value"
+		echo "Usage: $0 -n|--nodes <count1,count2> -s|--scale-factor <sf1,sf2> -w <image_name> -c <image_name> [additional sbatch options]"
+                exit 1
+            fi
+            ;;
+        *)
+            EXTRA_ARGS+=("$1")
+            shift
+            ;;
+    esac
+done
+
+if [[ -z "${NODES_COUNT}" ]]; then
+    echo "Error: -n|--nodes is required"
+    exit 1
+fi
+if [[ -z "${SCALE_FACTOR}" ]]; then
+    echo "Error: -s|--scale-factor is required"
+    exit 1
+fi
+if [[ -z "${WORKER_IMAGE}" ]]; then
+    echo "Error: -w|--worker-image is required"
+    exit 1
+fi
+if [[ -z "${COORD_IMAGE}" ]]; then
+    echo "Error: -c|--coord-image is required"
+    exit 1
+fi
+
+
+mkdir -p kept_results
+
+IFS=',' read -ra NODES_ARRAY <<< "$NODES_COUNT"
+IFS=',' read -ra SF_ARRAY <<< "$SCALE_FACTOR"
+for s in "${SF_ARRAY[@]}"; do
+    for n in "${NODES_ARRAY[@]}"; do
+        ./launch-run.sh -s $s -n $n -i $NUM_ITERATIONS -w $WORKER_IMAGE -c $COORD_IMAGE
+        cp logs/cli.log kept_results/${n}N-${s}SF-summary.txt
+    done
+done
diff --git a/presto/testing/integration_tests/create_hive_tables.py b/presto/testing/integration_tests/create_hive_tables.py
index ed2365a2..001b557d 100644
--- a/presto/testing/integration_tests/create_hive_tables.py
+++ b/presto/testing/integration_tests/create_hive_tables.py
@@ -55,7 +55,12 @@ def drop_schema(presto_cursor, schema_name):
     )
     args = parser.parse_args()
 
-    conn = prestodb.dbapi.connect(host="localhost", port=8080, user="test_user", catalog="hive")
+    conn = prestodb.dbapi.connect(
+        host=os.environ.get("HOSTNAME", "localhost"),
+        port=int(os.environ.get("PORT", "8080")),
+        user="test_user",
+        catalog="hive",
+    )
     cursor = conn.cursor()
     data_sub_directory = f"user_data/{args.data_dir_name}"
     create_tables(cursor, args.schema_name, args.schemas_dir_path, data_sub_directory)
diff --git a/presto/testing/performance_benchmarks/common_fixtures.py b/presto/testing/performance_benchmarks/common_fixtures.py
index 3198805b..fadbd58e 100644
--- a/presto/testing/performance_benchmarks/common_fixtures.py
+++ b/presto/testing/performance_benchmarks/common_fixtures.py
@@ -3,6 +3,7 @@
 
 from pathlib import Path
 
+import pandas as pd
 import prestodb
 import pytest
 
@@ -82,8 +83,10 @@ def benchmark_query(request, presto_cursor, benchmark_queries, benchmark_result_
 
     if profile:
         assert profile_script_path is not None
+        print(f"[Profiler] Profiling enabled with script: {profile_script_path}")
         profile_output_dir_path = Path(f"{bench_output_dir}/profiles/{benchmark_type}")
         profile_output_dir_path.mkdir(parents=True, exist_ok=True)
+        print(f"[Profiler] Profile output directory: {profile_output_dir_path}")
 
     benchmark_result_collector[benchmark_type] = {
         BenchmarkKeys.RAW_TIMES_KEY: {},
@@ -103,14 +106,28 @@ def benchmark_query_function(query_id):
             if profile:
                 # Base path without .nsys-rep extension: {dir}/{query_id}
                 profile_output_file_path = f"{profile_output_dir_path.absolute()}/{query_id}"
+                print(f"[Profiler] Starting profiler for query {query_id}, output: {profile_output_file_path}")
                 start_profiler(profile_script_path, profile_output_file_path)
             result = []
-            for _ in range(iterations):
+            for iteration_num in range(iterations):
                 cursor = presto_cursor.execute(
                     "--" + str(benchmark_type) + "_" + str(query_id) + "--" + "\n" + benchmark_queries[query_id]
                 )
                 result.append(cursor.stats["elapsedTimeMillis"])
 
+                # Save query results to Parquet (only on first iteration)
+                rows = cursor.fetchall()
+                columns = [desc[0] for desc in cursor.description]
+                df = pd.DataFrame(rows, columns=columns)
+
+                # Save to Parquet format to match expected results
+                results_dir = Path(f"{bench_output_dir}/query_results")
+                results_dir.mkdir(parents=True, exist_ok=True)
+                parquet_path = results_dir / f"{query_id.lower()}.parquet"
+                df.to_parquet(parquet_path, index=False)
+
+                print(f"Saved {query_id} results to {parquet_path}")
+
                 # Collect metrics after each query iteration if enabled
                 if metrics:
                     presto_query_id = cursor._query.query_id
@@ -129,6 +146,7 @@ def benchmark_query_function(query_id):
             raise
         finally:
             if profile and profile_output_file_path is not None:
+                print(f"[Profiler] Stopping profiler for query {query_id}")
                 stop_profiler(profile_script_path, profile_output_file_path)
 
     return benchmark_query_function
diff --git a/presto/testing/performance_benchmarks/conftest.py b/presto/testing/performance_benchmarks/conftest.py
index 0a15a36a..b10c13aa 100644
--- a/presto/testing/performance_benchmarks/conftest.py
+++ b/presto/testing/performance_benchmarks/conftest.py
@@ -137,8 +137,10 @@ def pytest_sessionfinish(session, exitstatus):
         ]
     else:
         AGG_KEYS = [BenchmarkKeys.LUKEWARM_KEY]
+
     if not hasattr(session, "benchmark_results"):
         return
+
     for benchmark_type, result in session.benchmark_results.items():
         compute_aggregate_timings(result)
         json_result[benchmark_type] = {
diff --git a/presto/testing/performance_benchmarks/profiler_utils.py b/presto/testing/performance_benchmarks/profiler_utils.py
index 59430270..74f19558 100644
--- a/presto/testing/performance_benchmarks/profiler_utils.py
+++ b/presto/testing/performance_benchmarks/profiler_utils.py
@@ -26,11 +26,56 @@ def stop_profiler(profile_script_path, profile_output_file_path):
 
 
 def execute_profiler_function(profile_script_path, profile_output_file_path, profiler_function):
+    # Ensure SCRIPT_DIR is set correctly - it should point to the slurm directory
+    # where worker info files are stored, not the scripts directory
+    env = os.environ.copy()
+    # If SCRIPT_DIR is not set or points to scripts, fix it
+    script_dir = env.get("SCRIPT_DIR", "")
+    if not script_dir or "scripts" in script_dir:
+        # Try to derive from profile_script_path
+        if "presto-nvl72" in profile_script_path:
+            env["SCRIPT_DIR"] = "/workspace/presto/slurm/presto-nvl72"
+        else:
+            env["SCRIPT_DIR"] = script_dir if script_dir else "/workspace/presto/slurm/presto-nvl72"
+    
+    # IMPORTANT: We need to execute the profiler script from the HOST, not from inside the container
+    # because srun is only available on the host. We'll write a wrapper script that gets executed
+    # from the host via a mechanism that can escape the container.
+    # 
+    # Since we're inside a container, we need to use a different approach:
+    # Option 1: Use nsys attach to attach to running processes (requires PID)
+    # Option 2: Write commands to a file that a host process reads
+    # Option 3: Use a mechanism to execute from host
+    
+    # For now, let's try to detect if we're in a container and provide a helpful error
+    print(f"[Profiler] Executing {profiler_function} with script: {profile_script_path}, output: {profile_output_file_path}")
+    print(f"[Profiler] SCRIPT_DIR={env.get('SCRIPT_DIR', 'NOT SET')}, VT_ROOT={env.get('VT_ROOT', 'NOT SET')}, IMAGE_DIR={env.get('IMAGE_DIR', 'NOT SET')}, NUM_WORKERS={env.get('NUM_WORKERS', 'NOT SET')}")
+    
+    # Check if we're in a container
+    in_container = os.path.exists("/.singularity.d/runscript") or "SINGULARITY" in env
+    
+    if in_container:
+        print(f"[Profiler] WARNING: Running inside container. Profiling via srun requires host execution.")
+        print(f"[Profiler] Attempting to use alternative method: nsys attach to running processes")
+        # We'll need to use nsys attach instead - this requires finding the PID of presto_server
+        # For now, let's try the original method and see if it fails gracefully
+        pass
+    
     profiler_command = ["bash", "-c", f"source {profile_script_path}; {profiler_function} {profile_output_file_path}"]
-
-    result = subprocess.run(profiler_command, capture_output=True, text=True, env=os.environ)
+    result = subprocess.run(profiler_command, capture_output=True, text=True, env=env)
+    
+    # Always print output for debugging
+    if result.stdout:
+        print(f"[Profiler] stdout: {result.stdout}")
+    if result.stderr:
+        print(f"[Profiler] stderr: {result.stderr}")
+    
     if result.returncode != 0:
-        raise RuntimeError(
+        error_msg = (
             f"{profiler_function} returned error code: {result.returncode}, "
             f"stdout: {result.stdout}, stderr: {result.stderr}"
         )
+        print(f"[Profiler] ERROR: {error_msg}")
+        raise RuntimeError(error_msg)
+    else:
+        print(f"[Profiler] {profiler_function} completed successfully")
diff --git a/presto/testing/requirements.txt b/presto/testing/requirements.txt
index 4a2dfa8c..e9eeb1da 100644
--- a/presto/testing/requirements.txt
+++ b/presto/testing/requirements.txt
@@ -5,8 +5,11 @@ duckdb==1.3.2
 idna==3.10
 iniconfig==2.1.0
 packaging==25.0
+pandas>=2.0.0
 pluggy==1.6.0
 presto-python-client==0.8.4
+pyarrow>=10.0.0
+pandas
 Pygments==2.19.2
 pytest==8.4.1
 requests==2.32.4
diff --git a/scripts/py_env_functions.sh b/scripts/py_env_functions.sh
index 50018344..3a15e5b1 100755
--- a/scripts/py_env_functions.sh
+++ b/scripts/py_env_functions.sh
@@ -51,7 +51,7 @@ function init_python_virtual_env() {
   local venv_dir=${1:-".venv"}
   rm -rf $venv_dir
 
-  if python3 -m venv $venv_dir &>/dev/null; then
+  if python3.12 -m venv $venv_dir &>/dev/null; then
     echo "Created virtual environment using the venv module"
   else
     if [[ -z $MINIFORGE_HOME ]]; then