From 470c0ca50793ad4bda03f703f07f00605f6f1dab Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 16 Feb 2026 09:34:50 -0800
Subject: [PATCH 01/24] Added context json for benchmark runs

---
 .../etc_worker/config_native.properties       |   2 +-
 presto/scripts/run_benchmark.sh               |   9 +
 .../performance_benchmarks/benchmark_keys.py  |   9 +
 .../performance_benchmarks/conftest.py        |  42 +++
 .../performance_benchmarks/run_context.py     | 297 ++++++++++++++++++
 5 files changed, 358 insertions(+), 1 deletion(-)
 create mode 100644 presto/testing/performance_benchmarks/run_context.py

diff --git a/presto/docker/config/template/etc_worker/config_native.properties b/presto/docker/config/template/etc_worker/config_native.properties
index b1ee1082..45520aa1 100644
--- a/presto/docker/config/template/etc_worker/config_native.properties
+++ b/presto/docker/config/template/etc_worker/config_native.properties
@@ -1,7 +1,7 @@
 # This node is a worker; it executes tasks but doesn't coordinate queries.
 coordinator=false
 # Worker REST/HTTP port for internal and admin endpoints.
-http-server.http.port=8080
+http-server.http.port=10000
 # Address of the coordinator's discovery service for registration.
 discovery.uri=http://presto-coordinator:8080
 
diff --git a/presto/scripts/run_benchmark.sh b/presto/scripts/run_benchmark.sh
index d651d71f..c0095751 100755
--- a/presto/scripts/run_benchmark.sh
+++ b/presto/scripts/run_benchmark.sh
@@ -34,6 +34,14 @@ OPTIONS:
     -m, --metrics           Collect detailed metrics from Presto REST API after each query.
                             Metrics are stored in query-specific directories.
 
+ENVIRONMENT:
+    PRESTO_BENCHMARK_DEBUG   Set to 1 to print debug logs for worker/engine detection
+                             (e.g. node URIs, reachability, metrics, Docker containers).
+                             Use when engine is misdetected or the run fails.
+    Docker                  In Docker setups, engine is inferred from running worker
+                             images (presto-native-worker-gpu/cpu, presto-java-worker)
+                             whose tag equals the username. Ensure 'docker ps' is available.
+
 EXAMPLES:
     $0 -b tpch -s bench_sf100
     $0 -b tpch -q "1,2" -s bench_sf100
@@ -41,6 +49,7 @@ EXAMPLES:
     $0 -b tpch -s bench_sf100 -t gh200_cpu_sf100
     $0 -b tpch -s bench_sf100 --profile
     $0 -b tpch -s bench_sf100 --metrics
+    PRESTO_BENCHMARK_DEBUG=1 $0 -b tpch -s bench_sf100
     $0 -h
 
 EOF
diff --git a/presto/testing/performance_benchmarks/benchmark_keys.py b/presto/testing/performance_benchmarks/benchmark_keys.py
index 3cd28bbc..297fc598 100644
--- a/presto/testing/performance_benchmarks/benchmark_keys.py
+++ b/presto/testing/performance_benchmarks/benchmark_keys.py
@@ -32,3 +32,12 @@ class BenchmarkKeys(str, Enum):
     CONTEXT_KEY = "context"
     ITERATIONS_COUNT_KEY = "iterations_count"
     SCHEMA_NAME_KEY = "schema_name"
+    SCALE_FACTOR_KEY = "scale_factor"
+    # Run configuration (from run context; written to benchmark_config.json and context)
+    TIMESTAMP_KEY = "timestamp"
+    NUM_WORKERS_KEY = "n_workers"
+    GPU_NAME_KEY = "gpu_name"
+    WORKER_IMAGE_KEY = "worker_image"
+    ENGINE_KEY = "engine"
+    KIND_KEY = "kind"
+    GPU_COUNT_KEY = "gpu_count"
diff --git a/presto/testing/performance_benchmarks/conftest.py b/presto/testing/performance_benchmarks/conftest.py
index 0a15a36a..7f455c20 100644
--- a/presto/testing/performance_benchmarks/conftest.py
+++ b/presto/testing/performance_benchmarks/conftest.py
@@ -3,10 +3,12 @@
 
 import json
 import statistics
+from datetime import datetime, timezone
 from pathlib import Path
 
 from ..common.conftest import *  # noqa: F403
 from .benchmark_keys import BenchmarkKeys
+from .run_context import gather_run_context
 
 
 def pytest_addoption(parser):
@@ -109,6 +111,28 @@ def write_section(terminalreporter, text_report, content, **kwargs):
     text_report.append(f" {content} ".center(120, sep))
 
 
+def _build_run_config(session):
+    """
+    Build run-config dict from execution context (Presto nodes, nvidia-smi, schema
+    data source, env). Used for benchmark_config.json and context in benchmark_result.json.
+    """
+    hostname = session.config.getoption("--hostname")
+    port = session.config.getoption("--port")
+    user = session.config.getoption("--user")
+    schema_name = session.config.getoption("--schema-name")
+    scale_factor_override = session.config.getoption("--scale-factor")
+
+    ctx = gather_run_context(
+        hostname=hostname,
+        port=port,
+        user=user,
+        schema_name=schema_name,
+        scale_factor_override=scale_factor_override,
+    )
+    ctx["timestamp"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    return ctx
+
+
 def pytest_sessionfinish(session, exitstatus):
     iterations = session.config.getoption("--iterations")
     schema_name = session.config.getoption("--schema-name")
@@ -123,6 +147,10 @@ def pytest_sessionfinish(session, exitstatus):
     if tag:
         json_result[BenchmarkKeys.CONTEXT_KEY][BenchmarkKeys.TAG_KEY] = tag
 
+    run_config = _build_run_config(session)
+    for key, value in run_config.items():
+        json_result[BenchmarkKeys.CONTEXT_KEY][key] = value
+
     bench_output_dir = get_output_dir(session.config)
     bench_output_dir.mkdir(parents=True, exist_ok=True)
 
@@ -138,6 +166,10 @@ def pytest_sessionfinish(session, exitstatus):
     else:
         AGG_KEYS = [BenchmarkKeys.LUKEWARM_KEY]
     if not hasattr(session, "benchmark_results"):
+        config_payload = {"benchmark": None, **run_config}
+        with open(f"{bench_output_dir}/benchmark_config.json", "w") as file:
+            json.dump(config_payload, file, indent=2)
+            file.write("\n")
         return
     for benchmark_type, result in session.benchmark_results.items():
         compute_aggregate_timings(result)
@@ -160,6 +192,16 @@ def pytest_sessionfinish(session, exitstatus):
         json.dump(json_result, file, indent=2)
         file.write("\n")
 
+    # Write run-config JSON (context from Presto, nvidia-smi, schema, env)
+    benchmark_types = list(session.benchmark_results.keys()) if hasattr(session, "benchmark_results") else []
+    config_payload = {
+        "benchmark": benchmark_types[0] if len(benchmark_types) == 1 else benchmark_types,
+        **run_config,
+    }
+    with open(f"{bench_output_dir}/benchmark_config.json", "w") as file:
+        json.dump(config_payload, file, indent=2)
+        file.write("\n")
+
 
 def get_output_dir(config):
     bench_output_dir = config.getoption("--output-dir")
diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
new file mode 100644
index 00000000..61b8c9bb
--- /dev/null
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -0,0 +1,297 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Gather run configuration from execution context. Engine (java / velox-cpu / velox-gpu)
+is inferred only from: (1) Docker running images with expected names and tag =
+username, or (2) SLURM nvidia-smi in LOGS/worker_0.log. Scale factor, n_workers,
+gpu_name, etc. come from schema, Presto /v1/node (node count), and nvidia-smi/env.
+"""
+
+import getpass
+import json
+import os
+import re
+import subprocess
+from pathlib import Path
+
+import prestodb
+import requests
+
+from ..common import test_utils
+
+# Set PRESTO_BENCHMARK_DEBUG=1 or DEBUG=1 to print engine-detection debug logs
+_DEBUG = os.environ.get("PRESTO_BENCHMARK_DEBUG") or os.environ.get("DEBUG")
+
+
+def _debug(msg: str) -> None:
+    if _DEBUG:
+        print(f"[run_context] {msg}")
+
+
+def _fetch_json(url: str, timeout: int = 10):
+    try:
+        response = requests.get(url, timeout=timeout)
+        response.raise_for_status()
+        return response.json()
+    except Exception as e:
+        _debug(f"GET {url} failed: {e!r}")
+        return None
+
+
+def get_node_count(hostname: str, port: int) -> int | None:
+    """Return number of nodes registered with the Presto coordinator (coordinator + workers)."""
+    nodes = _fetch_json(f"http://{hostname}:{port}/v1/node")
+    if nodes is None or not isinstance(nodes, list):
+        return None
+    return len(nodes)
+
+
+def get_n_workers(hostname: str, port: int) -> int | None:
+    """Infer worker count from Presto node list. Single-node => 1, else nodes - 1."""
+    n = get_node_count(hostname, port)
+    if n is None:
+        return None
+    return max(1, n - 1)  # coordinator counts as one; single-node has 1 "worker"
+
+
+def get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_name: str) -> int | float | None:
+    """
+    Resolve scale factor from the schema's data source (metadata.json next to table data).
+    Uses same logic as test_utils.get_scale_factor but without pytest request.
+    """
+    conn = None
+    try:
+        conn = prestodb.dbapi.connect(
+            host=hostname, port=port, user=user, catalog="hive", schema=schema_name
+        )
+        cursor = conn.cursor()
+        tables = cursor.execute(f"SHOW TABLES IN {schema_name}").fetchall()
+        if not tables:
+            return None
+        table = tables[0][0]
+        location = test_utils.get_table_external_location(schema_name, table, cursor)
+        # metadata.json is typically in parent of table data dir
+        meta_path = Path(location).parent / "metadata.json"
+        if not meta_path.is_file():
+            meta_path = (Path(location) / ".." / "metadata.json").resolve()
+        if not meta_path.is_file():
+            return None
+        with open(meta_path) as f:
+            data = json.load(f)
+        return data.get("scale_factor")
+    except Exception:
+        return None
+    finally:
+        if conn is not None:
+            try:
+                conn.close()
+            except Exception:
+                pass
+
+
+def _parse_gpu_name_from_text(line: str) -> str | None:
+    """Parse a single line of nvidia-smi -L output; return GPU model name or None."""
+    line = line.strip()
+    if not line:
+        return None
+    match = re.search(r"GPU \d+:\s*(.+?)(?:\s*\(UUID)", line)
+    if match:
+        return match.group(1).strip()
+    if line.startswith("GPU "):
+        return line.split(":", 1)[-1].strip()
+    return None
+
+
+def get_gpu_name_from_slurm_logs() -> str | None:
+    """
+    When running under SLURM, workers run nvidia-smi -L and write to LOGS/worker_<id>.log.
+    All workers are assumed the same GPU; read worker_0.log only. LOGS env must be set.
+    Returns None if not in SLURM, LOGS unset, or no matching line found.
+    """
+    if not os.environ.get("SLURM_JOB_ID"):
+        return None
+    logs_dir = os.environ.get("LOGS")
+    if not logs_dir:
+        return None
+    log_file = Path(logs_dir) / "worker_0.log"
+    if not log_file.is_file():
+        return None
+    try:
+        with open(log_file) as f:
+            for line in f:
+                gpu_name = _parse_gpu_name_from_text(line)
+                if gpu_name:
+                    return gpu_name
+    except Exception:
+        pass
+    return None
+
+
+def get_engine_from_slurm() -> str | None:
+    """
+    Infer engine when running under SLURM from nvidia-smi -L output in LOGS/worker_0.log.
+    If that log contains GPU lines (from nvidia-smi -L), return 'velox-gpu'; otherwise
+    'velox-cpu'. Returns None if not in SLURM (SLURM_JOB_ID and LOGS unset) or LOGS
+    not available. Does not use the Presto API for engine type.
+    """
+    if not os.environ.get("SLURM_JOB_ID"):
+        return None
+    if not os.environ.get("LOGS"):
+        return None
+    gpu_name = get_gpu_name_from_slurm_logs()
+    if gpu_name is not None:
+        _debug(f"SLURM: nvidia-smi in logs -> velox-gpu ({gpu_name!r})")
+        return "velox-gpu"
+    # SLURM but no GPU in logs -> CPU native
+    _debug("SLURM: no nvidia-smi in logs -> velox-cpu")
+    return "velox-cpu"
+
+
+def get_gpu_name() -> str | None:
+    """
+    Return GPU model name. Under SLURM, read from LOGS/worker_<id>.log if LOGS is set;
+    otherwise run nvidia-smi -L on the current host (e.g. Docker host).
+    """
+    gpu_from_logs = get_gpu_name_from_slurm_logs()
+    if gpu_from_logs is not None:
+        return gpu_from_logs
+    try:
+        result = subprocess.run(
+            ["nvidia-smi", "-L"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+            check=False,
+        )
+        if result.returncode != 0 or not result.stdout.strip():
+            return None
+        first_line = result.stdout.strip().split("\n")[0]
+        return _parse_gpu_name_from_text(first_line)
+    except (FileNotFoundError, subprocess.TimeoutExpired, Exception):
+        return None
+
+
+def get_worker_image() -> str | None:
+    """Return worker image name from env (set by cluster/container setup)."""
+    return os.environ.get("WORKER_IMAGE")
+
+
+def _current_username() -> str:
+    """Return the username of the user running the process (for Docker image tag matching)."""
+    return os.environ.get("USER") or os.environ.get("USERNAME") or getpass.getuser() or ""
+
+
+def get_engine_from_docker_containers(hostname: str, port: int) -> str | None:
+    """
+    Infer engine from running Docker containers whose image has an expected name
+    (presto-native-worker-gpu, presto-native-worker-cpu, presto-java-worker) and
+    a tag equal to the username of the user running the benchmarks. Returns
+    'velox-gpu', 'velox-cpu', 'java', or None.
+    """
+    username = _current_username()
+    if not username:
+        _debug("docker: could not determine username, skip Docker engine detection")
+        return None
+    try:
+        result = subprocess.run(
+            ["docker", "ps", "--format", "{{.Image}}"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+            check=False,
+        )
+        if result.returncode != 0 or not result.stdout.strip():
+            return None
+        images = [s.strip() for s in result.stdout.strip().splitlines() if s.strip()]
+    except (FileNotFoundError, subprocess.TimeoutExpired, Exception):
+        _debug("docker ps failed or not available")
+        return None
+    has_gpu = has_cpu = has_java = False
+    for image in images:
+        parts = image.rsplit(":", 1)
+        name = parts[0] if parts else ""
+        tag = parts[1] if len(parts) == 2 else ""
+        if tag != username:
+            continue
+        if "presto-native-worker-gpu" in name:
+            has_gpu = True
+        if "presto-native-worker-cpu" in name:
+            has_cpu = True
+        if "presto-java-worker" in name:
+            has_java = True
+    if has_gpu or has_cpu or has_java:
+        _debug(f"docker (image tag={username!r}): gpu={has_gpu}, cpu={has_cpu}, java={has_java}")
+    if has_gpu:
+        return "velox-gpu"
+    if has_cpu:
+        return "velox-cpu"
+    if has_java:
+        return "java"
+    return None
+
+
+def gather_run_context(
+    hostname: str,
+    port: int,
+    user: str,
+    schema_name: str,
+    scale_factor_override: str | int | None = None,
+) -> dict:
+    """
+    Build run-config dict from context. Engine is taken only from Docker (running
+    images presto-native-worker-gpu/cpu or presto-java-worker with tag = username)
+    or SLURM (nvidia-smi in LOGS/worker_0.log). scale_factor_override takes
+    precedence over schema-derived scale factor.
+    """
+    ctx = {}
+    # Scale factor: CLI override first, then from schema data source
+    if scale_factor_override is not None:
+        try:
+            ctx["scale_factor"] = int(scale_factor_override)
+        except (TypeError, ValueError):
+            ctx["scale_factor"] = scale_factor_override
+    else:
+        sf = get_scale_factor_from_schema(hostname, port, user, schema_name)
+        if sf is not None:
+            ctx["scale_factor"] = int(sf) if isinstance(sf, float) and sf == int(sf) else sf
+
+    n_workers = get_n_workers(hostname, port)
+    # Engine only from Docker (container names) or SLURM (nvidia-smi in LOGS). No API fallback.
+    engine_from_docker = get_engine_from_docker_containers(hostname, port)
+    engine_from_slurm = get_engine_from_slurm() if engine_from_docker is None else None
+    engine = engine_from_docker or engine_from_slurm
+    if engine_from_docker is not None:
+        _debug(f"using engine from Docker containers: {engine_from_docker}")
+    elif engine_from_slurm is not None:
+        _debug(f"using engine from SLURM (nvidia-smi in logs): {engine_from_slurm}")
+
+    if n_workers is not None:
+        ctx["n_workers"] = n_workers
+        ctx["kind"] = "single-node" if n_workers == 1 else f"{n_workers}-node"
+
+    if engine == "velox-cpu":
+        ctx["gpu_count"] = 0
+        ctx["gpu_name"] = "NA"
+        ctx["engine"] = "velox-cpu"
+    elif engine == "velox-gpu":
+        ctx["gpu_count"] = n_workers if n_workers is not None else 0
+        gpu_name = get_gpu_name()
+        if gpu_name is not None:
+            ctx["gpu_name"] = gpu_name
+        ctx["engine"] = "velox-gpu"
+    elif engine == "java":
+        ctx["gpu_count"] = 0
+        ctx["engine"] = "java"
+    else:
+        raise RuntimeError(
+            "Could not determine worker engine. Run in Docker (worker images "
+            "presto-native-worker-gpu, presto-native-worker-cpu, or presto-java-worker with "
+            "tag equal to your username) or on SLURM with LOGS set and nvidia-smi -L in LOGS/worker_0.log."
+        )
+
+    worker_image = get_worker_image()
+    if worker_image is not None:
+        ctx["worker_image"] = worker_image
+
+    return ctx

From 50e3ec6522b0c66d585638882bc98472e5333bda Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 16 Feb 2026 09:55:43 -0800
Subject: [PATCH 02/24] debug multiple workers

---
 .../performance_benchmarks/run_context.py        | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index 61b8c9bb..7a659795 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -41,10 +41,16 @@ def _fetch_json(url: str, timeout: int = 10):
 
 def get_node_count(hostname: str, port: int) -> int | None:
     """Return number of nodes registered with the Presto coordinator (coordinator + workers)."""
-    nodes = _fetch_json(f"http://{hostname}:{port}/v1/node")
-    if nodes is None or not isinstance(nodes, list):
+    url = f"http://{hostname}:{port}/v1/node"
+    raw = _fetch_json(url)
+    if raw is None:
         return None
-    return len(nodes)
+    if not isinstance(raw, list):
+        _debug(f"get_node_count: {url} returned type {type(raw).__name__}, expected list: {raw!r}")
+        return None
+    n = len(raw)
+    _debug(f"get_node_count: {url} -> {n} node(s). First node sample: {raw[0] if raw else 'N/A'}")
+    return n
 
 
 def get_n_workers(hostname: str, port: int) -> int | None:
@@ -52,7 +58,9 @@ def get_n_workers(hostname: str, port: int) -> int | None:
     n = get_node_count(hostname, port)
     if n is None:
         return None
-    return max(1, n - 1)  # coordinator counts as one; single-node has 1 "worker"
+    workers = max(1, n - 1)  # coordinator counts as one; single-node has 1 "worker"
+    _debug(f"get_n_workers: node_count={n} -> n_workers={workers}")
+    return workers
 
 
 def get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_name: str) -> int | float | None:

From 0e72c3366a7e4167cebcddaeae934bb50e6c4701 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 16 Feb 2026 10:00:56 -0800
Subject: [PATCH 03/24] fix node count

---
 .../testing/performance_benchmarks/run_context.py  | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index 7a659795..f9320355 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -40,7 +40,7 @@ def _fetch_json(url: str, timeout: int = 10):
 
 
 def get_node_count(hostname: str, port: int) -> int | None:
-    """Return number of nodes registered with the Presto coordinator (coordinator + workers)."""
+    """Return number of nodes in the Presto /v1/node list (workers only; coordinator not listed)."""
     url = f"http://{hostname}:{port}/v1/node"
     raw = _fetch_json(url)
     if raw is None:
@@ -53,16 +53,6 @@ def get_node_count(hostname: str, port: int) -> int | None:
     return n
 
 
-def get_n_workers(hostname: str, port: int) -> int | None:
-    """Infer worker count from Presto node list. Single-node => 1, else nodes - 1."""
-    n = get_node_count(hostname, port)
-    if n is None:
-        return None
-    workers = max(1, n - 1)  # coordinator counts as one; single-node has 1 "worker"
-    _debug(f"get_n_workers: node_count={n} -> n_workers={workers}")
-    return workers
-
-
 def get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_name: str) -> int | float | None:
     """
     Resolve scale factor from the schema's data source (metadata.json next to table data).
@@ -264,7 +254,7 @@ def gather_run_context(
         if sf is not None:
             ctx["scale_factor"] = int(sf) if isinstance(sf, float) and sf == int(sf) else sf
 
-    n_workers = get_n_workers(hostname, port)
+    n_workers = get_node_count(hostname, port)
     # Engine only from Docker (container names) or SLURM (nvidia-smi in LOGS). No API fallback.
     engine_from_docker = get_engine_from_docker_containers(hostname, port)
     engine_from_slurm = get_engine_from_slurm() if engine_from_docker is None else None

From ee385c23baeb10e4e247c5192ef63b40ccbc3adb Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Tue, 3 Mar 2026 18:22:09 -0800
Subject: [PATCH 04/24] fix import

---
 presto/testing/performance_benchmarks/conftest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/presto/testing/performance_benchmarks/conftest.py b/presto/testing/performance_benchmarks/conftest.py
index e97336be..4c4a1713 100644
--- a/presto/testing/performance_benchmarks/conftest.py
+++ b/presto/testing/performance_benchmarks/conftest.py
@@ -8,7 +8,6 @@
 from pathlib import Path
 
 from ..common.conftest import *  # noqa: F403
-from .benchmark_keys import BenchmarkKeys
 from .run_context import gather_run_context
 
 # ruff: noqa: I001

From 9121f7535c9149e52081bc79f354ea68926b0df1 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Tue, 3 Mar 2026 18:27:48 -0800
Subject: [PATCH 05/24] change benchmark.json to benchmark_config.json

---
 benchmark_reporting_tools/post_results.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index 964e7628..5f386547 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -15,7 +15,7 @@
 expected directory structure is:
 
     ../benchmark-root/
-    ├── benchmark.json           # optional
+    ├── benchmark_config.json           # optional
     ├── configs                  # optional
     │   ├── coordinator.config
     │   └── worker.config
@@ -160,7 +160,7 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "input_path",
         type=str,
-        help="Path to benchmark directory containing benchmark.json and result_dir/",
+        help="Path to benchmark directory containing benchmark_config.json and result_dir/",
     )
     parser.add_argument(
         "--api-url",
@@ -191,7 +191,7 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--engine-name",
         default=None,
-        help="Query engine name (optionally derived from benchmark.json 'engine' field)",
+        help="Query engine name (optionally derived from benchmark_config.json 'engine' field)",
     )
     parser.add_argument(
         "--identifier-hash",
@@ -242,7 +242,7 @@ def parse_args() -> argparse.Namespace:
         default=1,
     )
 
-    # A bunch of optional arguments for when benchmark.json is not present.
+    # A bunch of optional arguments for when benchmark_config.json is not present.
     parser.add_argument(
         "--kind",
         help="Run kind (e.g. 'single-node', 'multi-node')",
@@ -333,7 +333,7 @@ def build_submission_payload(
     """Build a BenchmarkSubmission payload from parsed dataclasses.
 
     Args:
-        benchmark_metadata: Parsed benchmark.json as BenchmarkMetadata
+        benchmark_metadata: Parsed benchmark_config.json as BenchmarkMetadata
         benchmark_results: Parsed benchmark_result.json as BenchmarkResults
         engine_config: Parsed config files as EngineConfig, optional
         sku_name: Hardware SKU name
@@ -524,7 +524,7 @@ async def process_benchmark_dir(
     timeout: float,
     upload_logs: bool = True,
     benchmark_definition_name: str,
-    # all the optional arguments for when benchmark.json is not present.
+    # all the optional arguments for when benchmark_config.json is not present.
     concurrency_streams: int = 1,
     kind: str | None = None,
     benchmark: str | None = None,
@@ -546,11 +546,11 @@ async def process_benchmark_dir(
 
     # Load metadata, results, and config
 
-    # benchmark.json is only optionally written out.
+    # benchmark_config.json is only optionally written out.
     # We give preference to getting this from the user CLI options,
     # falling back to
 
-    benchmark_json_path = benchmark_dir / "benchmark.json"
+    benchmark_json_path = benchmark_dir / "benchmark_config.json"
 
     if not benchmark_json_path.exists():
         missing_args = []
@@ -574,7 +574,7 @@ async def process_benchmark_dir(
             missing_args.append("engine_name")
 
         if missing_args:
-            print("  Error: must provide benchmark metadata when benchmark.json is not present", file=sys.stderr)
+            print("  Error: must provide benchmark metadata when benchmark_config.json is not present", file=sys.stderr)
             print(f"  Error: missing arguments: {', '.join(missing_args)}", file=sys.stderr)
             return 1
 
@@ -594,7 +594,7 @@ async def process_benchmark_dir(
         )
     else:
         try:
-            benchmark_metadata = BenchmarkMetadata.from_file(benchmark_dir / "benchmark.json")
+            benchmark_metadata = BenchmarkMetadata.from_file(benchmark_dir / "benchmark_config.json")
         except (ValueError, json.JSONDecodeError, FileNotFoundError) as e:
             print(f"  Error loading metadata: {e}", file=sys.stderr)
             return 1

From e8bda2acefd866cb3e8b3bc527bef0bcb7a00118 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Tue, 3 Mar 2026 18:41:46 -0800
Subject: [PATCH 06/24] add num_drivers to context

---
 .../performance_benchmarks/run_context.py     | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index f9320355..e854b5ef 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -229,6 +229,49 @@ def get_engine_from_docker_containers(hostname: str, port: int) -> str | None:
     return None
 
 
+_ENGINE_TO_VARIANT = {
+    "velox-gpu": "gpu",
+    "velox-cpu": "cpu",
+    "java": "java",
+}
+
+
+def get_num_drivers(engine: str) -> int | None:
+    """Read task.max-drivers-per-task from the generated worker config_native.properties.
+
+    Falls back to None when the generated config directory does not exist
+    (e.g. pre-configured cluster or SLURM without local config generation).
+    """
+    variant = _ENGINE_TO_VARIANT.get(engine)
+    if variant is None:
+        return None
+
+    config_file = (
+        Path(__file__).resolve().parent
+        / ".."
+        / ".."
+        / "docker"
+        / "config"
+        / "generated"
+        / variant
+        / "etc_worker"
+        / "config_native.properties"
+    )
+    if not config_file.is_file():
+        _debug(f"num_drivers: config not found at {config_file}")
+        return None
+
+    for line in config_file.read_text().splitlines():
+        line = line.strip()
+        if line.startswith("task.max-drivers-per-task="):
+            try:
+                return int(line.split("=", 1)[1])
+            except (ValueError, IndexError):
+                pass
+    _debug(f"num_drivers: task.max-drivers-per-task not found in {config_file}")
+    return None
+
+
 def gather_run_context(
     hostname: str,
     port: int,
@@ -292,4 +335,10 @@ def gather_run_context(
     if worker_image is not None:
         ctx["worker_image"] = worker_image
 
+    num_drivers = get_num_drivers(engine)
+    if num_drivers is not None:
+        ctx["num_drivers"] = num_drivers
+
+    ctx["execution_number"] = 1
+
     return ctx

From 2ba4ffc59d46cdf4d2a08a04520e31262732a2fa Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Tue, 3 Mar 2026 18:49:46 -0800
Subject: [PATCH 07/24] remove result_dir expectation

---
 benchmark_reporting_tools/post_results.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index 5f386547..410e8302 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -21,8 +21,7 @@
     │   └── worker.config
     ├── logs                     # optional
     │   └── slurm-4575179.out
-    └── result_dir
-        └── benchmark_result.json
+    └── benchmark_result.json
 
 Usage:
     python benchmark_reporting_tools/post_results.py /path/to/benchmark/dir \
@@ -160,7 +159,7 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "input_path",
         type=str,
-        help="Path to benchmark directory containing benchmark_config.json and result_dir/",
+        help="Path to benchmark directory containing benchmark_config.json and benchmark_result.json",
     )
     parser.add_argument(
         "--api-url",
@@ -601,7 +600,7 @@ async def process_benchmark_dir(
 
     try:
         results = BenchmarkResults.from_file(
-            benchmark_dir / "result_dir" / "benchmark_result.json", benchmark_name=benchmark_metadata.benchmark
+            benchmark_dir / "benchmark_result.json", benchmark_name=benchmark_metadata.benchmark
         )
     except (ValueError, json.JSONDecodeError, FileNotFoundError) as e:
         print(f"  Error loading results: {e}", file=sys.stderr)

From 4741a1b927f550427da20ddc374c35f067983ccd Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Wed, 4 Mar 2026 15:19:33 -0800
Subject: [PATCH 08/24] major refactor

---
 .gitignore                                    |   1 +
 benchmark_reporting_tools/post_results.py     | 203 ++++-----------
 common/testing/conftest.py                    |  18 +-
 .../performance_benchmarks/benchmark_keys.py  |   3 +-
 .../performance_benchmarks/conftest.py        |  32 +--
 .../etc_worker/config_native.properties       |   2 +-
 presto/docker/docker-compose.java.yml         |   2 +
 presto/docker/docker-compose.native-cpu.yml   |   2 +
 .../docker-compose.native-gpu.yml.jinja       |   8 +
 presto/docker/launch_presto_servers.sh        |  25 +-
 presto/scripts/generate_presto_config.sh      |   8 +-
 presto/scripts/run_benchmark.sh               |  13 +-
 presto/scripts/start_presto_helper.sh         |   6 +
 .../performance_benchmarks/conftest.py        | 156 +-----------
 .../metrics_collector.py                      |  33 +--
 .../performance_benchmarks/presto_api.py      |  60 +++++
 .../performance_benchmarks/run_context.py     | 238 ++++++------------
 17 files changed, 264 insertions(+), 546 deletions(-)
 create mode 100644 presto/testing/performance_benchmarks/presto_api.py

diff --git a/.gitignore b/.gitignore
index ad8182ca..db748e49 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,7 @@ presto/docker/config/generated*/
 
 # Generated Presto Docker Compose files
 presto/docker/docker-compose/generated*/
+presto/docker/worker_logs*/
 
 # Slurm logs and results
 presto/slurm/presto-nvl72/logs/
diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index 410e8302..f4bb09b4 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -15,13 +15,12 @@
 expected directory structure is:
 
     ../benchmark-root/
-    ├── benchmark_config.json           # optional
+    ├── benchmark_result.json
     ├── configs                  # optional
     │   ├── coordinator.config
     │   └── worker.config
-    ├── logs                     # optional
-    │   └── slurm-4575179.out
-    └── benchmark_result.json
+    └── logs                     # optional
+        └── slurm-4575179.out
 
 Usage:
     python benchmark_reporting_tools/post_results.py /path/to/benchmark/dir \
@@ -72,13 +71,16 @@ class BenchmarkMetadata:
     engine: str
 
     @classmethod
-    def from_file(cls, file_path: Path) -> "BenchmarkMetadata":
-        data = json.loads(file_path.read_text())
-
-        # parse fields, like the timestamp
+    def from_result_file(cls, file_path: Path) -> "BenchmarkMetadata":
+        """Extract metadata from the 'context' section of benchmark_result.json."""
+        raw = json.loads(file_path.read_text())
+        data = raw["context"]
         data["timestamp"] = datetime.fromisoformat(data["timestamp"].replace("Z", "+00:00"))
 
-        return cls(**data)
+        known_fields = {f.name for f in dataclasses.fields(cls)}
+        filtered = {k: v for k, v in data.items() if k in known_fields}
+
+        return cls(**filtered)
 
     def serialize(self) -> dict:
         out = dataclasses.asdict(self)
@@ -109,7 +111,7 @@ def from_file(cls, file_path: Path, benchmark_name: str) -> "BenchmarkResults":
         )
 
 
-def parse_config_file(file_path: Path) -> dict[str, str]:
+def _parse_config_file(file_path: Path) -> dict[str, str]:
     """Parse a key=value config file, ignoring comments and blank lines.
 
     Args:
@@ -142,15 +144,15 @@ def from_dir(cls, configs_dir: Path) -> "EngineConfig":
 
         Expects coordinator.config and worker.config files.
         """
-        coordinator_config = parse_config_file(configs_dir / "coordinator.config")
-        worker_config = parse_config_file(configs_dir / "worker.config")
+        coordinator_config = _parse_config_file(configs_dir / "coordinator.config")
+        worker_config = _parse_config_file(configs_dir / "worker.config")
         return cls(coordinator=coordinator_config, worker=worker_config)
 
     def serialize(self) -> dict:
         return dataclasses.asdict(self)
 
 
-def parse_args() -> argparse.Namespace:
+def _parse_args() -> argparse.Namespace:
     """Parse command-line arguments."""
     parser = argparse.ArgumentParser(
         description="Post Velox benchmark results to the API.",
@@ -159,7 +161,7 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "input_path",
         type=str,
-        help="Path to benchmark directory containing benchmark_config.json and benchmark_result.json",
+        help="Path to benchmark directory containing benchmark_result.json",
     )
     parser.add_argument(
         "--api-url",
@@ -190,7 +192,7 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--engine-name",
         default=None,
-        help="Query engine name (optionally derived from benchmark_config.json 'engine' field)",
+        help="Query engine name (overrides the 'engine' field from benchmark_result.json context)",
     )
     parser.add_argument(
         "--identifier-hash",
@@ -241,61 +243,10 @@ def parse_args() -> argparse.Namespace:
         default=1,
     )
 
-    # A bunch of optional arguments for when benchmark_config.json is not present.
-    parser.add_argument(
-        "--kind",
-        help="Run kind (e.g. 'single-node', 'multi-node')",
-    )
-    parser.add_argument(
-        "--benchmark",
-        help="Benchmark name (e.g. 'tpch')",
-        default="tpch",
-    )
-    parser.add_argument(
-        "--timestamp",
-        help="Timestamp of the benchmark run",
-        default=None,
-    )
-    parser.add_argument("--execution-number", help="Execution number of the benchmark run", type=int, default=1)
-    parser.add_argument(
-        "--n-workers",
-        help="Number of workers in the benchmark run",
-        type=int,
-        default=None,
-    )
-    parser.add_argument(
-        "--scale-factor",
-        help="Scale factor of the benchmark run",
-        type=int,
-        default=None,
-    )
-    parser.add_argument(
-        "--gpu-count",
-        help="Number of GPUs in the benchmark run",
-        type=int,
-        default=None,
-    )
-    parser.add_argument(
-        "--gpu-name",
-        help="GPU name (e.g. 'H100')",
-        default=None,
-    )
-    parser.add_argument(
-        "--num-drivers",
-        help="Number of drivers in the benchmark run",
-        type=int,
-        default=None,
-    )
-    parser.add_argument(
-        "--worker-image",
-        help="Worker image (e.g. 'velox/worker:latest')",
-        default=None,
-    )
-
-    return parser.parse_args()
+    return parser._parse_args()
 
 
-def normalize_api_url(url: str) -> str:
+def _normalize_api_url(url: str) -> str:
     """Normalize a user-provided API URL to a base URL.
 
     Handles various formats:
@@ -313,7 +264,7 @@ def normalize_api_url(url: str) -> str:
     return normalized.rstrip("/")
 
 
-def build_submission_payload(
+def _build_submission_payload(
     benchmark_metadata: BenchmarkMetadata,
     benchmark_results: BenchmarkResults,
     engine_config: EngineConfig | None,
@@ -332,7 +283,7 @@ def build_submission_payload(
     """Build a BenchmarkSubmission payload from parsed dataclasses.
 
     Args:
-        benchmark_metadata: Parsed benchmark_config.json as BenchmarkMetadata
+        benchmark_metadata: Parsed from the 'context' section of benchmark_result.json
         benchmark_results: Parsed benchmark_result.json as BenchmarkResults
         engine_config: Parsed config files as EngineConfig, optional
         sku_name: Hardware SKU name
@@ -435,8 +386,8 @@ def build_submission_payload(
     }
 
 
-def build_http_client(api_url: str, api_key: str, timeout: float) -> httpx.AsyncClient:
-    base_url = normalize_api_url(api_url)
+def _build_http_client(api_url: str, api_key: str, timeout: float) -> httpx.AsyncClient:
+    base_url = _normalize_api_url(api_url)
     transport = httpx.AsyncHTTPTransport(retries=3)
     return httpx.AsyncClient(
         base_url=base_url,
@@ -446,7 +397,7 @@ def build_http_client(api_url: str, api_key: str, timeout: float) -> httpx.Async
     )
 
 
-async def upload_log_files(
+async def _upload_log_files(
     benchmark_dir: Path,
     api_url: str,
     api_key: str,
@@ -472,7 +423,7 @@ async def upload_log_files(
     print(f"  Uploading {len(log_files)} log file(s) (max {max_concurrency} concurrent)...", file=sys.stderr)
     semaphore = asyncio.Semaphore(max_concurrency)
 
-    async with build_http_client(api_url, api_key, timeout) as client:
+    async with _build_http_client(api_url, api_key, timeout) as client:
 
         async def _upload_one(log_file: Path) -> int:
             async with semaphore:
@@ -495,18 +446,18 @@ async def _upload_one(log_file: Path) -> int:
     return list(asset_ids)
 
 
-async def post_submission(api_url: str, api_key: str, payload: dict, timeout: float) -> tuple[int, str]:
+async def _post_submission(api_url: str, api_key: str, payload: dict, timeout: float) -> tuple[int, str]:
     """Post a benchmark submission to the API.
 
     Returns:
         Tuple of (status_code, response_text)
     """
-    async with build_http_client(api_url, api_key, timeout) as client:
+    async with _build_http_client(api_url, api_key, timeout) as client:
         response = await client.post("/api/benchmark/", json=payload)
     return response.status_code, response.text
 
 
-async def process_benchmark_dir(
+async def _process_benchmark_dir(
     benchmark_dir: Path,
     *,
     sku_name: str,
@@ -523,18 +474,7 @@ async def process_benchmark_dir(
     timeout: float,
     upload_logs: bool = True,
     benchmark_definition_name: str,
-    # all the optional arguments for when benchmark_config.json is not present.
     concurrency_streams: int = 1,
-    kind: str | None = None,
-    benchmark: str | None = None,
-    timestamp: str | None = None,
-    execution_number: int = 1,
-    n_workers: int | None = None,
-    scale_factor: int | None = None,
-    gpu_count: int | None = None,
-    gpu_name: str | None = None,
-    worker_image: str | None = None,
-    num_drivers: int | None = None,
 ) -> int:
     """Process a benchmark directory and post results to API.
 
@@ -543,66 +483,23 @@ async def process_benchmark_dir(
     """
     print(f"\nProcessing: {benchmark_dir}", file=sys.stderr)
 
-    # Load metadata, results, and config
-
-    # benchmark_config.json is only optionally written out.
-    # We give preference to getting this from the user CLI options,
-    # falling back to
-
-    benchmark_json_path = benchmark_dir / "benchmark_config.json"
-
-    if not benchmark_json_path.exists():
-        missing_args = []
-        if kind is None:
-            missing_args.append("kind")
-        if benchmark is None:
-            missing_args.append("benchmark")
-        if timestamp is None:
-            missing_args.append("timestamp")
-        if n_workers is None:
-            missing_args.append("n_workers")
-        if scale_factor is None:
-            missing_args.append("scale_factor")
-        if gpu_count is None:
-            missing_args.append("gpu_count")
-        if gpu_name is None:
-            missing_args.append("gpu_name")
-        if num_drivers is None:
-            missing_args.append("num_drivers")
-        if engine_name is None:
-            missing_args.append("engine_name")
-
-        if missing_args:
-            print("  Error: must provide benchmark metadata when benchmark_config.json is not present", file=sys.stderr)
-            print(f"  Error: missing arguments: {', '.join(missing_args)}", file=sys.stderr)
-            return 1
+    # Load metadata and results from benchmark_result.json.
+    # The "context" section contains run metadata; benchmark data sits
+    # under a top-level key matching the benchmark name (e.g. "tpch").
 
-        # mypy doesn't realize that kind, benchmark, etc. have been narrowed to not-None by the check above.
-        benchmark_metadata = BenchmarkMetadata(
-            kind=kind,  # type: ignore[arg-type]
-            benchmark=benchmark,  # type: ignore[arg-type]
-            timestamp=datetime.fromisoformat(timestamp.replace("Z", "+00:00")),  # type: ignore[union-attr]
-            execution_number=execution_number,
-            n_workers=n_workers,  # type: ignore[arg-type]
-            scale_factor=scale_factor,  # type: ignore[arg-type]
-            gpu_count=gpu_count,  # type: ignore[arg-type]
-            gpu_name=gpu_name,  # type: ignore[arg-type]
-            num_drivers=num_drivers,  # type: ignore[arg-type]
-            worker_image=worker_image,
-            engine=engine_name,  # type: ignore[arg-type]
-        )
-    else:
-        try:
-            benchmark_metadata = BenchmarkMetadata.from_file(benchmark_dir / "benchmark_config.json")
-        except (ValueError, json.JSONDecodeError, FileNotFoundError) as e:
-            print(f"  Error loading metadata: {e}", file=sys.stderr)
-            return 1
+    result_file = benchmark_dir / "benchmark_result.json"
+
+    try:
+        benchmark_metadata = BenchmarkMetadata.from_result_file(result_file)
+    except (ValueError, KeyError, json.JSONDecodeError, FileNotFoundError) as e:
+        print(f"  Error loading metadata: {e}", file=sys.stderr)
+        return 1
 
     try:
         results = BenchmarkResults.from_file(
-            benchmark_dir / "benchmark_result.json", benchmark_name=benchmark_metadata.benchmark
+            result_file, benchmark_name=benchmark_metadata.benchmark
         )
-    except (ValueError, json.JSONDecodeError, FileNotFoundError) as e:
+    except (ValueError, KeyError, json.JSONDecodeError, FileNotFoundError) as e:
         print(f"  Error loading results: {e}", file=sys.stderr)
         return 1
 
@@ -623,14 +520,14 @@ async def process_benchmark_dir(
             )
         else:
             try:
-                asset_ids = await upload_log_files(benchmark_dir, api_url, api_key, timeout)
+                asset_ids = await _upload_log_files(benchmark_dir, api_url, api_key, timeout)
             except (RuntimeError, httpx.RequestError) as e:
                 print(f"  Error uploading logs: {e}", file=sys.stderr)
                 return 1
 
     # Build submission payload
     try:
-        payload = build_submission_payload(
+        payload = _build_submission_payload(
             benchmark_metadata=benchmark_metadata,
             benchmark_results=results,
             engine_config=engine_config,
@@ -664,7 +561,7 @@ async def process_benchmark_dir(
 
     # Post to API
     try:
-        status_code, response_text = await post_submission(api_url, api_key, payload, timeout)
+        status_code, response_text = await _post_submission(api_url, api_key, payload, timeout)
         print(f"  Status: {status_code}", file=sys.stderr)
         if status_code >= 400:
             print(f"  Response: {response_text}", file=sys.stderr)
@@ -678,7 +575,7 @@ async def process_benchmark_dir(
 
 
 async def main() -> int:
-    args = parse_args()
+    args = _parse_args()
 
     # Resolve to str (parser already falls back to BENCHMARK_API_URL / BENCHMARK_API_KEY)
     api_url = args.api_url or ""
@@ -704,7 +601,7 @@ async def main() -> int:
         print(f"Error: Input path is not a directory: {args.input_path}", file=sys.stderr)
         return 1
 
-    result = await process_benchmark_dir(
+    result = await _process_benchmark_dir(
         benchmark_dir,
         sku_name=args.sku_name,
         storage_configuration_name=args.storage_configuration_name,
@@ -720,16 +617,6 @@ async def main() -> int:
         timeout=args.timeout,
         upload_logs=args.upload_logs,
         benchmark_definition_name=args.benchmark_name,
-        kind=args.kind,
-        benchmark=args.benchmark,
-        timestamp=args.timestamp,
-        execution_number=args.execution_number,
-        n_workers=args.n_workers,
-        scale_factor=args.scale_factor,
-        gpu_count=args.gpu_count,
-        gpu_name=args.gpu_name,
-        worker_image=args.worker_image,
-        num_drivers=args.num_drivers,
         concurrency_streams=args.concurrency_streams,
     )
 
diff --git a/common/testing/conftest.py b/common/testing/conftest.py
index fc3bc52c..830def6d 100644
--- a/common/testing/conftest.py
+++ b/common/testing/conftest.py
@@ -6,7 +6,7 @@ def pytest_generate_tests(metafunc):
     TPCH_FIXTURE_NAME = "tpch_query_id"
     if TPCH_FIXTURE_NAME in metafunc.fixturenames:
         TPCH_NUM_QUERIES = 22
-        set_query_id_param(metafunc, TPCH_FIXTURE_NAME, TPCH_NUM_QUERIES, [])
+        _set_query_id_param(metafunc, TPCH_FIXTURE_NAME, TPCH_NUM_QUERIES, [])
 
     TPCDS_FIXTURE_NAME = "tpcds_query_id"
     if TPCDS_FIXTURE_NAME in metafunc.fixturenames:
@@ -14,22 +14,22 @@ def pytest_generate_tests(metafunc):
         TPCDS_DISABLED_QUERIES = [
             # All queries now pass with SQL fixes
         ]
-        set_query_id_param(metafunc, TPCDS_FIXTURE_NAME, TPCDS_NUM_QUERIES, TPCDS_DISABLED_QUERIES)
+        _set_query_id_param(metafunc, TPCDS_FIXTURE_NAME, TPCDS_NUM_QUERIES, TPCDS_DISABLED_QUERIES)
 
 
-def set_query_id_param(metafunc, param_name, num_queries, disabled_queries):
+def _set_query_id_param(metafunc, param_name, num_queries, disabled_queries):
     queries = metafunc.config.getoption("--queries")
-    metafunc.parametrize(param_name, get_query_ids(num_queries, queries, disabled_queries))
+    metafunc.parametrize(param_name, _get_query_ids(num_queries, queries, disabled_queries))
 
 
-def get_query_ids(num_queries, selected_query_ids, disabled_queries):
-    query_ids = parse_selected_query_ids(selected_query_ids, num_queries)
+def _get_query_ids(num_queries, selected_query_ids, disabled_queries):
+    query_ids = _parse_selected_query_ids(selected_query_ids, num_queries)
     if len(query_ids) == 0:
         query_ids = [id for id in range(1, num_queries + 1) if id not in disabled_queries]
-    return format_query_ids(query_ids)
+    return _format_query_ids(query_ids)
 
 
-def parse_selected_query_ids(selected_query_ids, num_queries):
+def _parse_selected_query_ids(selected_query_ids, num_queries):
     query_ids = []
     if selected_query_ids and selected_query_ids.strip():
         for id_str in selected_query_ids.split(","):
@@ -40,5 +40,5 @@ def parse_selected_query_ids(selected_query_ids, num_queries):
     return query_ids
 
 
-def format_query_ids(query_ids):
+def _format_query_ids(query_ids):
     return [f"Q{query_id}" for query_id in query_ids]
diff --git a/common/testing/performance_benchmarks/benchmark_keys.py b/common/testing/performance_benchmarks/benchmark_keys.py
index 579dbdcc..358ce6b2 100644
--- a/common/testing/performance_benchmarks/benchmark_keys.py
+++ b/common/testing/performance_benchmarks/benchmark_keys.py
@@ -21,8 +21,7 @@ class BenchmarkKeys(str, Enum):
     CONTEXT_KEY = "context"
     ITERATIONS_COUNT_KEY = "iterations_count"
     SCHEMA_NAME_KEY = "schema_name"
-    SCALE_FACTOR_KEY = "scale_factor"
-    # Run configuration (from run context; written to benchmark_config.json and context)
+    # Run configuration (from run context; written to context in benchmark_result.json)
     TIMESTAMP_KEY = "timestamp"
     NUM_WORKERS_KEY = "n_workers"
     GPU_NAME_KEY = "gpu_name"
diff --git a/common/testing/performance_benchmarks/conftest.py b/common/testing/performance_benchmarks/conftest.py
index daf23d19..33206499 100644
--- a/common/testing/performance_benchmarks/conftest.py
+++ b/common/testing/performance_benchmarks/conftest.py
@@ -31,17 +31,17 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
     for benchmark_type, result in terminalreporter._session.benchmark_results.items():
         assert BenchmarkKeys.AGGREGATE_TIMES_KEY in result
 
-        write_line(terminalreporter, text_report, "")
-        write_section(
+        _write_line(terminalreporter, text_report, "")
+        _write_section(
             terminalreporter, text_report, f"{benchmark_type} Benchmark Summary", sep="-", bold=True, yellow=True
         )
 
-        write_line(terminalreporter, text_report, "")
-        write_line(terminalreporter, text_report, f"Iterations Count: {iterations}")
-        write_line(terminalreporter, text_report, f"{data_location_display_name} Name: {data_location_name}")
+        _write_line(terminalreporter, text_report, "")
+        _write_line(terminalreporter, text_report, f"Iterations Count: {iterations}")
+        _write_line(terminalreporter, text_report, f"{data_location_display_name} Name: {data_location_name}")
         if tag:
-            write_line(terminalreporter, text_report, f"Tag: {tag}")
-        write_line(terminalreporter, text_report, "")
+            _write_line(terminalreporter, text_report, f"Tag: {tag}")
+        _write_line(terminalreporter, text_report, "")
 
         if iterations > 1:
             AGG_HEADERS = [
@@ -59,9 +59,9 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
         header = " Query ID "
         for agg_header in AGG_HEADERS:
             header += f"|{agg_header:^{width}}"
-        write_line(terminalreporter, text_report, "-" * len(header), bold=True, yellow=True)
-        write_line(terminalreporter, text_report, header)
-        write_line(terminalreporter, text_report, "-" * len(header), bold=True, yellow=True)
+        _write_line(terminalreporter, text_report, "-" * len(header), bold=True, yellow=True)
+        _write_line(terminalreporter, text_report, header)
+        _write_line(terminalreporter, text_report, "-" * len(header), bold=True, yellow=True)
         for query_id, agg_timings in result[BenchmarkKeys.AGGREGATE_TIMES_KEY].items():
             line = f"{query_id:^10}"
             if agg_timings:
@@ -70,10 +70,10 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
                     line += f"|{agg_timing:^{width}}"
             else:
                 line += f"|{'NULL':^{width}}" * len(AGG_HEADERS)
-            write_line(terminalreporter, text_report, line)
+            _write_line(terminalreporter, text_report, line)
 
         # Print SUM row.
-        write_line(terminalreporter, text_report, "-" * len(header))
+        _write_line(terminalreporter, text_report, "-" * len(header))
         agg_sums = result[BenchmarkKeys.AGGREGATE_TIMES_SUM_KEY]
         line = f"{'SUM':^10}"
         if agg_sums:
@@ -83,8 +83,8 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
         else:
             line += f"|{'NULL':^{width}}" * len(AGG_HEADERS)
 
-        write_line(terminalreporter, text_report, line)
-        write_line(terminalreporter, text_report, "")
+        _write_line(terminalreporter, text_report, line)
+        _write_line(terminalreporter, text_report, "")
 
     bench_output_dir = get_output_dir(config)
     assert bench_output_dir.is_dir()
@@ -93,12 +93,12 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
         file.write(f"{report_text}\n")
 
 
-def write_line(terminalreporter, text_report, content, **kwargs):
+def _write_line(terminalreporter, text_report, content, **kwargs):
     terminalreporter.write_line(content, **kwargs)
     text_report.append(content)
 
 
-def write_section(terminalreporter, text_report, content, **kwargs):
+def _write_section(terminalreporter, text_report, content, **kwargs):
     terminalreporter.section(content, **kwargs)
 
     sep = kwargs.get("sep", " ")
diff --git a/presto/docker/config/template/etc_worker/config_native.properties b/presto/docker/config/template/etc_worker/config_native.properties
index 353ad0b0..c36788c7 100644
--- a/presto/docker/config/template/etc_worker/config_native.properties
+++ b/presto/docker/config/template/etc_worker/config_native.properties
@@ -1,7 +1,7 @@
 # This node is a worker; it executes tasks but doesn't coordinate queries.
 coordinator=false
 # Worker REST/HTTP port for internal and admin endpoints.
-http-server.http.port=10000
+http-server.http.port=8080
 # Address of the coordinator's discovery service for registration.
 discovery.uri=http://presto-coordinator:8080
 
diff --git a/presto/docker/docker-compose.java.yml b/presto/docker/docker-compose.java.yml
index e728c291..014e4581 100644
--- a/presto/docker/docker-compose.java.yml
+++ b/presto/docker/docker-compose.java.yml
@@ -20,5 +20,7 @@ services:
       - ./config/generated/java/etc_worker/config_java.properties:/opt/presto-server/etc/config.properties
       - ./config/generated/java/etc_worker/node.properties:/opt/presto-server/etc/node.properties
       - ./config/generated/java/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
+      - ./worker_logs:/opt/presto-server/logs
+      - ./launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
     depends_on:
       - presto-coordinator
diff --git a/presto/docker/docker-compose.native-cpu.yml b/presto/docker/docker-compose.native-cpu.yml
index b017dc81..cc5bc39d 100644
--- a/presto/docker/docker-compose.native-cpu.yml
+++ b/presto/docker/docker-compose.native-cpu.yml
@@ -25,3 +25,5 @@ services:
       - ./config/generated/cpu/etc_worker/node.properties:/opt/presto-server/etc/node.properties
       - ./config/generated/cpu/etc_worker/config_native.properties:/opt/presto-server/etc/config.properties
       - ./config/generated/cpu/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
+      - ./worker_logs:/opt/presto-server/logs
+      - ./launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
diff --git a/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja b/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja
index d5ad844d..0dbff164 100644
--- a/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja
+++ b/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja
@@ -49,6 +49,8 @@ x-presto-native-worker-gpu: &gpu_worker_base
     - ../../config/generated/gpu/etc_worker/node.properties:/opt/presto-server/etc/node.properties
     - ../../config/generated/gpu/etc_worker/config_native.properties:/opt/presto-server/etc/config.properties
     - ../../config/generated/gpu/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
+    - ../../worker_logs:/opt/presto-server/logs
+    - ../../launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
 
 services:
   presto-coordinator:
@@ -68,6 +70,7 @@ services:
     <<: *gpu_worker_base
     container_name: presto-native-worker-gpu-{{ gpu_id }}
     environment:
+      WORKER_ID: {{ gpu_id }}
       NVIDIA_VISIBLE_DEVICES: all
       PROFILE: ${PROFILE}
       PROFILE_ARGS: ${PROFILE_ARGS}
@@ -82,9 +85,12 @@ services:
       KVIKIO_NTHREADS: {{ kvikio_threads }}
       CUDA_VISIBLE_DEVICES: {{ gpu_id }}
     volumes:
+      - ../../config/generated/gpu/etc_common:/opt/presto-server/etc
       - ../../config/generated/gpu/etc_worker_{{ gpu_id }}/node.properties:/opt/presto-server/etc/node.properties
       - ../../config/generated/gpu/etc_worker_{{ gpu_id }}/config_native.properties:/opt/presto-server/etc/config.properties
       - ../../config/generated/gpu/etc_worker_{{ gpu_id }}/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
+      - ../../worker_logs:/opt/presto-server/logs
+      - ../../launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
   {% endfor %}
   {% else %}
   # Combined GPU worker - runs {{ workers|length if workers else 1 }} presto servers in parallel, each pinned to a specific GPU
@@ -124,6 +130,8 @@ services:
       CUDA_VISIBLE_DEVICES: 0
 {%- endif %}
     volumes:
+      - ../../worker_logs:/opt/presto-server/logs
+      - ../../launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
 {%- if workers %}
       # Mount all etc directories for workers {{ workers|join(', ') }}
 {%- for gpu_id in workers %}
diff --git a/presto/docker/launch_presto_servers.sh b/presto/docker/launch_presto_servers.sh
index 16dfd352..e390e9cc 100644
--- a/presto/docker/launch_presto_servers.sh
+++ b/presto/docker/launch_presto_servers.sh
@@ -4,14 +4,27 @@ set -e
 # Run ldconfig once
 echo ldconfig
 
+LOGS_DIR="/opt/presto-server/logs"
+mkdir -p "${LOGS_DIR}"
+
 if [ $# -eq 0 ]; then
-  presto_server --etc-dir="/opt/presto-server/etc/" &
+  # Single worker mode.  Use WORKER_ID env var for log filename (defaults to 0).
+  local_id="${WORKER_ID:-0}"
+  log_file="${LOGS_DIR}/worker_${local_id}.log"
+  nvidia-smi -L > "${log_file}" 2>&1 || true
+  presto_server --etc-dir="/opt/presto-server/etc/" >> "${log_file}" 2>&1 &
 else
-# Launch workers in parallel, each pinned to a different GPU
-# The GPU IDs are passed as command-line arguments
-for gpu_id in "$@"; do
-  CUDA_VISIBLE_DEVICES=$gpu_id presto_server --etc-dir="/opt/presto-server/etc${gpu_id}" &
-done
+  # Multi-worker single-container mode.  Each GPU ID is an argument.
+  worker_id=0
+  for gpu_id in "$@"; do
+    (
+      export CUDA_VISIBLE_DEVICES=$gpu_id
+      log_file="${LOGS_DIR}/worker_${worker_id}.log"
+      nvidia-smi -L > "${log_file}" 2>&1 || true
+      exec presto_server --etc-dir="/opt/presto-server/etc${gpu_id}" >> "${log_file}" 2>&1
+    ) &
+    worker_id=$((worker_id + 1))
+  done
 fi
 
 # Wait for all background processes
diff --git a/presto/scripts/generate_presto_config.sh b/presto/scripts/generate_presto_config.sh
index cc20b82a..7bbbe771 100755
--- a/presto/scripts/generate_presto_config.sh
+++ b/presto/scripts/generate_presto_config.sh
@@ -133,16 +133,10 @@ EOF
     # optimizer.joins-not-null-inference-strategy=USE_FUNCTION_METADATA
     # optimizer.default-filter-factor-enabled=true
     sed -i 's/\#optimizer/optimizer/g' ${COORD_CONFIG}
-
-    if [[ ${NUM_WORKERS} -eq 1 ]]; then
-      # Adds a cluster tag for gpu variant
-      echo "cluster-tag=native-gpu" >> ${COORD_CONFIG}
-    fi
+    echo "cluster-tag=native-gpu" >> ${COORD_CONFIG}
   fi
 
-  # now perform other variant-specific modifications to the generated configs
   if [[ "${VARIANT_TYPE}" == "cpu" ]]; then
-    # Adds a cluster tag for cpu variant
     echo "cluster-tag=native-cpu" >> ${COORD_CONFIG}
   fi
 
diff --git a/presto/scripts/run_benchmark.sh b/presto/scripts/run_benchmark.sh
index 28dfd928..fbea6f21 100755
--- a/presto/scripts/run_benchmark.sh
+++ b/presto/scripts/run_benchmark.sh
@@ -8,6 +8,11 @@ set -e
 # Compute the directory where this script resides
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
+# Default LOGS to the Docker worker_logs directory so that run_context.py
+# can find nvidia-smi output written by the worker containers.  SLURM
+# environments set LOGS themselves, so this only takes effect for Docker.
+export LOGS="${LOGS:-${SCRIPT_DIR}/../docker/worker_logs}"
+
 source "${SCRIPT_DIR}/presto_connection_defaults.sh"
 
 print_help() {
@@ -41,11 +46,11 @@ OPTIONS:
 
 ENVIRONMENT:
     PRESTO_BENCHMARK_DEBUG   Set to 1 to print debug logs for worker/engine detection
-                             (e.g. node URIs, reachability, metrics, Docker containers).
+                             (e.g. node URIs, reachability, metrics).
                              Use when engine is misdetected or the run fails.
-    Docker                  In Docker setups, engine is inferred from running worker
-                             images (presto-native-worker-gpu/cpu, presto-java-worker)
-                             whose tag equals the username. Ensure 'docker ps' is available.
+    LOGS                     Directory containing worker_<id>.log files with nvidia-smi
+                             output.  Defaults to the Docker worker_logs directory.
+                             SLURM environments set this to their own logs path.
 
 EXAMPLES:
     $0 -b tpch -s bench_sf100
diff --git a/presto/scripts/start_presto_helper.sh b/presto/scripts/start_presto_helper.sh
index 872b1cbc..b51d3c9b 100755
--- a/presto/scripts/start_presto_helper.sh
+++ b/presto/scripts/start_presto_helper.sh
@@ -213,5 +213,11 @@ if (( ${#BUILD_TARGET_ARG[@]} )); then
   ${BUILD_TARGET_ARG[@]}
 fi
 
+# Prepare a clean worker_logs directory so containers can write nvidia-smi
+# output before starting the server.  Stale logs from a prior run are removed.
+WORKER_LOGS_DIR="${SCRIPT_DIR}/../docker/worker_logs"
+rm -rf "${WORKER_LOGS_DIR}"
+mkdir -p "${WORKER_LOGS_DIR}"
+
 # Start all services defined in the rendered docker-compose file.
 docker compose -f $DOCKER_COMPOSE_FILE_PATH up -d
diff --git a/presto/testing/performance_benchmarks/conftest.py b/presto/testing/performance_benchmarks/conftest.py
index 4c4a1713..ad79247d 100644
--- a/presto/testing/performance_benchmarks/conftest.py
+++ b/presto/testing/performance_benchmarks/conftest.py
@@ -3,9 +3,7 @@
 
 
 import json
-import statistics
 from datetime import datetime, timezone
-from pathlib import Path
 
 from ..common.conftest import *  # noqa: F403
 from .run_context import gather_run_context
@@ -21,7 +19,8 @@
 )
 from common.testing.performance_benchmarks.conftest import (
     DataLocation,
-    pytest_sessionfinish,  # noqa: F401
+    compute_aggregate_timings,
+    get_output_dir,
     pytest_terminal_summary,  # noqa: F401
 )
 
@@ -51,107 +50,21 @@ def pytest_addoption(parser):
     parser.addoption("--skip-drop-cache", action="store_true", default=False)
 
 
-def pytest_terminal_summary(terminalreporter, exitstatus, config):
-    text_report = []
-    iterations = config.getoption("--iterations")
-    schema_name = config.getoption("--schema-name")
-    tag = config.getoption("--tag")
-    if not hasattr(terminalreporter._session, "benchmark_results"):
-        return
-    for benchmark_type, result in terminalreporter._session.benchmark_results.items():
-        assert BenchmarkKeys.AGGREGATE_TIMES_KEY in result
-
-        write_line(terminalreporter, text_report, "")
-        write_section(
-            terminalreporter, text_report, f"{benchmark_type} Benchmark Summary", sep="-", bold=True, yellow=True
-        )
-
-        write_line(terminalreporter, text_report, "")
-        write_line(terminalreporter, text_report, f"Iterations Count: {iterations}")
-        write_line(terminalreporter, text_report, f"Schema Name: {schema_name}")
-        if tag:
-            write_line(terminalreporter, text_report, f"Tag: {tag}")
-        write_line(terminalreporter, text_report, "")
-
-        if iterations > 1:
-            AGG_HEADERS = [
-                "Avg Hot(ms)",
-                "Min Hot(ms)",
-                "Max Hot(ms)",
-                "Median Hot(ms)",
-                "GMean Hot(ms)",
-                "Lukewarm(ms)",
-            ]
-        else:
-            AGG_HEADERS = ["Lukewarm(ms)"]
-        width = max([len(agg_header) for agg_header in AGG_HEADERS])
-        width = max(width, result[BenchmarkKeys.FORMAT_WIDTH_KEY]) + 2  # Additional padding on each side
-        header = " Query ID "
-        for agg_header in AGG_HEADERS:
-            header += f"|{agg_header:^{width}}"
-        write_line(terminalreporter, text_report, "-" * len(header), bold=True, yellow=True)
-        write_line(terminalreporter, text_report, header)
-        write_line(terminalreporter, text_report, "-" * len(header), bold=True, yellow=True)
-        for query_id, agg_timings in result[BenchmarkKeys.AGGREGATE_TIMES_KEY].items():
-            line = f"{query_id:^10}"
-            if agg_timings:
-                assert len(AGG_HEADERS) == len(agg_timings)
-                for agg_timing in agg_timings:
-                    line += f"|{agg_timing:^{width}}"
-            else:
-                line += f"|{'NULL':^{width}}" * len(AGG_HEADERS)
-            write_line(terminalreporter, text_report, line)
-
-        # Print SUM row.
-        write_line(terminalreporter, text_report, "-" * len(header))
-        agg_sums = result[BenchmarkKeys.AGGREGATE_TIMES_SUM_KEY]
-        line = f"{'SUM':^10}"
-        if agg_sums:
-            assert len(AGG_HEADERS) == len(agg_sums)
-            for agg_sum in agg_sums:
-                line += f"|{agg_sum:^{width}}"
-        else:
-            line += f"|{'NULL':^{width}}" * len(AGG_HEADERS)
-
-        write_line(terminalreporter, text_report, line)
-        write_line(terminalreporter, text_report, "")
-
-    bench_output_dir = get_output_dir(config)
-    assert bench_output_dir.is_dir()
-    with open(f"{bench_output_dir}/benchmark_result.txt", "w") as file:
-        report_text = "\n".join(text_report)
-        file.write(f"{report_text}\n")
-
-
-def write_line(terminalreporter, text_report, content, **kwargs):
-    terminalreporter.write_line(content, **kwargs)
-    text_report.append(content)
-
-
-def write_section(terminalreporter, text_report, content, **kwargs):
-    terminalreporter.section(content, **kwargs)
-
-    sep = kwargs.get("sep", " ")
-    text_report.append(f" {content} ".center(120, sep))
-
-
 def _build_run_config(session):
     """
     Build run-config dict from execution context (Presto nodes, nvidia-smi, schema
-    data source, env). Used for benchmark_config.json and context in benchmark_result.json.
+    data source, env). Used for the context section in benchmark_result.json.
     """
     hostname = session.config.getoption("--hostname")
     port = session.config.getoption("--port")
     user = session.config.getoption("--user")
     schema_name = session.config.getoption("--schema-name")
-    scale_factor_override = session.config.getoption("--scale-factor")
 
     ctx = gather_run_context(
         hostname=hostname,
         port=port,
         user=user,
         schema_name=schema_name,
-        scale_factor_override=scale_factor_override,
     )
     ctx["timestamp"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
     return ctx
@@ -190,11 +103,11 @@ def pytest_sessionfinish(session, exitstatus):
     else:
         AGG_KEYS = [BenchmarkKeys.LUKEWARM_KEY]
     if not hasattr(session, "benchmark_results"):
-        config_payload = {"benchmark": None, **run_config}
-        with open(f"{bench_output_dir}/benchmark_config.json", "w") as file:
-            json.dump(config_payload, file, indent=2)
-            file.write("\n")
         return
+    benchmark_types = list(session.benchmark_results.keys())
+    json_result[BenchmarkKeys.CONTEXT_KEY]["benchmark"] = (
+        benchmark_types[0] if len(benchmark_types) == 1 else benchmark_types
+    )
     for benchmark_type, result in session.benchmark_results.items():
         compute_aggregate_timings(result)
         json_result[benchmark_type] = {
@@ -216,61 +129,6 @@ def pytest_sessionfinish(session, exitstatus):
         json.dump(json_result, file, indent=2)
         file.write("\n")
 
-    # Write run-config JSON (context from Presto, nvidia-smi, schema, env)
-    benchmark_types = list(session.benchmark_results.keys()) if hasattr(session, "benchmark_results") else []
-    config_payload = {
-        "benchmark": benchmark_types[0] if len(benchmark_types) == 1 else benchmark_types,
-        **run_config,
-    }
-    with open(f"{bench_output_dir}/benchmark_config.json", "w") as file:
-        json.dump(config_payload, file, indent=2)
-        file.write("\n")
-
-
-def get_output_dir(config):
-    bench_output_dir = config.getoption("--output-dir")
-    tag = config.getoption("--tag")
-    if tag:
-        bench_output_dir = f"{bench_output_dir}/{tag}"
-    return Path(bench_output_dir)
-
-
-def compute_aggregate_timings(benchmark_results):
-    raw_times = benchmark_results[BenchmarkKeys.RAW_TIMES_KEY]
-    benchmark_results[BenchmarkKeys.AGGREGATE_TIMES_KEY] = {}
-    format_width = 0
-    for query_id, timings in raw_times.items():
-        if timings:
-            first_iteration = timings[0]
-            if len(timings) > 1:
-                hot_timings = timings[1:]
-                stats = (
-                    round(statistics.mean(hot_timings), 2),
-                    min(hot_timings),
-                    max(hot_timings),
-                    statistics.median(hot_timings),
-                    round(statistics.geometric_mean(hot_timings), 2),
-                    first_iteration,
-                )
-            else:
-                stats = (first_iteration,)
-            format_width = max(format_width, *[len(str(stat)) for stat in stats])
-        else:
-            stats = None
-        benchmark_results[BenchmarkKeys.AGGREGATE_TIMES_KEY][query_id] = stats
-
-    agg_sums = None
-    for _, stats in benchmark_results[BenchmarkKeys.AGGREGATE_TIMES_KEY].items():
-        if stats:
-            if agg_sums is None:
-                agg_sums = list(stats)
-            else:
-                assert len(agg_sums) == len(stats)
-                for i, stat in enumerate(stats):
-                    agg_sums[i] = round(agg_sums[i] + stat, 2)
-    benchmark_results[BenchmarkKeys.AGGREGATE_TIMES_SUM_KEY] = agg_sums
-    benchmark_results[BenchmarkKeys.FORMAT_WIDTH_KEY] = format_width
-
 
 def pytest_configure(config):
     pytest.data_location = DataLocation("--schema-name", "Schema", BenchmarkKeys.SCHEMA_NAME_KEY)
diff --git a/presto/testing/performance_benchmarks/metrics_collector.py b/presto/testing/performance_benchmarks/metrics_collector.py
index 96d1242d..25e9d854 100644
--- a/presto/testing/performance_benchmarks/metrics_collector.py
+++ b/presto/testing/performance_benchmarks/metrics_collector.py
@@ -22,7 +22,7 @@
 from typing import Any
 from urllib.parse import urlparse
 
-import requests
+from .presto_api import fetch_json, fetch_text, get_nodes
 
 
 def collect_metrics(query_id: str, query_name: str, hostname: str, port: int, output_dir: str) -> None:
@@ -43,12 +43,12 @@ def collect_metrics(query_id: str, query_name: str, hostname: str, port: int, ou
     combined = {}
 
     # Collect node information
-    nodes = _fetch_json(f"{base_url}/v1/node")
+    nodes = get_nodes(hostname, port)
     if nodes:
         combined["nodes"] = nodes
 
     # Collect query details and extract task information
-    query_info = _fetch_json(f"{base_url}/v1/query/{query_id}")
+    query_info = fetch_json(f"{base_url}/v1/query/{query_id}")
     if query_info:
         combined["query"] = query_info
         tasks, metrics = _collect_worker_data(query_info)
@@ -65,27 +65,6 @@ def collect_metrics(query_id: str, query_name: str, hostname: str, port: int, ou
             json.dump(combined, f, indent=2)
 
 
-def _fetch_json(url: str, timeout: int = 30) -> Any | None:
-    """Fetch JSON from a URL, returning None on error."""
-    try:
-        response = requests.get(url, timeout=timeout)
-        response.raise_for_status()
-        return response.json()
-    except Exception as e:
-        print(f"Warning: Failed to fetch {url}: {e}")
-        return None
-
-
-def _fetch_text(url: str, timeout: int = 30) -> str | None:
-    """Fetch text from a URL, returning None on error."""
-    try:
-        response = requests.get(url, timeout=timeout)
-        response.raise_for_status()
-        return response.text
-    except Exception as e:
-        print(f"Warning: Failed to fetch {url}: {e}")
-        return None
-
 
 def _collect_worker_data(query_info: dict) -> tuple[dict, dict]:
     """Collect task details and metrics from each worker."""
@@ -115,7 +94,7 @@ def _collect_worker_data(query_info: dict) -> tuple[dict, dict]:
         # Collect detailed task info from worker
         worker_tasks = []
         for task_id in task_ids:
-            task_data = _fetch_json(f"{worker_uri}/v1/task/{task_id}")
+            task_data = fetch_json(f"{worker_uri}/v1/task/{task_id}")
             if task_data:
                 worker_tasks.append(task_data)
 
@@ -156,7 +135,7 @@ def _extract_embedded_tasks(query_info: dict) -> dict:
 
 def _fetch_worker_metrics(worker_uri: str) -> dict | None:
     """Fetch metrics from a worker node and convert to nested object structure."""
-    text = _fetch_text(f"{worker_uri}/v1/info/metrics")
+    text = fetch_text(f"{worker_uri}/v1/info/metrics")
     if text is None:
         return None
     metrics = _parse_prometheus_metrics(text)
@@ -301,7 +280,7 @@ def interactive_collect(base_url: str, output_dir: str) -> None:
         return
 
     print(f"Fetching query list from {base_url}/v1/query ...")
-    queries = _fetch_json(f"{base_url}/v1/query")
+    queries = fetch_json(f"{base_url}/v1/query")
     if not queries:
         print("No queries returned from coordinator.")
         return
diff --git a/presto/testing/performance_benchmarks/presto_api.py b/presto/testing/performance_benchmarks/presto_api.py
new file mode 100644
index 00000000..da64d9ef
--- /dev/null
+++ b/presto/testing/performance_benchmarks/presto_api.py
@@ -0,0 +1,60 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Shared HTTP and Presto REST API helpers."""
+
+from typing import Any
+
+import requests
+
+
+def fetch_json(url: str, timeout: int = 10) -> Any | None:
+    """Fetch JSON from a URL, returning None on error."""
+    try:
+        response = requests.get(url, timeout=timeout)
+        response.raise_for_status()
+        return response.json()
+    except Exception as e:
+        print(f"Warning: Failed to fetch {url}: {e}")
+        return None
+
+
+def fetch_text(url: str, timeout: int = 10) -> str | None:
+    """Fetch text from a URL, returning None on error."""
+    try:
+        response = requests.get(url, timeout=timeout)
+        response.raise_for_status()
+        return response.text
+    except Exception as e:
+        print(f"Warning: Failed to fetch {url}: {e}")
+        return None
+
+
+def get_cluster_tag(hostname: str, port: int) -> str | None:
+    """Fetch the cluster-tag from the coordinator's /v1/cluster endpoint.
+
+    Returns the tag string (e.g. 'native-gpu', 'native-cpu', 'java'),
+    or None if the endpoint is unavailable or the tag is not set.
+    """
+    url = f"http://{hostname}:{port}/v1/cluster"
+    data = fetch_json(url)
+    if data is None or not isinstance(data, dict):
+        return None
+    tag = data.get("clusterTag")
+    if not tag or not isinstance(tag, str):
+        return None
+    return tag
+
+
+def get_nodes(hostname: str, port: int) -> list | None:
+    """Fetch the worker node list from Presto's /v1/node endpoint.
+
+    Returns the list of node dicts, or None on failure.
+    """
+    url = f"http://{hostname}:{port}/v1/node"
+    raw = fetch_json(url)
+    if raw is None:
+        return None
+    if not isinstance(raw, list):
+        return None
+    return raw
diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index e854b5ef..8fc52c09 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -3,22 +3,20 @@
 
 """
 Gather run configuration from execution context. Engine (java / velox-cpu / velox-gpu)
-is inferred only from: (1) Docker running images with expected names and tag =
-username, or (2) SLURM nvidia-smi in LOGS/worker_0.log. Scale factor, n_workers,
-gpu_name, etc. come from schema, Presto /v1/node (node count), and nvidia-smi/env.
+is determined from the coordinator's cluster-tag (via /v1/cluster). GPU name is
+parsed from nvidia-smi output in worker log files (LOGS env var). Scale factor and
+n_workers come from schema and Presto /v1/node respectively.
 """
 
-import getpass
 import json
 import os
 import re
-import subprocess
 from pathlib import Path
 
 import prestodb
-import requests
 
 from ..common import test_utils
+from .presto_api import get_cluster_tag, get_nodes
 
 # Set PRESTO_BENCHMARK_DEBUG=1 or DEBUG=1 to print engine-detection debug logs
 _DEBUG = os.environ.get("PRESTO_BENCHMARK_DEBUG") or os.environ.get("DEBUG")
@@ -29,31 +27,17 @@ def _debug(msg: str) -> None:
         print(f"[run_context] {msg}")
 
 
-def _fetch_json(url: str, timeout: int = 10):
-    try:
-        response = requests.get(url, timeout=timeout)
-        response.raise_for_status()
-        return response.json()
-    except Exception as e:
-        _debug(f"GET {url} failed: {e!r}")
-        return None
-
-
-def get_node_count(hostname: str, port: int) -> int | None:
+def _get_node_count(hostname: str, port: int) -> int | None:
     """Return number of nodes in the Presto /v1/node list (workers only; coordinator not listed)."""
-    url = f"http://{hostname}:{port}/v1/node"
-    raw = _fetch_json(url)
-    if raw is None:
-        return None
-    if not isinstance(raw, list):
-        _debug(f"get_node_count: {url} returned type {type(raw).__name__}, expected list: {raw!r}")
+    nodes = get_nodes(hostname, port)
+    if nodes is None:
         return None
-    n = len(raw)
-    _debug(f"get_node_count: {url} -> {n} node(s). First node sample: {raw[0] if raw else 'N/A'}")
+    n = len(nodes)
+    _debug(f"get_node_count: {n} node(s)")
     return n
 
 
-def get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_name: str) -> int | float | None:
+def _get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_name: str) -> int | float | None:
     """
     Resolve scale factor from the schema's data source (metadata.json next to table data).
     Uses same logic as test_utils.get_scale_factor but without pytest request.
@@ -101,14 +85,14 @@ def _parse_gpu_name_from_text(line: str) -> str | None:
     return None
 
 
-def get_gpu_name_from_slurm_logs() -> str | None:
-    """
-    When running under SLURM, workers run nvidia-smi -L and write to LOGS/worker_<id>.log.
-    All workers are assumed the same GPU; read worker_0.log only. LOGS env must be set.
-    Returns None if not in SLURM, LOGS unset, or no matching line found.
+def _get_gpu_name_from_worker_logs() -> str | None:
+    """Parse GPU model from nvidia-smi output in worker log files.
+
+    Both Docker and SLURM workers write nvidia-smi -L output to
+    LOGS/worker_<id>.log before starting the server.  All workers are
+    assumed to have the same GPU model; only worker_0.log is read.
+    Returns None when LOGS is unset or no matching line is found.
     """
-    if not os.environ.get("SLURM_JOB_ID"):
-        return None
     logs_dir = os.environ.get("LOGS")
     if not logs_dir:
         return None
@@ -126,107 +110,51 @@ def get_gpu_name_from_slurm_logs() -> str | None:
     return None
 
 
-def get_engine_from_slurm() -> str | None:
-    """
-    Infer engine when running under SLURM from nvidia-smi -L output in LOGS/worker_0.log.
-    If that log contains GPU lines (from nvidia-smi -L), return 'velox-gpu'; otherwise
-    'velox-cpu'. Returns None if not in SLURM (SLURM_JOB_ID and LOGS unset) or LOGS
-    not available. Does not use the Presto API for engine type.
-    """
-    if not os.environ.get("SLURM_JOB_ID"):
-        return None
-    if not os.environ.get("LOGS"):
-        return None
-    gpu_name = get_gpu_name_from_slurm_logs()
-    if gpu_name is not None:
-        _debug(f"SLURM: nvidia-smi in logs -> velox-gpu ({gpu_name!r})")
-        return "velox-gpu"
-    # SLURM but no GPU in logs -> CPU native
-    _debug("SLURM: no nvidia-smi in logs -> velox-cpu")
-    return "velox-cpu"
+_CLUSTER_TAG_TO_ENGINE = {
+    "native-gpu": "velox-gpu",
+    "native-cpu": "velox-cpu",
+    "java": "java",
+}
 
 
-def get_gpu_name() -> str | None:
-    """
-    Return GPU model name. Under SLURM, read from LOGS/worker_<id>.log if LOGS is set;
-    otherwise run nvidia-smi -L on the current host (e.g. Docker host).
+def _get_engine(hostname: str, port: int) -> str:
+    """Determine worker engine type from the coordinator's cluster-tag.
+
+    Queries /v1/cluster for the tag set in the coordinator's config
+    (e.g. 'native-gpu', 'native-cpu', 'java') and maps it to our
+    internal engine name.  Raises RuntimeError if the tag is missing or
+    unrecognised.
     """
-    gpu_from_logs = get_gpu_name_from_slurm_logs()
-    if gpu_from_logs is not None:
-        return gpu_from_logs
-    try:
-        result = subprocess.run(
-            ["nvidia-smi", "-L"],
-            capture_output=True,
-            text=True,
-            timeout=5,
-            check=False,
+    tag = get_cluster_tag(hostname, port)
+    if tag is None:
+        raise RuntimeError(
+            "Could not determine worker engine: cluster-tag is not set on the "
+            "coordinator.  Ensure the coordinator's config.properties includes "
+            "cluster-tag=native-gpu, cluster-tag=native-cpu, or cluster-tag=java."
         )
-        if result.returncode != 0 or not result.stdout.strip():
-            return None
-        first_line = result.stdout.strip().split("\n")[0]
-        return _parse_gpu_name_from_text(first_line)
-    except (FileNotFoundError, subprocess.TimeoutExpired, Exception):
-        return None
-
+    engine = _CLUSTER_TAG_TO_ENGINE.get(tag)
+    if engine is None:
+        raise RuntimeError(
+            f"Unrecognised cluster-tag '{tag}'.  "
+            f"Expected one of: {', '.join(sorted(_CLUSTER_TAG_TO_ENGINE))}."
+        )
+    _debug(f"cluster-tag={tag!r} -> engine={engine!r}")
+    return engine
 
-def get_worker_image() -> str | None:
-    """Return worker image name from env (set by cluster/container setup)."""
-    return os.environ.get("WORKER_IMAGE")
 
+def _get_gpu_name() -> str | None:
+    """Return GPU model name from worker log files.
 
-def _current_username() -> str:
-    """Return the username of the user running the process (for Docker image tag matching)."""
-    return os.environ.get("USER") or os.environ.get("USERNAME") or getpass.getuser() or ""
+    Worker containers (Docker and SLURM) run nvidia-smi -L and write the
+    output to LOGS/worker_<id>.log.  Returns None when LOGS is unset or
+    no GPU info is found in the logs.
+    """
+    return _get_gpu_name_from_worker_logs()
 
 
-def get_engine_from_docker_containers(hostname: str, port: int) -> str | None:
-    """
-    Infer engine from running Docker containers whose image has an expected name
-    (presto-native-worker-gpu, presto-native-worker-cpu, presto-java-worker) and
-    a tag equal to the username of the user running the benchmarks. Returns
-    'velox-gpu', 'velox-cpu', 'java', or None.
-    """
-    username = _current_username()
-    if not username:
-        _debug("docker: could not determine username, skip Docker engine detection")
-        return None
-    try:
-        result = subprocess.run(
-            ["docker", "ps", "--format", "{{.Image}}"],
-            capture_output=True,
-            text=True,
-            timeout=10,
-            check=False,
-        )
-        if result.returncode != 0 or not result.stdout.strip():
-            return None
-        images = [s.strip() for s in result.stdout.strip().splitlines() if s.strip()]
-    except (FileNotFoundError, subprocess.TimeoutExpired, Exception):
-        _debug("docker ps failed or not available")
-        return None
-    has_gpu = has_cpu = has_java = False
-    for image in images:
-        parts = image.rsplit(":", 1)
-        name = parts[0] if parts else ""
-        tag = parts[1] if len(parts) == 2 else ""
-        if tag != username:
-            continue
-        if "presto-native-worker-gpu" in name:
-            has_gpu = True
-        if "presto-native-worker-cpu" in name:
-            has_cpu = True
-        if "presto-java-worker" in name:
-            has_java = True
-    if has_gpu or has_cpu or has_java:
-        _debug(f"docker (image tag={username!r}): gpu={has_gpu}, cpu={has_cpu}, java={has_java}")
-    if has_gpu:
-        return "velox-gpu"
-    if has_cpu:
-        return "velox-cpu"
-    if has_java:
-        return "java"
-    return None
+def _get_worker_image() -> str | None:
+    """Return worker image name from env (set by cluster/container setup)."""
+    return os.environ.get("WORKER_IMAGE")
 
 
 _ENGINE_TO_VARIANT = {
@@ -236,7 +164,7 @@ def get_engine_from_docker_containers(hostname: str, port: int) -> str | None:
 }
 
 
-def get_num_drivers(engine: str) -> int | None:
+def _get_num_drivers(engine: str) -> int | None:
     """Read task.max-drivers-per-task from the generated worker config_native.properties.
 
     Falls back to None when the generated config directory does not exist
@@ -277,65 +205,41 @@ def gather_run_context(
     port: int,
     user: str,
     schema_name: str,
-    scale_factor_override: str | int | None = None,
 ) -> dict:
     """
-    Build run-config dict from context. Engine is taken only from Docker (running
-    images presto-native-worker-gpu/cpu or presto-java-worker with tag = username)
-    or SLURM (nvidia-smi in LOGS/worker_0.log). scale_factor_override takes
-    precedence over schema-derived scale factor.
+    Build run-config dict from context. Engine is determined from the
+    coordinator's cluster-tag (via /v1/cluster). Scale factor is read
+    from the metadata file next to the schema's table data.
     """
     ctx = {}
-    # Scale factor: CLI override first, then from schema data source
-    if scale_factor_override is not None:
-        try:
-            ctx["scale_factor"] = int(scale_factor_override)
-        except (TypeError, ValueError):
-            ctx["scale_factor"] = scale_factor_override
-    else:
-        sf = get_scale_factor_from_schema(hostname, port, user, schema_name)
-        if sf is not None:
-            ctx["scale_factor"] = int(sf) if isinstance(sf, float) and sf == int(sf) else sf
-
-    n_workers = get_node_count(hostname, port)
-    # Engine only from Docker (container names) or SLURM (nvidia-smi in LOGS). No API fallback.
-    engine_from_docker = get_engine_from_docker_containers(hostname, port)
-    engine_from_slurm = get_engine_from_slurm() if engine_from_docker is None else None
-    engine = engine_from_docker or engine_from_slurm
-    if engine_from_docker is not None:
-        _debug(f"using engine from Docker containers: {engine_from_docker}")
-    elif engine_from_slurm is not None:
-        _debug(f"using engine from SLURM (nvidia-smi in logs): {engine_from_slurm}")
+    sf = _get_scale_factor_from_schema(hostname, port, user, schema_name)
+    if sf is not None:
+        ctx["scale_factor"] = int(sf) if isinstance(sf, float) and sf == int(sf) else sf
+
+    n_workers = _get_node_count(hostname, port)
+    engine = _get_engine(hostname, port)
 
     if n_workers is not None:
         ctx["n_workers"] = n_workers
         ctx["kind"] = "single-node" if n_workers == 1 else f"{n_workers}-node"
 
-    if engine == "velox-cpu":
-        ctx["gpu_count"] = 0
-        ctx["gpu_name"] = "NA"
-        ctx["engine"] = "velox-cpu"
-    elif engine == "velox-gpu":
+    ctx["engine"] = engine
+    if engine == "velox-gpu":
         ctx["gpu_count"] = n_workers if n_workers is not None else 0
-        gpu_name = get_gpu_name()
+        gpu_name = _get_gpu_name()
         if gpu_name is not None:
             ctx["gpu_name"] = gpu_name
-        ctx["engine"] = "velox-gpu"
+    elif engine == "velox-cpu":
+        ctx["gpu_count"] = 0
+        ctx["gpu_name"] = "NA"
     elif engine == "java":
         ctx["gpu_count"] = 0
-        ctx["engine"] = "java"
-    else:
-        raise RuntimeError(
-            "Could not determine worker engine. Run in Docker (worker images "
-            "presto-native-worker-gpu, presto-native-worker-cpu, or presto-java-worker with "
-            "tag equal to your username) or on SLURM with LOGS set and nvidia-smi -L in LOGS/worker_0.log."
-        )
 
-    worker_image = get_worker_image()
+    worker_image = _get_worker_image()
     if worker_image is not None:
         ctx["worker_image"] = worker_image
 
-    num_drivers = get_num_drivers(engine)
+    num_drivers = _get_num_drivers(engine)
     if num_drivers is not None:
         ctx["num_drivers"] = num_drivers
 

From a7d365c46a7aaa4ef9c65dbe55e8d663e2d0abfc Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Wed, 4 Mar 2026 15:33:54 -0800
Subject: [PATCH 09/24] address minor issues

---
 benchmark_reporting_tools/post_results.py            | 2 +-
 common/testing/performance_benchmarks/conftest.py    | 2 +-
 presto/docker/launch_presto_servers.sh               | 2 +-
 presto/testing/performance_benchmarks/run_context.py | 7 +++++--
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index f4bb09b4..406c059a 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -243,7 +243,7 @@ def _parse_args() -> argparse.Namespace:
         default=1,
     )
 
-    return parser._parse_args()
+    return parser.parse_args()
 
 
 def _normalize_api_url(url: str) -> str:
diff --git a/common/testing/performance_benchmarks/conftest.py b/common/testing/performance_benchmarks/conftest.py
index 33206499..c01461d9 100644
--- a/common/testing/performance_benchmarks/conftest.py
+++ b/common/testing/performance_benchmarks/conftest.py
@@ -109,7 +109,7 @@ def pytest_sessionfinish(session, exitstatus):
     iterations = session.config.getoption("--iterations")
 
     data_location_option = pytest.data_location.option_name
-    data_location_key = pytest.data_location.option_name
+    data_location_key = pytest.data_location.key
     data_location_name = session.config.getoption(data_location_option)
     json_result = {
         BenchmarkKeys.CONTEXT_KEY: {
diff --git a/presto/docker/launch_presto_servers.sh b/presto/docker/launch_presto_servers.sh
index e390e9cc..38e6c7c8 100644
--- a/presto/docker/launch_presto_servers.sh
+++ b/presto/docker/launch_presto_servers.sh
@@ -2,7 +2,7 @@
 
 set -e
 # Run ldconfig once
-echo ldconfig
+ldconfig
 
 LOGS_DIR="/opt/presto-server/logs"
 mkdir -p "${LOGS_DIR}"
diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index 8fc52c09..d6030326 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -62,7 +62,8 @@ def _get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_na
         with open(meta_path) as f:
             data = json.load(f)
         return data.get("scale_factor")
-    except Exception:
+    except Exception as e:
+        _debug(f"scale factor lookup failed: {e}")
         return None
     finally:
         if conn is not None:
@@ -149,7 +150,9 @@ def _get_gpu_name() -> str | None:
     output to LOGS/worker_<id>.log.  Returns None when LOGS is unset or
     no GPU info is found in the logs.
     """
-    return _get_gpu_name_from_worker_logs()
+    gpu_name = _get_gpu_name_from_worker_logs()
+    _debug(f"gpu_name: {gpu_name!r}")
+    return gpu_name
 
 
 def _get_worker_image() -> str | None:

From 951b04bba9a9e123df583010d224690cd9c75b27 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Wed, 4 Mar 2026 15:41:36 -0800
Subject: [PATCH 10/24] More minor fixes

---
 benchmark_reporting_tools/post_results.py            | 5 ++++-
 presto/scripts/run_benchmark.sh                      | 2 +-
 presto/testing/performance_benchmarks/run_context.py | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index 406c059a..ec01e0df 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -26,14 +26,17 @@
     python benchmark_reporting_tools/post_results.py /path/to/benchmark/dir \
         --sku-name PDX-H100 \
         --storage-configuration-name pdx-lustre-sf-100 \
+        --benchmark-name tpch \
+        --identifier-hash abc123 \
         --cache-state warm
 
     # With optional version info
     python benchmark_reporting_tools/post_results.py /path/to/benchmark/dir \
         --sku-name PDX-H100 \
         --storage-configuration-name pdx-lustre-sf-100 \
-        --cache-state warm \
+        --benchmark-name tpch \
         --identifier-hash abc123 \
+        --cache-state warm \
         --version 1.0.0 \
         --commit-hash def456
 
diff --git a/presto/scripts/run_benchmark.sh b/presto/scripts/run_benchmark.sh
index fbea6f21..aacbff38 100755
--- a/presto/scripts/run_benchmark.sh
+++ b/presto/scripts/run_benchmark.sh
@@ -239,7 +239,7 @@ if [[ -n ${TAG} ]]; then
 fi
 
 if [[ "${PROFILE}" == "true" ]]; then
-  PYTEST_ARGS+=("--profile --profile-script-path $(readlink -f ./profiler_functions.sh)")
+  PYTEST_ARGS+=("--profile --profile-script-path $(readlink -f "${SCRIPT_DIR}/profiler_functions.sh")")
 fi
 
 if [[ "${METRICS}" == "true" ]]; then
diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index d6030326..02693f54 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -237,6 +237,7 @@ def gather_run_context(
         ctx["gpu_name"] = "NA"
     elif engine == "java":
         ctx["gpu_count"] = 0
+        ctx["gpu_name"] = "NA"
 
     worker_image = _get_worker_image()
     if worker_image is not None:

From b6e633a4d050cb6e5d2f959c2ab61e017767f848 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Wed, 4 Mar 2026 16:02:53 -0800
Subject: [PATCH 11/24] pre-commit

---
 benchmark_reporting_tools/post_results.py                  | 4 +---
 presto/docker/launch_presto_servers.sh                     | 2 ++
 presto/testing/performance_benchmarks/metrics_collector.py | 1 -
 presto/testing/performance_benchmarks/run_context.py       | 7 ++-----
 4 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index ec01e0df..ebd7a583 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -499,9 +499,7 @@ async def _process_benchmark_dir(
         return 1
 
     try:
-        results = BenchmarkResults.from_file(
-            result_file, benchmark_name=benchmark_metadata.benchmark
-        )
+        results = BenchmarkResults.from_file(result_file, benchmark_name=benchmark_metadata.benchmark)
     except (ValueError, KeyError, json.JSONDecodeError, FileNotFoundError) as e:
         print(f"  Error loading results: {e}", file=sys.stderr)
         return 1
diff --git a/presto/docker/launch_presto_servers.sh b/presto/docker/launch_presto_servers.sh
index 38e6c7c8..59b1f3eb 100644
--- a/presto/docker/launch_presto_servers.sh
+++ b/presto/docker/launch_presto_servers.sh
@@ -1,4 +1,6 @@
 #!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 
 set -e
 # Run ldconfig once
diff --git a/presto/testing/performance_benchmarks/metrics_collector.py b/presto/testing/performance_benchmarks/metrics_collector.py
index 25e9d854..59136c86 100644
--- a/presto/testing/performance_benchmarks/metrics_collector.py
+++ b/presto/testing/performance_benchmarks/metrics_collector.py
@@ -65,7 +65,6 @@ def collect_metrics(query_id: str, query_name: str, hostname: str, port: int, ou
             json.dump(combined, f, indent=2)
 
 
-
 def _collect_worker_data(query_info: dict) -> tuple[dict, dict]:
     """Collect task details and metrics from each worker."""
     # Group tasks by worker
diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index 02693f54..4ae82237 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -44,9 +44,7 @@ def _get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_na
     """
     conn = None
     try:
-        conn = prestodb.dbapi.connect(
-            host=hostname, port=port, user=user, catalog="hive", schema=schema_name
-        )
+        conn = prestodb.dbapi.connect(host=hostname, port=port, user=user, catalog="hive", schema=schema_name)
         cursor = conn.cursor()
         tables = cursor.execute(f"SHOW TABLES IN {schema_name}").fetchall()
         if not tables:
@@ -136,8 +134,7 @@ def _get_engine(hostname: str, port: int) -> str:
     engine = _CLUSTER_TAG_TO_ENGINE.get(tag)
     if engine is None:
         raise RuntimeError(
-            f"Unrecognised cluster-tag '{tag}'.  "
-            f"Expected one of: {', '.join(sorted(_CLUSTER_TAG_TO_ENGINE))}."
+            f"Unrecognised cluster-tag '{tag}'.  Expected one of: {', '.join(sorted(_CLUSTER_TAG_TO_ENGINE))}."
         )
     _debug(f"cluster-tag={tag!r} -> engine={engine!r}")
     return engine

From a96a1f586784148efa54dcaf009ceacda7ee35b5 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Wed, 4 Mar 2026 16:16:53 -0800
Subject: [PATCH 12/24] remove worker_image from context

---
 presto/testing/performance_benchmarks/run_context.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index 4ae82237..6ee68643 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -152,11 +152,6 @@ def _get_gpu_name() -> str | None:
     return gpu_name
 
 
-def _get_worker_image() -> str | None:
-    """Return worker image name from env (set by cluster/container setup)."""
-    return os.environ.get("WORKER_IMAGE")
-
-
 _ENGINE_TO_VARIANT = {
     "velox-gpu": "gpu",
     "velox-cpu": "cpu",
@@ -236,10 +231,6 @@ def gather_run_context(
         ctx["gpu_count"] = 0
         ctx["gpu_name"] = "NA"
 
-    worker_image = _get_worker_image()
-    if worker_image is not None:
-        ctx["worker_image"] = worker_image
-
     num_drivers = _get_num_drivers(engine)
     if num_drivers is not None:
         ctx["num_drivers"] = num_drivers

From db14057a7c1eb6c62e262f03088c87be910953b5 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Thu, 5 Mar 2026 13:14:40 -0800
Subject: [PATCH 13/24] remove WORKER_IMAGE_KEY

---
 common/testing/performance_benchmarks/benchmark_keys.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/testing/performance_benchmarks/benchmark_keys.py b/common/testing/performance_benchmarks/benchmark_keys.py
index 358ce6b2..dbfbccd3 100644
--- a/common/testing/performance_benchmarks/benchmark_keys.py
+++ b/common/testing/performance_benchmarks/benchmark_keys.py
@@ -25,7 +25,7 @@ class BenchmarkKeys(str, Enum):
     TIMESTAMP_KEY = "timestamp"
     NUM_WORKERS_KEY = "n_workers"
     GPU_NAME_KEY = "gpu_name"
-    WORKER_IMAGE_KEY = "worker_image"
+
     ENGINE_KEY = "engine"
     KIND_KEY = "kind"
     GPU_COUNT_KEY = "gpu_count"

From ce11e7a13a7704e653684986bbb4d530dbaf7da9 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Fri, 6 Mar 2026 11:27:07 -0800
Subject: [PATCH 14/24] Changed engine names to include 'presto'

---
 .../performance_benchmarks/run_context.py     | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index 6ee68643..48f356cb 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 """
-Gather run configuration from execution context. Engine (java / velox-cpu / velox-gpu)
+Gather run configuration from execution context. Engine (presto-java / presto-velox-cpu / presto-velox-gpu)
 is determined from the coordinator's cluster-tag (via /v1/cluster). GPU name is
 parsed from nvidia-smi output in worker log files (LOGS env var). Scale factor and
 n_workers come from schema and Presto /v1/node respectively.
@@ -110,9 +110,9 @@ def _get_gpu_name_from_worker_logs() -> str | None:
 
 
 _CLUSTER_TAG_TO_ENGINE = {
-    "native-gpu": "velox-gpu",
-    "native-cpu": "velox-cpu",
-    "java": "java",
+    "native-gpu": "presto-velox-gpu",
+    "native-cpu": "presto-velox-cpu",
+    "java": "presto-java",
 }
 
 
@@ -153,9 +153,9 @@ def _get_gpu_name() -> str | None:
 
 
 _ENGINE_TO_VARIANT = {
-    "velox-gpu": "gpu",
-    "velox-cpu": "cpu",
-    "java": "java",
+    "presto-velox-gpu": "gpu",
+    "presto-velox-cpu": "cpu",
+    "presto-java": "java",
 }
 
 
@@ -219,15 +219,15 @@ def gather_run_context(
         ctx["kind"] = "single-node" if n_workers == 1 else f"{n_workers}-node"
 
     ctx["engine"] = engine
-    if engine == "velox-gpu":
+    if engine == "presto-velox-gpu":
         ctx["gpu_count"] = n_workers if n_workers is not None else 0
         gpu_name = _get_gpu_name()
         if gpu_name is not None:
             ctx["gpu_name"] = gpu_name
-    elif engine == "velox-cpu":
+    elif engine == "presto-velox-cpu":
         ctx["gpu_count"] = 0
         ctx["gpu_name"] = "NA"
-    elif engine == "java":
+    elif engine == "presto-java":
         ctx["gpu_count"] = 0
         ctx["gpu_name"] = "NA"
 

From 289dcd38354fcf91be1925caed55ac181cbe6044 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 9 Mar 2026 12:49:11 -0700
Subject: [PATCH 15/24] PR feedback

---
 .gitignore                                    |  4 +-
 benchmark_reporting_tools/post_results.py     |  7 +-
 .../performance_benchmarks/benchmark_keys.py  |  2 +-
 .../performance_benchmarks/conftest.py        | 41 +++++++----
 presto/docker/docker-compose.common.yml       |  5 ++
 presto/docker/docker-compose.java.yml         |  2 -
 presto/docker/docker-compose.native-cpu.yml   |  2 -
 .../docker-compose.native-gpu.yml.jinja       |  6 --
 presto/docker/launch_coordinator.sh           | 11 +++
 presto/scripts/run_benchmark.sh               |  5 +-
 presto/scripts/start_presto_helper.sh         | 16 +++--
 .../scripts/start_presto_helper_parse_args.sh | 12 ++++
 presto/slurm/presto-nvl72/functions.sh        |  2 +-
 .../presto-nvl72/run-presto-benchmarks.slurm  |  7 +-
 .../performance_benchmarks/conftest.py        | 51 +++-----------
 .../performance_benchmarks/run_context.py     | 69 ++++++++-----------
 scripts/py_env_functions.sh                   |  2 +-
 17 files changed, 116 insertions(+), 128 deletions(-)
 create mode 100755 presto/docker/launch_coordinator.sh

diff --git a/.gitignore b/.gitignore
index db748e49..0fba6429 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,10 +23,12 @@ presto/docker/config/generated*/
 
 # Generated Presto Docker Compose files
 presto/docker/docker-compose/generated*/
+presto/docker/worker_logs
 presto/docker/worker_logs*/
 
 # Slurm logs and results
-presto/slurm/presto-nvl72/logs/
+presto/slurm/presto-nvl72/logs
+presto/slurm/presto-nvl72/logs_*/
 presto/slurm/presto-nvl72/*.err
 presto/slurm/presto-nvl72/*.out
 presto/slurm/presto-nvl72/result_dir/
diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index ebd7a583..154f9c8a 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -20,7 +20,8 @@
     │   ├── coordinator.config
     │   └── worker.config
     └── logs                     # optional
-        └── slurm-4575179.out
+        ├── worker_0.log
+        └── worker_1.log
 
 Usage:
     python benchmark_reporting_tools/post_results.py /path/to/benchmark/dir \
@@ -65,7 +66,7 @@ class BenchmarkMetadata:
     benchmark: str
     timestamp: datetime
     execution_number: int
-    n_workers: int
+    worker_count: int
     scale_factor: int
     gpu_count: int
     num_drivers: int
@@ -379,7 +380,7 @@ def _build_submission_payload(
             "commit_hash": commit_hash,
         },
         "run_at": benchmark_metadata.timestamp.isoformat(),
-        "node_count": benchmark_metadata.n_workers,
+        "node_count": benchmark_metadata.worker_count,
         "query_logs": query_logs,
         "concurrency_streams": concurrency_streams,
         "engine_config": engine_config.serialize() if engine_config else {},
diff --git a/common/testing/performance_benchmarks/benchmark_keys.py b/common/testing/performance_benchmarks/benchmark_keys.py
index dbfbccd3..90f1ce35 100644
--- a/common/testing/performance_benchmarks/benchmark_keys.py
+++ b/common/testing/performance_benchmarks/benchmark_keys.py
@@ -23,7 +23,7 @@ class BenchmarkKeys(str, Enum):
     SCHEMA_NAME_KEY = "schema_name"
     # Run configuration (from run context; written to context in benchmark_result.json)
     TIMESTAMP_KEY = "timestamp"
-    NUM_WORKERS_KEY = "n_workers"
+    NUM_WORKERS_KEY = "worker_count"
     GPU_NAME_KEY = "gpu_name"
 
     ENGINE_KEY = "engine"
diff --git a/common/testing/performance_benchmarks/conftest.py b/common/testing/performance_benchmarks/conftest.py
index c01461d9..451859c5 100644
--- a/common/testing/performance_benchmarks/conftest.py
+++ b/common/testing/performance_benchmarks/conftest.py
@@ -105,22 +105,13 @@ def _write_section(terminalreporter, text_report, content, **kwargs):
     text_report.append(f" {content} ".center(120, sep))
 
 
-def pytest_sessionfinish(session, exitstatus):
-    iterations = session.config.getoption("--iterations")
-
-    data_location_option = pytest.data_location.option_name
-    data_location_key = pytest.data_location.key
-    data_location_name = session.config.getoption(data_location_option)
-    json_result = {
-        BenchmarkKeys.CONTEXT_KEY: {
-            BenchmarkKeys.ITERATIONS_COUNT_KEY: iterations,
-            data_location_key: data_location_name,
-        },
-    }
+def build_and_write_benchmark_result(session, json_result):
+    """Compute aggregate timings and write benchmark_result.json.
 
-    tag = session.config.getoption("--tag")
-    if tag:
-        json_result[BenchmarkKeys.CONTEXT_KEY][BenchmarkKeys.TAG_KEY] = tag
+    Callers populate json_result[CONTEXT_KEY] with their own context
+    before calling this function.
+    """
+    iterations = session.config.getoption("--iterations")
 
     bench_output_dir = get_output_dir(session.config)
     bench_output_dir.mkdir(parents=True, exist_ok=True)
@@ -162,6 +153,26 @@ def pytest_sessionfinish(session, exitstatus):
         file.write("\n")
 
 
+def pytest_sessionfinish(session, exitstatus):
+    iterations = session.config.getoption("--iterations")
+
+    data_location_option = pytest.data_location.option_name
+    data_location_key = pytest.data_location.key
+    data_location_name = session.config.getoption(data_location_option)
+    json_result = {
+        BenchmarkKeys.CONTEXT_KEY: {
+            BenchmarkKeys.ITERATIONS_COUNT_KEY: iterations,
+            data_location_key: data_location_name,
+        },
+    }
+
+    tag = session.config.getoption("--tag")
+    if tag:
+        json_result[BenchmarkKeys.CONTEXT_KEY][BenchmarkKeys.TAG_KEY] = tag
+
+    build_and_write_benchmark_result(session, json_result)
+
+
 def get_output_dir(config):
     bench_output_dir = config.getoption("--output-dir")
     tag = config.getoption("--tag")
diff --git a/presto/docker/docker-compose.common.yml b/presto/docker/docker-compose.common.yml
index 281a1f68..f38297f5 100644
--- a/presto/docker/docker-compose.common.yml
+++ b/presto/docker/docker-compose.common.yml
@@ -4,6 +4,8 @@ services:
       - ./.hive_metastore:/var/lib/presto/data/hive/metastore
       - ../../common/testing/integration_tests/data:/var/lib/presto/data/hive/data/integration_test
       - ${PRESTO_DATA_DIR:-/dev/null}:/var/lib/presto/data/hive/data/user_data
+      - ./worker_logs:/opt/presto-server/logs
+      - ./launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
 
   presto-base-java:
     extends:
@@ -18,6 +20,9 @@ services:
     image: presto-coordinator:${PRESTO_IMAGE_TAG}
     ports:
       - 8080:8080
+    volumes:
+      - ./launch_coordinator.sh:/opt/launch_coordinator.sh:ro
+    command: ["bash", "/opt/launch_coordinator.sh"]
 
   presto-base-native-worker:
     extends:
diff --git a/presto/docker/docker-compose.java.yml b/presto/docker/docker-compose.java.yml
index 014e4581..e728c291 100644
--- a/presto/docker/docker-compose.java.yml
+++ b/presto/docker/docker-compose.java.yml
@@ -20,7 +20,5 @@ services:
       - ./config/generated/java/etc_worker/config_java.properties:/opt/presto-server/etc/config.properties
       - ./config/generated/java/etc_worker/node.properties:/opt/presto-server/etc/node.properties
       - ./config/generated/java/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
-      - ./worker_logs:/opt/presto-server/logs
-      - ./launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
     depends_on:
       - presto-coordinator
diff --git a/presto/docker/docker-compose.native-cpu.yml b/presto/docker/docker-compose.native-cpu.yml
index cc5bc39d..b017dc81 100644
--- a/presto/docker/docker-compose.native-cpu.yml
+++ b/presto/docker/docker-compose.native-cpu.yml
@@ -25,5 +25,3 @@ services:
       - ./config/generated/cpu/etc_worker/node.properties:/opt/presto-server/etc/node.properties
       - ./config/generated/cpu/etc_worker/config_native.properties:/opt/presto-server/etc/config.properties
       - ./config/generated/cpu/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
-      - ./worker_logs:/opt/presto-server/logs
-      - ./launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
diff --git a/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja b/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja
index 0dbff164..50915495 100644
--- a/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja
+++ b/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja
@@ -49,8 +49,6 @@ x-presto-native-worker-gpu: &gpu_worker_base
     - ../../config/generated/gpu/etc_worker/node.properties:/opt/presto-server/etc/node.properties
     - ../../config/generated/gpu/etc_worker/config_native.properties:/opt/presto-server/etc/config.properties
     - ../../config/generated/gpu/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
-    - ../../worker_logs:/opt/presto-server/logs
-    - ../../launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
 
 services:
   presto-coordinator:
@@ -89,8 +87,6 @@ services:
       - ../../config/generated/gpu/etc_worker_{{ gpu_id }}/node.properties:/opt/presto-server/etc/node.properties
       - ../../config/generated/gpu/etc_worker_{{ gpu_id }}/config_native.properties:/opt/presto-server/etc/config.properties
       - ../../config/generated/gpu/etc_worker_{{ gpu_id }}/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
-      - ../../worker_logs:/opt/presto-server/logs
-      - ../../launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
   {% endfor %}
   {% else %}
   # Combined GPU worker - runs {{ workers|length if workers else 1 }} presto servers in parallel, each pinned to a specific GPU
@@ -130,8 +126,6 @@ services:
       CUDA_VISIBLE_DEVICES: 0
 {%- endif %}
     volumes:
-      - ../../worker_logs:/opt/presto-server/logs
-      - ../../launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
 {%- if workers %}
       # Mount all etc directories for workers {{ workers|join(', ') }}
 {%- for gpu_id in workers %}
diff --git a/presto/docker/launch_coordinator.sh b/presto/docker/launch_coordinator.sh
new file mode 100755
index 00000000..02203395
--- /dev/null
+++ b/presto/docker/launch_coordinator.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+
+LOGS_DIR="/opt/presto-server/logs"
+mkdir -p "${LOGS_DIR}"
+log_file="${LOGS_DIR}/coordinator.log"
+
+exec /opt/presto-server/bin/launcher run >> "${log_file}" 2>&1
diff --git a/presto/scripts/run_benchmark.sh b/presto/scripts/run_benchmark.sh
index aacbff38..4f610354 100755
--- a/presto/scripts/run_benchmark.sh
+++ b/presto/scripts/run_benchmark.sh
@@ -8,7 +8,7 @@ set -e
 # Compute the directory where this script resides
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-# Default LOGS to the Docker worker_logs directory so that run_context.py
+# LOGS needs to reflect the hard-coded paths to the Docker worker_logs directory so that run_context.py
 # can find nvidia-smi output written by the worker containers.  SLURM
 # environments set LOGS themselves, so this only takes effect for Docker.
 export LOGS="${LOGS:-${SCRIPT_DIR}/../docker/worker_logs}"
@@ -48,9 +48,6 @@ ENVIRONMENT:
     PRESTO_BENCHMARK_DEBUG   Set to 1 to print debug logs for worker/engine detection
                              (e.g. node URIs, reachability, metrics).
                              Use when engine is misdetected or the run fails.
-    LOGS                     Directory containing worker_<id>.log files with nvidia-smi
-                             output.  Defaults to the Docker worker_logs directory.
-                             SLURM environments set this to their own logs path.
 
 EXAMPLES:
     $0 -b tpch -s bench_sf100
diff --git a/presto/scripts/start_presto_helper.sh b/presto/scripts/start_presto_helper.sh
index b51d3c9b..e45c002b 100755
--- a/presto/scripts/start_presto_helper.sh
+++ b/presto/scripts/start_presto_helper.sh
@@ -213,11 +213,17 @@ if (( ${#BUILD_TARGET_ARG[@]} )); then
   ${BUILD_TARGET_ARG[@]}
 fi
 
-# Prepare a clean worker_logs directory so containers can write nvidia-smi
-# output before starting the server.  Stale logs from a prior run are removed.
-WORKER_LOGS_DIR="${SCRIPT_DIR}/../docker/worker_logs"
-rm -rf "${WORKER_LOGS_DIR}"
-mkdir -p "${WORKER_LOGS_DIR}"
+# Create a timestamped directory for this run's worker logs and point the
+# worker_logs symlink at it.  Old runs are preserved in their own directories.
+# If worker_logs is a real directory (pre-symlink migration), rename it first.
+WORKER_LOGS_DIR="${WORKER_LOGS_DIR:-${SCRIPT_DIR}/../docker/worker_logs}"
+if [ -d "${WORKER_LOGS_DIR}" ] && [ ! -L "${WORKER_LOGS_DIR}" ]; then
+  mv "${WORKER_LOGS_DIR}" "${WORKER_LOGS_DIR}_$(date -r "${WORKER_LOGS_DIR}" +"%Y%m%dT%H%M%S" 2>/dev/null || date +"%Y%m%dT%H%M%S")_migrated"
+fi
+TIMESTAMPED_LOGS_DIR="${WORKER_LOGS_DIR}_$(date +"%Y%m%dT%H%M%S")"
+mkdir -p "${TIMESTAMPED_LOGS_DIR}"
+rm -f "${WORKER_LOGS_DIR}"
+ln -sfn "${TIMESTAMPED_LOGS_DIR}" "${WORKER_LOGS_DIR}"
 
 # Start all services defined in the rendered docker-compose file.
 docker compose -f $DOCKER_COMPOSE_FILE_PATH up -d
diff --git a/presto/scripts/start_presto_helper_parse_args.sh b/presto/scripts/start_presto_helper_parse_args.sh
index 20f4889c..47838fc1 100644
--- a/presto/scripts/start_presto_helper_parse_args.sh
+++ b/presto/scripts/start_presto_helper_parse_args.sh
@@ -33,6 +33,8 @@ OPTIONS:
     --profile-args       Arguments to pass to the profiler when it launches the Presto server.
                          This will override the default arguments.
     --overwrite-config   Force config to be regenerated (will overwrite local changes).
+    --worker-logs-dir    Directory for worker log files (default: <script_dir>/../docker/worker_logs).
+                         Each startup creates a timestamped subdirectory and symlinks this path to it.
     --sccache            Enable sccache distributed compilation caching (requires auth files
                          in ~/.sccache-auth/). Run scripts/sccache/setup_sccache_auth.sh first.
     --sccache-version    Install a specific version of rapidsai/sccache, e.g. "0.12.0-rapids.1"
@@ -68,6 +70,7 @@ export PROFILE=OFF
 export NUM_WORKERS=1
 export KVIKIO_THREADS=8
 export VCPU_PER_WORKER=""
+WORKER_LOGS_DIR=""
 ENABLE_SCCACHE=false
 SCCACHE_AUTH_DIR="${SCCACHE_AUTH_DIR:-$HOME/.sccache-auth}"
 SCCACHE_ENABLE_DIST=false
@@ -172,6 +175,15 @@ parse_args() {
         OVERWRITE_CONFIG=true
         shift
         ;;
+      --worker-logs-dir)
+        if [[ -n $2 ]]; then
+          WORKER_LOGS_DIR=$2
+          shift 2
+        else
+          echo "Error: --worker-logs-dir requires a value"
+          exit 1
+        fi
+        ;;
       --sccache)
         ENABLE_SCCACHE=true
         shift
diff --git a/presto/slurm/presto-nvl72/functions.sh b/presto/slurm/presto-nvl72/functions.sh
index 016c35b3..e233a128 100755
--- a/presto/slurm/presto-nvl72/functions.sh
+++ b/presto/slurm/presto-nvl72/functions.sh
@@ -360,7 +360,7 @@ function generate_json() {
     benchmark: $benchmark,
     timestamp: $timestamp,
     execution_number: 1,
-    n_workers: ($num_workers | tonumber),
+    worker_count: ($num_workers | tonumber),
     scale_factor: ($scale_factor | tonumber),
     gpu_count: ($num_workers | tonumber),
     num_drivers: ($num_drivers | tonumber),
diff --git a/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm b/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
index 0d785912..108b8b77 100755
--- a/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
+++ b/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
@@ -40,7 +40,10 @@ export COORD_IMAGE
 export VT_ROOT="$(cd -- "${SCRIPT_DIR}/../../.." >/dev/null 2>&1 && pwd -P)"
 export DATA=/mnt/data/tpch-rs
 export IMAGE_DIR=/mnt/data/images/presto
-export LOGS=$SCRIPT_DIR/logs
+TIMESTAMPED_LOGS="${SCRIPT_DIR}/logs_$(date +"%Y%m%dT%H%M%S")"
+mkdir -p "${TIMESTAMPED_LOGS}"
+ln -sfn "${TIMESTAMPED_LOGS}" "${SCRIPT_DIR}/logs"
+export LOGS="${SCRIPT_DIR}/logs"
 export VARIANT_TYPE=gpu
 export CONFIGS=$VT_ROOT/presto/docker/config/generated/$VARIANT_TYPE
 
@@ -92,7 +95,7 @@ echo "Total workers: $NUM_WORKERS (${NUM_NODES} nodes × ${NUM_GPUS_PER_NODE} GP
 echo "Single node execution: $SINGLE_NODE_EXECUTION"
 echo "========================================"
 
-# Create necessary directories
+# Ensure logs directory exists (symlink target was already created above)
 mkdir -p ${LOGS}
 
 # Launch the job script
diff --git a/presto/testing/performance_benchmarks/conftest.py b/presto/testing/performance_benchmarks/conftest.py
index ad79247d..0008d996 100644
--- a/presto/testing/performance_benchmarks/conftest.py
+++ b/presto/testing/performance_benchmarks/conftest.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-import json
 from datetime import datetime, timezone
 
 from ..common.conftest import *  # noqa: F403
@@ -19,8 +18,7 @@
 )
 from common.testing.performance_benchmarks.conftest import (
     DataLocation,
-    compute_aggregate_timings,
-    get_output_dir,
+    build_and_write_benchmark_result,
     pytest_terminal_summary,  # noqa: F401
 )
 
@@ -88,46 +86,13 @@ def pytest_sessionfinish(session, exitstatus):
     for key, value in run_config.items():
         json_result[BenchmarkKeys.CONTEXT_KEY][key] = value
 
-    bench_output_dir = get_output_dir(session.config)
-    bench_output_dir.mkdir(parents=True, exist_ok=True)
-
-    if iterations > 1:
-        AGG_KEYS = [
-            BenchmarkKeys.AVG_KEY,
-            BenchmarkKeys.MIN_KEY,
-            BenchmarkKeys.MAX_KEY,
-            BenchmarkKeys.MEDIAN_KEY,
-            BenchmarkKeys.GMEAN_KEY,
-            BenchmarkKeys.LUKEWARM_KEY,
-        ]
-    else:
-        AGG_KEYS = [BenchmarkKeys.LUKEWARM_KEY]
-    if not hasattr(session, "benchmark_results"):
-        return
-    benchmark_types = list(session.benchmark_results.keys())
-    json_result[BenchmarkKeys.CONTEXT_KEY]["benchmark"] = (
-        benchmark_types[0] if len(benchmark_types) == 1 else benchmark_types
-    )
-    for benchmark_type, result in session.benchmark_results.items():
-        compute_aggregate_timings(result)
-        json_result[benchmark_type] = {
-            BenchmarkKeys.AGGREGATE_TIMES_KEY: {},
-            BenchmarkKeys.RAW_TIMES_KEY: result[BenchmarkKeys.RAW_TIMES_KEY],
-            BenchmarkKeys.FAILED_QUERIES_KEY: result[BenchmarkKeys.FAILED_QUERIES_KEY],
-        }
-        json_agg_timings = json_result[benchmark_type][BenchmarkKeys.AGGREGATE_TIMES_KEY]
-        for agg_key in AGG_KEYS:
-            json_agg_timings[agg_key] = {}
-
-        for query_id, agg_timings in result[BenchmarkKeys.AGGREGATE_TIMES_KEY].items():
-            if agg_timings:
-                assert len(AGG_KEYS) == len(agg_timings)
-                for i, agg_key in enumerate(AGG_KEYS):
-                    json_agg_timings[agg_key][query_id] = agg_timings[i]
-
-    with open(f"{bench_output_dir}/benchmark_result.json", "w") as file:
-        json.dump(json_result, file, indent=2)
-        file.write("\n")
+    if hasattr(session, "benchmark_results"):
+        benchmark_types = list(session.benchmark_results.keys())
+        json_result[BenchmarkKeys.CONTEXT_KEY]["benchmark"] = (
+            benchmark_types[0] if len(benchmark_types) == 1 else benchmark_types
+        )
+
+    build_and_write_benchmark_result(session, json_result)
 
 
 def pytest_configure(config):
diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index 48f356cb..06df8d56 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -51,10 +51,7 @@ def _get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_na
             return None
         table = tables[0][0]
         location = test_utils.get_table_external_location(schema_name, table, cursor)
-        # metadata.json is typically in parent of table data dir
-        meta_path = Path(location).parent / "metadata.json"
-        if not meta_path.is_file():
-            meta_path = (Path(location) / ".." / "metadata.json").resolve()
+        meta_path = (Path(location).parent / "metadata.json").resolve()
         if not meta_path.is_file():
             return None
         with open(meta_path) as f:
@@ -152,46 +149,34 @@ def _get_gpu_name() -> str | None:
     return gpu_name
 
 
-_ENGINE_TO_VARIANT = {
-    "presto-velox-gpu": "gpu",
-    "presto-velox-cpu": "cpu",
-    "presto-java": "java",
-}
-
+def _get_num_drivers() -> int | None:
+    """Parse task.max-drivers-per-task from worker log files.
 
-def _get_num_drivers(engine: str) -> int | None:
-    """Read task.max-drivers-per-task from the generated worker config_native.properties.
-
-    Falls back to None when the generated config directory does not exist
-    (e.g. pre-configured cluster or SLURM without local config generation).
+    The Presto native server logs its configuration at startup.
+    Returns None when LOGS is unset or the property is not found.
     """
-    variant = _ENGINE_TO_VARIANT.get(engine)
-    if variant is None:
+    logs_dir = os.environ.get("LOGS")
+    if not logs_dir:
         return None
-
-    config_file = (
-        Path(__file__).resolve().parent
-        / ".."
-        / ".."
-        / "docker"
-        / "config"
-        / "generated"
-        / variant
-        / "etc_worker"
-        / "config_native.properties"
-    )
-    if not config_file.is_file():
-        _debug(f"num_drivers: config not found at {config_file}")
+    log_file = Path(logs_dir) / "worker_0.log"
+    if not log_file.is_file():
+        _debug(f"num_drivers: log not found at {log_file}")
         return None
-
-    for line in config_file.read_text().splitlines():
-        line = line.strip()
-        if line.startswith("task.max-drivers-per-task="):
-            try:
-                return int(line.split("=", 1)[1])
-            except (ValueError, IndexError):
-                pass
-    _debug(f"num_drivers: task.max-drivers-per-task not found in {config_file}")
+    try:
+        with open(log_file) as f:
+            for line in f:
+                line = line.strip()
+                if "task.max-drivers-per-task" in line:
+                    parts = line.split("task.max-drivers-per-task=", 1)
+                    if len(parts) == 2:
+                        value = parts[1].split()[0].rstrip(",")
+                        try:
+                            return int(value)
+                        except ValueError:
+                            pass
+    except Exception:
+        pass
+    _debug(f"num_drivers: task.max-drivers-per-task not found in {log_file}")
     return None
 
 
@@ -215,7 +200,7 @@ def gather_run_context(
     engine = _get_engine(hostname, port)
 
     if n_workers is not None:
-        ctx["n_workers"] = n_workers
+        ctx["worker_count"] = n_workers
         ctx["kind"] = "single-node" if n_workers == 1 else f"{n_workers}-node"
 
     ctx["engine"] = engine
@@ -231,7 +216,7 @@ def gather_run_context(
         ctx["gpu_count"] = 0
         ctx["gpu_name"] = "NA"
 
-    num_drivers = _get_num_drivers(engine)
+    num_drivers = _get_num_drivers()
     if num_drivers is not None:
         ctx["num_drivers"] = num_drivers
 
diff --git a/scripts/py_env_functions.sh b/scripts/py_env_functions.sh
index 0f991057..daca387b 100755
--- a/scripts/py_env_functions.sh
+++ b/scripts/py_env_functions.sh
@@ -81,7 +81,7 @@ function init_python_virtual_env() {
 function delete_python_virtual_env() {
   local venv_dir=${1:-".venv"}
 
-  if [ -n "$LOCAL_CONDA_INIT" ] && command -v conda &> /dev/null; then
+  if [ -n "${LOCAL_CONDA_INIT:-}" ] && command -v conda &> /dev/null; then
       echo "Deactivating conda environment"
       conda deactivate
       LOCAL_CONDA_INIT=""

From e5aae733a4ea5f4c5c6004b12698d7872497f394 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 9 Mar 2026 16:12:29 -0700
Subject: [PATCH 16/24] More PR feedback

---
 presto/scripts/run_benchmark.sh               | 22 ++++++++++---------
 presto/scripts/start_presto_helper.sh         | 19 ++++++++--------
 .../scripts/start_presto_helper_parse_args.sh | 10 ++++-----
 3 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/presto/scripts/run_benchmark.sh b/presto/scripts/run_benchmark.sh
index 4f610354..d411c60e 100755
--- a/presto/scripts/run_benchmark.sh
+++ b/presto/scripts/run_benchmark.sh
@@ -8,10 +8,10 @@ set -e
 # Compute the directory where this script resides
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-# LOGS needs to reflect the hard-coded paths to the Docker worker_logs directory so that run_context.py
-# can find nvidia-smi output written by the worker containers.  SLURM
-# environments set LOGS themselves, so this only takes effect for Docker.
-export LOGS="${LOGS:-${SCRIPT_DIR}/../docker/worker_logs}"
+# LOGS points to the directory where server log files (including nvidia-smi
+# output) are written so that run_context.py can parse GPU info.  SLURM
+# environments set LOGS themselves; this default covers Docker.
+export LOGS="${LOGS:-${SCRIPT_DIR}/presto_logs}"
 
 source "${SCRIPT_DIR}/presto_connection_defaults.sh"
 
@@ -43,11 +43,9 @@ OPTIONS:
     --skip-drop-cache       Skip dropping system caches before each benchmark query (dropped by default).
     -m, --metrics           Collect detailed metrics from Presto REST API after each query.
                             Metrics are stored in query-specific directories.
-
-ENVIRONMENT:
-    PRESTO_BENCHMARK_DEBUG   Set to 1 to print debug logs for worker/engine detection
-                             (e.g. node URIs, reachability, metrics).
-                             Use when engine is misdetected or the run fails.
+    -v, --verbose           Print debug logs for worker/engine detection
+                            (e.g. node URIs, cluster-tag, GPU model).
+                            Use when engine is misdetected or the run fails.
 
 EXAMPLES:
     $0 -b tpch -s bench_sf100
@@ -56,7 +54,7 @@ EXAMPLES:
     $0 -b tpch -s bench_sf100 -t gh200_cpu_sf100
     $0 -b tpch -s bench_sf100 --profile
     $0 -b tpch -s bench_sf100 --metrics
-    PRESTO_BENCHMARK_DEBUG=1 $0 -b tpch -s bench_sf100
+    $0 -b tpch -s bench_sf100 --verbose
     $0 -h
 
 EOF
@@ -171,6 +169,10 @@ parse_args() {
         METRICS=true
         shift
         ;;
+      -v|--verbose)
+        export PRESTO_BENCHMARK_DEBUG=1
+        shift
+        ;;
       *)
         echo "Error: Unknown argument $1"
         print_help
diff --git a/presto/scripts/start_presto_helper.sh b/presto/scripts/start_presto_helper.sh
index e45c002b..c45b4f1d 100755
--- a/presto/scripts/start_presto_helper.sh
+++ b/presto/scripts/start_presto_helper.sh
@@ -213,17 +213,18 @@ if (( ${#BUILD_TARGET_ARG[@]} )); then
   ${BUILD_TARGET_ARG[@]}
 fi
 
-# Create a timestamped directory for this run's worker logs and point the
-# worker_logs symlink at it.  Old runs are preserved in their own directories.
-# If worker_logs is a real directory (pre-symlink migration), rename it first.
-WORKER_LOGS_DIR="${WORKER_LOGS_DIR:-${SCRIPT_DIR}/../docker/worker_logs}"
-if [ -d "${WORKER_LOGS_DIR}" ] && [ ! -L "${WORKER_LOGS_DIR}" ]; then
-  mv "${WORKER_LOGS_DIR}" "${WORKER_LOGS_DIR}_$(date -r "${WORKER_LOGS_DIR}" +"%Y%m%dT%H%M%S" 2>/dev/null || date +"%Y%m%dT%H%M%S")_migrated"
+# Create a timestamped directory for this run's logs and point the
+# logs symlink at it.  Old runs are preserved in their own directories.
+# If the target is a real directory (pre-symlink migration), rename it first.
+LOGS_DIR="${LOGS_DIR:-${SCRIPT_DIR}/presto_logs}"
+if [ -d "${LOGS_DIR}" ] && [ ! -L "${LOGS_DIR}" ]; then
+  mv "${LOGS_DIR}" "${LOGS_DIR}_$(date -r "${LOGS_DIR}" +"%Y%m%dT%H%M%S" 2>/dev/null || date +"%Y%m%dT%H%M%S")_migrated"
 fi
-TIMESTAMPED_LOGS_DIR="${WORKER_LOGS_DIR}_$(date +"%Y%m%dT%H%M%S")"
+TIMESTAMPED_LOGS_DIR="${LOGS_DIR}_$(date +"%Y%m%dT%H%M%S")"
 mkdir -p "${TIMESTAMPED_LOGS_DIR}"
-rm -f "${WORKER_LOGS_DIR}"
-ln -sfn "${TIMESTAMPED_LOGS_DIR}" "${WORKER_LOGS_DIR}"
+rm -f "${LOGS_DIR}"
+ln -sfn "${TIMESTAMPED_LOGS_DIR}" "${LOGS_DIR}"
+export LOGS_DIR
 
 # Start all services defined in the rendered docker-compose file.
 docker compose -f $DOCKER_COMPOSE_FILE_PATH up -d
diff --git a/presto/scripts/start_presto_helper_parse_args.sh b/presto/scripts/start_presto_helper_parse_args.sh
index 47838fc1..9d156545 100644
--- a/presto/scripts/start_presto_helper_parse_args.sh
+++ b/presto/scripts/start_presto_helper_parse_args.sh
@@ -33,7 +33,7 @@ OPTIONS:
     --profile-args       Arguments to pass to the profiler when it launches the Presto server.
                          This will override the default arguments.
     --overwrite-config   Force config to be regenerated (will overwrite local changes).
-    --worker-logs-dir    Directory for worker log files (default: <script_dir>/../docker/worker_logs).
+    --logs-dir           Directory for server log files (default: <script_dir>/presto_logs).
                          Each startup creates a timestamped subdirectory and symlinks this path to it.
     --sccache            Enable sccache distributed compilation caching (requires auth files
                          in ~/.sccache-auth/). Run scripts/sccache/setup_sccache_auth.sh first.
@@ -70,7 +70,7 @@ export PROFILE=OFF
 export NUM_WORKERS=1
 export KVIKIO_THREADS=8
 export VCPU_PER_WORKER=""
-WORKER_LOGS_DIR=""
+LOGS_DIR=""
 ENABLE_SCCACHE=false
 SCCACHE_AUTH_DIR="${SCCACHE_AUTH_DIR:-$HOME/.sccache-auth}"
 SCCACHE_ENABLE_DIST=false
@@ -175,12 +175,12 @@ parse_args() {
         OVERWRITE_CONFIG=true
         shift
         ;;
-      --worker-logs-dir)
+      --logs-dir)
         if [[ -n $2 ]]; then
-          WORKER_LOGS_DIR=$2
+          LOGS_DIR=$2
           shift 2
         else
-          echo "Error: --worker-logs-dir requires a value"
+          echo "Error: --logs-dir requires a value"
           exit 1
         fi
         ;;

From 44b6cc05548fffbd81b50f50798e9bf1483bad0c Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 9 Mar 2026 16:53:08 -0700
Subject: [PATCH 17/24] PR updates

---
 .gitignore                                    |   4 +-
 benchmark_reporting_tools/post_results.py     | 173 +++++++++++++++---
 .../performance_benchmarks/conftest.py        |  10 +
 presto/docker/docker-compose.common.yml       |   2 +-
 presto/docker/launch_presto_servers.sh        |   4 +-
 presto/slurm/presto-nvl72/functions.sh        |   2 +-
 .../performance_benchmarks/conftest.py        |  40 +---
 .../performance_benchmarks/run_context.py     |  33 +---
 8 files changed, 175 insertions(+), 93 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0fba6429..92196f8d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,8 +23,8 @@ presto/docker/config/generated*/
 
 # Generated Presto Docker Compose files
 presto/docker/docker-compose/generated*/
-presto/docker/worker_logs
-presto/docker/worker_logs*/
+presto/scripts/presto_logs
+presto/scripts/presto_logs*/
 
 # Slurm logs and results
 presto/slurm/presto-nvl72/logs
diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index 154f9c8a..df597a77 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -11,35 +11,34 @@
 """
 CLI for posting Velox benchmark results to the API.
 
-This script operates on the parsed output of the benchmark runner. The
-expected directory structure is:
-
-    ../benchmark-root/
-    ├── benchmark_result.json
-    ├── configs                  # optional
-    │   ├── coordinator.config
-    │   └── worker.config
-    └── logs                     # optional
-        ├── worker_0.log
-        └── worker_1.log
+The script reads benchmark_result.json from the input directory.
+Engine configs and worker logs are automatically loaded from the
+velox-testing repo by detecting the engine variant from the
+benchmark results context.  Paths can be overridden with
+--config-dir and --logs-dir.
+
+Default locations (derived from the repo root and detected variant):
+    configs: presto/docker/config/generated/{variant}/
+    logs:    presto/scripts/presto_logs/  (symlink to latest run)
 
 Usage:
-    python benchmark_reporting_tools/post_results.py /path/to/benchmark/dir \
+    # Auto-detect configs/logs from the repo (default):
+    python benchmark_reporting_tools/post_results.py /path/to/benchmark_output \
         --sku-name PDX-H100 \
         --storage-configuration-name pdx-lustre-sf-100 \
         --benchmark-name tpch \
         --identifier-hash abc123 \
         --cache-state warm
 
-    # With optional version info
-    python benchmark_reporting_tools/post_results.py /path/to/benchmark/dir \
+    # Override with explicit paths:
+    python benchmark_reporting_tools/post_results.py /path/to/benchmark_output \
+        --config-dir /custom/path/to/configs \
+        --logs-dir /custom/path/to/logs \
         --sku-name PDX-H100 \
         --storage-configuration-name pdx-lustre-sf-100 \
         --benchmark-name tpch \
         --identifier-hash abc123 \
-        --cache-state warm \
-        --version 1.0.0 \
-        --commit-hash def456
+        --cache-state warm
 
 Environment variables:
     BENCHMARK_API_URL: API URL
@@ -59,6 +58,31 @@
 
 import httpx
 
+_ENGINE_TO_VARIANT = {
+    "presto-velox-gpu": "gpu",
+    "presto-velox-cpu": "cpu",
+    "presto-java": "java",
+}
+
+
+def _repo_root() -> Path:
+    """Return the velox-testing repo root (parent of benchmark_reporting_tools/)."""
+    return Path(__file__).resolve().parent.parent
+
+
+def _default_config_dir(variant: str) -> Path | None:
+    """Derive the generated config directory for a given variant."""
+    d = _repo_root() / "presto" / "docker" / "config" / "generated" / variant
+    return d if d.is_dir() else None
+
+
+def _default_logs_dir() -> Path | None:
+    """Return the presto_logs directory, resolving the symlink to the latest run."""
+    link = _repo_root() / "presto" / "scripts" / "presto_logs"
+    if link.exists():
+        return link.resolve()
+    return None
+
 
 @dataclasses.dataclass(kw_only=True)
 class BenchmarkMetadata:
@@ -137,19 +161,60 @@ def _parse_config_file(file_path: Path) -> dict[str, str]:
     return config
 
 
+def _find_config_file(configs_dir: Path, subdir: str, legacy_name: str, variant: str | None = None) -> Path | None:
+    """Locate a config file in either the generated repo layout or legacy flat layout.
+
+    Repo layout:  {configs_dir}/{subdir}/config_native.properties (or config_java.properties)
+    Legacy layout: {configs_dir}/{legacy_name}
+
+    When variant is known, the correct properties file is selected directly
+    (config_java for 'java', config_native for 'gpu'/'cpu').
+    """
+    sub = configs_dir / subdir
+    if sub.is_dir():
+        if variant == "java":
+            candidate = sub / "config_java.properties"
+        elif variant in ("gpu", "cpu"):
+            candidate = sub / "config_native.properties"
+        else:
+            candidate = None
+        if candidate and candidate.is_file():
+            return candidate
+        for fallback in sorted(sub.glob("config_*.properties")):
+            return fallback
+    legacy = configs_dir / legacy_name
+    if legacy.is_file():
+        return legacy
+    return None
+
+
 @dataclasses.dataclass
 class EngineConfig:
     coordinator: dict[str, str]
     worker: dict[str, str]
 
     @classmethod
-    def from_dir(cls, configs_dir: Path) -> "EngineConfig":
+    def from_dir(cls, configs_dir: Path, variant: str | None = None) -> "EngineConfig":
         """Load engine configuration from a configs directory.
 
-        Expects coordinator.config and worker.config files.
+        Supports two layouts:
+          1. Generated repo layout: etc_coordinator/config_*.properties,
+             etc_worker/config_*.properties
+          2. Legacy flat layout: coordinator.config, worker.config
+
+        When variant is provided ('gpu', 'cpu', or 'java'), selects the
+        matching properties file (config_native vs config_java).
         """
-        coordinator_config = _parse_config_file(configs_dir / "coordinator.config")
-        worker_config = _parse_config_file(configs_dir / "worker.config")
+        coord_file = _find_config_file(configs_dir, "etc_coordinator", "coordinator.config", variant)
+        worker_file = _find_config_file(configs_dir, "etc_worker", "worker.config", variant)
+        if coord_file is None or worker_file is None:
+            raise FileNotFoundError(
+                f"Could not find coordinator/worker config files in {configs_dir}. "
+                "Expected either etc_coordinator/config_*.properties + etc_worker/config_*.properties, "
+                "or coordinator.config + worker.config."
+            )
+        coordinator_config = _parse_config_file(coord_file)
+        worker_config = _parse_config_file(worker_file)
         return cls(coordinator=coordinator_config, worker=worker_config)
 
     def serialize(self) -> dict:
@@ -246,6 +311,20 @@ def _parse_args() -> argparse.Namespace:
         type=int,
         default=1,
     )
+    parser.add_argument(
+        "--config-dir",
+        type=str,
+        default=None,
+        help="Override config directory. Default: auto-detected from the engine variant "
+        "in benchmark_result.json → presto/docker/config/generated/{variant}/.",
+    )
+    parser.add_argument(
+        "--logs-dir",
+        type=str,
+        default=None,
+        help="Override server logs directory. Default: presto/scripts/presto_logs/ "
+        "(symlink to the latest timestamped run).",
+    )
 
     return parser.parse_args()
 
@@ -479,6 +558,8 @@ async def _process_benchmark_dir(
     upload_logs: bool = True,
     benchmark_definition_name: str,
     concurrency_streams: int = 1,
+    config_dir: Path | None = None,
+    logs_dir: Path | None = None,
 ) -> int:
     """Process a benchmark directory and post results to API.
 
@@ -505,27 +586,57 @@ async def _process_benchmark_dir(
         print(f"  Error loading results: {e}", file=sys.stderr)
         return 1
 
-    if (benchmark_dir / "configs").exists():
-        print("  Loading engine config...", file=sys.stderr)
-        engine_config = EngineConfig.from_dir(benchmark_dir / "configs")
+    # Resolve config directory: explicit override → auto-detect from variant
+    effective_config_dir = config_dir
+    variant = _ENGINE_TO_VARIANT.get(benchmark_metadata.engine)
+    if effective_config_dir is None:
+        if variant:
+            effective_config_dir = _default_config_dir(variant)
+            if effective_config_dir:
+                print(f"  Auto-detected variant '{variant}' → config dir: {effective_config_dir}", file=sys.stderr)
+            else:
+                print(f"  Auto-detected variant '{variant}' but config dir does not exist.", file=sys.stderr)
+        else:
+            print(f"  Could not map engine '{benchmark_metadata.engine}' to a variant.", file=sys.stderr)
+
+    if effective_config_dir and effective_config_dir.exists():
+        print(f"  Loading engine config from {effective_config_dir}...", file=sys.stderr)
+        try:
+            engine_config = EngineConfig.from_dir(effective_config_dir, variant=variant)
+        except FileNotFoundError as e:
+            print(f"  Warning: could not load engine config: {e}", file=sys.stderr)
+            engine_config = None
     else:
-        print("  No engine config found.", file=sys.stderr)
+        if effective_config_dir:
+            print(f"  Warning: config directory does not exist: {effective_config_dir}", file=sys.stderr)
+        else:
+            print("  Warning: no config directory found. Use --config-dir to specify one.", file=sys.stderr)
         engine_config = None
 
-    # Upload log files as assets
+    # Resolve logs directory: explicit override → auto-detect from repo
+    effective_logs_dir = logs_dir
+    if effective_logs_dir is None:
+        effective_logs_dir = _default_logs_dir()
+        if effective_logs_dir:
+            print(f"  Auto-detected logs dir: {effective_logs_dir}", file=sys.stderr)
+
     asset_ids = None
-    if upload_logs:
+    if upload_logs and effective_logs_dir and effective_logs_dir.exists():
         if dry_run:
-            log_files = sorted(benchmark_dir.glob("*.log"))
+            log_files = sorted(effective_logs_dir.glob("*.log"))
             print(
-                f"  [DRY RUN] Would upload {len(log_files)} log file(s): {[f.name for f in log_files]}", file=sys.stderr
+                f"  [DRY RUN] Would upload {len(log_files)} log file(s) from {effective_logs_dir}: "
+                f"{[f.name for f in log_files]}",
+                file=sys.stderr,
             )
         else:
             try:
-                asset_ids = await _upload_log_files(benchmark_dir, api_url, api_key, timeout)
+                asset_ids = await _upload_log_files(effective_logs_dir, api_url, api_key, timeout)
             except (RuntimeError, httpx.RequestError) as e:
                 print(f"  Error uploading logs: {e}", file=sys.stderr)
                 return 1
+    elif upload_logs:
+        print("  No logs directory found; skipping log upload.", file=sys.stderr)
 
     # Build submission payload
     try:
@@ -620,6 +731,8 @@ async def main() -> int:
         upload_logs=args.upload_logs,
         benchmark_definition_name=args.benchmark_name,
         concurrency_streams=args.concurrency_streams,
+        config_dir=Path(args.config_dir) if args.config_dir else None,
+        logs_dir=Path(args.logs_dir) if args.logs_dir else None,
     )
 
     return result
diff --git a/common/testing/performance_benchmarks/conftest.py b/common/testing/performance_benchmarks/conftest.py
index 451859c5..d1720b48 100644
--- a/common/testing/performance_benchmarks/conftest.py
+++ b/common/testing/performance_benchmarks/conftest.py
@@ -170,6 +170,16 @@ def pytest_sessionfinish(session, exitstatus):
     if tag:
         json_result[BenchmarkKeys.CONTEXT_KEY][BenchmarkKeys.TAG_KEY] = tag
 
+    if hasattr(session, "run_context"):
+        for key, value in session.run_context.items():
+            json_result[BenchmarkKeys.CONTEXT_KEY][key] = value
+
+    if hasattr(session, "benchmark_results"):
+        benchmark_types = list(session.benchmark_results.keys())
+        json_result[BenchmarkKeys.CONTEXT_KEY]["benchmark"] = (
+            benchmark_types[0] if len(benchmark_types) == 1 else benchmark_types
+        )
+
     build_and_write_benchmark_result(session, json_result)
 
 
diff --git a/presto/docker/docker-compose.common.yml b/presto/docker/docker-compose.common.yml
index f38297f5..a74d4e99 100644
--- a/presto/docker/docker-compose.common.yml
+++ b/presto/docker/docker-compose.common.yml
@@ -4,7 +4,7 @@ services:
       - ./.hive_metastore:/var/lib/presto/data/hive/metastore
       - ../../common/testing/integration_tests/data:/var/lib/presto/data/hive/data/integration_test
       - ${PRESTO_DATA_DIR:-/dev/null}:/var/lib/presto/data/hive/data/user_data
-      - ./worker_logs:/opt/presto-server/logs
+      - ${LOGS_DIR:-../scripts/presto_logs}:/opt/presto-server/logs
       - ./launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
 
   presto-base-java:
diff --git a/presto/docker/launch_presto_servers.sh b/presto/docker/launch_presto_servers.sh
index 59b1f3eb..6e99ba67 100644
--- a/presto/docker/launch_presto_servers.sh
+++ b/presto/docker/launch_presto_servers.sh
@@ -13,7 +13,7 @@ if [ $# -eq 0 ]; then
   # Single worker mode.  Use WORKER_ID env var for log filename (defaults to 0).
   local_id="${WORKER_ID:-0}"
   log_file="${LOGS_DIR}/worker_${local_id}.log"
-  nvidia-smi -L > "${log_file}" 2>&1 || true
+  echo "GPU Name: $(nvidia-smi --query-gpu=name --format=csv,noheader | head -n 1)" > "${log_file}" 2>&1 || true
   presto_server --etc-dir="/opt/presto-server/etc/" >> "${log_file}" 2>&1 &
 else
   # Multi-worker single-container mode.  Each GPU ID is an argument.
@@ -22,7 +22,7 @@ else
     (
       export CUDA_VISIBLE_DEVICES=$gpu_id
       log_file="${LOGS_DIR}/worker_${worker_id}.log"
-      nvidia-smi -L > "${log_file}" 2>&1 || true
+      echo "GPU Name: $(nvidia-smi --query-gpu=name --format=csv,noheader | head -n 1)" > "${log_file}" 2>&1 || true
       exec presto_server --etc-dir="/opt/presto-server/etc${gpu_id}" >> "${log_file}" 2>&1
     ) &
     worker_id=$((worker_id + 1))
diff --git a/presto/slurm/presto-nvl72/functions.sh b/presto/slurm/presto-nvl72/functions.sh
index e233a128..dd2664e7 100755
--- a/presto/slurm/presto-nvl72/functions.sh
+++ b/presto/slurm/presto-nvl72/functions.sh
@@ -193,7 +193,7 @@ ${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore \
 --container-env=LD_LIBRARY_PATH="$CUDF_LIB:$LD_LIBRARY_PATH" \
 --container-env=GLOG_vmodule=IntraNodeTransferRegistry=3,ExchangeOperator=3 \
 --container-env=GLOG_logtostderr=1 \
--- /bin/bash -c "export CUDA_VISIBLE_DEVICES=${gpu_id}; echo \"CUDA_VISIBLE_DEVICES=\$CUDA_VISIBLE_DEVICES\"; echo \"--- Environment Variables ---\"; set | grep -E 'UCX_|CUDA_VISIBLE_DEVICES'; nvidia-smi -L; /usr/bin/presto_server --etc-dir=/opt/presto-server/etc" > ${LOGS}/worker_${worker_id}.log 2>&1 &
+-- /bin/bash -c "export CUDA_VISIBLE_DEVICES=${gpu_id}; echo \"CUDA_VISIBLE_DEVICES=\$CUDA_VISIBLE_DEVICES\"; echo \"--- Environment Variables ---\"; set | grep -E 'UCX_|CUDA_VISIBLE_DEVICES'; echo \"GPU Name: \$(nvidia-smi --query-gpu=name --format=csv,noheader | head -n 1)\"; /usr/bin/presto_server --etc-dir=/opt/presto-server/etc" > ${LOGS}/worker_${worker_id}.log 2>&1 &
 }
 
 function copy_hive_metastore {
diff --git a/presto/testing/performance_benchmarks/conftest.py b/presto/testing/performance_benchmarks/conftest.py
index 0008d996..5fd67d73 100644
--- a/presto/testing/performance_benchmarks/conftest.py
+++ b/presto/testing/performance_benchmarks/conftest.py
@@ -18,7 +18,7 @@
 )
 from common.testing.performance_benchmarks.conftest import (
     DataLocation,
-    build_and_write_benchmark_result,
+    pytest_sessionfinish,  # noqa: F401
     pytest_terminal_summary,  # noqa: F401
 )
 
@@ -48,10 +48,11 @@ def pytest_addoption(parser):
     parser.addoption("--skip-drop-cache", action="store_true", default=False)
 
 
-def _build_run_config(session):
-    """
-    Build run-config dict from execution context (Presto nodes, nvidia-smi, schema
-    data source, env). Used for the context section in benchmark_result.json.
+def pytest_sessionstart(session):
+    """Gather Presto-specific run context and attach it to the session.
+
+    The common pytest_sessionfinish merges session.run_context into the
+    benchmark_result.json context section.
     """
     hostname = session.config.getoption("--hostname")
     port = session.config.getoption("--port")
@@ -65,34 +66,7 @@ def _build_run_config(session):
         schema_name=schema_name,
     )
     ctx["timestamp"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-    return ctx
-
-
-def pytest_sessionfinish(session, exitstatus):
-    iterations = session.config.getoption("--iterations")
-    schema_name = session.config.getoption("--schema-name")
-    json_result = {
-        BenchmarkKeys.CONTEXT_KEY: {
-            BenchmarkKeys.ITERATIONS_COUNT_KEY: iterations,
-            BenchmarkKeys.SCHEMA_NAME_KEY: schema_name,
-        },
-    }
-
-    tag = session.config.getoption("--tag")
-    if tag:
-        json_result[BenchmarkKeys.CONTEXT_KEY][BenchmarkKeys.TAG_KEY] = tag
-
-    run_config = _build_run_config(session)
-    for key, value in run_config.items():
-        json_result[BenchmarkKeys.CONTEXT_KEY][key] = value
-
-    if hasattr(session, "benchmark_results"):
-        benchmark_types = list(session.benchmark_results.keys())
-        json_result[BenchmarkKeys.CONTEXT_KEY]["benchmark"] = (
-            benchmark_types[0] if len(benchmark_types) == 1 else benchmark_types
-        )
-
-    build_and_write_benchmark_result(session, json_result)
+    session.run_context = ctx
 
 
 def pytest_configure(config):
diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index 06df8d56..af627fa3 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -4,13 +4,12 @@
 """
 Gather run configuration from execution context. Engine (presto-java / presto-velox-cpu / presto-velox-gpu)
 is determined from the coordinator's cluster-tag (via /v1/cluster). GPU name is
-parsed from nvidia-smi output in worker log files (LOGS env var). Scale factor and
-n_workers come from schema and Presto /v1/node respectively.
+read from worker log files (LOGS env var). Scale factor and n_workers come from
+schema and Presto /v1/node respectively.
 """
 
 import json
 import os
-import re
 from pathlib import Path
 
 import prestodb
@@ -18,7 +17,7 @@
 from ..common import test_utils
 from .presto_api import get_cluster_tag, get_nodes
 
-# Set PRESTO_BENCHMARK_DEBUG=1 or DEBUG=1 to print engine-detection debug logs
+# Enabled by run_benchmark.sh --verbose (sets PRESTO_BENCHMARK_DEBUG=1)
 _DEBUG = os.environ.get("PRESTO_BENCHMARK_DEBUG") or os.environ.get("DEBUG")
 
 
@@ -68,25 +67,12 @@ def _get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_na
                 pass
 
 
-def _parse_gpu_name_from_text(line: str) -> str | None:
-    """Parse a single line of nvidia-smi -L output; return GPU model name or None."""
-    line = line.strip()
-    if not line:
-        return None
-    match = re.search(r"GPU \d+:\s*(.+?)(?:\s*\(UUID)", line)
-    if match:
-        return match.group(1).strip()
-    if line.startswith("GPU "):
-        return line.split(":", 1)[-1].strip()
-    return None
-
-
 def _get_gpu_name_from_worker_logs() -> str | None:
-    """Parse GPU model from nvidia-smi output in worker log files.
+    """Read GPU model name from worker log files.
 
-    Both Docker and SLURM workers write nvidia-smi -L output to
-    LOGS/worker_<id>.log before starting the server.  All workers are
-    assumed to have the same GPU model; only worker_0.log is read.
+    Both Docker and SLURM workers write a 'GPU Name: <model>' line
+    to LOGS/worker_<id>.log before starting the server.  All workers
+    are assumed to have the same GPU; only worker_0.log is read.
     Returns None when LOGS is unset or no matching line is found.
     """
     logs_dir = os.environ.get("LOGS")
@@ -98,9 +84,8 @@ def _get_gpu_name_from_worker_logs() -> str | None:
     try:
         with open(log_file) as f:
             for line in f:
-                gpu_name = _parse_gpu_name_from_text(line)
-                if gpu_name:
-                    return gpu_name
+                if line.startswith("GPU Name:"):
+                    return line.split(":", 1)[1].strip()
     except Exception:
         pass
     return None

From 961ed8c428ca36c987659b7596506815ab0dede1 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 9 Mar 2026 17:54:41 -0700
Subject: [PATCH 18/24] self review

---
 benchmark_reporting_tools/post_results.py     | 72 +++++++++++--------
 presto/docker/docker-compose.common.yml       |  2 +-
 presto/docker/launch_presto_servers.sh        |  6 +-
 presto/scripts/run_benchmark.sh               |  1 -
 .../performance_benchmarks/run_context.py     | 18 +++--
 5 files changed, 60 insertions(+), 39 deletions(-)

diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index df597a77..105e2c19 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -51,6 +51,7 @@
 import dataclasses
 import json
 import os
+import re
 import sys
 from datetime import datetime
 from pathlib import Path
@@ -86,23 +87,21 @@ def _default_logs_dir() -> Path | None:
 
 @dataclasses.dataclass(kw_only=True)
 class BenchmarkMetadata:
-    kind: str
     benchmark: str
     timestamp: datetime
-    execution_number: int
-    worker_count: int
-    scale_factor: int
-    gpu_count: int
-    num_drivers: int
-    worker_image: str | None = None
-    gpu_name: str
     engine: str
+    kind: str | None = None
+    execution_number: int = 1
+    worker_count: int | None = None
+    scale_factor: int | None = None
+    gpu_count: int | None = None
+    num_drivers: int | None = None
+    gpu_name: str | None = None
 
     @classmethod
-    def from_result_file(cls, file_path: Path) -> "BenchmarkMetadata":
-        """Extract metadata from the 'context' section of benchmark_result.json."""
-        raw = json.loads(file_path.read_text())
-        data = raw["context"]
+    def from_parsed(cls, raw: dict) -> "BenchmarkMetadata":
+        """Extract metadata from the 'context' section of a parsed benchmark_result.json."""
+        data = dict(raw["context"])
         data["timestamp"] = datetime.fromisoformat(data["timestamp"].replace("Z", "+00:00"))
 
         known_fields = {f.name for f in dataclasses.fields(cls)}
@@ -123,11 +122,9 @@ class BenchmarkResults:
     failed_queries: dict[str, str]
 
     @classmethod
-    def from_file(cls, file_path: Path, benchmark_name: str) -> "BenchmarkResults":
-        data = json.loads(file_path.read_text())
-
-        if benchmark_name not in data.keys():
-            raise KeyError(f"Expected '{benchmark_name}' key in {file_path}, got: {sorted(data.keys())}")
+    def from_parsed(cls, data: dict, benchmark_name: str) -> "BenchmarkResults":
+        if benchmark_name not in data:
+            raise KeyError(f"Expected '{benchmark_name}' key, got: {sorted(data.keys())}")
 
         raw_times_ms = data[benchmark_name]["raw_times_ms"]
         failed_queries = data[benchmark_name]["failed_queries"]
@@ -394,8 +391,14 @@ def _build_submission_payload(
     raw_times = benchmark_results.raw_times_ms
     failed_queries = benchmark_results.failed_queries
 
-    # Sort query names for consistent ordering (Q1, Q2, ..., Q22)
-    query_names = sorted(raw_times.keys(), key=lambda x: int(x[1:]))
+    def _query_sort_key(name: str):
+        stripped = name.lstrip("Qq")
+        match = re.match(r"(\d+)(.*)", stripped)
+        if match:
+            return (int(match.group(1)), match.group(2))
+        return (float("inf"), name)
+
+    query_names = sorted(raw_times.keys(), key=_query_sort_key)
 
     for query_name in query_names:
         times = raw_times[query_name]
@@ -437,14 +440,17 @@ def _build_submission_payload(
             )
             execution_order += 1
 
-    # Build extra info from metadata
+    # Build extra info from metadata, omitting None values
     extra_info = {
-        "kind": benchmark_metadata.kind,
-        "gpu_count": benchmark_metadata.gpu_count,
-        "gpu_name": benchmark_metadata.gpu_name,
-        "num_drivers": benchmark_metadata.num_drivers,
-        "worker_image": benchmark_metadata.worker_image,
-        "execution_number": benchmark_metadata.execution_number,
+        k: v
+        for k, v in {
+            "kind": benchmark_metadata.kind,
+            "gpu_count": benchmark_metadata.gpu_count,
+            "gpu_name": benchmark_metadata.gpu_name,
+            "num_drivers": benchmark_metadata.num_drivers,
+            "execution_number": benchmark_metadata.execution_number,
+        }.items()
+        if v is not None
     }
 
     return {
@@ -575,14 +581,20 @@ async def _process_benchmark_dir(
     result_file = benchmark_dir / "benchmark_result.json"
 
     try:
-        benchmark_metadata = BenchmarkMetadata.from_result_file(result_file)
-    except (ValueError, KeyError, json.JSONDecodeError, FileNotFoundError) as e:
+        raw = json.loads(result_file.read_text())
+    except (json.JSONDecodeError, FileNotFoundError) as e:
+        print(f"  Error reading {result_file}: {e}", file=sys.stderr)
+        return 1
+
+    try:
+        benchmark_metadata = BenchmarkMetadata.from_parsed(raw)
+    except (ValueError, KeyError) as e:
         print(f"  Error loading metadata: {e}", file=sys.stderr)
         return 1
 
     try:
-        results = BenchmarkResults.from_file(result_file, benchmark_name=benchmark_metadata.benchmark)
-    except (ValueError, KeyError, json.JSONDecodeError, FileNotFoundError) as e:
+        results = BenchmarkResults.from_parsed(raw, benchmark_name=benchmark_metadata.benchmark)
+    except (ValueError, KeyError) as e:
         print(f"  Error loading results: {e}", file=sys.stderr)
         return 1
 
diff --git a/presto/docker/docker-compose.common.yml b/presto/docker/docker-compose.common.yml
index a74d4e99..10b54a09 100644
--- a/presto/docker/docker-compose.common.yml
+++ b/presto/docker/docker-compose.common.yml
@@ -4,7 +4,7 @@ services:
       - ./.hive_metastore:/var/lib/presto/data/hive/metastore
       - ../../common/testing/integration_tests/data:/var/lib/presto/data/hive/data/integration_test
       - ${PRESTO_DATA_DIR:-/dev/null}:/var/lib/presto/data/hive/data/user_data
-      - ${LOGS_DIR:-../scripts/presto_logs}:/opt/presto-server/logs
+      - ${LOGS_DIR:?LOGS_DIR must be set to an absolute path}:/opt/presto-server/logs
       - ./launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
 
   presto-base-java:
diff --git a/presto/docker/launch_presto_servers.sh b/presto/docker/launch_presto_servers.sh
index 6e99ba67..5a208a04 100644
--- a/presto/docker/launch_presto_servers.sh
+++ b/presto/docker/launch_presto_servers.sh
@@ -13,7 +13,8 @@ if [ $# -eq 0 ]; then
   # Single worker mode.  Use WORKER_ID env var for log filename (defaults to 0).
   local_id="${WORKER_ID:-0}"
   log_file="${LOGS_DIR}/worker_${local_id}.log"
-  echo "GPU Name: $(nvidia-smi --query-gpu=name --format=csv,noheader | head -n 1)" > "${log_file}" 2>&1 || true
+  gpu_name="$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -n 1)"
+  echo "GPU Name: ${gpu_name:-unknown}" > "${log_file}"
   presto_server --etc-dir="/opt/presto-server/etc/" >> "${log_file}" 2>&1 &
 else
   # Multi-worker single-container mode.  Each GPU ID is an argument.
@@ -22,7 +23,8 @@ else
     (
       export CUDA_VISIBLE_DEVICES=$gpu_id
       log_file="${LOGS_DIR}/worker_${worker_id}.log"
-      echo "GPU Name: $(nvidia-smi --query-gpu=name --format=csv,noheader | head -n 1)" > "${log_file}" 2>&1 || true
+      gpu_name="$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -n 1)"
+      echo "GPU Name: ${gpu_name:-unknown}" > "${log_file}"
       exec presto_server --etc-dir="/opt/presto-server/etc${gpu_id}" >> "${log_file}" 2>&1
     ) &
     worker_id=$((worker_id + 1))
diff --git a/presto/scripts/run_benchmark.sh b/presto/scripts/run_benchmark.sh
index d411c60e..6b231f3c 100755
--- a/presto/scripts/run_benchmark.sh
+++ b/presto/scripts/run_benchmark.sh
@@ -55,7 +55,6 @@ EXAMPLES:
     $0 -b tpch -s bench_sf100 --profile
     $0 -b tpch -s bench_sf100 --metrics
     $0 -b tpch -s bench_sf100 --verbose
-    $0 -h
 
 EOF
 }
diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index af627fa3..b04e3da0 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -10,6 +10,7 @@
 
 import json
 import os
+import re
 from pathlib import Path
 
 import prestodb
@@ -18,7 +19,7 @@
 from .presto_api import get_cluster_tag, get_nodes
 
 # Enabled by run_benchmark.sh --verbose (sets PRESTO_BENCHMARK_DEBUG=1)
-_DEBUG = os.environ.get("PRESTO_BENCHMARK_DEBUG") or os.environ.get("DEBUG")
+_DEBUG = os.environ.get("PRESTO_BENCHMARK_DEBUG", "") == "1" or os.environ.get("DEBUG", "") == "1"
 
 
 def _debug(msg: str) -> None:
@@ -36,11 +37,17 @@ def _get_node_count(hostname: str, port: int) -> int | None:
     return n
 
 
+_SAFE_IDENTIFIER_RE = re.compile(r"^[a-zA-Z0-9_]+$")
+
+
 def _get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_name: str) -> int | float | None:
     """
     Resolve scale factor from the schema's data source (metadata.json next to table data).
     Uses same logic as test_utils.get_scale_factor but without pytest request.
     """
+    if not _SAFE_IDENTIFIER_RE.match(schema_name):
+        _debug(f"schema_name {schema_name!r} contains unsafe characters, skipping scale factor lookup")
+        return None
     conn = None
     try:
         conn = prestodb.dbapi.connect(host=hostname, port=port, user=user, catalog="hive", schema=schema_name)
@@ -86,8 +93,8 @@ def _get_gpu_name_from_worker_logs() -> str | None:
             for line in f:
                 if line.startswith("GPU Name:"):
                     return line.split(":", 1)[1].strip()
-    except Exception:
-        pass
+    except Exception as e:
+        _debug(f"failed to read GPU name from worker logs: {e}")
     return None
 
 
@@ -159,8 +166,8 @@ def _get_num_drivers() -> int | None:
                             return int(value)
                         except ValueError:
                             pass
-    except Exception:
-        pass
+    except Exception as e:
+        _debug(f"failed to parse num_drivers from worker logs: {e}")
     _debug(f"num_drivers: task.max-drivers-per-task not found in {log_file}")
     return None
 
@@ -205,6 +212,7 @@ def gather_run_context(
     if num_drivers is not None:
         ctx["num_drivers"] = num_drivers
 
+    # Always 1 for single-run invocations; reserved for future multi-execution support.
     ctx["execution_number"] = 1
 
     return ctx

From 8c8d608596191ce8a475a6f1612cc4525a430c0c Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 9 Mar 2026 19:05:07 -0700
Subject: [PATCH 19/24] Refactor logging

---
 .gitignore                                    |  6 +-
 presto/docker/docker-compose.common.yml       |  8 ++-
 .../docker-compose.native-gpu.yml.jinja       |  3 +
 presto/docker/launch_coordinator.sh           |  3 +-
 presto/docker/launch_presto_servers.sh        |  5 +-
 presto/scripts/run_benchmark.sh               |  7 +--
 presto/scripts/start_presto_helper.sh         | 25 ++++----
 presto/slurm/presto-nvl72/README.md           | 12 ++--
 presto/slurm/presto-nvl72/functions.sh        | 27 ++++----
 presto/slurm/presto-nvl72/launch-run.sh       |  8 ++-
 .../presto-nvl72/run-presto-benchmarks.sh     |  4 +-
 .../presto-nvl72/run-presto-benchmarks.slurm  | 17 +++---
 presto/slurm/presto-nvl72/run_multiple.sh     |  4 +-
 .../performance_benchmarks/run_context.py     | 61 +++++++++++++------
 scripts/py_env_functions.sh                   |  7 +++
 scripts/run_py_script.sh                      | 13 ++--
 16 files changed, 128 insertions(+), 82 deletions(-)

diff --git a/.gitignore b/.gitignore
index 92196f8d..d86b7454 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,12 +23,10 @@ presto/docker/config/generated*/
 
 # Generated Presto Docker Compose files
 presto/docker/docker-compose/generated*/
-presto/scripts/presto_logs
-presto/scripts/presto_logs*/
+presto/scripts/presto_logs/
 
 # Slurm logs and results
-presto/slurm/presto-nvl72/logs
-presto/slurm/presto-nvl72/logs_*/
+presto/slurm/presto-nvl72/logs/
 presto/slurm/presto-nvl72/*.err
 presto/slurm/presto-nvl72/*.out
 presto/slurm/presto-nvl72/result_dir/
diff --git a/presto/docker/docker-compose.common.yml b/presto/docker/docker-compose.common.yml
index 10b54a09..5b4fd06a 100644
--- a/presto/docker/docker-compose.common.yml
+++ b/presto/docker/docker-compose.common.yml
@@ -4,7 +4,7 @@ services:
       - ./.hive_metastore:/var/lib/presto/data/hive/metastore
       - ../../common/testing/integration_tests/data:/var/lib/presto/data/hive/data/integration_test
       - ${PRESTO_DATA_DIR:-/dev/null}:/var/lib/presto/data/hive/data/user_data
-      - ${LOGS_DIR:?LOGS_DIR must be set to an absolute path}:/opt/presto-server/logs
+      - ${LOGS_DIR:-/dev/null}:/opt/presto-server/logs
       - ./launch_presto_servers.sh:/opt/launch_presto_servers.sh:ro
 
   presto-base-java:
@@ -20,9 +20,12 @@ services:
     image: presto-coordinator:${PRESTO_IMAGE_TAG}
     ports:
       - 8080:8080
+    environment:
+      - RUN_TIMESTAMP=${RUN_TIMESTAMP:-}
     volumes:
       - ./launch_coordinator.sh:/opt/launch_coordinator.sh:ro
-    command: ["bash", "/opt/launch_coordinator.sh"]
+    entrypoint: ["bash", "/opt/launch_coordinator.sh"]
+    command: []
 
   presto-base-native-worker:
     extends:
@@ -32,3 +35,4 @@ services:
       dockerfile: velox-testing/presto/docker/native_build.dockerfile
     environment:
       - GLOG_logtostderr=1
+      - RUN_TIMESTAMP=${RUN_TIMESTAMP:-}
diff --git a/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja b/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja
index 50915495..0997ffc7 100644
--- a/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja
+++ b/presto/docker/docker-compose/template/docker-compose.native-gpu.yml.jinja
@@ -72,6 +72,7 @@ services:
       NVIDIA_VISIBLE_DEVICES: all
       PROFILE: ${PROFILE}
       PROFILE_ARGS: ${PROFILE_ARGS}
+      RUN_TIMESTAMP: ${RUN_TIMESTAMP:-}
       UCX_LOG_LEVEL: info #debug
       UCX_RNDV_PIPELINE_ERROR_HANDLING: y
       UCX_TLS: tcp,cuda_copy,cuda_ipc
@@ -99,6 +100,7 @@ services:
       NVIDIA_VISIBLE_DEVICES: all
       PROFILE: ${PROFILE}
       PROFILE_ARGS: ${PROFILE_ARGS}
+      RUN_TIMESTAMP: ${RUN_TIMESTAMP:-}
       UCX_LOG_LEVEL: info #debug
       UCX_RNDV_PIPELINE_ERROR_HANDLING: y
       UCX_TLS: tcp,cuda_copy,cuda_ipc
@@ -114,6 +116,7 @@ services:
       NVIDIA_VISIBLE_DEVICES: all
       PROFILE: ${PROFILE}
       PROFILE_ARGS: ${PROFILE_ARGS}
+      RUN_TIMESTAMP: ${RUN_TIMESTAMP:-}
       UCX_LOG_LEVEL: info #debug
       UCX_RNDV_PIPELINE_ERROR_HANDLING: y
       UCX_TLS: tcp,cuda_copy,cuda_ipc
diff --git a/presto/docker/launch_coordinator.sh b/presto/docker/launch_coordinator.sh
index 02203395..1ac7537e 100755
--- a/presto/docker/launch_coordinator.sh
+++ b/presto/docker/launch_coordinator.sh
@@ -6,6 +6,7 @@ set -e
 
 LOGS_DIR="/opt/presto-server/logs"
 mkdir -p "${LOGS_DIR}"
-log_file="${LOGS_DIR}/coordinator.log"
+RUN_TIMESTAMP="${RUN_TIMESTAMP:-$(date +"%Y%m%dT%H%M%S")}"
+log_file="${LOGS_DIR}/coordinator_${RUN_TIMESTAMP}.log"
 
 exec /opt/presto-server/bin/launcher run >> "${log_file}" 2>&1
diff --git a/presto/docker/launch_presto_servers.sh b/presto/docker/launch_presto_servers.sh
index 5a208a04..913f1118 100644
--- a/presto/docker/launch_presto_servers.sh
+++ b/presto/docker/launch_presto_servers.sh
@@ -8,11 +8,12 @@ ldconfig
 
 LOGS_DIR="/opt/presto-server/logs"
 mkdir -p "${LOGS_DIR}"
+RUN_TIMESTAMP="${RUN_TIMESTAMP:-$(date +"%Y%m%dT%H%M%S")}"
 
 if [ $# -eq 0 ]; then
   # Single worker mode.  Use WORKER_ID env var for log filename (defaults to 0).
   local_id="${WORKER_ID:-0}"
-  log_file="${LOGS_DIR}/worker_${local_id}.log"
+  log_file="${LOGS_DIR}/worker_${local_id}_${RUN_TIMESTAMP}.log"
   gpu_name="$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -n 1)"
   echo "GPU Name: ${gpu_name:-unknown}" > "${log_file}"
   presto_server --etc-dir="/opt/presto-server/etc/" >> "${log_file}" 2>&1 &
@@ -22,7 +23,7 @@ else
   for gpu_id in "$@"; do
     (
       export CUDA_VISIBLE_DEVICES=$gpu_id
-      log_file="${LOGS_DIR}/worker_${worker_id}.log"
+      log_file="${LOGS_DIR}/worker_${worker_id}_${RUN_TIMESTAMP}.log"
       gpu_name="$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -n 1)"
       echo "GPU Name: ${gpu_name:-unknown}" > "${log_file}"
       exec presto_server --etc-dir="/opt/presto-server/etc${gpu_id}" >> "${log_file}" 2>&1
diff --git a/presto/scripts/run_benchmark.sh b/presto/scripts/run_benchmark.sh
index 6b231f3c..f54aca7e 100755
--- a/presto/scripts/run_benchmark.sh
+++ b/presto/scripts/run_benchmark.sh
@@ -8,10 +8,9 @@ set -e
 # Compute the directory where this script resides
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-# LOGS points to the directory where server log files (including nvidia-smi
-# output) are written so that run_context.py can parse GPU info.  SLURM
-# environments set LOGS themselves; this default covers Docker.
-export LOGS="${LOGS:-${SCRIPT_DIR}/presto_logs}"
+# LOGS_DIR points to the directory where server log files (including nvidia-smi
+# output) are written so that run_context.py can parse GPU info.
+export LOGS_DIR="${LOGS_DIR:-${SCRIPT_DIR}/presto_logs}"
 
 source "${SCRIPT_DIR}/presto_connection_defaults.sh"
 
diff --git a/presto/scripts/start_presto_helper.sh b/presto/scripts/start_presto_helper.sh
index 73e89aee..9145f53f 100755
--- a/presto/scripts/start_presto_helper.sh
+++ b/presto/scripts/start_presto_helper.sh
@@ -140,6 +140,18 @@ else
   echo "Internal error: unexpected VARIANT_TYPE value: $VARIANT_TYPE"
 fi
 
+# Set up LOGS_DIR before any docker compose commands (stop, build, or up)
+# since docker-compose.common.yml requires it via ${LOGS_DIR:?...}.
+LOGS_DIR="${LOGS_DIR:-${SCRIPT_DIR}/presto_logs}"
+[ -L "${LOGS_DIR}" ] && rm -f "${LOGS_DIR}"
+mkdir -p "${LOGS_DIR}"
+if compgen -G "${LOGS_DIR}/*.log" > /dev/null 2>&1; then
+  mkdir -p "${LOGS_DIR}/archive"
+  mv "${LOGS_DIR}"/*.log "${LOGS_DIR}/archive/"
+fi
+export RUN_TIMESTAMP="$(date +"%Y%m%dT%H%M%S")"
+export LOGS_DIR
+
 "${SCRIPT_DIR}/stop_presto.sh"
 
 "${SCRIPT_DIR}/generate_presto_config.sh"
@@ -220,18 +232,5 @@ if (( ${#BUILD_TARGET_ARG[@]} )); then
   ${BUILD_TARGET_ARG[@]}
 fi
 
-# Create a timestamped directory for this run's logs and point the
-# logs symlink at it.  Old runs are preserved in their own directories.
-# If the target is a real directory (pre-symlink migration), rename it first.
-LOGS_DIR="${LOGS_DIR:-${SCRIPT_DIR}/presto_logs}"
-if [ -d "${LOGS_DIR}" ] && [ ! -L "${LOGS_DIR}" ]; then
-  mv "${LOGS_DIR}" "${LOGS_DIR}_$(date -r "${LOGS_DIR}" +"%Y%m%dT%H%M%S" 2>/dev/null || date +"%Y%m%dT%H%M%S")_migrated"
-fi
-TIMESTAMPED_LOGS_DIR="${LOGS_DIR}_$(date +"%Y%m%dT%H%M%S")"
-mkdir -p "${TIMESTAMPED_LOGS_DIR}"
-rm -f "${LOGS_DIR}"
-ln -sfn "${TIMESTAMPED_LOGS_DIR}" "${LOGS_DIR}"
-export LOGS_DIR
-
 # Start all services defined in the rendered docker-compose file.
 docker compose -f $DOCKER_COMPOSE_FILE_PATH up -d
diff --git a/presto/slurm/presto-nvl72/README.md b/presto/slurm/presto-nvl72/README.md
index b2fb24e3..007f3bcd 100644
--- a/presto/slurm/presto-nvl72/README.md
+++ b/presto/slurm/presto-nvl72/README.md
@@ -56,7 +56,7 @@ Key variables:
 - NUM_ITERATIONS: required by the job; launcher defaults to 1 (`-i/--iterations` to override)
 - NUM_NODES: derived from Slurm allocation; provided via `-n/--nodes` to launcher
 - REPO_ROOT: auto-detected from script location
-- LOGS: `${SCRIPT_DIR}/logs` by default
+- LOGS_DIR: `${SCRIPT_DIR}/logs` by default (log files are timestamped; old logs archived to `logs/archive/`)
 - IMAGE_DIR, DATA, CONFIGS: see below or override via environment if needed
 
 Other defaults:
@@ -82,10 +82,10 @@ squeue -u $USER
 # Monitor job output
 tail -f presto-tpch-run_n<NODES>_sf<SCALE_FACTOR>_i<ITER>_<JOB_ID>.out
 
-# Check logs during execution
-tail -f logs/coord.log
-tail -f logs/cli.log
-tail -f logs/worker_0.log
+# Check logs during execution (filenames include a run timestamp)
+tail -f logs/coord_*.log
+tail -f logs/cli_*.log
+tail -f logs/worker_0_*.log
 ```
 
 ## Coordinator IP and Web UI
@@ -124,7 +124,7 @@ Results are saved to:
 ### Coordinator fails to start
 Check coordinator logs:
 ```bash
-cat logs/coord.log
+cat logs/coord_*.log
 ```
 
 ### Workers not registering
diff --git a/presto/slurm/presto-nvl72/functions.sh b/presto/slurm/presto-nvl72/functions.sh
index dd2664e7..7c4144df 100755
--- a/presto/slurm/presto-nvl72/functions.sh
+++ b/presto/slurm/presto-nvl72/functions.sh
@@ -9,7 +9,7 @@ function setup {
     [ -z "$SLURM_JOB_PARTITION" ] && echo "required argument '--partition' not specified" && exit 1
     [ -z "$SLURM_NNODES" ] && echo "required argument '--nodes' not specified" && exit 1
     [ -z "$IMAGE_DIR" ] && echo "IMAGE_DIR must be set" && exit 1
-    [ -z "$LOGS" ] && echo "LOGS must be set" && exit 1
+    [ -z "$LOGS_DIR" ] && echo "LOGS_DIR must be set" && exit 1
     [ -z "$CONFIGS" ] && echo "CONFIGS must be set" && exit 1
     [ -z "$NUM_NODES" ] && echo "NUM_NODES must be set" && exit 1
     [ -z "$NUM_GPUS_PER_NODE" ] && echo "NUM_GPUS_PER_NODE env variable must be set" && exit 1
@@ -58,11 +58,11 @@ function validate_environment_preconditions {
 # Execute script through the coordinator image (used for coordinator and cli executables)
 function run_coord_image {
     [ $# -ne 2 ] && echo_error "$0 expected one argument for '<script>' and one for '<coord/cli>'"
-    validate_environment_preconditions LOGS CONFIGS VT_ROOT COORD DATA COORD_IMAGE
+    validate_environment_preconditions LOGS_DIR CONFIGS VT_ROOT COORD DATA COORD_IMAGE
     local script=$1
     local type=$2
     [ "$type" != "coord" ] && [ "$type" != "cli" ] && echo_error "coord type must be coord/cli"
-    local log_file="${type}.log"
+    local log_file="${type}_${RUN_TIMESTAMP}.log"
 
     local coord_image="${IMAGE_DIR}/${COORD_IMAGE}.sqsh"
     [ ! -f "${coord_image}" ] && echo_error "coord image does not exist at ${coord_image}"
@@ -83,7 +83,7 @@ ${CONFIGS}/etc_coordinator/config_native.properties:/opt/presto-server/etc/confi
 ${CONFIGS}/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties,\
 ${DATA}:/var/lib/presto/data/hive/data/user_data,\
 ${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore \
--- bash -lc "unset JAVA_HOME; export JAVA_HOME=/usr/lib/jvm/jre-17-openjdk; export PATH=/usr/lib/jvm/jre-17-openjdk/bin:\$PATH; ${script}" >> ${LOGS}/${log_file} 2>&1 &
+-- bash -lc "unset JAVA_HOME; export JAVA_HOME=/usr/lib/jvm/jre-17-openjdk; export PATH=/usr/lib/jvm/jre-17-openjdk/bin:\$PATH; ${script}" >> ${LOGS_DIR}/${log_file} 2>&1 &
     else
         srun -w $COORD --ntasks=1 --overlap \
 --container-image=${coord_image} \
@@ -97,7 +97,7 @@ ${CONFIGS}/etc_coordinator/config_native.properties:/opt/presto-server/etc/confi
 ${CONFIGS}/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties,\
 ${DATA}:/var/lib/presto/data/hive/data/user_data,\
 ${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore \
--- bash -lc "unset JAVA_HOME; export JAVA_HOME=/usr/lib/jvm/jre-17-openjdk; export PATH=/usr/lib/jvm/jre-17-openjdk/bin:\$PATH; ${script}" >> ${LOGS}/${log_file} 2>&1
+-- bash -lc "unset JAVA_HOME; export JAVA_HOME=/usr/lib/jvm/jre-17-openjdk; export PATH=/usr/lib/jvm/jre-17-openjdk/bin:\$PATH; ${script}" >> ${LOGS_DIR}/${log_file} 2>&1
     fi
 }
 
@@ -151,7 +151,7 @@ run_coord_image "$COORD_SCRIPT" "coord"
 # Runs a worker on a given node with custom configuration files which are generated as necessary.
 function run_worker {
     [ $# -ne 4 ] && echo_error "$0 expected arguments 'gpu_id', 'image', 'node_id', and 'worker_id'"
-    validate_environment_preconditions LOGS CONFIGS VT_ROOT COORD CUDF_LIB DATA
+    validate_environment_preconditions LOGS_DIR CONFIGS VT_ROOT COORD CUDF_LIB DATA
 
     local gpu_id=$1 image=$2 node=$3 worker_id=$4
     echo "running worker ${worker_id} with image ${image} on node ${node} with gpu_id ${gpu_id}"
@@ -193,7 +193,7 @@ ${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore \
 --container-env=LD_LIBRARY_PATH="$CUDF_LIB:$LD_LIBRARY_PATH" \
 --container-env=GLOG_vmodule=IntraNodeTransferRegistry=3,ExchangeOperator=3 \
 --container-env=GLOG_logtostderr=1 \
--- /bin/bash -c "export CUDA_VISIBLE_DEVICES=${gpu_id}; echo \"CUDA_VISIBLE_DEVICES=\$CUDA_VISIBLE_DEVICES\"; echo \"--- Environment Variables ---\"; set | grep -E 'UCX_|CUDA_VISIBLE_DEVICES'; echo \"GPU Name: \$(nvidia-smi --query-gpu=name --format=csv,noheader | head -n 1)\"; /usr/bin/presto_server --etc-dir=/opt/presto-server/etc" > ${LOGS}/worker_${worker_id}.log 2>&1 &
+-- /bin/bash -c "export CUDA_VISIBLE_DEVICES=${gpu_id}; echo \"CUDA_VISIBLE_DEVICES=\$CUDA_VISIBLE_DEVICES\"; echo \"--- Environment Variables ---\"; set | grep -E 'UCX_|CUDA_VISIBLE_DEVICES'; echo \"GPU Name: \$(nvidia-smi --query-gpu=name --format=csv,noheader | head -n 1)\"; /usr/bin/presto_server --etc-dir=/opt/presto-server/etc" > ${LOGS_DIR}/worker_${worker_id}_${RUN_TIMESTAMP}.log 2>&1 &
 }
 
 function copy_hive_metastore {
@@ -244,7 +244,7 @@ function run_queries {
 # Check if the coordinator is running via curl.  Fail after 10 retries.
 function wait_until_coordinator_is_running {
     echo "waiting for coordinator to be accessible"
-    validate_environment_preconditions COORD LOGS
+    validate_environment_preconditions COORD LOGS_DIR
     local state="INACTIVE"
     for i in {1..10}; do
         state=$(curl -s http://${COORD}:${PORT}/v1/info/state || true)
@@ -259,7 +259,7 @@ function wait_until_coordinator_is_running {
 
 # Check N nodes are registered with the coordinator.  Fail after 60 retries (5 minutes).
 function wait_for_workers_to_register {
-    validate_environment_preconditions LOGS COORD
+    validate_environment_preconditions LOGS_DIR COORD
     [ $# -ne 1 ] && echo_error "$0 expected one argument for 'expected number of workers'"
     echo "waiting for $1 workers to register"
     local expected_num_workers=$1
@@ -342,7 +342,10 @@ function generate_json() {
 	kind="${NUM_WORKERS}-node"
     fi
     local timestamp=$(date +"%Y-%m-%dT%H:%M:%SZ")
-    local gpu=$(grep "GPU 0: NVIDIA [^ ]* " ${OUTPUT_PREFIX}/logs/worker_0.log | sed "s/GPU 0: NVIDIA \([^ ]*\) .*/\1/g")
+    local worker_log
+    worker_log="$(ls -t ${OUTPUT_PREFIX}/logs/worker_0_*.log 2>/dev/null | head -1)"
+    [ -z "${worker_log}" ] && worker_log="${OUTPUT_PREFIX}/logs/worker_0.log"
+    local gpu=$(grep "GPU 0: NVIDIA [^ ]* " "${worker_log}" | sed "s/GPU 0: NVIDIA \([^ ]*\) .*/\1/g")
     echo "GPU = $gpu"
 
     jq --null-input \
@@ -402,8 +405,8 @@ function push_csv() {
     fi
 
     # Copy logs
-    if [ -d "${LOGS}" ]; then
-        cp -r ${LOGS} ${run_dir}/
+    if [ -d "${LOGS_DIR}" ]; then
+        cp -r ${LOGS_DIR} ${run_dir}/
     fi
 
     # Copy slurm output files from the job directory
diff --git a/presto/slurm/presto-nvl72/launch-run.sh b/presto/slurm/presto-nvl72/launch-run.sh
index 27da4ec7..83e9e060 100755
--- a/presto/slurm/presto-nvl72/launch-run.sh
+++ b/presto/slurm/presto-nvl72/launch-run.sh
@@ -155,9 +155,9 @@ echo "Monitor job with:"
 echo "  squeue -j $JOB_ID"
 echo "  tail -f ${OUT_FILE}"
 echo "  tail -f ${ERR_FILE}"
-echo "  tail -f logs/coord.log"
+echo "  tail -f logs/coord_*.log"
 echo "  tail -f logs/worker_*.log"
-echo "  tail -f logs/cli.log"
+echo "  tail -f logs/cli_*.log"
 echo ""
 echo "Waiting for job to complete..."
 
@@ -176,4 +176,6 @@ echo "Showing job output:"
 echo "========================================"
 cat "${OUT_FILE}" 2>/dev/null || echo "No output available"
 echo "Showing benchmark results:"
-cat logs/cli.log 2>/dev/null || echo "No CLI output available"
+cli_log="$(ls -t logs/cli_*.log 2>/dev/null | head -1)"
+[ -z "${cli_log}" ] && cli_log="logs/cli.log"
+cat "${cli_log}" 2>/dev/null || echo "No CLI output available"
diff --git a/presto/slurm/presto-nvl72/run-presto-benchmarks.sh b/presto/slurm/presto-nvl72/run-presto-benchmarks.sh
index e059a7e5..61882e3e 100755
--- a/presto/slurm/presto-nvl72/run-presto-benchmarks.sh
+++ b/presto/slurm/presto-nvl72/run-presto-benchmarks.sh
@@ -62,10 +62,10 @@ run_queries ${NUM_ITERATIONS} ${SCALE_FACTOR}
 # ==============================================================================
 echo "Processing results..."
 mkdir -p ${SCRIPT_DIR}/result_dir
-cp -r ${LOGS}/cli.log ${SCRIPT_DIR}/result_dir/summary.txt
+cp -r ${LOGS_DIR}/cli_${RUN_TIMESTAMP}.log ${SCRIPT_DIR}/result_dir/summary.txt
 
 echo "========================================"
 echo "Benchmark complete!"
 echo "Results saved to: ${SCRIPT_DIR}/results_dir"
-echo "Logs available at: ${LOGS}"
+echo "Logs available at: ${LOGS_DIR}"
 echo "========================================"
diff --git a/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm b/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
index 108b8b77..ec88cef4 100755
--- a/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
+++ b/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
@@ -40,10 +40,14 @@ export COORD_IMAGE
 export VT_ROOT="$(cd -- "${SCRIPT_DIR}/../../.." >/dev/null 2>&1 && pwd -P)"
 export DATA=/mnt/data/tpch-rs
 export IMAGE_DIR=/mnt/data/images/presto
-TIMESTAMPED_LOGS="${SCRIPT_DIR}/logs_$(date +"%Y%m%dT%H%M%S")"
-mkdir -p "${TIMESTAMPED_LOGS}"
-ln -sfn "${TIMESTAMPED_LOGS}" "${SCRIPT_DIR}/logs"
-export LOGS="${SCRIPT_DIR}/logs"
+export LOGS_DIR="${LOGS_DIR:-${SCRIPT_DIR}/logs}"
+[ -L "${LOGS_DIR}" ] && rm -f "${LOGS_DIR}"
+mkdir -p "${LOGS_DIR}"
+if compgen -G "${LOGS_DIR}/*.log" > /dev/null 2>&1; then
+  mkdir -p "${LOGS_DIR}/archive"
+  mv "${LOGS_DIR}"/*.log "${LOGS_DIR}/archive/"
+fi
+export RUN_TIMESTAMP="$(date +"%Y%m%dT%H%M%S")"
 export VARIANT_TYPE=gpu
 export CONFIGS=$VT_ROOT/presto/docker/config/generated/$VARIANT_TYPE
 
@@ -90,13 +94,10 @@ echo "Coord image: $COORD_IMAGE"
 echo "Scale factor: $SCALE_FACTOR"
 echo "Iterations: $NUM_ITERATIONS"
 echo "Data directory: $DATA"
-echo "Logs directory: $LOGS"
+echo "Logs directory: $LOGS_DIR"
 echo "Total workers: $NUM_WORKERS (${NUM_NODES} nodes × ${NUM_GPUS_PER_NODE} GPUs)"
 echo "Single node execution: $SINGLE_NODE_EXECUTION"
 echo "========================================"
 
-# Ensure logs directory exists (symlink target was already created above)
-mkdir -p ${LOGS}
-
 # Launch the job script
 bash $SCRIPT_DIR/run-presto-benchmarks.sh
diff --git a/presto/slurm/presto-nvl72/run_multiple.sh b/presto/slurm/presto-nvl72/run_multiple.sh
index b3c9d9bd..88339a82 100755
--- a/presto/slurm/presto-nvl72/run_multiple.sh
+++ b/presto/slurm/presto-nvl72/run_multiple.sh
@@ -87,6 +87,8 @@ IFS=',' read -ra SF_ARRAY <<< "$SCALE_FACTOR"
 for s in "${SF_ARRAY[@]}"; do
     for n in "${NODES_ARRAY[@]}"; do
         ./launch-run.sh -s $s -n $n -i $NUM_ITERATIONS -w $WORKER_IMAGE -c $COORD_IMAGE
-        cp logs/cli.log kept_results/${n}N-${s}SF-summary.txt
+        cli_log="$(ls -t logs/cli_*.log 2>/dev/null | head -1)"
+        [ -z "${cli_log}" ] && cli_log="logs/cli.log"
+        cp "${cli_log}" kept_results/${n}N-${s}SF-summary.txt
     done
 done
diff --git a/presto/testing/performance_benchmarks/run_context.py b/presto/testing/performance_benchmarks/run_context.py
index b04e3da0..c458cbbb 100644
--- a/presto/testing/performance_benchmarks/run_context.py
+++ b/presto/testing/performance_benchmarks/run_context.py
@@ -4,7 +4,7 @@
 """
 Gather run configuration from execution context. Engine (presto-java / presto-velox-cpu / presto-velox-gpu)
 is determined from the coordinator's cluster-tag (via /v1/cluster). GPU name is
-read from worker log files (LOGS env var). Scale factor and n_workers come from
+read from worker log files (LOGS_DIR env var). Scale factor and n_workers come from
 schema and Presto /v1/node respectively.
 """
 
@@ -74,19 +74,45 @@ def _get_scale_factor_from_schema(hostname: str, port: int, user: str, schema_na
                 pass
 
 
+def _find_worker_log(worker_id: int = 0) -> Path | None:
+    """Locate the worker log file for *worker_id*.
+
+    Tries, in order:
+    1. ``LOGS_DIR/worker_<id>_<RUN_TIMESTAMP>.log`` (exact match when RUN_TIMESTAMP is set)
+    2. The newest ``LOGS_DIR/worker_<id>_*.log`` by mtime (glob fallback)
+    3. ``LOGS_DIR/worker_<id>.log`` (legacy non-timestamped name)
+    """
+    logs_dir = os.environ.get("LOGS_DIR")
+    if not logs_dir:
+        return None
+    logs_path = Path(logs_dir)
+
+    ts = os.environ.get("RUN_TIMESTAMP")
+    if ts:
+        exact = logs_path / f"worker_{worker_id}_{ts}.log"
+        if exact.is_file():
+            return exact
+
+    candidates = sorted(logs_path.glob(f"worker_{worker_id}_*.log"), key=lambda p: p.stat().st_mtime, reverse=True)
+    if candidates:
+        return candidates[0]
+
+    legacy = logs_path / f"worker_{worker_id}.log"
+    if legacy.is_file():
+        return legacy
+    return None
+
+
 def _get_gpu_name_from_worker_logs() -> str | None:
     """Read GPU model name from worker log files.
 
     Both Docker and SLURM workers write a 'GPU Name: <model>' line
-    to LOGS/worker_<id>.log before starting the server.  All workers
-    are assumed to have the same GPU; only worker_0.log is read.
-    Returns None when LOGS is unset or no matching line is found.
+    to LOGS_DIR/worker_<id>_<timestamp>.log before starting the server.
+    All workers are assumed to have the same GPU; only worker_0 is read.
+    Returns None when LOGS_DIR is unset or no matching line is found.
     """
-    logs_dir = os.environ.get("LOGS")
-    if not logs_dir:
-        return None
-    log_file = Path(logs_dir) / "worker_0.log"
-    if not log_file.is_file():
+    log_file = _find_worker_log(0)
+    if log_file is None:
         return None
     try:
         with open(log_file) as f:
@@ -132,9 +158,9 @@ def _get_engine(hostname: str, port: int) -> str:
 def _get_gpu_name() -> str | None:
     """Return GPU model name from worker log files.
 
-    Worker containers (Docker and SLURM) run nvidia-smi -L and write the
-    output to LOGS/worker_<id>.log.  Returns None when LOGS is unset or
-    no GPU info is found in the logs.
+    Worker containers (Docker and SLURM) run nvidia-smi and write the
+    output to LOGS_DIR/worker_<id>_<timestamp>.log.  Returns None when
+    LOGS_DIR is unset or no GPU info is found in the logs.
     """
     gpu_name = _get_gpu_name_from_worker_logs()
     _debug(f"gpu_name: {gpu_name!r}")
@@ -145,14 +171,11 @@ def _get_num_drivers() -> int | None:
     """Parse task.max-drivers-per-task from worker log files.
 
     The Presto native server logs its configuration at startup.
-    Returns None when LOGS is unset or the property is not found.
+    Returns None when LOGS_DIR is unset or the property is not found.
     """
-    logs_dir = os.environ.get("LOGS")
-    if not logs_dir:
-        return None
-    log_file = Path(logs_dir) / "worker_0.log"
-    if not log_file.is_file():
-        _debug(f"num_drivers: log not found at {log_file}")
+    log_file = _find_worker_log(0)
+    if log_file is None:
+        _debug("num_drivers: worker_0 log not found")
         return None
     try:
         with open(log_file) as f:
diff --git a/scripts/py_env_functions.sh b/scripts/py_env_functions.sh
index daca387b..eb974e88 100755
--- a/scripts/py_env_functions.sh
+++ b/scripts/py_env_functions.sh
@@ -52,6 +52,13 @@ function activate_python_virtual_env() {
 
 function init_python_virtual_env() {
   local venv_dir=${1:-".venv"}
+
+  if [[ -f "$venv_dir/pyvenv.cfg" || -d "$venv_dir/conda-meta" ]]; then
+    echo "Reusing existing virtual environment at $venv_dir"
+    activate_python_virtual_env $venv_dir
+    return
+  fi
+
   rm -rf $venv_dir
 
   if python3.12 -m venv $venv_dir &>/dev/null; then
diff --git a/scripts/run_py_script.sh b/scripts/run_py_script.sh
index 3101a7a8..5854e5f3 100755
--- a/scripts/run_py_script.sh
+++ b/scripts/run_py_script.sh
@@ -77,13 +77,16 @@ fi
 
 source "$(dirname $(readlink -f $0))/py_env_functions.sh"
 
-trap delete_python_virtual_env EXIT
-
 init_python_virtual_env
 
-
-echo "Running pip install for requirements file: $REQUIREMENTS_FILE_PATH"
-pip install -q -r $REQUIREMENTS_FILE_PATH
+STAMP_FILE=".venv/.requirements_stamp"
+if [[ ! -f "$STAMP_FILE" ]] || ! diff -q "$REQUIREMENTS_FILE_PATH" "$STAMP_FILE" &>/dev/null; then
+  echo "Running pip install for requirements file: $REQUIREMENTS_FILE_PATH"
+  pip install -q -r $REQUIREMENTS_FILE_PATH
+  cp "$REQUIREMENTS_FILE_PATH" "$STAMP_FILE"
+else
+  echo "Requirements unchanged, skipping pip install"
+fi
 
 echo -e "\nRunning python script with args:\n$SCRIPT_PATH ${SCRIPT_ARGS[@]}\n"
 python $SCRIPT_PATH ${SCRIPT_ARGS[@]}

From 73eddc726ed0575ab674b25e706f8dfb71b2b292 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 9 Mar 2026 19:10:07 -0700
Subject: [PATCH 20/24] updated comments

---
 benchmark_reporting_tools/post_results.py        | 7 +++----
 presto/scripts/start_presto_helper_parse_args.sh | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index 105e2c19..a04e68ca 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -19,7 +19,7 @@
 
 Default locations (derived from the repo root and detected variant):
     configs: presto/docker/config/generated/{variant}/
-    logs:    presto/scripts/presto_logs/  (symlink to latest run)
+    logs:    presto/scripts/presto_logs/
 
 Usage:
     # Auto-detect configs/logs from the repo (default):
@@ -78,7 +78,7 @@ def _default_config_dir(variant: str) -> Path | None:
 
 
 def _default_logs_dir() -> Path | None:
-    """Return the presto_logs directory, resolving the symlink to the latest run."""
+    """Return the presto_logs directory."""
     link = _repo_root() / "presto" / "scripts" / "presto_logs"
     if link.exists():
         return link.resolve()
@@ -319,8 +319,7 @@ def _parse_args() -> argparse.Namespace:
         "--logs-dir",
         type=str,
         default=None,
-        help="Override server logs directory. Default: presto/scripts/presto_logs/ "
-        "(symlink to the latest timestamped run).",
+        help="Override server logs directory. Default: presto/scripts/presto_logs/.",
     )
 
     return parser.parse_args()
diff --git a/presto/scripts/start_presto_helper_parse_args.sh b/presto/scripts/start_presto_helper_parse_args.sh
index 9d156545..289ff927 100644
--- a/presto/scripts/start_presto_helper_parse_args.sh
+++ b/presto/scripts/start_presto_helper_parse_args.sh
@@ -34,7 +34,7 @@ OPTIONS:
                          This will override the default arguments.
     --overwrite-config   Force config to be regenerated (will overwrite local changes).
     --logs-dir           Directory for server log files (default: <script_dir>/presto_logs).
-                         Each startup creates a timestamped subdirectory and symlinks this path to it.
+                         Old log files are archived to an archive/ subdirectory on each startup.
     --sccache            Enable sccache distributed compilation caching (requires auth files
                          in ~/.sccache-auth/). Run scripts/sccache/setup_sccache_auth.sh first.
     --sccache-version    Install a specific version of rapidsai/sccache, e.g. "0.12.0-rapids.1"

From abd0cd6447af9655eb38462043d878b7c7631c04 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 9 Mar 2026 19:27:21 -0700
Subject: [PATCH 21/24] fixed java path

---
 benchmark_reporting_tools/post_results.py | 47 ++++++++++-------------
 presto/docker/docker-compose.java.yml     |  5 +++
 presto/docker/launch_java_worker.sh       | 12 ++++++
 3 files changed, 37 insertions(+), 27 deletions(-)
 create mode 100755 presto/docker/launch_java_worker.sh

diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index a04e68ca..6a6c7c30 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -158,30 +158,25 @@ def _parse_config_file(file_path: Path) -> dict[str, str]:
     return config
 
 
-def _find_config_file(configs_dir: Path, subdir: str, legacy_name: str, variant: str | None = None) -> Path | None:
-    """Locate a config file in either the generated repo layout or legacy flat layout.
-
-    Repo layout:  {configs_dir}/{subdir}/config_native.properties (or config_java.properties)
-    Legacy layout: {configs_dir}/{legacy_name}
+def _find_config_file(configs_dir: Path, subdir: str, variant: str | None = None) -> Path | None:
+    """Locate a config file under {configs_dir}/{subdir}/.
 
     When variant is known, the correct properties file is selected directly
     (config_java for 'java', config_native for 'gpu'/'cpu').
     """
     sub = configs_dir / subdir
-    if sub.is_dir():
-        if variant == "java":
-            candidate = sub / "config_java.properties"
-        elif variant in ("gpu", "cpu"):
-            candidate = sub / "config_native.properties"
-        else:
-            candidate = None
-        if candidate and candidate.is_file():
-            return candidate
-        for fallback in sorted(sub.glob("config_*.properties")):
-            return fallback
-    legacy = configs_dir / legacy_name
-    if legacy.is_file():
-        return legacy
+    if not sub.is_dir():
+        return None
+    if variant == "java":
+        candidate = sub / "config_java.properties"
+    elif variant in ("gpu", "cpu"):
+        candidate = sub / "config_native.properties"
+    else:
+        candidate = None
+    if candidate and candidate.is_file():
+        return candidate
+    for fallback in sorted(sub.glob("config_*.properties")):
+        return fallback
     return None
 
 
@@ -194,21 +189,19 @@ class EngineConfig:
     def from_dir(cls, configs_dir: Path, variant: str | None = None) -> "EngineConfig":
         """Load engine configuration from a configs directory.
 
-        Supports two layouts:
-          1. Generated repo layout: etc_coordinator/config_*.properties,
-             etc_worker/config_*.properties
-          2. Legacy flat layout: coordinator.config, worker.config
+        Expects the generated layout:
+          etc_coordinator/config_*.properties
+          etc_worker/config_*.properties
 
         When variant is provided ('gpu', 'cpu', or 'java'), selects the
         matching properties file (config_native vs config_java).
         """
-        coord_file = _find_config_file(configs_dir, "etc_coordinator", "coordinator.config", variant)
-        worker_file = _find_config_file(configs_dir, "etc_worker", "worker.config", variant)
+        coord_file = _find_config_file(configs_dir, "etc_coordinator", variant)
+        worker_file = _find_config_file(configs_dir, "etc_worker", variant)
         if coord_file is None or worker_file is None:
             raise FileNotFoundError(
                 f"Could not find coordinator/worker config files in {configs_dir}. "
-                "Expected either etc_coordinator/config_*.properties + etc_worker/config_*.properties, "
-                "or coordinator.config + worker.config."
+                "Expected etc_coordinator/config_*.properties + etc_worker/config_*.properties."
             )
         coordinator_config = _parse_config_file(coord_file)
         worker_config = _parse_config_file(worker_file)
diff --git a/presto/docker/docker-compose.java.yml b/presto/docker/docker-compose.java.yml
index e728c291..963e22a5 100644
--- a/presto/docker/docker-compose.java.yml
+++ b/presto/docker/docker-compose.java.yml
@@ -15,10 +15,15 @@ services:
       service: presto-base-java
     container_name: presto-java-worker
     image: presto-java-worker:${PRESTO_IMAGE_TAG}
+    environment:
+      - RUN_TIMESTAMP=${RUN_TIMESTAMP:-}
     volumes:
       - ./config/generated/java/etc_common:/opt/presto-server/etc
       - ./config/generated/java/etc_worker/config_java.properties:/opt/presto-server/etc/config.properties
       - ./config/generated/java/etc_worker/node.properties:/opt/presto-server/etc/node.properties
       - ./config/generated/java/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
+      - ./launch_java_worker.sh:/opt/launch_java_worker.sh:ro
+    entrypoint: ["bash", "/opt/launch_java_worker.sh"]
+    command: []
     depends_on:
       - presto-coordinator
diff --git a/presto/docker/launch_java_worker.sh b/presto/docker/launch_java_worker.sh
new file mode 100755
index 00000000..8cdeec47
--- /dev/null
+++ b/presto/docker/launch_java_worker.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+
+LOGS_DIR="/opt/presto-server/logs"
+mkdir -p "${LOGS_DIR}"
+RUN_TIMESTAMP="${RUN_TIMESTAMP:-$(date +"%Y%m%dT%H%M%S")}"
+log_file="${LOGS_DIR}/worker_0_${RUN_TIMESTAMP}.log"
+
+exec /opt/presto-server/bin/launcher run >> "${log_file}" 2>&1

From b0695af286674fbed4997d4198d63daf8794d0ce Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Mon, 9 Mar 2026 23:52:43 -0700
Subject: [PATCH 22/24] Fixed cpu logging

---
 presto/docker/docker-compose.native-cpu.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/presto/docker/docker-compose.native-cpu.yml b/presto/docker/docker-compose.native-cpu.yml
index b017dc81..3ba16cdd 100644
--- a/presto/docker/docker-compose.native-cpu.yml
+++ b/presto/docker/docker-compose.native-cpu.yml
@@ -18,6 +18,8 @@ services:
     build:
       args:
         - GPU=OFF
+    entrypoint: ["bash", "/opt/launch_presto_servers.sh"]
+    command: []
     depends_on:
       - presto-coordinator
     volumes:

From dcf83b7b78bc15b2999167ddabe47f23cac48ffe Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Fri, 13 Mar 2026 14:48:30 -0700
Subject: [PATCH 23/24] Final PR update

---
 benchmark_reporting_tools/post_results.py     | 111 ++++++++++--------
 .../performance_benchmarks/conftest.py        |   4 +-
 .../performance_benchmarks/common_fixtures.py |  25 ++++
 .../performance_benchmarks/conftest.py        |  25 +---
 4 files changed, 87 insertions(+), 78 deletions(-)

diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index 6a6c7c30..e3d64654 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -87,7 +87,7 @@ def _default_logs_dir() -> Path | None:
 
 @dataclasses.dataclass(kw_only=True)
 class BenchmarkMetadata:
-    benchmark: str
+    benchmark: list[str]
     timestamp: datetime
     engine: str
     kind: str | None = None
@@ -104,6 +104,10 @@ def from_parsed(cls, raw: dict) -> "BenchmarkMetadata":
         data = dict(raw["context"])
         data["timestamp"] = datetime.fromisoformat(data["timestamp"].replace("Z", "+00:00"))
 
+        # Normalise legacy string values to a list.
+        if isinstance(data.get("benchmark"), str):
+            data["benchmark"] = [data["benchmark"]]
+
         known_fields = {f.name for f in dataclasses.fields(cls)}
         filtered = {k: v for k, v in data.items() if k in known_fields}
 
@@ -584,12 +588,6 @@ async def _process_benchmark_dir(
         print(f"  Error loading metadata: {e}", file=sys.stderr)
         return 1
 
-    try:
-        results = BenchmarkResults.from_parsed(raw, benchmark_name=benchmark_metadata.benchmark)
-    except (ValueError, KeyError) as e:
-        print(f"  Error loading results: {e}", file=sys.stderr)
-        return 1
-
     # Resolve config directory: explicit override → auto-detect from variant
     effective_config_dir = config_dir
     variant = _ENGINE_TO_VARIANT.get(benchmark_metadata.engine)
@@ -642,53 +640,64 @@ async def _process_benchmark_dir(
     elif upload_logs:
         print("  No logs directory found; skipping log upload.", file=sys.stderr)
 
-    # Build submission payload
-    try:
-        payload = _build_submission_payload(
-            benchmark_metadata=benchmark_metadata,
-            benchmark_results=results,
-            engine_config=engine_config,
-            benchmark_definition_name=benchmark_definition_name,
-            sku_name=sku_name,
-            storage_configuration_name=storage_configuration_name,
-            cache_state=cache_state,
-            engine_name=engine_name,
-            identifier_hash=identifier_hash,
-            version=version,
-            commit_hash=commit_hash,
-            is_official=is_official,
-            asset_ids=asset_ids,
-            concurrency_streams=concurrency_streams,
-        )
-    except Exception as e:
-        print(f"  Error building payload: {e}", file=sys.stderr)
-        return 1
+    # Process each benchmark type found in the result file.
+    overall_result = 0
+    for bench_name in benchmark_metadata.benchmark:
+        print(f"\n  Processing benchmark type: {bench_name}", file=sys.stderr)
+
+        try:
+            results = BenchmarkResults.from_parsed(raw, benchmark_name=bench_name)
+        except (ValueError, KeyError) as e:
+            print(f"  Error loading results for '{bench_name}': {e}", file=sys.stderr)
+            overall_result = 1
+            continue
+
+        try:
+            payload = _build_submission_payload(
+                benchmark_metadata=benchmark_metadata,
+                benchmark_results=results,
+                engine_config=engine_config,
+                benchmark_definition_name=benchmark_definition_name,
+                sku_name=sku_name,
+                storage_configuration_name=storage_configuration_name,
+                cache_state=cache_state,
+                engine_name=engine_name,
+                identifier_hash=identifier_hash,
+                version=version,
+                commit_hash=commit_hash,
+                is_official=is_official,
+                asset_ids=asset_ids,
+                concurrency_streams=concurrency_streams,
+            )
+        except Exception as e:
+            print(f"  Error building payload for '{bench_name}': {e}", file=sys.stderr)
+            overall_result = 1
+            continue
 
-    # Print summary
-    print(f"  Benchmark definition: {payload['benchmark_definition_name']}", file=sys.stderr)
-    print(f"  Engine: {payload['query_engine']['engine_name']}", file=sys.stderr)
-    print(f"  Identifier hash: {payload['query_engine']['identifier_hash']}", file=sys.stderr)
-    print(f"  Node count: {payload['node_count']}", file=sys.stderr)
-    print(f"  Query logs: {len(payload['query_logs'])}", file=sys.stderr)
+        print(f"  Benchmark definition: {payload['benchmark_definition_name']}", file=sys.stderr)
+        print(f"  Engine: {payload['query_engine']['engine_name']}", file=sys.stderr)
+        print(f"  Identifier hash: {payload['query_engine']['identifier_hash']}", file=sys.stderr)
+        print(f"  Node count: {payload['node_count']}", file=sys.stderr)
+        print(f"  Query logs: {len(payload['query_logs'])}", file=sys.stderr)
 
-    if dry_run:
-        print("\n  [DRY RUN] Payload:", file=sys.stderr)
-        print(json.dumps(payload, indent=2, default=str))
-        return 0
+        if dry_run:
+            print("\n  [DRY RUN] Payload:", file=sys.stderr)
+            print(json.dumps(payload, indent=2, default=str))
+            continue
 
-    # Post to API
-    try:
-        status_code, response_text = await _post_submission(api_url, api_key, payload, timeout)
-        print(f"  Status: {status_code}", file=sys.stderr)
-        if status_code >= 400:
-            print(f"  Response: {response_text}", file=sys.stderr)
-            return 1
-        else:
-            print(f"  Success: {response_text}", file=sys.stderr)
-            return 0
-    except httpx.RequestError as e:
-        print(f"  Error posting: {e}", file=sys.stderr)
-        return 1
+        try:
+            status_code, response_text = await _post_submission(api_url, api_key, payload, timeout)
+            print(f"  Status: {status_code}", file=sys.stderr)
+            if status_code >= 400:
+                print(f"  Response: {response_text}", file=sys.stderr)
+                overall_result = 1
+            else:
+                print(f"  Success: {response_text}", file=sys.stderr)
+        except httpx.RequestError as e:
+            print(f"  Error posting for '{bench_name}': {e}", file=sys.stderr)
+            overall_result = 1
+
+    return overall_result
 
 
 async def main() -> int:
diff --git a/common/testing/performance_benchmarks/conftest.py b/common/testing/performance_benchmarks/conftest.py
index d1720b48..4ff35ffe 100644
--- a/common/testing/performance_benchmarks/conftest.py
+++ b/common/testing/performance_benchmarks/conftest.py
@@ -176,9 +176,7 @@ def pytest_sessionfinish(session, exitstatus):
 
     if hasattr(session, "benchmark_results"):
         benchmark_types = list(session.benchmark_results.keys())
-        json_result[BenchmarkKeys.CONTEXT_KEY]["benchmark"] = (
-            benchmark_types[0] if len(benchmark_types) == 1 else benchmark_types
-        )
+        json_result[BenchmarkKeys.CONTEXT_KEY]["benchmark"] = benchmark_types
 
     build_and_write_benchmark_result(session, json_result)
 
diff --git a/presto/testing/performance_benchmarks/common_fixtures.py b/presto/testing/performance_benchmarks/common_fixtures.py
index 8ea3b6db..0892ea44 100644
--- a/presto/testing/performance_benchmarks/common_fixtures.py
+++ b/presto/testing/performance_benchmarks/common_fixtures.py
@@ -1,6 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
+from datetime import datetime, timezone
 from pathlib import Path
 
 import pandas as pd
@@ -12,6 +13,30 @@
 from ..integration_tests.analyze_tables import check_tables_analyzed
 from .metrics_collector import collect_metrics
 from .profiler_utils import start_profiler, stop_profiler
+from .run_context import gather_run_context
+
+
+@pytest.fixture(scope="session", autouse=True)
+def run_context_collector(request):
+    """Gather Presto-specific run context and attach it to the session.
+
+    The common pytest_sessionfinish merges session.run_context into the
+    benchmark_result.json context section.
+    """
+    hostname = request.config.getoption("--hostname")
+    port = request.config.getoption("--port")
+    user = request.config.getoption("--user")
+    schema_name = request.config.getoption("--schema-name")
+
+    ctx = gather_run_context(
+        hostname=hostname,
+        port=port,
+        user=user,
+        schema_name=schema_name,
+    )
+    ctx["timestamp"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    yield ctx
+    request.session.run_context = ctx
 
 
 @pytest.fixture(scope="session", autouse=True)
diff --git a/presto/testing/performance_benchmarks/conftest.py b/presto/testing/performance_benchmarks/conftest.py
index 5fd67d73..7989ed24 100644
--- a/presto/testing/performance_benchmarks/conftest.py
+++ b/presto/testing/performance_benchmarks/conftest.py
@@ -2,10 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-from datetime import datetime, timezone
-
 from ..common.conftest import *  # noqa: F403
-from .run_context import gather_run_context
 
 # ruff: noqa: I001
 import pytest
@@ -29,6 +26,7 @@
 from .common_fixtures import (
     benchmark_query,  # noqa: F401
     presto_cursor,  # noqa: F401
+    run_context_collector,  # noqa: F401
 )
 
 
@@ -48,26 +46,5 @@ def pytest_addoption(parser):
     parser.addoption("--skip-drop-cache", action="store_true", default=False)
 
 
-def pytest_sessionstart(session):
-    """Gather Presto-specific run context and attach it to the session.
-
-    The common pytest_sessionfinish merges session.run_context into the
-    benchmark_result.json context section.
-    """
-    hostname = session.config.getoption("--hostname")
-    port = session.config.getoption("--port")
-    user = session.config.getoption("--user")
-    schema_name = session.config.getoption("--schema-name")
-
-    ctx = gather_run_context(
-        hostname=hostname,
-        port=port,
-        user=user,
-        schema_name=schema_name,
-    )
-    ctx["timestamp"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-    session.run_context = ctx
-
-
 def pytest_configure(config):
     pytest.data_location = DataLocation("--schema-name", "Schema", BenchmarkKeys.SCHEMA_NAME_KEY)

From ace9f21b9d493a10d1cc5c228820a41874c16366 Mon Sep 17 00:00:00 2001
From: misiugodfrey <misiug@nvidia.com>
Date: Fri, 13 Mar 2026 18:17:19 -0700
Subject: [PATCH 24/24] gpu vs node count

---
 benchmark_reporting_tools/post_results.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index e3d64654..318466fc 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -461,7 +461,8 @@ def _query_sort_key(name: str):
             "commit_hash": commit_hash,
         },
         "run_at": benchmark_metadata.timestamp.isoformat(),
-        "node_count": benchmark_metadata.worker_count,
+        "node_count": 1,
+        "gpu_count": benchmark_metadata.gpu_count or 0,
         "query_logs": query_logs,
         "concurrency_streams": concurrency_streams,
         "engine_config": engine_config.serialize() if engine_config else {},