From ba0b0331d8f70519056f3b5f7e3011020decff50 Mon Sep 17 00:00:00 2001 From: Angelic Ebbers Date: Wed, 14 Jan 2026 11:27:36 -1000 Subject: [PATCH] Improved the Memory Map Burst Write test by changing warm-up strategy and generating statistics over multiple runs in seperate JVM processes. --- .../MemoryMapWriteBurst.java | 163 ++++++++++++------ image processing timing/run_burst_stats.sh | 137 +++++++++++++++ 2 files changed, 245 insertions(+), 55 deletions(-) create mode 100755 image processing timing/run_burst_stats.sh diff --git a/image processing timing/MemoryMapWriteBurst.java b/image processing timing/MemoryMapWriteBurst.java index 0b531a1..7d9cea9 100644 --- a/image processing timing/MemoryMapWriteBurst.java +++ b/image processing timing/MemoryMapWriteBurst.java @@ -14,14 +14,13 @@ * Writes MANY frames into ONE container file using memory-mapped I/O * with <= 2 GiB windows. Each frame uses a single bulk put(...). * - * Stability strategy (per TRIAL, using a fresh file each time): - * 1) Prefault (READ_ONLY page-touch across the whole container; no dirty pages) - * 2) Short CPU warm-up to stabilize clocks/JIT - * 3) Timed BURN-IN (printed, not summarized), then force()+sleep to drain - * 4) Measured trial (copy/end-to-end/remap), then force()+sleep - * - * This isolates trials from each other and prevents steady writeback backlog - * growth across trials, which was inflating later measurements. + * Updated methodology: + * 1) Prefault (optional): READ_ONLY page-touch across the whole container (no dirty pages) + * 2) Warmup (NEW): mapping-only warmup across all windows (READ_WRITE map/unmap, no writes) + * 3) Warmup (NEW, optional): minimal dirty warmup (write 1 page per window; e.g., 4096 bytes) + * 4) CPU warmup spin + * 5) SINGLE measured burst write per trial (no full-size burn-in write) + * 6) force()+optional sleep after the measured burst * * Usage: * java MemoryMapWriteBurst [width height frameCount [outDir] [bytesPerPixel] [pattern] [trials]] @@ -30,7 +29,7 @@ * outDir output directory (default /tmp/aps) * bytesPerPixel 1=8-bit, 2=16-bit, 4=32-bit (default 2) * pattern { zero | random | ramp | alt } (default random) - * trials number of timed trials (default 1) + * trials number of timed trials/files to generate (default 1) */ public class MemoryMapWriteBurst { // Defaults @@ -41,13 +40,19 @@ public class MemoryMapWriteBurst { static final long WINDOW_BYTES = 2_000_000_000L; // Per-trial isolation & pacing (no CLI) - private static final boolean ROTATE_PATHS = true; // create a fresh file per trial - private static final boolean ENABLE_PREFAULT = true; // read-only page-touch - private static final int CPU_WARMUP_MS = 150; // small spin to stabilize clocks/JIT - private static final boolean FORCE_FLUSH_AFTER_BURNIN = false; // drain before measured copy - private static final int SLEEP_AFTER_BURNIN_MS = 0; // give writeback time to settle - private static final boolean FORCE_FLUSH_AFTER_TRIAL = false; // drain after measured copy - private static final int SLEEP_AFTER_TRIAL_MS = 0; // brief breather + private static final boolean ROTATE_PATHS = true; // create a fresh file per trial + + // Optional stabilization + private static final boolean ENABLE_PREFAULT = false; // read-only page-touch (no dirty pages) + private static final boolean ENABLE_MAP_WARMUP = true; // NEW: map/unmap each window READ_WRITE (no write) + private static final boolean ENABLE_DIRTY_WARMUP = true; // NEW: dirty only a small amount per window + private static final int DIRTY_WARMUP_BYTES = 4096; // NEW: dirty 1 page per window (<= len) + + private static final int CPU_WARMUP_MS = 150; // small spin to stabilize clocks/JIT + + // Post-trial drain (keep visible to diagnose writeback pressure) + private static final boolean FORCE_FLUSH_AFTER_TRIAL = false; + private static final int SLEEP_AFTER_TRIAL_MS = 0; public static void main(String[] args) throws IOException { if (args.length == 1 || args.length == 2 || args.length > 7) { @@ -82,12 +87,15 @@ public static void main(String[] args) throws IOException { System.out.printf("Burst -> %dx%d @ %d B/px (%s), %d frames, container %,d bytes%n", width, height, bpp, pattern, frames, containerBytes); + System.out.printf("Warmup: prefault=%s, mapWarmup=%s, dirtyWarmup=%s (%d bytes/window), cpuWarmupMs=%d%n", + ENABLE_PREFAULT, ENABLE_MAP_WARMUP, ENABLE_DIRTY_WARMUP, DIRTY_WARMUP_BYTES, CPU_WARMUP_MS); // Per-trial totals List copyMsList = new ArrayList<>(trials); List end2endMsList = new ArrayList<>(trials); List remapMsList = new ArrayList<>(trials); List remapsList = new ArrayList<>(trials); + List forceMsList = new ArrayList<>(trials); for (int t = 1; t <= trials; t++) { Path path = outDir.resolve(String.format( @@ -104,30 +112,23 @@ public static void main(String[] args) throws IOException { // -------------------- Prefault (READ_ONLY page-touch) -------------------- if (ENABLE_PREFAULT) { - final int page = 4096; - for (long start = 0; start < containerBytes; start += WINDOW_BYTES) { - int len = (int) Math.min(WINDOW_BYTES, containerBytes - start); - MappedByteBuffer ro = ch.map(FileChannel.MapMode.READ_ONLY, start, len); - for (int p = 0; p < len; p += page) { - byte b = ro.get(p); - if ((b & 1) == 2) System.out.print(""); // keep JIT from eliding - } - } + prefaultReadOnly(ch, containerBytes); } - // Short CPU warm-up - cpuWarmupMillis(CPU_WARMUP_MS); + // -------------------- Warmup: mapping-only across windows (no writes) -------------------- + if (ENABLE_MAP_WARMUP) { + warmupMappingsOnly(ch, containerBytes); + } - // -------------------- Timed BURN-IN (printed, not summarized) -------------------- - TrialResult burn = runOneTimedTrial(ch, src, frameBytes, containerBytes); - System.out.printf("Trial %d burn-in: copy=%.3f ms | end-to-end=%.3f ms | remap=%.3f ms | remaps=%d -> %s%n", - t, burn.copyMs, burn.endToEndMs, burn.remapMs, burn.remaps, path.toAbsolutePath()); + // -------------------- Warmup: minimal dirty write per window -------------------- + if (ENABLE_DIRTY_WARMUP) { + warmupDirtyOnePagePerWindow(ch, containerBytes, DIRTY_WARMUP_BYTES); + } - // Drain before measured trial - double burnFlushMs = forceAndTime(ch); - if (SLEEP_AFTER_BURNIN_MS > 0) sleepMs(SLEEP_AFTER_BURNIN_MS); + // Short CPU warm-up + cpuWarmupMillis(CPU_WARMUP_MS); - // -------------------- Measured trial -------------------- + // -------------------- SINGLE measured burst -------------------- TrialResult r = runOneTimedTrial(ch, src, frameBytes, containerBytes); copyMsList.add(r.copyMs); @@ -135,14 +136,17 @@ public static void main(String[] args) throws IOException { remapMsList.add(r.remapMs); remapsList.add(r.remaps); - System.out.printf("Trial %d/%d: copy=%.3f ms | end-to-end=%.3f ms | remap=%.3f ms | remaps=%d | force()=%.3f ms%n", - t, trials, r.copyMs, r.endToEndMs, r.remapMs, r.remaps, burnFlushMs); + // Drain after measured trial (optional) + always record force() time for diagnostics + double forceMs = forceAndTime(ch); + forceMsList.add(forceMs); - // Drain after measured trial - double flushMs = forceAndTime(ch); + System.out.printf("Trial %d/%d: copy=%.3f ms | end-to-end=%.3f ms | remap=%.3f ms | remaps=%d | force()=%.3f ms -> %s%n", + t, trials, r.copyMs, r.endToEndMs, r.remapMs, r.remaps, forceMs, path.toAbsolutePath()); + + if (FORCE_FLUSH_AFTER_TRIAL) { + // force() already called above; this flag is preserved for legacy toggling semantics + } if (SLEEP_AFTER_TRIAL_MS > 0) sleepMs(SLEEP_AFTER_TRIAL_MS); - // Print flush time so you can see whether writeback is the culprit - System.out.printf("Trial %d post-flush: force()=%.3f ms%n", t, flushMs); } } @@ -150,13 +154,63 @@ public static void main(String[] args) throws IOException { summarize("copy ms", copyMsList); summarize("end-to-end", end2endMsList); summarize("remap ms", remapMsList); + summarize("force ms", forceMsList); + int[] remArr = remapsList.stream().mapToInt(Integer::intValue).toArray(); System.out.println("Remaps (count): min=" + Arrays.stream(remArr).min().orElse(0) + " max=" + Arrays.stream(remArr).max().orElse(0) + " mean=" + String.format("%.2f", mean(remArr))); } -// -------------------- Core timed trial (FIXED windowing logic) -------------------- + // -------------------- Prefault (READ_ONLY) -------------------- + private static void prefaultReadOnly(FileChannel ch, long containerBytes) throws IOException { + final int page = 4096; + for (long start = 0; start < containerBytes; start += WINDOW_BYTES) { + int len = (int) Math.min(WINDOW_BYTES, containerBytes - start); + MappedByteBuffer ro = ch.map(FileChannel.MapMode.READ_ONLY, start, len); + for (int p = 0; p < len; p += page) { + byte b = ro.get(p); + if ((b & 1) == 2) System.out.print(""); // keep JIT from eliding + } + if ((len % page) != 0) { + byte b = ro.get(len - 1); + if ((b & 1) == 2) System.out.print(""); + } + } + } + + // -------------------- Warmup: map/unmap windows (READ_WRITE, no writes) -------------------- + private static void warmupMappingsOnly(FileChannel ch, long containerBytes) throws IOException { + for (long start = 0; start < containerBytes; start += WINDOW_BYTES) { + int len = (int) Math.min(WINDOW_BYTES, containerBytes - start); + MappedByteBuffer map = ch.map(FileChannel.MapMode.READ_WRITE, start, len); + // Touch a couple of positions via reads only (still non-dirty) to avoid complete elision. + // Reading from READ_WRITE mapping does not dirty pages. + byte b0 = map.get(0); + byte b1 = map.get(len - 1); + if (((b0 ^ b1) & 1) == 2) System.out.print(""); + // Let map go out of scope; GC/unmap timing is OS-dependent, but warmup still exercises mapping path. + } + } + + // -------------------- Warmup: dirty only one small page per window -------------------- + private static void warmupDirtyOnePagePerWindow(FileChannel ch, long containerBytes, int dirtyBytes) throws IOException { + if (dirtyBytes <= 0) return; + + byte[] tiny = new byte[Math.min(dirtyBytes, 4096)]; + // Deterministic, non-zero pattern + for (int i = 0; i < tiny.length; i++) tiny[i] = (byte) (i * 31 + 7); + + for (long start = 0; start < containerBytes; start += WINDOW_BYTES) { + int len = (int) Math.min(WINDOW_BYTES, containerBytes - start); + MappedByteBuffer map = ch.map(FileChannel.MapMode.READ_WRITE, start, len); + int n = Math.min(tiny.length, len); + map.position(0); + map.put(tiny, 0, n); // dirties only first page(s) of each window + } + } + + // -------------------- Core timed trial (fixed windowing logic) -------------------- private static TrialResult runOneTimedTrial(FileChannel ch, byte[] src, int frameBytes, long containerBytes) throws IOException { long copyNsTotal = 0L; @@ -165,7 +219,7 @@ private static TrialResult runOneTimedTrial(FileChannel ch, byte[] src, int remaps = 0; final int totalFrames = (int) (containerBytes / frameBytes); - + MappedByteBuffer map = null; long currentWindowStart = -1; int currentWindowLen = 0; @@ -173,26 +227,21 @@ private static TrialResult runOneTimedTrial(FileChannel ch, byte[] src, for (int frameIndex = 0; frameIndex < totalFrames; frameIndex++) { long frameOffset = (long) frameIndex * frameBytes; long frameEnd = frameOffset + frameBytes; - - // Check if current frame fits in the current window - boolean needsRemap = (map == null) || - (frameOffset < currentWindowStart) || + + boolean needsRemap = (map == null) || + (frameOffset < currentWindowStart) || (frameEnd > currentWindowStart + currentWindowLen); - + if (needsRemap) { - // Start new window at this frame's offset long windowStart = frameOffset; - - // Window extends as far as possible (up to WINDOW_BYTES) but must stay within container long maxWindowEnd = Math.min(windowStart + WINDOW_BYTES, containerBytes); int windowLen = (int) (maxWindowEnd - windowStart); - - // Create new mapping + long t0 = System.nanoTime(); map = ch.map(FileChannel.MapMode.READ_WRITE, windowStart, windowLen); - map.position(0); + map.position(0); long t1 = System.nanoTime(); - + currentWindowStart = windowStart; currentWindowLen = windowLen; remaps++; @@ -257,6 +306,10 @@ private static void sleepMs(int ms) { // -------------------- Stats helpers -------------------- private static void summarize(String label, List vals) { + if (vals.isEmpty()) { + System.out.printf("Summary (%s): no data%n", label); + return; + } double[] a = vals.stream().mapToDouble(Double::doubleValue).toArray(); Arrays.sort(a); double mn = a[0], mx = a[a.length - 1]; diff --git a/image processing timing/run_burst_stats.sh b/image processing timing/run_burst_stats.sh new file mode 100755 index 0000000..43701ff --- /dev/null +++ b/image processing timing/run_burst_stats.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ---- Config (edit as needed) ---- +JAVA_CMD=${JAVA_CMD:-java} # e.g. "java" or "/path/to/java" +CLASS=${CLASS:-MemoryMapWriteBurst} # class name (no .java) +WIDTH=${WIDTH:-2048} +HEIGHT=${HEIGHT:-2048} +FRAMES=${FRAMES:-1000} +OUTDIR=${OUTDIR:-/tmp/aps} +BPP=${BPP:-2} +PATTERN=${PATTERN:-random} +N=${N:-10} # number of separate-process runs + +# Pause between runs to let background writeback settle (set 0 for stress) +SLEEP_BETWEEN=${SLEEP_BETWEEN:-3} + +mkdir -p "$OUTDIR" +TS=$(date +"%Y%m%d_%H%M%S") +CSV="$OUTDIR/burst_stats_${WIDTH}x${HEIGHT}_${BPP}bpp_${FRAMES}frames_${TS}.csv" +LOG="$OUTDIR/burst_stats_${WIDTH}x${HEIGHT}_${BPP}bpp_${FRAMES}frames_${TS}.log" + +# Container size (bytes) = width * height * bpp * frames +# Use awk for 64-bit safety on macOS bash +CONTAINER_BYTES=$(awk -v w="$WIDTH" -v h="$HEIGHT" -v b="$BPP" -v f="$FRAMES" 'BEGIN{printf "%.0f", w*h*b*f}') +CONTAINER_MB=$(awk -v cb="$CONTAINER_BYTES" 'BEGIN{printf "%.3f", cb/1e6}') + +echo "Container: ${CONTAINER_BYTES} bytes (${CONTAINER_MB} MB)" +echo "Writing CSV to: $CSV" +echo "Full logs to: $LOG" +echo + +echo "run,copy_ms,end2end_ms,remap_ms,remaps,force_ms,throughput_MBps,throughput_durable_MBps,path" > "$CSV" + +for i in $(seq 1 "$N"); do + echo "=== Run $i/$N ===" | tee -a "$LOG" + + # NOTE: assumes you've already compiled and are running from the directory containing .class + OUT="$($JAVA_CMD -cp . "$CLASS" "$WIDTH" "$HEIGHT" "$FRAMES" "$OUTDIR" "$BPP" "$PATTERN" 1 2>&1 | tee -a "$LOG")" + + LINE=$(echo "$OUT" | grep -E "Trial 1/1:" | tail -n 1) + + if [[ -z "$LINE" ]]; then + echo "ERROR: Could not parse Trial line in run $i. See log: $LOG" | tee -a "$LOG" + exit 1 + fi + + copy_ms=$(echo "$LINE" | sed -E 's/.*copy=([0-9.]+) ms.*/\1/') + end2end_ms=$(echo "$LINE" | sed -E 's/.*end-to-end=([0-9.]+) ms.*/\1/') + remap_ms=$(echo "$LINE" | sed -E 's/.*remap=([0-9.]+) ms.*/\1/') + remaps=$(echo "$LINE" | sed -E 's/.*remaps=([0-9]+).*/\1/') + force_ms=$(echo "$LINE" | sed -E 's/.*force\(\)=([0-9.]+) ms.*/\1/') + path=$(echo "$LINE" | sed -E 's/.*-> (.*)$/\1/') + + # Buffered throughput (MB/s) = containerBytes / (end2endSeconds) / 1e6 + throughput=$(awk -v cb="$CONTAINER_BYTES" -v ms="$end2end_ms" \ + 'BEGIN{ if(ms<=0){print "nan"} else { printf "%.3f", (cb/(ms/1000.0))/1e6 } }') + + # Durable throughput (MB/s) = containerBytes / ((end2end+force)Seconds) / 1e6 + throughput_durable=$(awk -v cb="$CONTAINER_BYTES" -v e="$end2end_ms" -v f="$force_ms" \ + 'BEGIN{ t=(e+f)/1000.0; if(t<=0){print "nan"} else { printf "%.3f", (cb/t)/1e6 } }') + + echo "$i,$copy_ms,$end2end_ms,$remap_ms,$remaps,$force_ms,$throughput,$throughput_durable,\"$path\"" >> "$CSV" + + if [[ "$SLEEP_BETWEEN" != "0" ]]; then + sleep "$SLEEP_BETWEEN" + fi +done + +echo +echo "Done." +echo "CSV: $CSV" +echo "LOG: $LOG" +echo + +# ---- Stats printer (awk / macOS compatible) ---- +# Prints: n, min, max, mean, std, p50, p95 +stats() { + local col="$1" + local label="$2" + + awk -F, -v COL="$col" -v LABEL="$label" ' + # Percentile helper (linear interpolation). Expects sorted array a[1..n]. + function pct(p, rank, lo, hi, w){ + if(n==1) return a[1] + rank = (p/100.0) * (n-1) + 1 + lo = int(rank) + hi = lo + 1 + if(hi>n) return a[n] + w = rank - lo + return a[lo]*(1-w) + a[hi]*w + } + + NR==1 { next } # skip header + + { + v = $COL + 0 + n++ + a[n] = v + sum += v + sumsq += v*v + if(n==1 || v < min) min = v + if(n==1 || v > max) max = v + } + + END { + if(n==0){ printf "%s: no data\n", LABEL; exit } + + mean = sum / n + var = (sumsq / n) - (mean*mean) + if(var < 0) var = 0 + std = sqrt(var) + + # sort a[1..n] (simple O(n^2), fine for small N) + for(i=1;i<=n;i++){ + for(j=i+1;j<=n;j++){ + if(a[j] < a[i]){ + t=a[i]; a[i]=a[j]; a[j]=t + } + } + } + + p50 = pct(50) + p95 = pct(95) + + printf "%s: n=%d | min=%.3f | max=%.3f | mean=%.3f | std=%.3f | p50=%.3f | p95=%.3f\n", + LABEL, n, min, max, mean, std, p50, p95 + } + ' "$CSV" +} + +echo "=== Summary statistics (${WIDTH}x${HEIGHT}, ${BPP} B/px, ${FRAMES} frames; ${CONTAINER_MB} MB) ===" +stats 3 "end2end_ms" +stats 2 "copy_ms" +stats 6 "force_ms" +stats 7 "throughput_MBps" +stats 8 "throughput_durable_MBps" \ No newline at end of file