From 372f23fad694d1214f8b3a7a021d0c89f2afc33b Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Mon, 10 Nov 2025 18:11:16 -0500 Subject: [PATCH 1/3] Create histogram.mjs --- histogram.mjs | 563 ++++++++++++++++++++++++++++++++++++++++++++++++++ util.mjs | 28 +++ 2 files changed, 591 insertions(+) create mode 100644 histogram.mjs diff --git a/histogram.mjs b/histogram.mjs new file mode 100644 index 0000000..0ef2f3f --- /dev/null +++ b/histogram.mjs @@ -0,0 +1,563 @@ +import { range, arrayProd, datatypeToTypedArray, datatypeToBytes, createUniformBuffer } from "./util.mjs"; +import { + BasePrimitive, + Kernel, + AllocateBuffer, +} from "./primitive.mjs"; +import { BaseTestSuite } from "./testsuite.mjs"; +import { BinOpAddU32, BinOpAddF32 } from "./binop.mjs"; + + + +export class BaseHistogram extends BasePrimitive { + constructor(args) { + super(args); + + // Required parameters + for (const required of ["datatype", "numBins"]) { + if (!this[required]) { + throw new Error(`${this.constructor.name}: ${required} is required`); + } + } + + // Histogram always outputs u32 + this.binop = args.binop ?? new BinOpAddU32(); + + if (this.binop.datatype !== "u32") { + throw new Error( + `${this.constructor.name}: binop datatype must be u32 (histogram output is always u32), but got ${this.binop.datatype}.` + ); + } + this.minValue = args.minValue ?? 0.0; + this.maxValue = args.maxValue ?? 1.0; + + + this.knownBuffers = ["inputBuffer", "outputBuffer"]; + + for (const knownBuffer of this.knownBuffers) { + if (knownBuffer in args) { + this.registerBuffer({ label: knownBuffer, buffer: args[knownBuffer] }); + delete this[knownBuffer]; + } + } + this.getDispatchGeometry = this.getSimpleDispatchGeometry; + } + + get bytesTransferred() { + return ( + this.getBuffer("inputBuffer").size + this.getBuffer("outputBuffer").size + ); + } + + validate = (args = {}) => { + const memsrc = args.inputBuffer ?? this.getBuffer("inputBuffer").cpuBuffer; + const memdest = args.outputBuffer ?? this.getBuffer("outputBuffer").cpuBuffer; + let referenceOutput; + try { + referenceOutput = new Uint32Array(this.numBins); + } catch (error) { + console.error(error, "Tried to allocate array of length", this.numBins); + } + for (let bin = 0; bin < this.numBins; bin++) { + referenceOutput[bin] = 0; + } + for (let i = 0; i < memsrc.length; i++) { + const value = memsrc[i]; + + const normalized = (value - this.minValue) / (this.maxValue - this.minValue); + let binIndex = Math.floor(normalized * this.numBins); + binIndex = Math.max(0, Math.min(binIndex, this.numBins - 1)); + referenceOutput[binIndex] = referenceOutput[binIndex] + 1; + } + function validates(args) { + return args.cpu == args.gpu; + } + + let returnString = ""; + let allowedErrors = 5; + + for (let bin = 0; bin < memdest.length; bin++) { + if (allowedErrors == 0) { + break; + } + if ( + !validates({ + cpu: referenceOutput[bin], + gpu: memdest[bin], + datatype: this.datatype, + }) + ) { + const ref = referenceOutput[bin]; + const gpu = memdest[bin]; + const diff = ref === 0 ? Math.abs(gpu - ref) : Math.abs((ref - gpu) / ref); + returnString += `\nBin ${bin}: expected ${ref}, instead saw ${gpu} (diff: ${diff}).`; + if (this.getBuffer("debugBuffer")) { + returnString += ` debug[${bin}] = ${this.getBuffer("debugBuffer").cpuBuffer[bin]}.`; + } + if (this.getBuffer("debug2Buffer")) { + returnString += ` debug2[${bin}] = ${this.getBuffer("debug2Buffer").cpuBuffer[bin]}.`; + } + allowedErrors--; + } + } + + console.log( + this.label, + "histogram", + "with input", + memsrc, + "should validate to", + referenceOutput, + "and actually validates to", + memdest, + this.getBuffer("debugBuffer") ? "\ndebugBuffer" : "", + this.getBuffer("debugBuffer") + ? this.getBuffer("debugBuffer").cpuBuffer + : "", + this.getBuffer("debug2Buffer") ? "\ndebug2Buffer" : "", + this.getBuffer("debug2Buffer") + ? this.getBuffer("debug2Buffer").cpuBuffer + : "", + this.binop.constructor.name, + this.binop.datatype, + "identity is", + this.binop.identity, + "bins:", + this.numBins, + "range:", + this.minValue, + "to", + this.maxValue, + "input length:", + memsrc.length + ); + + return returnString; + }; +} + +export const histogramBandwidthPlot = { + x: { field: "inputBytes", label: "Input array size (B)" }, + y: { field: "bandwidth", label: "Achieved bandwidth (GB/s)" }, + stroke: { field: "timing" }, // Lines colored by GPU vs CPU + text_br: "gpuinfo.description", + caption: "Histogram Bandwidth (GPU vs CPU)", +}; + +function histogramWGCountFnPlot() { + return { + x: { field: "inputBytes", label: "Input array size (B)" }, + y: { field: (d) => d.bandwidth, label: "Achieved bandwidth (GB/s)" }, + stroke: { field: "workgroupCount" }, + text_br: (d) => `${d.gpuinfo.description}`, + caption: `${this.category} | ${this.testSuite} | Lines are workgroup count`, + }; +} + +const histogramWGSizeBinOpPlot = { + x: { field: "inputBytes", label: "Input array size (B)" }, + y: { field: "bandwidth", label: "Achieved bandwidth (GB/s)" }, + fy: { field: "binop" }, + stroke: { field: "workgroupSize" }, + text_br: "gpuinfo.description", + caption: "Lines are workgroup size", +}; + +//https://developer.nvidia.com/blog/gpu-pro-tip-fast-histograms-using-shared-atomics-maxwell/ + +/*Step 1: clear private bins (like setting to 0). +Step 2: each thread counts elements into its workgroup’s private histogram. +Step 3: merge your private histogram into the final global result. */ + +export class WGHistogram extends BaseHistogram { + constructor(args) { + super(args); + } + finalizeRuntimeParameters() { + this.workgroupSize = this.workgroupSize ?? 256; + this.maxGSLWorkgroupCount = this.maxGSLWorkgroupCount ?? 256; + + this.workgroupCount = Math.min(Math.ceil(this.getBuffer("inputBuffer").size / this.workgroupSize), this.maxGSLWorkgroupCount); + this.numPartials = this.workgroupCount; + + const inputLength = this.getBuffer("inputBuffer").size / datatypeToBytes(this.datatype); + + this.histogramUniformsBuffer = createUniformBuffer([ + { type: 'u32', value: inputLength }, + { type: 'u32', value: this.numBins }, + { type: 'u32', value: this.workgroupCount }, + { type: 'f32', value: this.minValue }, + { type: 'f32', value: this.maxValue }, + ]); + + } + histogramKernelDefinition = () => { + return /*wgsl*/ ` + @group(0) @binding(0) var inputBuffer: array<${this.datatype}>; + @group(0) @binding(1) varoutputBuffer:array>; + + struct HistogramUniforms{ + inputLength:u32, + numBins:u32, + numWorkgroups:u32, + minValue:f32, + maxValue:f32, + } + @group(0) @binding(2) var uniforms:HistogramUniforms; + + varprivateHistogram:array,${this.numBins}>; + + @compute @workgroup_size(${this.workgroupSize}) + fn main( + @builtin(global_invocation_id)globalId:vec3, + @builtin(local_invocation_id)localId:vec3, + @builtin(workgroup_id)wg_id:vec3 + ) { + let gwIndex:u32 =globalId.x; + let localIndex:u32=localId.x; + let wgIndex:u32=wg_id.x; + + var i:u32=localIndex; + let WGS: u32=${this.workgroupSize}u; + let NB:u32=uniforms.numBins; + + loop{ + if(i>=NB){break;} + atomicExchange(&privateHistogram[i],0u); + i=i+WGS; + } + workgroupBarrier(); + + var idx: u32 = gwIndex; + let inputLen: u32 = uniforms.inputLength; + + while (idx < inputLen) { + let value: ${this.datatype} = inputBuffer[idx]; + + let range: f32 = uniforms.maxValue - uniforms.minValue; + let normalized: f32 = (f32(value) - uniforms.minValue) / range; + var binIndex: i32 = i32(floor(normalized * f32(NB))); + binIndex = clamp(binIndex, 0, i32(NB) - 1); + + atomicAdd(&privateHistogram[u32(binIndex)], 1u); + + idx = idx + uniforms.numWorkgroups * WGS; + } + + workgroupBarrier(); + + var b: u32 = localIndex; + while (b < NB) { + let partialCount: u32 = atomicLoad(&privateHistogram[b]); + if (partialCount > 0u) { + atomicAdd(&outputBuffer[b], partialCount); + } + b = b + WGS; + } + } + `; + } + + compute() { + this.finalizeRuntimeParameters(); + + return [ + new AllocateBuffer({ + label: "histogramUniforms", + size: this.histogramUniformsBuffer.byteLength, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, + populateWith: this.histogramUniformsBuffer, + }), + + new Kernel({ + kernel: this.histogramKernelDefinition, + bufferTypes: [["read-only-storage", "storage", "uniform"]], + bindings: [["inputBuffer", "outputBuffer", "histogramUniforms"]], + label: "histogram kernel", + getDispatchGeometry: () => { + return [this.workgroupCount]; + }, + }), + ]; + } +} + +/*https://developer.nvidia.com/blog/gpu-pro-tip-fast-histograms-using-shared-atomics-maxwell/ */ + +export class HierarchicalHistogram extends BaseHistogram { + constructor(args) { + super(args); + } + finalizeRuntimeParameters() { + this.workgroupSize = this.workgroupSize ?? 256; + this.maxGSLWorkgroupCount = this.maxGSLWorkgroupCount ?? 512; + this.numThreadsPerWorkgroup = arrayProd(this.workgroupSize); + + const inputLength = this.getBuffer("inputBuffer").size / datatypeToBytes(this.datatype); + const idealWorkgroupCount = Math.ceil(inputLength / this.numThreadsPerWorkgroup); + this.workgroupCount = Math.min(idealWorkgroupCount, this.maxGSLWorkgroupCount); + this.numPartials = this.workgroupCount; + + this.histogramUniformsBuffer = createUniformBuffer([ + { type: 'u32', value: inputLength }, + { type: 'u32', value: this.numBins }, + { type: 'u32', value: this.workgroupCount }, + { type: 'f32', value: this.minValue }, + { type: 'f32', value: this.maxValue }, + ]); + + this.clearUniformsBuffer = createUniformBuffer([ + { type: 'u32', value: this.numBins } + ]); + + this.accumulateDispatchCount = this.numBins; + this.accumulateUniformsBuffer = createUniformBuffer([ + { type: 'u32', value: this.numBins }, + { type: 'u32', value: this.workgroupCount } + ]); + } + // Kernel 1: Each workgroup builds its own local histogram using workgroup atomics + histogramPerWorkgroupKernel = () => { + return /*wgsl*/` + @group(0) @binding(0) varinputBuffer:array<${this.datatype}>; + @group(0) @binding(1) varpartials:array; + + struct HistogramUniforms{ + inputLength:u32, + numBins:u32, + numWorkgroups:u32, + minValue:f32, + maxValue:f32, + } + @group(0) @binding(2) varuniforms:HistogramUniforms; + + var privateHistogram: array, ${this.numBins}>; + + @compute @workgroup_size(${this.workgroupSize}) + fn histogramPerWorkgroupKernel( + @builtin(global_invocation_id) globalId: vec3, + @builtin(local_invocation_id) localId: vec3, + @builtin(workgroup_id) wgId: vec3 + ) { + let gwIndex: u32 = globalId.x; + let localIndex: u32 = localId.x; + let wgIndex: u32 = wgId.x; + + let WGS: u32 = ${this.workgroupSize}u; + let NB: u32 = uniforms.numBins; + + var i: u32 = localIndex; + loop { + if (i >= NB) { break; } + atomicStore(&privateHistogram[i], 0u); + i = i + WGS; + } + workgroupBarrier(); + + let inputLen: u32 = uniforms.inputLength; + + var idx: u32 = gwIndex; + + while (idx < inputLen) { + let value: ${this.datatype} = inputBuffer[idx]; + + let range: f32 = uniforms.maxValue - uniforms.minValue; + let normalized: f32 = (f32(value) - uniforms.minValue) / range; + var binIndex: i32 = i32(floor(normalized * f32(NB))); + binIndex = clamp(binIndex, 0, i32(NB) - 1); + + atomicAdd(&privateHistogram[u32(binIndex)], 1u); + + idx = idx + uniforms.numWorkgroups * WGS; + } + + workgroupBarrier(); + + var b: u32 = localIndex; + while (b < NB) { + let partialCount: u32 = atomicLoad(&privateHistogram[b]); + let partialIndex: u32 = b * uniforms.numWorkgroups + wgIndex; + partials[partialIndex] = partialCount; + b = b + WGS; + } + }`; + }; + // kernel 2: for accumulated partial + accumulateHistogramsKernel = () => { + return /* wgsl */ ` + @group(0) @binding(0) var partials: array; + @group(0) @binding(1) var outputBuffer: array>; + + struct AccumulateUniforms { + numBins: u32, + numWorkgroups: u32, + } + @group(0) @binding(2) var uniforms: AccumulateUniforms; + + var localSum: atomic; + + @compute @workgroup_size(${this.workgroupSize}) + fn accumulateHistogramsKernel( + @builtin(global_invocation_id) globalId: vec3, + @builtin(local_invocation_id) localId: vec3, + @builtin(workgroup_id) workgroupId: vec3 + ) { + let localIdx: u32 = localId.x; + let binIdx: u32 = workgroupId.x; + let NB: u32 = uniforms.numBins; + let numWG: u32 = uniforms.numWorkgroups; + let WGS: u32 = ${this.workgroupSize}u; + + if (binIdx >= NB) { + return; + } + + if (localIdx == 0u) { + atomicStore(&localSum, 0u); + } + workgroupBarrier(); + + var wgIdx: u32 = localIdx; + while (wgIdx < numWG) { + let partialIndex: u32 = binIdx * numWG + wgIdx; + let value: u32 = partials[partialIndex]; + atomicAdd(&localSum, value); + wgIdx = wgIdx + WGS; + } + + workgroupBarrier(); + + if (localIdx == 0u) { + let total: u32 = atomicLoad(&localSum); + atomicStore(&outputBuffer[binIdx], total); + } + }`; + }; + + // Kernel to clear the output buffer + clearOutputBufferKernel = () => { + return /* wgsl */ ` + @group(0) @binding(0) var outputBuffer: array>; + + struct ClearUniforms { + numBins: u32, + } + @group(0) @binding(1) var uniforms: ClearUniforms; + + @compute @workgroup_size(${this.workgroupSize}) + fn clearOutputBufferKernel( + @builtin(global_invocation_id) globalId: vec3 + ) { + let idx: u32 = globalId.x; + if (idx < uniforms.numBins) { + atomicStore(&outputBuffer[idx], 0u); + } + }`; + }; + + compute() { + this.finalizeRuntimeParameters(); + + return [ + // Allocate partials buffer: transposed layout [numBins][numWorkgroups] + new AllocateBuffer({ + label: "partials", + size: this.numBins * this.workgroupCount * 4, // 4 bytes per u32 + }), + + // Allocate and populate histogram uniforms + new AllocateBuffer({ + label: "histogramUniforms", + size: this.histogramUniformsBuffer.byteLength, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, + populateWith: this.histogramUniformsBuffer, + }), + + // Allocate and populate accumulate uniforms + new AllocateBuffer({ + label: "accumulateUniforms", + size: this.accumulateUniformsBuffer.byteLength, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, + populateWith: this.accumulateUniformsBuffer, + }), + + // Kernel 1: Each workgroup builds local histogram from contiguous chunks + new Kernel({ + kernel: this.histogramPerWorkgroupKernel, + bufferTypes: [["read-only-storage", "storage", "uniform"]], + bindings: [["inputBuffer", "partials", "histogramUniforms"]], + label: "histogram per workgroup", + logKernelCodeToConsole: false, + getDispatchGeometry: () => { + return [this.workgroupCount]; + }, + }), + + // Kernel 2: Accumulate all per-workgroup histograms (NO ATOMICS!) + new Kernel({ + kernel: this.accumulateHistogramsKernel, + bufferTypes: [["read-only-storage", "storage", "uniform"]], + bindings: [["partials", "outputBuffer", "accumulateUniforms"]], + label: "accumulate histograms", + logKernelCodeToConsole: false, + getDispatchGeometry: () => { + // One workgroup per bin = numBins workgroups + return [this.numBins]; + }, + }), + ]; + } + +} + +const HistogramParams = { + inputLength: [2 ** 20, 2 ** 22, 2 ** 24, 2 ** 26, 2 ** 27], + numBins: [64, 256], + workgroupSize: [256], + maxGSLWorkgroupCount: [256, 512, 1024], + minValue: [-1024.0], + maxValue: [1024.0], +}; + +const HistogramParamsSingleton = { + inputLength: [2 ** 10], + numBins: [64], + workgroupSize: [256], + maxGSLWorkgroupCount: [64], + minValue: [-1024.0], + maxValue: [1024.0], +}; + +export const WGHistogramTestSuite = new BaseTestSuite({ + category: "histogram", + testSuite: "workgroup histogram", + trials: 10, + params: HistogramParams, + uniqueRuns: ["inputLength", "numBins", "workgroupSize"], + primitive: WGHistogram, + primitiveArgs: { + datatype: "f32", + binop: BinOpAddU32, + gputimestamps: true, + }, + plots: [ + histogramBandwidthPlot + ], +}); + +export const HierarchicalHistogramTestSuite = new BaseTestSuite({ + category: "histogram", + testSuite: "hierarchical histogram", + trials: 10, + params: HistogramParams, + uniqueRuns: ["inputLength", "numBins", "maxGSLWorkgroupCount"], + primitive: HierarchicalHistogram, + primitiveArgs: { + datatype: "f32", + binop: BinOpAddU32, + gputimestamps: true, + }, + plots: [ + histogramBandwidthPlot + ], +}); diff --git a/util.mjs b/util.mjs index d789e0b..43782c6 100644 --- a/util.mjs +++ b/util.mjs @@ -257,3 +257,31 @@ export function formatWGSL(wgslCode) { }); return formattedLines.join("\n"); } +export function createUniformBuffer(fields) { + const dataSize = fields.length * 4; + + const alignedSize = Math.ceil(dataSize / 16) * 16; + + const buffer = new ArrayBuffer(alignedSize); + const dataView = new DataView(buffer); + + let offset = 0; + for (const field of fields) { + switch (field.type) { + case 'u32': + dataView.setUint32(offset, field.value, true); + break; + case 'i32': + dataView.setInt32(offset, field.value, true); + break; + case 'f32': + dataView.setFloat32(offset, field.value, true); + break; + default: + throw new Error(`Unsupported uniform type: ${field.type}`); + } + offset += 4; + } + + // Remaining paddings are automatically zero + return new Uint \ No newline at end of file From 4791600a0a6b12af177777745590a511f4b90d28 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Mon, 10 Nov 2025 18:13:03 -0500 Subject: [PATCH 2/3] Update util.mjs --- util.mjs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/util.mjs b/util.mjs index 43782c6..385be12 100644 --- a/util.mjs +++ b/util.mjs @@ -214,9 +214,9 @@ export function formatWGSL(wgslCode) { const pushLeft = /* lines like ") -> f32 {" */ braceCount == 0 && - (trimmedLine.startsWith(")") || - trimmedLine.startsWith("]") || - trimmedLine.startsWith("}")) + (trimmedLine.startsWith(")") || + trimmedLine.startsWith("]") || + trimmedLine.startsWith("}")) ? -1 : 0; @@ -260,6 +260,7 @@ export function formatWGSL(wgslCode) { export function createUniformBuffer(fields) { const dataSize = fields.length * 4; + //multiple of 16 bytes const alignedSize = Math.ceil(dataSize / 16) * 16; const buffer = new ArrayBuffer(alignedSize); @@ -284,4 +285,5 @@ export function createUniformBuffer(fields) { } // Remaining paddings are automatically zero - return new Uint \ No newline at end of file + return new Uint8Array(buffer); +} \ No newline at end of file From ca23fb6674c5f21a513401bb3e22232248005315 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Mon, 10 Nov 2025 18:30:58 -0500 Subject: [PATCH 3/3] added histogram benchmarking --- benchmarking.html | 2 +- benchmarking.mjs | 23 +++++++++++++++++------ benchmarking_chrome.mjs | 2 +- histogram.mjs | 12 ++++++------ 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/benchmarking.html b/benchmarking.html index d226103..3b852a4 100644 --- a/benchmarking.html +++ b/benchmarking.html @@ -15,7 +15,7 @@ window.location.protocol === "file:"; const script = document.createElement("script"); script.type = "module"; - script.src = (isLocalhost ? window.location.origin : "https://gridwise-webgpu.github.io") + "/gridwise/benchmarking_chrome.mjs"; + script.src = (isLocalhost ? window.location.origin : "https://gridwise-webgpu.github.io") + "/benchmarking_chrome.mjs"; document.body.appendChild(script); // diff --git a/benchmarking.mjs b/benchmarking.mjs index 62770df..28acaf6 100644 --- a/benchmarking.mjs +++ b/benchmarking.mjs @@ -22,9 +22,9 @@ if (typeof process !== "undefined" && process.release.name === "node") { ); /* begin https://github.com/sharonchoong/svg-exportJS */ /* svg-exportJS prerequisite: canvg */ - await import("https://cdnjs.cloudflare.com/ajax/libs/canvg/3.0.9/umd.js"); + // await import("https://cdnjs.cloudflare.com/ajax/libs/canvg/3.0.9/umd.js"); /* svg-exportJS plugin */ - await import("https://sharonchoong.github.io/svg-exportJS/svg-export.min.js"); + // await import("https://cdn.jsdelivr.net/npm/svg-export@1.0.1/dist/svg-export.min.js"); /* end https://github.com/sharonchoong/svg-exportJS */ const urlParams = new URL(window.location.href).searchParams; saveJSON = urlParams.get("saveJSON"); // string or undefined @@ -65,6 +65,10 @@ import { SortOneSweep64v32Suite, SortOneSweep64v321MNoPlotSuite, } from "./onesweep.mjs"; +import { + WGHistogramTestSuite, + HierarchicalHistogramTestSuite, +} from "./histogram.mjs"; import { BasePrimitive } from "./primitive.mjs"; async function main(navigator) { @@ -135,7 +139,7 @@ async function main(navigator) { //); // let testSuites = [DLDFScanMiniSuite]; // let testSuites = [DLDFScanAccuracyRegressionSuite]; - let testSuites = [DLDFPerfSuite]; + // let testSuites = [DLDFPerfSuite]; // let testSuites = [DLDFDottedCachePerfTestSuite]; // let testSuites = [DLDFDottedCachePerf2TestSuite]; // let testSuites = [DLDFSingletonWithTimingSuite]; @@ -144,6 +148,9 @@ async function main(navigator) { // let testSuites = [SortOneSweepFunctionalRegressionSuite]; // let testSuites = [SortOneSweep64v32Suite]; // let testSuites = [SortOneSweep64v321MNoPlotSuite]; + // let testSuites = [WGHistogramTestSuite]; + // let testSuites = [HierarchicalHistogramTestSuite]; + let testSuites = [WGHistogramTestSuite, HierarchicalHistogramTestSuite]; const expts = new Array(); // push new rows (experiments) onto this let primitiveCacheStats; @@ -240,13 +247,17 @@ async function main(navigator) { device, datatype: testSuite.category === "subgroups" && - testSuite.testSuite === "subgroupBallot" + testSuite.testSuite === "subgroupBallot" ? "vec4u" - : primitive.datatype, + : testSuite.category === "histogram" + ? "u32" + : primitive.datatype, length: "type" in primitive && primitive.type === "reduce" ? 1 - : primitive.inputLength, + : testSuite.category === "histogram" + ? primitive.numBins + : primitive.inputLength, label: "outputBuffer", createGPUBuffer: true, createMappableGPUBuffer: true, diff --git a/benchmarking_chrome.mjs b/benchmarking_chrome.mjs index 19f6d36..2d98a0d 100644 --- a/benchmarking_chrome.mjs +++ b/benchmarking_chrome.mjs @@ -5,7 +5,7 @@ const isLocalhost = const modulePath = (isLocalhost ? window.location.origin : "https://gridwise-webgpu.github.io") + - "/gridwise/benchmarking.mjs"; + "/benchmarking.mjs"; import(modulePath) .then(({ main }) => { diff --git a/histogram.mjs b/histogram.mjs index 0ef2f3f..8ec5c2e 100644 --- a/histogram.mjs +++ b/histogram.mjs @@ -180,10 +180,10 @@ export class WGHistogram extends BaseHistogram { this.workgroupCount = Math.min(Math.ceil(this.getBuffer("inputBuffer").size / this.workgroupSize), this.maxGSLWorkgroupCount); this.numPartials = this.workgroupCount; - const inputLength = this.getBuffer("inputBuffer").size / datatypeToBytes(this.datatype); - + this.inputLength = this.getBuffer("inputBuffer").size / datatypeToBytes(this.datatype); + this.histogramUniformsBuffer = createUniformBuffer([ - { type: 'u32', value: inputLength }, + { type: 'u32', value: this.inputLength }, { type: 'u32', value: this.numBins }, { type: 'u32', value: this.workgroupCount }, { type: 'f32', value: this.minValue }, @@ -293,13 +293,13 @@ export class HierarchicalHistogram extends BaseHistogram { this.maxGSLWorkgroupCount = this.maxGSLWorkgroupCount ?? 512; this.numThreadsPerWorkgroup = arrayProd(this.workgroupSize); - const inputLength = this.getBuffer("inputBuffer").size / datatypeToBytes(this.datatype); - const idealWorkgroupCount = Math.ceil(inputLength / this.numThreadsPerWorkgroup); + this.inputLength = this.getBuffer("inputBuffer").size / datatypeToBytes(this.datatype); + const idealWorkgroupCount = Math.ceil(this.inputLength / this.numThreadsPerWorkgroup); this.workgroupCount = Math.min(idealWorkgroupCount, this.maxGSLWorkgroupCount); this.numPartials = this.workgroupCount; this.histogramUniformsBuffer = createUniformBuffer([ - { type: 'u32', value: inputLength }, + { type: 'u32', value: this.inputLength }, { type: 'u32', value: this.numBins }, { type: 'u32', value: this.workgroupCount }, { type: 'f32', value: this.minValue },