From 8e1063feea0d9d02e36c4ad1d353bf812daf0f98 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Thu, 20 Nov 2025 18:37:14 -0500 Subject: [PATCH 01/17] Update jekyll.yml --- .github/workflows/jekyll.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/jekyll.yml b/.github/workflows/jekyll.yml index 6df84b9..151c9d3 100644 --- a/.github/workflows/jekyll.yml +++ b/.github/workflows/jekyll.yml @@ -44,6 +44,9 @@ jobs: permissions: pages: write id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} steps: - name: Deploy to GitHub Pages id: deployment From 47ccef944af473e59a26df6f06d8cc3f713e2231 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Thu, 20 Nov 2025 18:54:24 -0500 Subject: [PATCH 02/17] Update _config.yml --- docs/_config.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/_config.yml b/docs/_config.yml index 97e0890..3e0d8ae 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -12,10 +12,14 @@ plugins: - jekyll-feed - jekyll-remote-theme -# Only show About in header navigation + header_pages: - about.markdown + +include: + - examples/ + exclude: - misc/ - Gemfile From 1df1b0422fd7d6df4d93a31ac0c237476a6c2d93 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Thu, 20 Nov 2025 18:57:32 -0500 Subject: [PATCH 03/17] added changes in workflow --- .github/workflows/jekyll.yml | 6 +++--- docs/_config.yml | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/jekyll.yml b/.github/workflows/jekyll.yml index 151c9d3..41b152a 100644 --- a/.github/workflows/jekyll.yml +++ b/.github/workflows/jekyll.yml @@ -3,9 +3,6 @@ name: Deploy Jekyll site to Pages on: push: branches: ["main"] - paths: - - ".github/workflows/**" - - "docs/**" workflow_dispatch: permissions: @@ -24,6 +21,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Copy examples to docs + run: cp -r examples docs/examples + - name: Setup Pages uses: actions/configure-pages@v4 diff --git a/docs/_config.yml b/docs/_config.yml index 3e0d8ae..66a4014 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -17,9 +17,6 @@ header_pages: - about.markdown -include: - - examples/ - exclude: - misc/ - Gemfile From f9309295782f08028da5f1a34ea066697513c2e5 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Thu, 20 Nov 2025 18:59:45 -0500 Subject: [PATCH 04/17] Update jekyll.yml --- .github/workflows/jekyll.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/jekyll.yml b/.github/workflows/jekyll.yml index 41b152a..49c2a83 100644 --- a/.github/workflows/jekyll.yml +++ b/.github/workflows/jekyll.yml @@ -21,8 +21,11 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Copy examples to docs - run: cp -r examples docs/examples + - name: Copy examples and source files to docs + run: | + cp -r examples docs/examples + cp -r src docs/src + cp *.mjs docs/ 2>/dev/null || true - name: Setup Pages uses: actions/configure-pages@v4 From 99a38b625747bb219c4a228eaa8f2c7f280da698 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Fri, 21 Nov 2025 14:41:32 -0500 Subject: [PATCH 05/17] Update jekyll.yml --- .github/workflows/jekyll.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/jekyll.yml b/.github/workflows/jekyll.yml index 9aed94d..45b8186 100644 --- a/.github/workflows/jekyll.yml +++ b/.github/workflows/jekyll.yml @@ -24,7 +24,7 @@ jobs: - name: Copy examples and source files to docs run: | cp -r examples docs/examples - cp -r src docs/src + cp -r docs cp *.mjs docs/ 2>/dev/null || true - name: Setup Pages From 8b6391b04c67cf867721ae76f7473daa4349f4f1 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Fri, 21 Nov 2025 14:42:37 -0500 Subject: [PATCH 06/17] Update jekyll.yml --- .github/workflows/jekyll.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/jekyll.yml b/.github/workflows/jekyll.yml index 45b8186..4414a3e 100644 --- a/.github/workflows/jekyll.yml +++ b/.github/workflows/jekyll.yml @@ -24,7 +24,6 @@ jobs: - name: Copy examples and source files to docs run: | cp -r examples docs/examples - cp -r docs cp *.mjs docs/ 2>/dev/null || true - name: Setup Pages From d298c17936eaccb71bb3dc83708af13298f32298 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Mon, 24 Nov 2025 14:13:29 -0500 Subject: [PATCH 07/17] example.html file --- docs/_includes/header.html | 1 + docs/example.html | 385 +++++++++++++++++-------------------- 2 files changed, 182 insertions(+), 204 deletions(-) diff --git a/docs/_includes/header.html b/docs/_includes/header.html index 7eff317..8951758 100644 --- a/docs/_includes/header.html +++ b/docs/_includes/header.html @@ -23,6 +23,7 @@ {{ my_page.title | escape }} {%- endif -%} {%- endfor -%} + Example Docs Try Interactive Example diff --git a/docs/example.html b/docs/example.html index ef5a575..0130729 100644 --- a/docs/example.html +++ b/docs/example.html @@ -1,206 +1,183 @@ --- -layout: null -permalink: /example/ +layout: home +title: Examples +permalink: /examples/ --- - - - - - WebGPU Interactive Example - - - -
-
-
-

Initializing WebGPU Example

-

Connecting to development server...

-
-
-
-
-
- - - - \ No newline at end of file + + +

Interactive examples demonstrating WebGPU primitives and operations.

+ + + +

Scan Example

+

Compact example that demonstrates exclusive vs inclusive scans using DLDFScan: how to choose scanType, build the + input array, run the primitive, and validate outputs. Good for understanding prefix-sum semantics and the effect of + different binary ops (add/max/min).

+

Open the page, pick parameters in the pane (datatype/binop/input length), click Start, then inspect the result + and plots.

+ +

Scan Types

+

Exclusive: Output[i] = sum of all elements before position i (output[0] is identity).

+

Inclusive: Output[i] = sum of all elements up to and including position i.

+ +

Binary Operations

+

Add, Min, Max — combines elements using the specified operation. Custom operations supported via binop.mjs.

+ +

Supported Data Types

+

u32, i32, f32 — all data types work with all binary operations.

+ +

Code Structure

+
+ // Exclusive scan with add operation
+ const primitive = new DLDFScan({
+   device,
+   binop: new BinOpAdd({ datatype: "u32" }),
+   type: "exclusive",
+   datatype: "u32"
+ });
+
+ // Inclusive scan with max operation
+ const primitive = new DLDFScan({
+   device,
+   binop: new BinOpMax({ datatype: "i32" }),
+   type: "inclusive",
+   datatype: "i32"
+ }); +
+ +

+ Source Code + Performance +

+ +

Key/Value Sort Demo

+

Demonstrates OneSweepSort with two modes: key-only sorting and key-value pair sorting with payload validation. Shows + how to configure the primitive for different operations and data types.

+

Open the page, pick parameters in the pane (datatype/binop/input length), click Start, then inspect the result + and plots.

+ +

Sort Operations

+

Sort Keys: Sorts only the keys array in ascending or descending order.

+

Sort Pairs: Sorts keys while maintaining associated payloads, useful for sorting complex data + structures.

+ +

Supported Data Types

+

u32, i32, f32 — all data types support both sort modes with configurable sort direction.

+ +

Code Structure

+
+ // Key-only sort
+ const primitive = new OneSweepSort({
+   device,
+   datatype: "u32",
+   direction: "ascending",
+   copyOutputToTemp: true,
+   inputLength: 1024
+ });
+
+ // Key-value pair sort
+ const primitive = new OneSweepSort({
+   device,
+   datatype: "u32",
+   direction: "ascending",
+   copyOutputToTemp: true,
+   type: "keyvalue",
+   inputLength: 1024
+ }); +
+ +

+ Source Code + Performance +

+ +

Reduce Operation

+

A minimal, hands-on example showing how to run the reduce primitive end-to-end: device setup, buffer upload, a single + execution, and result readback. Use this to learn the basic API calls and validation pattern. Parameters shown: + datatype and binop (add/max/min). Ideal as the first example before any benchmarking.

+

Open the page, pick parameters in the pane (datatype/binop/input length), click Start, then inspect the result + and plots.

+ +

What is Reduce?

+

Reduces an entire array to a single value by repeatedly applying a binary operation across all elements.

+ +

Binary Operations

+

Add, Min, Max — aggregates all elements using the specified operation. Custom operations supported via binop.mjs.

+ +

Supported Data Types

+

u32, i32, f32 — all data types work with all binary operations.

+ +

Code Structure

+
+ // Reduce with add operation
+ const primitive = new DLDFScan({
+   device,
+   binop: new BinOpAdd({ datatype: "u32" }),
+   type: "reduce",
+   datatype: "u32"
+ });
+
+ // Reduce with min operation
+ const primitive = new DLDFScan({
+   device,
+   binop: new BinOpMin({ datatype: "i32" }),
+   type: "reduce",
+   datatype: "i32"
+ }); +
+ +

+ Source Code + Performance +

+ +
\ No newline at end of file From 1143cf60514fe8938a31b1b3573540af4b699319 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Mon, 24 Nov 2025 14:22:51 -0500 Subject: [PATCH 08/17] docs.html file --- _config.yml | 2 + docs/_includes/header.html | 1 + docs/docs.html | 141 +++++++++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+) create mode 100644 docs/docs.html diff --git a/_config.yml b/_config.yml index ed3c58a..c1c68eb 100644 --- a/_config.yml +++ b/_config.yml @@ -15,6 +15,8 @@ plugins: header_pages: - about.markdown + - docs/example.html + - docs/docs.html exclude: diff --git a/docs/_includes/header.html b/docs/_includes/header.html index 8951758..46810d8 100644 --- a/docs/_includes/header.html +++ b/docs/_includes/header.html @@ -24,6 +24,7 @@ {%- endif -%} {%- endfor -%} Example Docs + Read Docs Try Interactive Example diff --git a/docs/docs.html b/docs/docs.html new file mode 100644 index 0000000..0ac26be --- /dev/null +++ b/docs/docs.html @@ -0,0 +1,141 @@ +--- +layout: home +title: Documentation +permalink: /docs/ +--- + + +

Comprehensive guides and technical documentation for GridWise WebGPU primitives.

+ + + +

Architecture

+

Overview of GridWise's system design, module structure, and how primitives are organized for extensibility and + performance. Learn about the high-level organization of GridWise components, including how different primitives + (scan, reduce, sort) are implemented as modular, reusable units. Understand the architectural decisions that enable + performance optimization while maintaining clean separation of concerns and ease of extension.

+

+ Read +

+ +

Primitive Design

+

Deep dive into the design principles behind GridWise primitives with focus on single-pass chained algorithms for + sort, scan, and reduce. Explores the tradeoffs between using subgroup instructions for maximum performance versus + software emulation for broader compatibility. Covers memory bandwidth considerations, the lookback and fallback + optimization techniques, and how to choose between chained algorithms and hybrid approaches for different use cases. +

+

+ Read +

+ +

Scan and Reduce

+

Comprehensive guide to scan (prefix sum) and reduce operations in GridWise. Explains the difference between exclusive + scan (first element is identity), inclusive scan (each element includes itself), and reduce (single output value). + Covers binary operations (Add, Min, Max), data type support (u32, i32, f32), API usage patterns with code examples, + and when to use each variant for optimal performance.

+

+ Read +

+ +

Sort

+

Complete documentation for GridWise's OneSweepSort implementation. Covers both key-only sorting and key-value pair + sorting with full payload support. Explains configurable sort direction (ascending/descending), supported data + types, buffer management strategies, and in-place versus temporary buffer approaches. Includes detailed API + documentation and performance characteristics across different input sizes and configurations.

+

+ Read +

+ +

Binary Operations

+

Guide to binary operations used in GridWise's scan and reduce primitives. Documents available operations (Add, Min, + Max, Multiply) and their properties. Explains how to implement custom binary operations by extending the binop + interface, including implementation requirements, data type constraints, and validation patterns. Critical for users + who need domain-specific aggregation operations.

+

+ Read +

+ +

Buffer Management

+

Best practices for allocating, managing, and optimizing GPU buffers in GridWise applications. Covers buffer creation + strategies, memory usage patterns, and how to minimize memory allocation overhead. Explains the relationship between + buffer sizes and performance, copy strategies for input/output, and how to handle edge cases with non-aligned input + lengths. Essential for building efficient GridWise applications.

+

+ Read +

+ +

Timing Strategy

+

Detailed explanation of timing mechanisms in GridWise for accurate performance measurement and benchmarking. Covers + both CPU timing (performance.now) and GPU timing (timestamp queries) approaches, their accuracy tradeoffs, and when + to use each. Explains warmup strategies, trial averaging, and how to interpret results across different hardware + configurations for reliable performance comparisons.

+

+ Read +

+ +

Subgroup Strategy

+

Detailed guide to GPU subgroups and their critical role in GridWise primitive performance. Explains what subgroups + are, how different GPU architectures have different subgroup sizes, and the performance benefits of subgroup + operations. Covers GridWise's approach to subgroup detection, optional subgroup acceleration, and fallback + strategies for hardware without subgroup support to maintain broad device compatibility.

+

+ Read +

+ +

Built-ins Strategy

+

Exploration of WebGPU WGSL built-in functions and how GridWise strategically selects and optimizes their use in + primitive implementations. Explains which built-ins provide the best performance for reduction operations, + aggregation patterns, and data movement. Covers vendor-specific optimizations and how to identify when built-in + usage versus hand-tuned WGSL code provides the best performance on different hardware.

+

+ Read +

+ +

WebGPU Object Caching Strategy

+

Comprehensive guide to GridWise's approach for caching and reusing WebGPU objects (compute pipelines, bind groups, + buffer layouts) across multiple invocations. Explains how object caching reduces GPU state setup overhead and + improves throughput for repeated operations. Covers caching strategies for different primitive configurations, + memory management of cached objects, and invalidation patterns for long-running applications.

+

+ Read +

+ +

Writing a WebGPU WGSL Workgroup Reduce Function

+

In-depth tutorial on implementing custom workgroup-level reduce functions in WGSL for integration with GridWise + primitives. Covers reduction patterns, memory synchronization with workgroup barriers, handling of non-power-of-2 + workgroup sizes, and optimization techniques using subgroups where available. Includes complete code examples and + validation strategies for custom reduce operations.

+

+ Read +

+ +
\ No newline at end of file From 84cdc79bba91e6c4062ab1e8f4653c76f0b49d04 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Mon, 24 Nov 2025 14:33:17 -0500 Subject: [PATCH 09/17] small changes --- _config.yml | 4 ++-- docs/_includes/header.html | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/_config.yml b/_config.yml index c1c68eb..5a65cf7 100644 --- a/_config.yml +++ b/_config.yml @@ -15,8 +15,6 @@ plugins: header_pages: - about.markdown - - docs/example.html - - docs/docs.html exclude: @@ -25,6 +23,8 @@ exclude: - Gemfile.lock - vendor/ - .bundle/ + - docs/example.html + - docs/docs.html include: - "*.mjs" diff --git a/docs/_includes/header.html b/docs/_includes/header.html index 46810d8..befd9fc 100644 --- a/docs/_includes/header.html +++ b/docs/_includes/header.html @@ -19,7 +19,7 @@
{%- for path in page_paths -%} {%- assign my_page = site.pages | where: "path", path | first -%} - {%- if my_page.title -%} + {%- if my_page.title and my_page.path != "docs/example.html" and my_page.path != "docs/docs.html" -%} {{ my_page.title | escape }} {%- endif -%} {%- endfor -%} From 1f1048afe23e3baf53400ee0f0de7de2b2e97339 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Mon, 24 Nov 2025 19:34:11 -0500 Subject: [PATCH 10/17] docs and example file with changes --- .github/workflows/jekyll.yml | 1 - .github/workflows/main.yml | 2 - _config.yml | 2 - docs/_includes/header.html | 4 +- docs/_posts/index.markdown | 104 +++++++++++++++++++++++++++ docs/docs.html | 26 ++++--- docs/{example.html => examples.html} | 67 ++++++++--------- docs/index.markdown | 6 -- 8 files changed, 146 insertions(+), 66 deletions(-) create mode 100644 docs/_posts/index.markdown rename docs/{example.html => examples.html} (80%) delete mode 100644 docs/index.markdown diff --git a/.github/workflows/jekyll.yml b/.github/workflows/jekyll.yml index 4414a3e..72a1885 100644 --- a/.github/workflows/jekyll.yml +++ b/.github/workflows/jekyll.yml @@ -2,7 +2,6 @@ name: Deploy Jekyll site to Pages on: push: - branches: ["main"] workflow_dispatch: permissions: diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f5233c1..6d5537d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,9 +2,7 @@ name: Test Jekyll Build on: push: - branches: [ test-css-fix-github-actions ] pull_request: - branches: [ main ] jobs: build: diff --git a/_config.yml b/_config.yml index 5a65cf7..ed3c58a 100644 --- a/_config.yml +++ b/_config.yml @@ -23,8 +23,6 @@ exclude: - Gemfile.lock - vendor/ - .bundle/ - - docs/example.html - - docs/docs.html include: - "*.mjs" diff --git a/docs/_includes/header.html b/docs/_includes/header.html index befd9fc..8662f35 100644 --- a/docs/_includes/header.html +++ b/docs/_includes/header.html @@ -23,8 +23,8 @@ {{ my_page.title | escape }} {%- endif -%} {%- endfor -%} - Example Docs - Read Docs + Example Docs + Read Docs Try Interactive Example
diff --git a/docs/_posts/index.markdown b/docs/_posts/index.markdown new file mode 100644 index 0000000..1e72ecf --- /dev/null +++ b/docs/_posts/index.markdown @@ -0,0 +1,104 @@ +--- +layout: home +permalink: / +--- + + + +Comprehensive guides and technical documentation for Gridwise WebGPU primitives. + +## Architecture + +Overview of Gridwise's system design, module structure, and how primitives are organized for extensibility and performance. Learn about the high-level organization of Gridwise components, including how different primitives (scan, reduce, sort) are implemented as modular, reusable units. Understand the architectural decisions that enable performance optimization while maintaining clean separation of concerns and ease of extension. + +[Read](https://gridwise-webgpu.github.io/gridwise/architecture/){:target="_blank" class="doc-btn"} + +## Primitive Design + +Deep dive into the design principles behind Gridwise primitives with focus on single-pass chained algorithms for sort, scan, and reduce. Explores the tradeoffs between using subgroup instructions for maximum performance versus software emulation for broader compatibility. Covers memory bandwidth considerations, the lookback and fallback optimization techniques, and how to choose between chained algorithms and hybrid approaches for different use cases. + +[Read](https://gridwise-webgpu.github.io/gridwise/primitive-design/){:target="_blank" class="doc-btn"} + +## Scan and Reduce + +Comprehensive guide to scan (prefix sum) and reduce operations in Gridwise. Explains the difference between exclusive scan (first element is identity), inclusive scan (each element includes itself), and reduce (single output value). Covers binary operations (Add, Min, Max), data type support (u32, i32, f32), API usage patterns with code examples, and when to use each variant for optimal performance. + +[Read](https://gridwise-webgpu.github.io/gridwise/scan-and-reduce/){:target="_blank" class="doc-btn"} + +## Sort + +Complete documentation for Gridwise's OneSweepSort implementation. Covers both key-only sorting and key-value pair sorting with full payload support. Explains configurable sort direction (ascending/descending), supported data types, buffer management strategies, and in-place versus temporary buffer approaches. Includes detailed API documentation and performance characteristics across different input sizes and configurations. + +[Read](https://gridwise-webgpu.github.io/gridwise/sort/){:target="_blank" class="doc-btn"} + +## Binary Operations + +Guide to binary operations used in Gridwise's scan and reduce primitives. Documents available operations (Add, Min, Max, Multiply) and their properties. Explains how to implement custom binary operations by extending the binop interface, including implementation requirements, data type constraints, and validation patterns. Critical for users who need domain-specific aggregation operations. + +[Read](https://gridwise-webgpu.github.io/gridwise/binop/){:target="_blank" class="doc-btn"} + +## Buffer Management + +Best practices for allocating, managing, and optimizing GPU buffers in Gridwise applications. Covers buffer creation strategies, memory usage patterns, and how to minimize memory allocation overhead. Explains the relationship between buffer sizes and performance, copy strategies for input/output, and how to handle edge cases with non-aligned input lengths. Essential for building efficient Gridwise applications. + +[Read](https://gridwise-webgpu.github.io/gridwise/buffer/){:target="_blank" class="doc-btn"} + +## Timing Strategy + +Detailed explanation of timing mechanisms in Gridwise for accurate performance measurement and benchmarking. Covers both CPU timing (performance.now) and GPU timing (timestamp queries) approaches, their accuracy tradeoffs, and when to use each. Explains warmup strategies, trial averaging, and how to interpret results across different hardware configurations for reliable performance comparisons. + +[Read](https://gridwise-webgpu.github.io/gridwise/timing-strategy/){:target="_blank" class="doc-btn"} + +## Subgroup Strategy + +Detailed guide to GPU subgroups and their critical role in Gridwise primitive performance. Explains what subgroups are, how different GPU architectures have different subgroup sizes, and the performance benefits of subgroup operations. Covers Gridwise's approach to subgroup detection, optional subgroup acceleration, and fallback strategies for hardware without subgroup support to maintain broad device compatibility. + +[Read](https://gridwise-webgpu.github.io/gridwise/subgroup-strategy/){:target="_blank" class="doc-btn"} + +## Built-ins Strategy + +Exploration of WebGPU WGSL built-in functions and how Gridwise strategically selects and optimizes their use in primitive implementations. Explains which built-ins provide the best performance for reduction operations, aggregation patterns, and data movement. Covers vendor-specific optimizations and how to identify when built-in usage versus hand-tuned WGSL code provides the best performance on different hardware. + +[Read](https://gridwise-webgpu.github.io/gridwise/builtins-strategy/){:target="_blank" class="doc-btn"} + +## WebGPU Object Caching Strategy + +Comprehensive guide to Gridwise's approach for caching and reusing WebGPU objects (compute pipelines, bind groups, buffer layouts) across multiple invocations. Explains how object caching reduces GPU state setup overhead and improves throughput for repeated operations. Covers caching strategies for different primitive configurations, memory management of cached objects, and invalidation patterns for long-running applications. + +[Read](https://gridwise-webgpu.github.io/gridwise/webgpu-object-caching-strategy/){:target="_blank" class="doc-btn"} + +## Writing a WebGPU WGSL Workgroup Reduce Function + +In-depth tutorial on implementing custom workgroup-level reduce functions in WGSL for integration with Gridwise primitives. Covers reduction patterns, memory synchronization with workgroup barriers, handling of non-power-of-2 workgroup sizes, and optimization techniques using subgroups where available. Includes complete code examples and validation strategies for custom reduce operations. + +[Read](https://gridwise-webgpu.github.io/gridwise/writing-a-webgpu-wgsl-workgroup-reduce-function/){:target="_blank" class="doc-btn"} diff --git a/docs/docs.html b/docs/docs.html index 0ac26be..4bc3af1 100644 --- a/docs/docs.html +++ b/docs/docs.html @@ -1,7 +1,7 @@ --- layout: home title: Documentation -permalink: /docs/ +permalink: / --- @@ -42,7 +42,7 @@

Architecture

(scan, reduce, sort) are implemented as modular, reusable units. Understand the architectural decisions that enable performance optimization while maintaining clean separation of concerns and ease of extension.

- Read + Read

Primitive Design

@@ -52,7 +52,7 @@

Primitive Design

optimization techniques, and how to choose between chained algorithms and hybrid approaches for different use cases.

- Read + Read

Scan and Reduce

@@ -61,7 +61,7 @@

Scan and Reduce

Covers binary operations (Add, Min, Max), data type support (u32, i32, f32), API usage patterns with code examples, and when to use each variant for optimal performance.

- Read + Read

Sort

@@ -70,7 +70,7 @@

Sort

types, buffer management strategies, and in-place versus temporary buffer approaches. Includes detailed API documentation and performance characteristics across different input sizes and configurations.

- Read + Read

Binary Operations

@@ -79,7 +79,7 @@

Binary Operations

interface, including implementation requirements, data type constraints, and validation patterns. Critical for users who need domain-specific aggregation operations.

- Read + Read

Buffer Management

@@ -88,7 +88,7 @@

Buffer Management

buffer sizes and performance, copy strategies for input/output, and how to handle edge cases with non-aligned input lengths. Essential for building efficient GridWise applications.

- Read + Read

Timing Strategy

@@ -97,7 +97,7 @@

Timing Strategy

to use each. Explains warmup strategies, trial averaging, and how to interpret results across different hardware configurations for reliable performance comparisons.

- Read + Read

Subgroup Strategy

@@ -106,7 +106,7 @@

Subgroup Strategy

operations. Covers GridWise's approach to subgroup detection, optional subgroup acceleration, and fallback strategies for hardware without subgroup support to maintain broad device compatibility.

- Read + Read

Built-ins Strategy

@@ -115,7 +115,7 @@

Built-ins Strategy

aggregation patterns, and data movement. Covers vendor-specific optimizations and how to identify when built-in usage versus hand-tuned WGSL code provides the best performance on different hardware.

- Read + Read

WebGPU Object Caching Strategy

@@ -124,8 +124,7 @@

WebGPU Object Caching Strategy

improves throughput for repeated operations. Covers caching strategies for different primitive configurations, memory management of cached objects, and invalidation patterns for long-running applications.

- Read + Read

Writing a WebGPU WGSL Workgroup Reduce Function

@@ -134,8 +133,7 @@

Writing a WebGPU WGSL Workgroup Reduce Function

workgroup sizes, and optimization techniques using subgroups where available. Includes complete code examples and validation strategies for custom reduce operations.

- Read + Read


\ No newline at end of file diff --git a/docs/example.html b/docs/examples.html similarity index 80% rename from docs/example.html rename to docs/examples.html index 0130729..cf1946e 100644 --- a/docs/example.html +++ b/docs/examples.html @@ -1,56 +1,43 @@ --- layout: home title: Examples -permalink: /examples/ +permalink: /examples-guide/ --- - -

Interactive examples demonstrating WebGPU primitives and operations.

- +

Explore practical examples demonstrating how to use Gridwise WebGPU primitives for scan, sort, and reduce + operations. Each example includes code snippets, explanations of key concepts, and links to source code and + performance benchmarks.

Scan Example

Compact example that demonstrates exclusive vs inclusive scans using DLDFScan: how to choose scanType, build the @@ -90,9 +77,9 @@

Code Structure

Source Code - Performance + class="doc-btn">Source Code + Performance

Key/Value Sort Demo

@@ -133,9 +120,9 @@

Code Structure

Source Code - Performance + class="doc-btn">Source Code + Performance

Reduce Operation

@@ -175,9 +162,11 @@

Code Structure

Source Code - Performance + class="doc-btn">Source Code + Performance +

+class="doc-btn">Performance


\ No newline at end of file diff --git a/docs/index.markdown b/docs/index.markdown deleted file mode 100644 index d57b7a8..0000000 --- a/docs/index.markdown +++ /dev/null @@ -1,6 +0,0 @@ ---- -layout: home -permalink: / ---- - -Gridwise provides WebGPU compute primitives in JavaScript. Its current supported primitives are reduce, scan, and sort, and it is built atop infrastructure to make the development and performance analysis of future primitives as straightforward as possible. Gridwise was developed during a sabbatical year at Google from August 2024--August 2025. From 2d25b13770fa58287c06543038206e5cb5fa2fab Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Mon, 24 Nov 2025 19:37:56 -0500 Subject: [PATCH 11/17] small changes(location) --- docs/_includes/header.html | 6 +++--- docs/docs.html | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/_includes/header.html b/docs/_includes/header.html index 8662f35..1fac2cb 100644 --- a/docs/_includes/header.html +++ b/docs/_includes/header.html @@ -2,7 +2,7 @@
{%- assign default_paths = site.pages | map: "path" -%} {%- assign page_paths = site.header_pages | default: default_paths -%} - + {%- if page_paths -%}
diff --git a/docs/docs.html b/docs/docs.html index 4bc3af1..658d8dc 100644 --- a/docs/docs.html +++ b/docs/docs.html @@ -115,7 +115,7 @@

Built-ins Strategy

aggregation patterns, and data movement. Covers vendor-specific optimizations and how to identify when built-in usage versus hand-tuned WGSL code provides the best performance on different hardware.

- Read + Read

WebGPU Object Caching Strategy

@@ -124,7 +124,7 @@

WebGPU Object Caching Strategy

improves throughput for repeated operations. Covers caching strategies for different primitive configurations, memory management of cached objects, and invalidation patterns for long-running applications.

- Read + Read

Writing a WebGPU WGSL Workgroup Reduce Function

@@ -133,7 +133,7 @@

Writing a WebGPU WGSL Workgroup Reduce Function

workgroup sizes, and optimization techniques using subgroups where available. Includes complete code examples and validation strategies for custom reduce operations.

- Read + Read


\ No newline at end of file From a89732df85f3eb4ec6abd0c7528648d8ed036957 Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Mon, 24 Nov 2025 19:39:51 -0500 Subject: [PATCH 12/17] workflow issue --- .github/workflows/main.yml | 5 +---- docs/_includes/header.html | 6 +++--- docs/docs.html | 22 +++++++++++----------- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6d5537d..5b39e4f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,11 +16,8 @@ jobs: with: ruby-version: '3.1' bundler-cache: true - working-directory: ./docs - name: Build site - run: | - cd docs - bundle exec jekyll build + run: bundle exec jekyll build env: JEKYLL_ENV: production diff --git a/docs/_includes/header.html b/docs/_includes/header.html index 1fac2cb..8662f35 100644 --- a/docs/_includes/header.html +++ b/docs/_includes/header.html @@ -2,7 +2,7 @@
{%- assign default_paths = site.pages | map: "path" -%} {%- assign page_paths = site.header_pages | default: default_paths -%} - + {%- if page_paths -%}
diff --git a/docs/docs.html b/docs/docs.html index 658d8dc..aeacdcb 100644 --- a/docs/docs.html +++ b/docs/docs.html @@ -42,7 +42,7 @@

Architecture

(scan, reduce, sort) are implemented as modular, reusable units. Understand the architectural decisions that enable performance optimization while maintaining clean separation of concerns and ease of extension.

- Read + Read

Primitive Design

@@ -52,7 +52,7 @@

Primitive Design

optimization techniques, and how to choose between chained algorithms and hybrid approaches for different use cases.

- Read + Read

Scan and Reduce

@@ -61,7 +61,7 @@

Scan and Reduce

Covers binary operations (Add, Min, Max), data type support (u32, i32, f32), API usage patterns with code examples, and when to use each variant for optimal performance.

- Read + Read

Sort

@@ -70,7 +70,7 @@

Sort

types, buffer management strategies, and in-place versus temporary buffer approaches. Includes detailed API documentation and performance characteristics across different input sizes and configurations.

- Read + Read

Binary Operations

@@ -79,7 +79,7 @@

Binary Operations

interface, including implementation requirements, data type constraints, and validation patterns. Critical for users who need domain-specific aggregation operations.

- Read + Read

Buffer Management

@@ -88,7 +88,7 @@

Buffer Management

buffer sizes and performance, copy strategies for input/output, and how to handle edge cases with non-aligned input lengths. Essential for building efficient GridWise applications.

- Read + Read

Timing Strategy

@@ -97,7 +97,7 @@

Timing Strategy

to use each. Explains warmup strategies, trial averaging, and how to interpret results across different hardware configurations for reliable performance comparisons.

- Read + Read

Subgroup Strategy

@@ -106,7 +106,7 @@

Subgroup Strategy

operations. Covers GridWise's approach to subgroup detection, optional subgroup acceleration, and fallback strategies for hardware without subgroup support to maintain broad device compatibility.

- Read + Read

Built-ins Strategy

@@ -115,7 +115,7 @@

Built-ins Strategy

aggregation patterns, and data movement. Covers vendor-specific optimizations and how to identify when built-in usage versus hand-tuned WGSL code provides the best performance on different hardware.

- Read + Read

WebGPU Object Caching Strategy

@@ -124,7 +124,7 @@

WebGPU Object Caching Strategy

improves throughput for repeated operations. Covers caching strategies for different primitive configurations, memory management of cached objects, and invalidation patterns for long-running applications.

- Read + Read

Writing a WebGPU WGSL Workgroup Reduce Function

@@ -133,7 +133,7 @@

Writing a WebGPU WGSL Workgroup Reduce Function

workgroup sizes, and optimization techniques using subgroups where available. Includes complete code examples and validation strategies for custom reduce operations.

- Read + Read


\ No newline at end of file From d996066259d4a9e4e72d569f753dde14e95af30d Mon Sep 17 00:00:00 2001 From: jayshah1819 Date: Mon, 24 Nov 2025 19:42:11 -0500 Subject: [PATCH 13/17] Fix Jekyll workflow and remove spaces from relative URLs --- Gemfile.lock | 314 +++++++++++++++ _site/about/index.html | 223 +++++++---- _site/architecture/index.html | 211 ++++++++++ _site/{gridwise => }/binop/index.html | 227 +++++++---- _site/{gridwise => }/buffer/index.html | 229 +++++++---- _site/builtins-strategy/index.html | 181 +++++++++ .../index.html | 103 ++--- _site/docs/2025/08/12/architecture/index.html | 103 ++--- .../webgpu-object-caching-strategy/index.html | 103 ++--- .../2025/08/18/subgroup-strategy/index.html | 103 ++--- .../2025/08/22/builtins-strategy/index.html | 103 ++--- _site/docs/2025/09/16/binop/index.html | 103 ++--- _site/docs/2025/09/16/buffer/index.html | 103 ++--- .../2025/09/16/scan-and-reduce/index.html | 103 ++--- _site/docs/2025/09/16/sort/index.html | 103 ++--- _site/docs/Gemfile | 32 -- README.md => _site/docs/README.md | 0 _site/docs/index.html | 274 +++++++------ _site/example/index.html | 202 ---------- _site/examples-guide/index.html | 316 +++++++++++++++ _site/feed.xml | 2 +- _site/gridwise/architecture/index.html | 134 ------- _site/gridwise/builtins-strategy/index.html | 104 ----- _site/gridwise/index.html | 104 ----- _site/gridwise/primitive-design/index.html | 182 --------- _site/index.html | 377 ++++++++++++------ _site/primitive-design/index.html | 259 ++++++++++++ _site/primitive.mjs | 15 +- .../{gridwise => }/scan-and-reduce/index.html | 229 +++++++---- _site/scandldf.mjs | 11 +- _site/{gridwise => }/sort/index.html | 229 +++++++---- .../subgroup-strategy/index.html | 229 +++++++---- .../{gridwise => }/timing-strategy/index.html | 229 +++++++---- .../webgpu-object-caching-strategy/index.html | 229 +++++++---- .../index.html | 229 +++++++---- {_site => docs}/README.md | 0 docs/_includes/header.html | 6 +- 37 files changed, 3452 insertions(+), 2252 deletions(-) create mode 100644 Gemfile.lock create mode 100644 _site/architecture/index.html rename _site/{gridwise => }/binop/index.html (64%) rename _site/{gridwise => }/buffer/index.html (68%) create mode 100644 _site/builtins-strategy/index.html delete mode 100644 _site/docs/Gemfile rename README.md => _site/docs/README.md (100%) delete mode 100644 _site/example/index.html create mode 100644 _site/examples-guide/index.html delete mode 100644 _site/gridwise/architecture/index.html delete mode 100644 _site/gridwise/builtins-strategy/index.html delete mode 100644 _site/gridwise/index.html delete mode 100644 _site/gridwise/primitive-design/index.html create mode 100644 _site/primitive-design/index.html rename _site/{gridwise => }/scan-and-reduce/index.html (72%) rename _site/{gridwise => }/sort/index.html (68%) rename _site/{gridwise => }/subgroup-strategy/index.html (64%) rename _site/{gridwise => }/timing-strategy/index.html (50%) rename _site/{gridwise => }/webgpu-object-caching-strategy/index.html (66%) rename _site/{gridwise => }/writing-a-webgpu-wgsl-workgroup-reduce-function/index.html (76%) rename {_site => docs}/README.md (100%) diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..c26f9cd --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,314 @@ +GEM + remote: https://rubygems.org/ + specs: + activesupport (8.1.1) + base64 + bigdecimal + concurrent-ruby (~> 1.0, >= 1.3.1) + connection_pool (>= 2.2.5) + drb + i18n (>= 1.6, < 2) + json + logger (>= 1.4.2) + minitest (>= 5.1) + securerandom (>= 0.3) + tzinfo (~> 2.0, >= 2.0.5) + uri (>= 0.13.1) + addressable (2.8.7) + public_suffix (>= 2.0.2, < 7.0) + base64 (0.3.0) + bigdecimal (3.3.1) + coffee-script (2.4.1) + coffee-script-source + execjs + coffee-script-source (1.12.2) + colorator (1.1.0) + commonmarker (0.23.12) + concurrent-ruby (1.3.5) + connection_pool (2.5.4) + csv (3.3.5) + dnsruby (1.73.1) + base64 (>= 0.2) + logger (~> 1.6) + simpleidn (~> 0.2.1) + drb (2.2.3) + em-websocket (0.5.3) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0) + ethon (0.15.0) + ffi (>= 1.15.0) + eventmachine (1.2.7) + execjs (2.10.0) + faraday (2.14.0) + faraday-net_http (>= 2.0, < 3.5) + json + logger + faraday-net_http (3.4.2) + net-http (~> 0.5) + ffi (1.17.2-aarch64-linux-gnu) + ffi (1.17.2-aarch64-linux-musl) + ffi (1.17.2-arm-linux-gnu) + ffi (1.17.2-arm-linux-musl) + ffi (1.17.2-arm64-darwin) + ffi (1.17.2-x86_64-darwin) + ffi (1.17.2-x86_64-linux-gnu) + ffi (1.17.2-x86_64-linux-musl) + forwardable-extended (2.6.0) + gemoji (4.1.0) + github-pages (232) + github-pages-health-check (= 1.18.2) + jekyll (= 3.10.0) + jekyll-avatar (= 0.8.0) + jekyll-coffeescript (= 1.2.2) + jekyll-commonmark-ghpages (= 0.5.1) + jekyll-default-layout (= 0.1.5) + jekyll-feed (= 0.17.0) + jekyll-gist (= 1.5.0) + jekyll-github-metadata (= 2.16.1) + jekyll-include-cache (= 0.2.1) + jekyll-mentions (= 1.6.0) + jekyll-optional-front-matter (= 0.3.2) + jekyll-paginate (= 1.1.0) + jekyll-readme-index (= 0.3.0) + jekyll-redirect-from (= 0.16.0) + jekyll-relative-links (= 0.6.1) + jekyll-remote-theme (= 0.4.3) + jekyll-sass-converter (= 1.5.2) + jekyll-seo-tag (= 2.8.0) + jekyll-sitemap (= 1.4.0) + jekyll-swiss (= 1.0.0) + jekyll-theme-architect (= 0.2.0) + jekyll-theme-cayman (= 0.2.0) + jekyll-theme-dinky (= 0.2.0) + jekyll-theme-hacker (= 0.2.0) + jekyll-theme-leap-day (= 0.2.0) + jekyll-theme-merlot (= 0.2.0) + jekyll-theme-midnight (= 0.2.0) + jekyll-theme-minimal (= 0.2.0) + jekyll-theme-modernist (= 0.2.0) + jekyll-theme-primer (= 0.6.0) + jekyll-theme-slate (= 0.2.0) + jekyll-theme-tactile (= 0.2.0) + jekyll-theme-time-machine (= 0.2.0) + jekyll-titles-from-headings (= 0.5.3) + jemoji (= 0.13.0) + kramdown (= 2.4.0) + kramdown-parser-gfm (= 1.1.0) + liquid (= 4.0.4) + mercenary (~> 0.3) + minima (= 2.5.1) + nokogiri (>= 1.16.2, < 2.0) + rouge (= 3.30.0) + terminal-table (~> 1.4) + webrick (~> 1.8) + github-pages-health-check (1.18.2) + addressable (~> 2.3) + dnsruby (~> 1.60) + octokit (>= 4, < 8) + public_suffix (>= 3.0, < 6.0) + typhoeus (~> 1.3) + html-pipeline (2.14.3) + activesupport (>= 2) + nokogiri (>= 1.4) + http_parser.rb (0.8.0) + i18n (1.14.7) + concurrent-ruby (~> 1.0) + jekyll (3.10.0) + addressable (~> 2.4) + colorator (~> 1.0) + csv (~> 3.0) + em-websocket (~> 0.5) + i18n (>= 0.7, < 2) + jekyll-sass-converter (~> 1.0) + jekyll-watch (~> 2.0) + kramdown (>= 1.17, < 3) + liquid (~> 4.0) + mercenary (~> 0.3.3) + pathutil (~> 0.9) + rouge (>= 1.7, < 4) + safe_yaml (~> 1.0) + webrick (>= 1.0) + jekyll-avatar (0.8.0) + jekyll (>= 3.0, < 5.0) + jekyll-coffeescript (1.2.2) + coffee-script (~> 2.2) + coffee-script-source (~> 1.12) + jekyll-commonmark (1.4.0) + commonmarker (~> 0.22) + jekyll-commonmark-ghpages (0.5.1) + commonmarker (>= 0.23.7, < 1.1.0) + jekyll (>= 3.9, < 4.0) + jekyll-commonmark (~> 1.4.0) + rouge (>= 2.0, < 5.0) + jekyll-default-layout (0.1.5) + jekyll (>= 3.0, < 5.0) + jekyll-feed (0.17.0) + jekyll (>= 3.7, < 5.0) + jekyll-gist (1.5.0) + octokit (~> 4.2) + jekyll-github-metadata (2.16.1) + jekyll (>= 3.4, < 5.0) + octokit (>= 4, < 7, != 4.4.0) + jekyll-include-cache (0.2.1) + jekyll (>= 3.7, < 5.0) + jekyll-mentions (1.6.0) + html-pipeline (~> 2.3) + jekyll (>= 3.7, < 5.0) + jekyll-optional-front-matter (0.3.2) + jekyll (>= 3.0, < 5.0) + jekyll-paginate (1.1.0) + jekyll-readme-index (0.3.0) + jekyll (>= 3.0, < 5.0) + jekyll-redirect-from (0.16.0) + jekyll (>= 3.3, < 5.0) + jekyll-relative-links (0.6.1) + jekyll (>= 3.3, < 5.0) + jekyll-remote-theme (0.4.3) + addressable (~> 2.0) + jekyll (>= 3.5, < 5.0) + jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0) + rubyzip (>= 1.3.0, < 3.0) + jekyll-sass-converter (1.5.2) + sass (~> 3.4) + jekyll-seo-tag (2.8.0) + jekyll (>= 3.8, < 5.0) + jekyll-sitemap (1.4.0) + jekyll (>= 3.7, < 5.0) + jekyll-swiss (1.0.0) + jekyll-theme-architect (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-cayman (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-dinky (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-hacker (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-leap-day (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-merlot (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-midnight (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-minimal (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-modernist (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-primer (0.6.0) + jekyll (> 3.5, < 5.0) + jekyll-github-metadata (~> 2.9) + jekyll-seo-tag (~> 2.0) + jekyll-theme-slate (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-tactile (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-time-machine (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-titles-from-headings (0.5.3) + jekyll (>= 3.3, < 5.0) + jekyll-watch (2.2.1) + listen (~> 3.0) + jemoji (0.13.0) + gemoji (>= 3, < 5) + html-pipeline (~> 2.2) + jekyll (>= 3.0, < 5.0) + json (2.16.0) + kramdown (2.4.0) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.4) + listen (3.9.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + logger (1.7.0) + mercenary (0.3.6) + minima (2.5.1) + jekyll (>= 3.5, < 5.0) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + minitest (5.26.2) + net-http (0.8.0) + uri (>= 0.11.1) + nokogiri (1.18.10-aarch64-linux-gnu) + racc (~> 1.4) + nokogiri (1.18.10-aarch64-linux-musl) + racc (~> 1.4) + nokogiri (1.18.10-arm-linux-gnu) + racc (~> 1.4) + nokogiri (1.18.10-arm-linux-musl) + racc (~> 1.4) + nokogiri (1.18.10-arm64-darwin) + racc (~> 1.4) + nokogiri (1.18.10-x86_64-darwin) + racc (~> 1.4) + nokogiri (1.18.10-x86_64-linux-gnu) + racc (~> 1.4) + nokogiri (1.18.10-x86_64-linux-musl) + racc (~> 1.4) + octokit (4.25.1) + faraday (>= 1, < 3) + sawyer (~> 0.9) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + public_suffix (5.1.1) + racc (1.8.1) + rb-fsevent (0.11.2) + rb-inotify (0.11.1) + ffi (~> 1.0) + rexml (3.4.4) + rouge (3.30.0) + rubyzip (2.4.1) + safe_yaml (1.0.5) + sass (3.7.4) + sass-listen (~> 4.0.0) + sass-listen (4.0.0) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + sawyer (0.9.3) + addressable (>= 2.3.5) + faraday (>= 0.17.3, < 3) + securerandom (0.4.1) + simpleidn (0.2.3) + terminal-table (1.8.0) + unicode-display_width (~> 1.1, >= 1.1.1) + typhoeus (1.5.0) + ethon (>= 0.9.0, < 0.16.0) + tzinfo (2.0.6) + concurrent-ruby (~> 1.0) + unicode-display_width (1.8.0) + uri (1.1.1) + webrick (1.9.1) + +PLATFORMS + aarch64-linux-gnu + aarch64-linux-musl + arm-linux-gnu + arm-linux-musl + arm64-darwin + x86_64-darwin + x86_64-linux-gnu + x86_64-linux-musl + +DEPENDENCIES + github-pages (~> 232) + http_parser.rb (~> 0.6.0) + jekyll-feed + jekyll-remote-theme + tzinfo (>= 1, < 3) + tzinfo-data + wdm (~> 0.1) + +BUNDLED WITH + 2.7.2 diff --git a/_site/about/index.html b/_site/about/index.html index 1eef0e3..72b534f 100644 --- a/_site/about/index.html +++ b/_site/about/index.html @@ -1,8 +1,8 @@ - - - + + + About | Gridwise @@ -18,89 +18,166 @@ - - - -
-
+
+ + + + +
+
+

About

+
-
-

About

-
- -
-

Gridwise is a JavaScript library for WebGPU compute primitives including scan, reduce, and sort.

+
+

Gridwise is a JavaScript library for WebGPU compute primitives including scan, reduce, and sort.

View the source code on GitHub.

-
- -
- -
-
+ + + + -
- + -
+
+
+ - +
+ - + + +
-
+ - +
diff --git a/_site/architecture/index.html b/_site/architecture/index.html new file mode 100644 index 0000000..6e9a3ff --- /dev/null +++ b/_site/architecture/index.html @@ -0,0 +1,211 @@ + + + + + +Gridwise Architecture | Gridwise + + + + + + + + + + + + + + + + +
+
+
+ + + + +
+
+
+

Gridwise Architecture

+
+ +
+

The primary goal of Gridwise is to deliver best-in-class performance on WebGPU compute primitives while minimizing the amount of code that must be written by the library user. Ideally, a Gridwise user will declare and then execute a primitive and Gridwise will handle all low-level details of setting up and calling the necessary WebGPU primitives.

+ +

Gridwise Abstraction

+ +

Primitive

+ +

The primary abstraction in Gridwise is a Primitive. Primitives are instances of a primitive-specific subclass of a JavaScript Primitive class. They have an execute member function, and the typical usage is to instantiate a primitive using new() and then call execute() on that primitive. Both instantiation and execution have numerous options. As an example, let’s look at a scan primitive, which is an instance of the DLDFScan class (“decoupled-lookback, decoupled-fallback scan”):

+ +
const datatype = "u32";
+const dldfscanPrimitive = new DLDFScan({
+  device,
+  binop: new BinOpAdd({ datatype }),
+  type: "exclusive", // "exclusive" is the default
+  datatype,
+});
+
+await dldfscanPrimitive.execute({
+  inputBuffer,
+  outputBuffer,
+});
+
+ +

This particular primitive is parameterized by its datatype (in this case, “u32”), by the binary operation (“binop”) performed by the scan (in this case, addition on u32 data), and by the scan operation (exclusive or inclusive).

+ +

When the scan is actually executed, its arguments are buffers that store its input and output. This particular primitive has named arguments of an input buffer named inputBuffer and an output buffer named outputBuffer. These buffers can be WebGPU buffers of type GPUBuffer but can also be Buffers, described next.

+ +

The primitive performs all necessary WebGPU operations, including (optionally) setting up an encoder, building up and setting WebGPU layouts and pipelines, running the pipeline, and optionally recording GPU-side or CPU-side timing. It also caches WebGPU layouts and pipelines to avoid the expense of recreating them if they have already been created.

+ +

Buffer

+ +

One of the challenges of writing a primitive library is handling data, which may be stored on the CPU (in a JavaScript typed array) or on the GPU (as a WebGPU GPUBuffer). Gridwise’s Buffer class attempts to abstract away the details of separately managing CPU and GPU buffer data structures with one unified data structure that stores, and moves data between, both. This data structure has grown organically to handle many use cases and deserves more focus by future developers as a principled data structure in WebGPU programming.

+ +
+
+
+
+ + + +
+
+ + +
+ + + + +
+ +
+ + diff --git a/_site/gridwise/binop/index.html b/_site/binop/index.html similarity index 64% rename from _site/gridwise/binop/index.html rename to _site/binop/index.html index a31180c..2c5d63e 100644 --- a/_site/gridwise/binop/index.html +++ b/_site/binop/index.html @@ -1,64 +1,118 @@ - - - + + + Gridwise’s Binary Operator Class | Gridwise - - + + +{"@context":"https://schema.org","@type":"WebPage","description":"Understanding the BinOp class that represents monoids with binary operations, datatypes, and identity elements for use in primitives.","headline":"Gridwise’s Binary Operator Class","url":"http://localhost:4000/gridwise/binop/"} - - - -
-
+ +
+
+

Gridwise's Binary Operator Class

+
-
-

Gridwise's Binary Operator Class

-
- -
-

Gridwise’s binary operator class is called binop. This class represents a monoid, which has as its constituent parts a binary operation, a datatype for the data on which the operator is applied, and an identity element. (If we call the identity element I and the operator op, then x = I op x. For instance, addition’s identity is zero, and multiplication’s is one.) In Gridwise, we package these elements into an instance of a JS class, BinOp. This class then defines a number of objects that are used in WGSL code generation and CPU correctness checking.

+
+

Gridwise’s binary operator class is called binop. This class represents a monoid, which has as its constituent parts a binary operation, a datatype for the data on which the operator is applied, and an identity element. (If we call the identity element I and the operator op, then x = I op x. For instance, addition’s identity is zero, and multiplication’s is one.) In Gridwise, we package these elements into an instance of a JS class, BinOp. This class then defines a number of objects that are used in WGSL code generation and CPU correctness checking.

BinOp is implemented in the source file binop.md. We specialize BinOp to particular operations (e.g., Add) and then further specialize it with a datatype. Many Gridwise primitives require a BinOp argument and the common use will be something like:

@@ -135,45 +189,68 @@

What does a BinOp p }

-
- - - - -
+ + + + -
- + -
+
+
+ - +
+ - + + +
-
+ - +
diff --git a/_site/gridwise/buffer/index.html b/_site/buffer/index.html similarity index 68% rename from _site/gridwise/buffer/index.html rename to _site/buffer/index.html index 9897dda..f91944e 100644 --- a/_site/gridwise/buffer/index.html +++ b/_site/buffer/index.html @@ -1,64 +1,118 @@ - - - + + + Gridwise’s Buffer Class | Gridwise - - + + +{"@context":"https://schema.org","@type":"WebPage","description":"The Buffer class encapsulates data that spans both CPU (typed arrays) and GPU (GPUBuffer), providing a unified interface for data management.","headline":"Gridwise’s Buffer Class","url":"http://localhost:4000/gridwise/buffer/"} - - - -
-
+
+ + + + +
+
+

Gridwise's Buffer Class

+
-
-

Gridwise's Buffer Class

-
- -
-

During Gridwise’s development, we found a need to encapsulate the concept of a single wad of data that spans CPU and GPU. We call this class Buffer. It contains both a CPU-side JS typed array and a GPU-side GPUBuffer. The abstraction is that these two objects are (roughly) consistent with each other (they are not meant to store two logically different objects).

+
+

During Gridwise’s development, we found a need to encapsulate the concept of a single wad of data that spans CPU and GPU. We call this class Buffer. It contains both a CPU-side JS typed array and a GPU-side GPUBuffer. The abstraction is that these two objects are (roughly) consistent with each other (they are not meant to store two logically different objects).

We believe this is an object whose design could be revisited and improved, because it is generally useful in WebGPU primitive development and more generally across WebGPU development. We welcome a redesign. For that purpose, we list our use cases:

@@ -200,45 +254,68 @@

device
  • get: Returns the associated GPUDevice.
  • -

    - -
    - -
    -
    + + + + -
    - + -
    +
    +
    + - +
    + - + + +
    -
    + - +
    diff --git a/_site/builtins-strategy/index.html b/_site/builtins-strategy/index.html new file mode 100644 index 0000000..d193aff --- /dev/null +++ b/_site/builtins-strategy/index.html @@ -0,0 +1,181 @@ + + + + + +Gridwise WebGPU @builtins Strategy | Gridwise + + + + + + + + + + + + + + + + +
    +
    + + + + +
    +
    + + +
    + + + + +
    + +
    + + diff --git a/_site/docs/2025/04/08/writing-a-webgpu-wgsl-workgroup-reduce-function/index.html b/_site/docs/2025/04/08/writing-a-webgpu-wgsl-workgroup-reduce-function/index.html index f3fbd13..a2b71e8 100644 --- a/_site/docs/2025/04/08/writing-a-webgpu-wgsl-workgroup-reduce-function/index.html +++ b/_site/docs/2025/04/08/writing-a-webgpu-wgsl-workgroup-reduce-function/index.html @@ -1,8 +1,8 @@ - - - + + + Abstraction Challenges in Writing a WebGPU/WGSL Workgroup Reduce Function | Gridwise @@ -19,38 +19,31 @@ - - -