Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
d0ac473
refactor(gemma4): delete all Gemma 2/3/3n legacy code
cofin Apr 2, 2026
614f6f3
feat(gemma4): add Gemma4Variant enum and config.json-based detection
cofin Apr 2, 2026
7bed754
feat(gemma4): update config defaults and hub for Gemma 4
cofin Apr 2, 2026
9b318f8
feat(gemma4): new chat template system with system prompt support
cofin Apr 2, 2026
dfb965c
chore(gemma4): clean all remaining Gemma 2/3/3n references
cofin Apr 2, 2026
68e567a
test(gemma4): add comprehensive test suite for Chapter 1
cofin Apr 2, 2026
3ee7366
feat(gemma4): replace GCS download backend with HuggingFace HTTP
cofin Apr 2, 2026
ab17c27
feat(gemma4): add bf16-to-f32 conversion in safetensors loader
cofin Apr 2, 2026
71561c8
feat(gemma4): tokenizer resolution and config.json validation
cofin Apr 2, 2026
177ebe5
test(gemma4): add comprehensive test suite for Chapter 2
cofin Apr 2, 2026
8fd00fe
chore(checkpoint): Chapter 2 (gemma4-delivery) complete
cofin Apr 2, 2026
b55c987
feat(gemma4): hybrid sliding-window + full global attention engine
cofin Apr 2, 2026
c6f32fe
chore(checkpoint): Chapter 3 (gemma4-attention) complete
cofin Apr 2, 2026
6b6e57c
feat(gemma4): rewrite FFI boundary for Gemma 4 forward path (Ch4)
cofin Apr 3, 2026
ca11e58
fix(tests): remove deleted Mojo test files from test runner
cofin Apr 3, 2026
577942c
chore(checkpoint): Chapter 4 (gemma4-dense-forward) complete
cofin Apr 3, 2026
8658969
feat(gemma4): add VisionLayerWeights + VisionModelWeights structs (Ch…
cofin Apr 3, 2026
b14cbbc
feat(gemma4): add gelu + average_pool_2d ops for vision encoder (Ch5 …
cofin Apr 3, 2026
daa050f
feat(gemma4): variable-resolution image preprocessing for SigLIP (Ch5…
cofin Apr 3, 2026
cb3abbe
feat(gemma4): video frame extraction via ffmpeg subprocess (Ch5 Task …
cofin Apr 3, 2026
545fb59
feat(gemma4): bidirectional vision attention in layers.mojo (Ch5 Task…
cofin Apr 3, 2026
cec2e6e
feat(gemma4): vision encoder forward pass with GELU MLP and pooling (…
cofin Apr 3, 2026
c00625c
feat(gemma4): vision weight building in core.mojo (Ch5 Task 5.2)
cofin Apr 3, 2026
5569744
feat(gemma4): process_image + step_with_embedding FFI entrypoints (Ch…
cofin Apr 3, 2026
0ec1e31
feat(gemma4): wire vision config from config.json (Ch5 Task 5.10)
cofin Apr 3, 2026
959c163
feat(gemma4): token merging — inject vision embeddings during prefill…
cofin Apr 3, 2026
f8a6d33
fix(gemma4): make process_image concrete in CoreBackend (Ch5 Task 5.11)
cofin Apr 3, 2026
e26ea62
chore(checkpoint): Chapter 5 (gemma4-vision) complete
cofin Apr 3, 2026
379e65d
feat(gemma4): Ch6 PLE/shared-KV/double-wide + Ch7 MoE structs/forward…
cofin Apr 3, 2026
c26cdef
chore(checkpoint): Chapters 5-7 (vision + E2B/E4B + MoE) complete
cofin Apr 3, 2026
bdf0d64
feat(gemma4): mel spectrogram extraction with numpy.fft only (Ch6 Tas…
cofin Apr 3, 2026
c969f0d
feat(gemma4): AudioHydrator + AudioInput in hydration.py (Ch6 Task 6.9)
cofin Apr 3, 2026
66af24c
feat(gemma4): AudioTowerWeights + forward_audio_encoder (Ch6 Task 6.10)
cofin Apr 3, 2026
d7b94c2
feat(gemma4): audio placeholder merging + process_audio FFI (Ch6 Task…
cofin Apr 3, 2026
f3a67eb
chore(checkpoint): Audio tower complete (Ch6 Tasks 6.8-6.11)
cofin Apr 3, 2026
74fc44f
fix(gemma4): wire PLE/MoE/audio in core.mojo — weight building + step…
cofin Apr 3, 2026
a9b2834
chore(gemma4): sync flow state — mark Ch5/Ch6/Ch7 complete
cofin Apr 3, 2026
ff88919
fix: remove .agents/ from git tracking (should be gitignored)
cofin Apr 3, 2026
e1cfc38
fix(gemma4): Mojo 0.26.3 nightly compatibility
cofin Apr 3, 2026
c829a46
fix(gemma4): update mblack and mojo versions to nightly builds
cofin Apr 3, 2026
2b18c09
chore(release): bump version to 0.4.0 and update dependencies
cofin Apr 3, 2026
584664a
feat: Refactor embedding models and update imports
cofin Apr 3, 2026
76ae965
fix: update type hints and improve type safety across multiple modules
cofin Apr 3, 2026
2fd0681
Refactor function definitions from 'fn' to 'def' across multiple files
cofin Apr 3, 2026
7ba1c29
feat: update references from EmbeddingModel to SyncEmbeddingModel acr…
cofin Apr 3, 2026
dbe1a68
feat(gpu): add GPUContext struct with DeviceContext lifecycle
cofin Apr 3, 2026
7a97f3d
feat(gpu): add WeightStage for layer-by-layer weight streaming
cofin Apr 3, 2026
537b6af
feat(gpu): add PersistentBuffers for session-resident embed/lm_head/norm
cofin Apr 3, 2026
aa9bc29
feat(gpu): add GPUKVCache with device-resident K/V buffers
cofin Apr 3, 2026
16ede03
feat(gpu): add GPUScratch for device-resident scratch buffer
cofin Apr 3, 2026
258f818
feat(gpu): wire GPU init path into core.mojo model initialization
cofin Apr 3, 2026
b34cf13
feat(gpu): add GPU cleanup path in free_arena_mojo
cofin Apr 3, 2026
27259bc
feat(gpu): add layer weight upload functions for dense, MoE, and vision
cofin Apr 3, 2026
4416fb1
chore(checkpoint): Chapter 1 gpu-memory-infra complete
cofin Apr 3, 2026
0dd7f07
feat(gpu): add ComputeBackend trait and CPUBackend struct
cofin Apr 3, 2026
ea3b498
feat(gpu): add element-wise GPU kernels (gelu, geglu, rope)
cofin Apr 4, 2026
fed667e
feat(gpu): add reduction GPU kernels (softmax, rms_norm)
cofin Apr 4, 2026
b6da203
feat(gpu): add matmul GPU kernels (vec, batched, int8)
cofin Apr 4, 2026
6b90ac0
feat(gpu): add specialized GPU kernels (average_pool_2d, top_k)
cofin Apr 4, 2026
16a6567
feat(gpu): add GPUBackend struct with kernel launch methods
cofin Apr 4, 2026
3cb9133
chore(checkpoint): Chapter 2 gpu-compute-kernels complete
cofin Apr 4, 2026
286f253
fix: use std.testing import instead of deprecated bare import
cofin Apr 4, 2026
a232fcf
chore(revise): gemma4-attention - modernization for Mojo Nightly 24.x
cofin Apr 4, 2026
396b9f6
chore(mojo): modernize syntax to Mojo 24.x standards
cofin Apr 4, 2026
011f3dc
feat: implement GPU-accelerated attention layers and add correspondin…
cofin Apr 5, 2026
2fbf18c
refactor: implement ComputeBackend trait for GPUBackend to enable uni…
cofin Apr 5, 2026
f17d1be
refactor: decouple persistent GPU buffer management by introducing a …
cofin Apr 5, 2026
32082de
feat: complete GPU forward paths with weight streaming and MoE expert…
cofin Apr 5, 2026
036cff0
feat(gpu): add weight streaming to vision/audio encoders and copy_sta…
cofin Apr 5, 2026
e4529f8
refactor: reformat GPU operation kernel calls and method signatures f…
cofin Apr 5, 2026
b3867ad
chore(checkpoint): Chapter 4 gpu-forward-paths complete
cofin Apr 5, 2026
31a525d
feat(gpu): add GPU dispatch for encoders, embeddings, and cache reset
cofin Apr 6, 2026
17ff5f8
feat: GPU integration for Python telemetry, CI guards, benchmarks, an…
cofin Apr 6, 2026
1aea5d1
chore(checkpoint): Chapter 5 gpu-integration-dispatch complete
cofin Apr 6, 2026
94b16bc
feat(ci): add perf-benchmark workflow for automated throughput tracking
cofin Apr 6, 2026
dce5c77
docs: note automated CI benchmark workflow in performance-baselines
cofin Apr 6, 2026
0b97850
fix: resolve ruff lint warnings and apply mojo format
cofin Apr 6, 2026
4e35e1f
chore: pin python version to 3.12 in makefile installation process
cofin Apr 6, 2026
45721b9
fix(mo): resolve mojo test failures after nightly upgrade
cofin Apr 6, 2026
29212ba
fix(mo): gate GPU branches with comptime if has_accelerator() to fix CI
cofin Apr 7, 2026
e601963
fix(mo): resolve GPU context key mismatches in core.mojo
cofin Apr 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions .github/workflows/perf-benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Performance Benchmark

on:
push:
branches:
- main
pull_request:

jobs:
benchmark:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@v5

- name: Install uv
uses: astral-sh/setup-uv@v7

- name: Set up Python
run: uv python install 3.12

- name: Install dependencies
run: uv sync --all-extras --dev

- name: Build Mojo core
run: make build

- name: Run benchmark (generation + embedding)
run: |
make benchmark
uv run python tools/benchmark.py --mode generation --rounds 20 --max-new-tokens 64 > benchmark_generation.json
uv run python tools/benchmark.py --mode embedding --rounds 20 > benchmark_embedding.json

- name: Upload benchmark artifacts
uses: actions/upload-artifact@v7
with:
name: benchmark-results
path: benchmark_*.json
retention-days: 90
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ CLAUDE.md
GEMINI.md
docs/*.json
benchmark-model/
.agents
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ help: ## Display this help
install: clean ## Install everything (Python, Mojo, Beads)
@echo "${INFO} Installing..."
@if ! command -v uv >/dev/null 2>&1; then curl -LsSf https://astral.sh/uv/install.sh | sh; fi
@uv python pin 3.12 >/dev/null 2>&1
@uv venv
@$(MAKE) py-install
@$(MAKE) beads-install
Expand Down
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ asyncio.run(main())
Generate dense vector embeddings natively through Mojo's optimized batched kernel operations. Pass a single string or a list of strings to process them in parallel.

```python
from mogemma import EmbeddingModel
from mogemma import SyncEmbeddingModel

model = EmbeddingModel()
model = SyncEmbeddingModel()
embeddings = model.embed(["Hello, world!", "Mojo runs Gemma inference."])
print(embeddings.shape) # (2, 768)
```
Expand Down Expand Up @@ -126,7 +126,7 @@ Current runtime status:
- `gpu` / `gpu:N` execute via a mathematically verified runtime polyfill

```python
from mogemma import EmbeddingConfig, EmbeddingModel, GenerationConfig, SyncGemmaModel
from mogemma import EmbeddingConfig, SyncEmbeddingModel, GenerationConfig, SyncGemmaModel

generation = SyncGemmaModel(
GenerationConfig(
Expand All @@ -135,14 +135,18 @@ generation = SyncGemmaModel(
)
)

embeddings = EmbeddingModel(
embeddings = SyncEmbeddingModel(
EmbeddingConfig(
model_path="gemma3-1b-it",
device="cpu",
)
)
```

> **GPU Requirements:** GPU acceleration requires Mojo nightly with GPU support,
> compatible GPU drivers (NVIDIA CUDA, AMD ROCm, or Apple Metal), and sufficient VRAM
> for model weights and KV cache.

## Runtime Requirements

MoGemma leverages the latest Mojo features for maximum performance.
Expand Down
2 changes: 1 addition & 1 deletion docs/architecture/backend-architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Current Runtime Shape

- Python model classes (`SyncGemmaModel`, `EmbeddingModel`) resolve a backend and cache it at construction time.
- Python model classes (`SyncGemmaModel`, `SyncEmbeddingModel`) resolve a backend and cache it at construction time.
- `cpu` is implemented by `CPUCoreBackend`, which delegates into `mogemma._core` (`init_model`, `step`, `generate_embeddings`).
- Backend IDs are normalized in one place (`resolve_backend_id`) and support:
- `cpu`
Expand Down
4 changes: 2 additions & 2 deletions docs/architecture/device-selection.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ cpu_model = SyncGemmaModel(
```

```python
from mogemma import EmbeddingConfig, EmbeddingModel
from mogemma import EmbeddingConfig, SyncEmbeddingModel

embedding_model = EmbeddingModel(
embedding_model = SyncEmbeddingModel(
EmbeddingConfig(model_path="gemma3-270m-it", device="cpu")
)
```
Expand Down
4 changes: 4 additions & 0 deletions docs/performance-baselines.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ Different CI runners have different baselines. Baselines should be strictly sepa

CI/release validation now executes generation benchmarks in `.github/workflows/ci.yml` through the `check-release` job.

## Automated CI Benchmarks

Standard CPU baselines are captured automatically in GitHub Actions via `.github/workflows/perf-benchmark.yml`. This workflow runs on every push to `main` and on every pull request, executing both generation and embedding benchmarks with synthetic stubs. Results are uploaded as artifacts (retained for 90 days) and visible in the Actions tab. This is purely informational — the workflow does not gate or fail PRs based on variance.

## Baseline artifacts

Latest captured snapshots are stored in this directory:
Expand Down
22 changes: 15 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@ dependencies = [
"numpy",
"typing_extensions",
"safetensors",
"tensorstore",
]
description = "Python/Mojo interface for Google Gemma 3"
description = "Python/Mojo interface for Google Gemma 4"
license = { file = "LICENSE" }
name = "mogemma"
readme = "README.md"
requires-python = ">=3.10"
version = "0.3.0"
version = "0.4.0"

[project.urls]
Issue = "https://github.com/cofin/mogemma/issues/"
Expand Down Expand Up @@ -143,17 +142,26 @@ split-on-trailing-comma = false

[tool.ruff.lint.per-file-ignores]
"src/py/tests/test_*.py" = [
"ANN202",
"ARG001",
"D100",
"D101",
"D102",
"D103",
"D107",
"E402",
"ERA001",
"PLR2004",
"PT011",
"S101",
"S105",
"S106",
"S108",
"S110",
"S603",
"PLR2004",
"SIM105",
"PLC0415",
"SLF001",
"E402",
]
"src/py/mogemma/typing.py" = ["A005"]
"src/mo/tests/test_*.py" = [
Expand Down Expand Up @@ -188,7 +196,7 @@ dev = [
build = ["bump-my-version>=0.31.1", "mojo>=0.26.1a1"]
lint = ["mypy>=1.13.0", "pyright>=1.1.386", "ruff>=0.14.14"]
test = [
"obstore>=0.3.08",
"obstore>=0.4.08",
"pillow>=12.1.1",
"pytest>=9.0.2",
"pytest-cov>=4.0.0",
Expand All @@ -200,7 +208,7 @@ test = [
allow_dirty = true
commit = false
commit_args = "--no-verify"
current_version = "0.3.0"
current_version = "0.4.0"
ignore_missing_files = false
ignore_missing_version = false
message = "chore(release): bump to v{new_version}"
Expand Down
Loading
Loading