diff --git a/.github/workflows/elasticapp.yml b/.github/workflows/elasticapp.yml
new file mode 100644
index 00000000..8fbc24d5
--- /dev/null
+++ b/.github/workflows/elasticapp.yml
@@ -0,0 +1,61 @@
+name: elasticapp (Elastica++ based backend) tests
+
+# trigger run only on changes to the backend folder.
+on:
+  push:
+    paths:
+      - backend/**
+  pull_request:
+    paths:
+      - backend/**
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: ["3.11"] #, "3.12"]
+        os: [ubuntu-latest] # , macos-latest]
+        include:
+          - os: ubuntu-latest
+            path: ~/.cache/pip
+    defaults:
+      run:
+        shell: bash
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          uv-version: latest
+      - name: Compile OpenMP
+        env:
+          OMP_NUM_THREADS: 2
+        run: |
+          sudo apt-get update; sudo apt-get install -y libomp5 libomp-dev
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Set up cache
+        uses: actions/cache@v5
+        with:
+          path: ${{ matrix.path }}
+          key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('uv.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('uv.lock') }}
+            ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}
+
+      - name: Install PyElastica and dependencies
+        run: |
+          make install-dev-deps PYTHON_VERSION=${{ matrix.python-version }}
+          uv cache prune --ci
+
+      - name: Run elasticapp tests
+        run: |
+          source .venv/bin/activate
+          cd backend
+          make clean-build
+          make test
+          # pytest backend/tests/py
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 1317163a..8acf2e14 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -44,7 +44,7 @@ jobs:
         run: python -c "import sys; print(sys.version)"
       # Set up cache
       - name: Set up cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
         with:
           path: ${{ matrix.path }}
           key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('uv.lock') }}
diff --git a/.gitignore b/.gitignore
index 61a79d21..cba0be5f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -219,6 +219,7 @@ sample_prog.py
 
 # txt files
 *.txt
+!CMakelists.txt
 
 # movie or video file formats
 *.mp4
@@ -241,3 +242,6 @@ outcmaes/*
 
 # csv files
 *.csv
+
+# ./backend dependencies
+deps
diff --git a/Makefile b/Makefile
index 50b180f0..7be633ef 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,7 @@ pre-commit-install:
 .PHONY: black
 black:
 	uv run black --version
-	uv run black --config pyproject.toml elastica tests examples
+	uv run black --config pyproject.toml elastica tests examples backend
 
 .PHONY: black-check
 black-check:
@@ -54,7 +54,7 @@ autoflake-check:
 .PHONY: autoflake-format
 autoflake-format:
 	uv run autoflake --version
-	uv run autoflake --in-place $(AUTOFLAKE_ARGS) elastica tests examples
+	uv run autoflake --in-place $(AUTOFLAKE_ARGS) elastica tests examples backend
 
 .PHONY: format-codestyle
 format-codestyle: black autoflake-format
@@ -71,15 +71,15 @@ mypy:
 
 .PHONY: test
 test:
-	uv run pytest -c pyproject.toml
+	uv run pytest -c pyproject.toml tests
 
 .PHONY: test_coverage
 test_coverage:
-	NUMBA_DISABLE_JIT=1 uv run pytest --cov=elastica -c pyproject.toml
+	NUMBA_DISABLE_JIT=1 uv run pytest --cov=elastica -c pyproject.toml tests
 
 .PHONY: test_coverage_xml
 test_coverage_xml:
-	NUMBA_DISABLE_JIT=1 uv run pytest --cov=elastica --cov-report=xml -c pyproject.toml
+	NUMBA_DISABLE_JIT=1 uv run pytest --cov=elastica --cov-report=xml -c pyproject.toml tests
 
 .PHONY: check-codestyle
 check-codestyle: black-check flake8 autoflake-check
diff --git a/backend/.clang-format b/backend/.clang-format
new file mode 100644
index 00000000..48d4ec71
--- /dev/null
+++ b/backend/.clang-format
@@ -0,0 +1,17 @@
+---
+# We'll use defaults from the Google style.
+# See http://clang.llvm.org/docs/ClangFormat.html for help.
+Language: Cpp
+BasedOnStyle: Google
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+PointerAlignment: Left
+DerivePointerAlignment: false
+FixNamespaceComments: true
+IncludeCategories:
+  - Regex: "^<.*"
+    Priority: 1
+  - Regex: ".*"
+    Priority: 2
+NamespaceIndentation: All
+SortIncludes: false
diff --git a/backend/.gitignore b/backend/.gitignore
new file mode 100644
index 00000000..4689dcf2
--- /dev/null
+++ b/backend/.gitignore
@@ -0,0 +1,12 @@
+CMakeLists.txt.user
+CMakeCache.txt
+CMakeFiles
+CMakeScripts
+Testing
+Makefile
+cmake_install.cmake
+install_manifest.txt
+compile_commands.json
+CTestTestfile.cmake
+_deps
+
diff --git a/backend/CMakeLists.txt b/backend/CMakeLists.txt
new file mode 100644
index 00000000..be937721
--- /dev/null
+++ b/backend/CMakeLists.txt
@@ -0,0 +1,290 @@
+cmake_minimum_required(VERSION 3.20)
+project(elasticapp VERSION 0.0.4 LANGUAGES CXX)
+
+# Set C++ standard
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Compiler flags
+add_compile_options(-Wno-unused -fdiagnostics-color=always -Wall -Wextra -pedantic -flto)
+
+# Find Python and pybind11 (temp)
+# pybind11 will find Python and NumPy automatically
+# Set Python interpreter to ../.venv
+set(Python3_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.venv")
+set(PYTHON_EXECUTABLE "${Python3_ROOT_DIR}/bin/python")
+
+# Find pybind11 and ensure it uses the Python interpreter from our venv
+set(PYBIND11_FINDPYTHON "ON")
+set(PYBIND11_PYTHON_EXECUTABLE "${PYTHON_EXECUTABLE}" CACHE FILEPATH "" FORCE)
+find_package(pybind11 REQUIRED)
+
+# ------------------------------------------------------------ #
+# External dependency management #
+# Include FetchContent for Catch2 and Eigen3
+include(FetchContent)
+
+# Save current BUILD_TESTING state and disable it for dependencies
+set(BUILD_TESTING_SAVED ${BUILD_TESTING})
+set(BUILD_TESTING OFF)
+
+# Fetch Eigen3 for numerical computations
+FetchContent_Declare(
+    Eigen3
+    GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
+    GIT_TAG 3.4.0
+)
+# Disable Eigen3 tests and documentation
+set(EIGEN_BUILD_DOC OFF)
+# Set Eigen's default storage order to row-major
+# This ensures Eigen's default behavior matches our explicit MatrixType template parameter
+set(EIGEN_DEFAULT_TO_ROW_MAJOR ON CACHE BOOL "Use row-major as default matrix storage order" FORCE)
+FetchContent_MakeAvailable(Eigen3)
+
+# Restore BUILD_TESTING state for our tests
+set(BUILD_TESTING ${BUILD_TESTING_SAVED})
+
+# ------------------------------------------------------------ #
+
+# Define directories #
+set(TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}/tests")
+set(PYTHON_PACKAGE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/py")
+set(SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
+
+option(ELASTICAPP_VECTORIZATION_REPORTS "Enable compiler vectorization reports"            OFF)
+option(ELASTICAPP_BUILD_TESTS           "Build C++ unit tests"                             OFF)
+option(ELASTICAPP_GIL_RELEASE           "Release Python GIL around compute-heavy bindings" ON )
+
+# Global CMake option to enable/disable for unittests
+if (DEFINED ENV{ELASTICAPP_BUILD_TESTS})
+    string(TOUPPER "$ENV{ELASTICAPP_BUILD_TESTS}" ELASTICAPP_BUILD_TESTS_ENV_VALUE)
+    if ("${ELASTICAPP_BUILD_TESTS_ENV_VALUE}" MATCHES "^(ON|TRUE|1)$")
+        set(ELASTICAPP_BUILD_TESTS ON)
+        message(STATUS "ELASTICAPP_BUILD_TESTS set to TRUE from environment variable.")
+    elseif ("${ELASTICAPP_BUILD_TESTS_ENV_VALUE}" MATCHES "^(OFF|FALSE|0)$")
+        set(ELASTICAPP_BUILD_TESTS OFF)
+        message(STATUS "ELASTICAPP_BUILD_TESTS set to OFF from environment variable.")
+    else()
+        message(WARNING "Environment variable ELASTICAPP_BUILD_TESTS has an unrecognized value. It will not be used to set the CMake option.")
+    endif()
+endif()
+
+# Option to set number of threads per component in update_dynamics parallel sections
+# This controls how many threads are used for each spatial component (x, y, z) in the parallel sections
+# Default is 3 (one thread per component), but can be adjusted for different hardware configurations
+set(ELASTICAPP_COMPONENT_THREADS "2" CACHE STRING "Number of threads per component in update_dynamics parallel sections (default: 3)")
+mark_as_advanced(ELASTICAPP_COMPONENT_THREADS)
+
+# Configure threading support (OpenMP) - check early but apply per-target
+# On macOS with Homebrew, help CMake find libomp
+# This is needed for both Makefile builds and scikit-build-core builds
+if(APPLE)
+    # Try to find Homebrew-installed libomp
+    execute_process(
+        COMMAND brew --prefix libomp
+        OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        ERROR_QUIET
+    )
+    if(HOMEBREW_LIBOMP_PREFIX)
+        message(STATUS "Found Homebrew libomp at: ${HOMEBREW_LIBOMP_PREFIX}")
+        # Set OpenMP variables for clang on macOS
+        if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+            set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
+            set(OpenMP_CXX_LIB_NAMES "omp")
+            set(OpenMP_omp_LIBRARY "${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib")
+            set(OpenMP_CXX_LIBRARIES "${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib")
+            message(STATUS "Configured OpenMP for clang with Homebrew libomp")
+        endif()
+    endif()
+endif()
+
+find_package(OpenMP COMPONENTS CXX REQUIRED)
+
+# Helper function to apply optimization flags to a target
+# This function must be defined before targets are created
+function(apply_optimization_flags target_name)
+    if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+        target_compile_options(${target_name} PRIVATE -O3 -march=native)
+        target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
+    elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+        target_compile_options(${target_name} PRIVATE /O2)
+    endif()
+    # Add NDEBUG flag to disable assertions in release builds
+    target_compile_definitions(${target_name} PRIVATE NDEBUG)
+
+    # Enable Eigen vectorization
+    target_compile_definitions(${target_name} PRIVATE EIGEN_VECTORIZE)
+
+    # Add architecture-specific optimization flags
+    if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+        # AVX2 and FMA are only available on x86_64, not ARM
+        if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
+            target_compile_options(${target_name} PRIVATE -mavx2 -mfma)
+        endif()
+
+        # Add vectorization report flags if requested
+        if(ELASTICAPP_VECTORIZATION_REPORTS)
+            if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+                # Clang vectorization reports
+                target_compile_options(${target_name} PRIVATE
+                    -Rpass=loop-vectorize
+                    -Rpass-missed=loop-vectorize
+                    -Rpass-analysis=loop-vectorize
+                )
+                message(STATUS "Clang vectorization reports enabled for ${target_name}")
+            elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+                # GCC vectorization reports
+                target_compile_options(${target_name} PRIVATE
+                    -fopt-info-vec
+                    -fopt-info-vec-missed
+                    -fopt-info-vec-all
+                )
+                message(STATUS "GCC vectorization reports enabled for ${target_name}")
+            endif()
+        endif()
+    elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+        # MSVC flags for AVX2
+        target_compile_options(${target_name} PRIVATE /arch:AVX2)
+    endif()
+
+    if(OpenMP_CXX_FOUND)
+        target_link_libraries(${target_name} PRIVATE OpenMP::OpenMP_CXX)
+        message(STATUS "Threading support enabled for ${target_name}")
+    endif()
+endfunction()
+
+# Extension module: _memory_block (Python module name)
+pybind11_add_module(_memory_block
+    "${SRC_DIR}/_api.cpp"
+)
+target_compile_definitions(_memory_block PRIVATE VERSION_INFO=${PROJECT_VERSION})
+target_include_directories(_memory_block PRIVATE ${SRC_DIR})
+target_link_libraries(_memory_block PRIVATE Eigen3::Eigen)
+
+if(APPLE)
+    target_link_options(_memory_block PRIVATE -Wl,-dead_strip)
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+    target_link_options(_memory_block PRIVATE -Wl,--gc-sections)
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    target_link_options(_memory_block PRIVATE -Wl,--gc-sections)
+endif()
+
+
+# Apply component threads option to _memory_block (only if explicitly set and non-empty)
+# If not set, the code will use OpenMP's default thread count
+if(ELASTICAPP_COMPONENT_THREADS AND NOT ELASTICAPP_COMPONENT_THREADS STREQUAL "" AND NOT ELASTICAPP_COMPONENT_THREADS STREQUAL "0")
+    target_compile_definitions(_memory_block PRIVATE ELASTICAPP_COMPONENT_THREADS=${ELASTICAPP_COMPONENT_THREADS})
+endif()
+
+if(ELASTICAPP_GIL_RELEASE)
+    target_compile_definitions(_memory_block PRIVATE ELASTICAPP_GIL_RELEASE)
+endif()
+
+# Apply optimization flags to _memory_block
+apply_optimization_flags(_memory_block)
+
+# Install _memory_block module
+install(TARGETS _memory_block
+        LIBRARY DESTINATION "elasticapp"
+        COMPONENT python)
+
+# Extension module: version (Python module name)
+pybind11_add_module(version
+    "${SRC_DIR}/_version.cpp"
+)
+target_compile_definitions(version PRIVATE VERSION_INFO=${PROJECT_VERSION})
+target_include_directories(version PRIVATE ${SRC_DIR})
+
+# Install version module
+install(TARGETS version
+        LIBRARY DESTINATION "elasticapp"
+        COMPONENT python)
+
+# Extension module: _collision (Python module name)
+pybind11_add_module(_collision
+    "${SRC_DIR}/environment/_api.cpp"
+    "${SRC_DIR}/environment/collision/course_detection/hash_grid.cpp"
+    "${SRC_DIR}/environment/collision/fine_detection/max_contacts.cpp"
+    "${SRC_DIR}/environment/collision/batching/union_find.cpp"
+    "${SRC_DIR}/environment/collision/physics/linear_spring_dashpot.cpp"
+    "${SRC_DIR}/environment/collision/physics/no_interaction.cpp"
+    "${SRC_DIR}/math/node_element_mapping.cpp"
+)
+target_compile_definitions(_collision PRIVATE VERSION_INFO=${PROJECT_VERSION})
+target_include_directories(_collision PRIVATE ${SRC_DIR})
+target_link_libraries(_collision PRIVATE Eigen3::Eigen)
+
+if(APPLE)
+    target_link_options(_collision PRIVATE -Wl,-dead_strip)
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+    target_link_options(_collision PRIVATE -Wl,--gc-sections)
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    target_link_options(_collision PRIVATE -Wl,--gc-sections)
+endif()
+
+if(ELASTICAPP_GIL_RELEASE)
+    target_compile_definitions(_collision PRIVATE ELASTICAPP_GIL_RELEASE)
+endif()
+
+# Apply optimization flags to _collision
+apply_optimization_flags(_collision)
+
+# Install _collision module
+install(TARGETS _collision
+        LIBRARY DESTINATION "elasticapp"
+        COMPONENT python)
+
+# Install Python package files
+install(FILES
+        "${PYTHON_PACKAGE_DIR}/__init__.py"
+        "${PYTHON_PACKAGE_DIR}/typing_alias.py"
+        "${PYTHON_PACKAGE_DIR}/memory_block_rod.py"
+        "${PYTHON_PACKAGE_DIR}/collision_physics.py"
+        "${PYTHON_PACKAGE_DIR}/module_collision.py"
+        DESTINATION "elasticapp"
+        COMPONENT python)
+
+# C++ unit tests with Catch2
+if(ELASTICAPP_BUILD_TESTS)
+    enable_testing()
+    include(CTest)
+
+    # Fetch Catch2 for C++ unit testing
+    FetchContent_Declare(
+        Catch2
+        GIT_REPOSITORY https://github.com/catchorg/Catch2.git
+        GIT_TAG v3.5.4
+    )
+    set(CATCH_BUILD_TESTING OFF CACHE BOOL "" FORCE)
+    set(CATCH_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
+    FetchContent_MakeAvailable(Catch2)
+
+    # Test executable
+    add_executable(cpp_tests
+        "${TEST_DIR}/cpp/test_version.cpp"
+        "${TEST_DIR}/cpp/test_block.cpp"
+        "${TEST_DIR}/cpp/test_traits.cpp"
+        "${TEST_DIR}/cpp/test_system.cpp"
+        "${TEST_DIR}/cpp/test_system_variable_validation.cpp"
+        "${TEST_DIR}/cpp/test_block_view.cpp"
+        "${TEST_DIR}/cpp/test_block_operations.cpp"
+        "${TEST_DIR}/cpp/collision/test_collision_system.cpp"
+    )
+
+    target_compile_definitions(cpp_tests PRIVATE VERSION_INFO=${PROJECT_VERSION})
+    target_include_directories(cpp_tests PRIVATE ${SRC_DIR} ${TEST_DIR}/cpp/mock)
+    target_link_libraries(cpp_tests PRIVATE Catch2::Catch2WithMain Eigen3::Eigen)
+
+    # Apply optimization flags to cpp_tests
+    apply_optimization_flags(cpp_tests)
+
+    # Use Catch2's test discovery to automatically register tests with CTest
+    # Add Catch2 extras directory to module path (FetchContent uses uppercase)
+    list(APPEND CMAKE_MODULE_PATH ${Catch2_SOURCE_DIR}/extras)
+    include(Catch)
+    catch_discover_tests(cpp_tests)
+else()
+    message(STATUS "C++ tests disabled (ELASTICAPP_BUILD_TESTS=OFF)")
+endif()
diff --git a/backend/Makefile b/backend/Makefile
new file mode 100644
index 00000000..9e94afa7
--- /dev/null
+++ b/backend/Makefile
@@ -0,0 +1,62 @@
+#* Variables
+PYTHON := python3
+PYTHONPATH := `pwd`/..
+PYTHON_VERSION :=
+
+#* Installation
+.PHONY: install
+install:
+	uv pip install "." -v
+
+#* C++ Build (direct CMake, no pip)
+.PHONY: build-cpp
+build-cpp:
+	@echo "Building C++ code with CMake..." && \
+	VENV_PYTHON=$$(which python) && \
+	PYBIND11_DIR=$$(python -c "import pybind11; import os; print(os.path.join(os.path.dirname(pybind11.__file__), 'share', 'cmake', 'pybind11'))") && \
+	mkdir -p build && \
+	cd build && \
+		LIBOMP_PREFIX=$$(brew --prefix libomp 2>/dev/null || echo "") && \
+		LLVM_PREFIX=$$(brew --prefix llvm 2>/dev/null || echo "") && \
+		cmake .. \
+			-DCMAKE_BUILD_TYPE=Release \
+			-DCMAKE_C_COMPILER=$$LLVM_PREFIX/bin/clang \
+			-DCMAKE_CXX_COMPILER=$$LLVM_PREFIX/bin/clang++ \
+			-DPython3_ROOT_DIR=$$(cd .. && pwd)/../.venv \
+			-DPython3_EXECUTABLE="$$VENV_PYTHON" \
+			-DPYTHON_EXECUTABLE="$$VENV_PYTHON" \
+			-DPython_EXECUTABLE="$$VENV_PYTHON" \
+			-Dpybind11_DIR="$$PYBIND11_DIR" \
+			-DOpenMP_CXX_FLAGS="-Xpreprocessor -fopenmp -I$$LIBOMP_PREFIX/include" \
+			-DOpenMP_CXX_LIB_NAMES="omp" \
+			-DOpenMP_omp_LIBRARY="$$LIBOMP_PREFIX/lib/libomp.dylib" \
+			-DOpenMP_CXX_LIBRARIES="$$LIBOMP_PREFIX/lib/libomp.dylib" \
+			-DELASTICAPP_VECTORIZATION_REPORTS=ON \
+			-DELASTICAPP_SAVE_ASSEMBLY=ON \
+			-DELASTICAPP_BUILD_TESTS=OFF; \
+		cmake --build . --parallel 2>&1 | tee ../vectorization_report.txt
+
+#* C++ Tests (build and run, no pip)
+.PHONY: test-cpp-direct
+test-cpp-direct: build-cpp
+	@echo "Running C++ tests..."
+	@cd build && ./cpp_tests
+
+#* Testing
+.PHONY: test
+test:
+	@echo "Running C++ and Python tests..."
+	ELASTICAPP_BUILD_TESTS=ON make install && cd build && ./cpp_tests
+	pytest tests/py -v
+
+#* Cleaning
+.PHONY: clean
+clean: clean-build
+	rm -rf dist/ *.egg-info
+	find . -type d -name __pycache__ -exec rm -r {} + 2>/dev/null || true
+	find . -type f -name "*.pyc" -delete
+	find . -type f -name "*.pyo" -delete
+
+.PHONY: clean-build
+clean-build:
+	rm -rf build/
diff --git a/backend/README.md b/backend/README.md
new file mode 100644
index 00000000..a8828bc6
--- /dev/null
+++ b/backend/README.md
@@ -0,0 +1,61 @@
+# C++ for PyElastica
+
+This directory includes basic C++ backend for experimental purposes.
+The full functionality is not yet moved to this directory.
+The purpose of this update is to provide a way to implement functionality in C++, such as multithreading and more controlled vectorization.
+For large-number of rods and complex environment, computation with contact and self-interactions requires higher control over parallelization.
+
+> Note: manual tuning of vectorization and threading is required for optimal performance, which may vary depending on the hardware.
+
+> Note: expected performance improvement depends on the vector size. It is expected to be faster for large rod systems, mostly due to the overhead created by the python-binding and interpreter. In most of the case with less than 1e5 elements, the performance of `numba` could be better.
+
+## Usage
+
+```python
+import elastica as ea
+import elasticapp as epp
+
+class SystemSimulator(
+    ea.BaseSystemCollection,
+    ...
+):
+    pass
+
+simulator = SystemSimulator()
+# Replace C++ block module for CosseratRod
+simulator.enable_block_supports(ea.CosseratRod, epp.MemoryBlockCosseratRod)
+```
+
+## Installation
+
+From the `backend` directory, run:
+
+```bash
+cd backend
+make install  # or run pip install "."
+```
+> Make sure you install the package from _PyElastica source tree_.
+
+## Testing
+
+Test includes both `cpp` and `python` testing.
+
+```bash
+make test
+```
+
+## File structure
+
+- All cpp files are in `src` directory. `src/py` directory includes python bindings for C++ classes.
+- All test files are in `tests` directory. `cpp` tests are in `tests/cpp` directory, and `python` tests are in `tests/py` directory.
+- All benchmark files are in `benchmarking` directory.
+
+
+## Contributed By
+
+- Tejaswin Parthasarathy (Teja)
+- [Seung Hyun Kim](https://github.com/skim0119)
+- Ankith Pai
+- [Yashraj Bhosale](https://github.com/bhosale2)
+- Arman Tekinalp
+- Songyuan Cui
diff --git a/backend/benchmarking/PDE_benchmark.ipynb b/backend/benchmarking/PDE_benchmark.ipynb
new file mode 100644
index 00000000..f76de55b
--- /dev/null
+++ b/backend/benchmarking/PDE_benchmark.ipynb
@@ -0,0 +1,231 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ea3e0b99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Any\n",
+    "import timeit\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import time\n",
+    "from tqdm import tqdm\n",
+    "import os\n",
+    "# os.environ[\"NUMBA_DISABLE_JIT\"] = \"1\"\n",
+    "\n",
+    "import elastica as ea\n",
+    "from elasticapp import MemoryBlockCosseratRod\n",
+    "\n",
+    "# Try to import set_num_threads if threading is enabled\n",
+    "try:\n",
+    "    from elasticapp._memory_block import set_num_threads, get_max_threads\n",
+    "    THREADING_AVAILABLE = True\n",
+    "except ImportError:\n",
+    "    THREADING_AVAILABLE = False\n",
+    "    raise RuntimeError(\"Threading not available (ELASTICAPP_USE_THREADING not enabled)\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "bc221bc9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 50000/50000 [00:24<00:00, 2016.30it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create 50000 rods with 6 elements each, iterate 100 operations\n",
+    "N = 100\n",
+    "n_rods = 50000\n",
+    "n_elems_per_rod = 6\n",
+    "\n",
+    "# Create rods\n",
+    "rods = [\n",
+    "    ea.CosseratRod.straight_rod(\n",
+    "        n_elements=n_elems_per_rod,\n",
+    "        start=np.zeros(3),\n",
+    "        direction=np.array([0.0, 0.0, 1.0]),\n",
+    "        normal=np.array([1.0, 0.0, 0.0]),\n",
+    "        base_length=1.0,\n",
+    "        base_radius=0.01,\n",
+    "        density=3000,\n",
+    "        youngs_modulus=1e6,\n",
+    "    )\n",
+    "    for _ in tqdm(range(n_rods))\n",
+    "]\n",
+    "\n",
+    "def jitter_rod(rod):\n",
+    "    rng = np.random.default_rng(42)\n",
+    "    rod.external_forces[:] = rng.uniform(-1e-6, 1e-6, rod.external_forces.shape)\n",
+    "    rod.external_torques[:] = rng.uniform(-1e-6, 1e-6, rod.external_torques.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "35e3d633",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Time per call: 42.549 ms (compute_internal_forces_and_torques - inverse rotation, equations)\n",
+      "Time per call: 5.615 ms (update_accelerations - block addition)\n",
+      "Time per call: 15.882 ms (update_kinematics - rodrigues)\n",
+      "Time per call: 0.974 ms (update_dynamics)\n"
+     ]
+    }
+   ],
+   "source": [
+    "block_rod = ea.MemoryBlockCosseratRod(rods, range(n_rods))\n",
+    "jitter_rod(block_rod)\n",
+    "\n",
+    "py_result = {}\n",
+    "\n",
+    "for _ in range(10): # Pre-run\n",
+    "    block_rod.compute_internal_forces_and_torques(0.0)\n",
+    "T = timeit.timeit(lambda: block_rod.compute_internal_forces_and_torques(0.0), number=N)\n",
+    "py_result['compute_internal_forces_and_torques'] = T/N*1000\n",
+    "print(f\"Time per call: {T/N*1000:.3f} ms (compute_internal_forces_and_torques - inverse rotation, equations)\")\n",
+    "for _ in range(10): # Pre-run\n",
+    "    block_rod.update_accelerations(0.0)\n",
+    "T = timeit.timeit(lambda: block_rod.update_accelerations(0.0), number=N)\n",
+    "py_result['update_accelerations'] = T/N*1000\n",
+    "print(f\"Time per call: {T/N*1000:.3f} ms (update_accelerations - block addition)\")\n",
+    "for _ in range(10): # Pre-run\n",
+    "    block_rod.update_kinematics(0.0, 1e-4)\n",
+    "T = timeit.timeit(lambda: block_rod.update_kinematics(0.0, 1e-4), number=N)\n",
+    "py_result['update_kinematics'] = T/N*1000\n",
+    "print(f\"Time per call: {T/N*1000:.3f} ms (update_kinematics - rodrigues)\")\n",
+    "for _ in range(10): # Pre-run\n",
+    "    block_rod.update_dynamics(0.0, 1e-4)\n",
+    "T = timeit.timeit(lambda: block_rod.update_dynamics(0.0, 1e-4), number=N)\n",
+    "py_result['update_dynamics'] = T/N*1000\n",
+    "print(f\"Time per call: {T/N*1000:.3f} ms (update_dynamics)\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "31e6d7a1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 50000/50000 [00:24<00:00, 2057.62it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Time per call: 21.076 ms (compute_internal_forces_and_torques - inverse rotation, equations)\n",
+      "Time per call: 3.041 ms (update_accelerations - block addition)\n",
+      "Time per call: 1.601 ms (update_kinematics - rodrigues)\n",
+      "Time per call: 0.756 ms (update_dynamics)\n",
+      "------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "assert THREADING_AVAILABLE\n",
+    "\n",
+    "\n",
+    "# Benchmark parameters\n",
+    "# thread_counts = [1, 2, 4, 8, 16]\n",
+    "\n",
+    "# Storage for results\n",
+    "cpp_results_for_threads = []\n",
+    "\n",
+    "# Run benchmark for each thread count\n",
+    "# for num_threads in thread_counts:\n",
+    "    # set_num_threads(num_threads)\n",
+    "    # print(f\"Threading available: {num_threads}\")\n",
+    "\n",
+    "rods = [\n",
+    "    ea.CosseratRod.straight_rod(\n",
+    "        n_elements=n_elems_per_rod,\n",
+    "        start=np.zeros(3),\n",
+    "        direction=np.array([0.0, 0.0, 1.0]),\n",
+    "        normal=np.array([1.0, 0.0, 0.0]),\n",
+    "        base_length=1.0,\n",
+    "        base_radius=0.01,\n",
+    "        density=3000,\n",
+    "        youngs_modulus=1e6,\n",
+    "    )\n",
+    "    for _ in tqdm(range(n_rods))\n",
+    "]\n",
+    "\n",
+    "block_rod = MemoryBlockCosseratRod(rods, range(n_rods))\n",
+    "jitter_rod(block_rod)\n",
+    "\n",
+    "cpp_result = {}\n",
+    "cpp_results_for_threads.append(cpp_result)\n",
+    "\n",
+    "for _ in range(10): # Pre-run\n",
+    "    block_rod.compute_internal_forces_and_torques(0.0)\n",
+    "T = timeit.timeit(lambda: block_rod.compute_internal_forces_and_torques(0.0), number=N)\n",
+    "cpp_result['compute_internal_forces_and_torques'] = T/N*1000\n",
+    "print(f\"Time per call: {T/N*1000:.3f} ms (compute_internal_forces_and_torques - inverse rotation, equations)\")\n",
+    "for _ in range(10): # Pre-run\n",
+    "    block_rod.update_accelerations(0.0)\n",
+    "T = timeit.timeit(lambda: block_rod.update_accelerations(0.0), number=N)\n",
+    "cpp_result['update_accelerations'] = T/N*1000\n",
+    "print(f\"Time per call: {T/N*1000:.3f} ms (update_accelerations - block addition)\")\n",
+    "for _ in range(10): # Pre-run\n",
+    "    block_rod.update_kinematics(0.0, 1e-4)\n",
+    "T = timeit.timeit(lambda: block_rod.update_kinematics(0.0, 1e-4), number=N)\n",
+    "cpp_result['update_kinematics'] = T/N*1000\n",
+    "print(f\"Time per call: {T/N*1000:.3f} ms (update_kinematics - rodrigues)\")\n",
+    "for _ in range(10): # Pre-run\n",
+    "    block_rod.update_dynamics(0.0, 1e-4)\n",
+    "T = timeit.timeit(lambda: block_rod.update_dynamics(0.0, 1e-4), number=N)\n",
+    "cpp_result['update_dynamics'] = T/N*1000\n",
+    "print(f\"Time per call: {T/N*1000:.3f} ms (update_dynamics)\")\n",
+    "\n",
+    "print(\"-\"*60)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bb8950dc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/backend/benchmarking/memory_block_integrity_check.py b/backend/benchmarking/memory_block_integrity_check.py
new file mode 100644
index 00000000..98e4e696
--- /dev/null
+++ b/backend/benchmarking/memory_block_integrity_check.py
@@ -0,0 +1,111 @@
+"""
+Example script to benchmark C++ operations with timing done entirely in C++.
+
+This script demonstrates how to use the benchmark_cpp function to measure
+performance without Python loop or pybind11 call overhead.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+import elastica as epy
+import elasticapp as epp
+
+# %%
+# Create test rods
+n_rods = 50
+n_elems_per_rod = 200
+
+print(f"Creating {n_rods} rods with {n_elems_per_rod} elements each...")
+keys = list(epp.memory_block_rod.PY2CPP_VARNAMES.keys())
+rods_cpp = [
+    epy.CosseratRod.straight_rod(
+        n_elements=n_elems_per_rod,
+        start=np.zeros(3),
+        direction=np.array([0.0, 0.0, 1.0]),
+        normal=np.array([1.0, 0.0, 0.0]),
+        base_length=1.0,
+        base_radius=0.01,
+        density=3000,
+        youngs_modulus=1e6,
+    )
+    for _ in range(n_rods)
+]
+rods_py = [
+    epy.CosseratRod.straight_rod(
+        n_elements=n_elems_per_rod,
+        start=np.zeros(3),
+        direction=np.array([0.0, 0.0, 1.0]),
+        normal=np.array([1.0, 0.0, 0.0]),
+        base_length=1.0,
+        base_radius=0.01,
+        density=3000,
+        youngs_modulus=1e6,
+    )
+    for _ in range(n_rods)
+]
+rng = np.random.default_rng(43)
+for i in range(n_rods):
+    for key in ["rest_kappa", "rest_sigma"]:
+        shape = getattr(rods_py[i], key).shape
+        values = rng.random(size=shape)
+        getattr(rods_py[i], key)[...] = values.copy()
+        getattr(rods_cpp[i], key)[...] = values.copy()
+
+# Create block rod system
+block_cpp = epp.MemoryBlockCosseratRod(rods_cpp, range(n_rods))
+block_py = epy.MemoryBlockCosseratRod(rods_py, range(n_rods))
+
+# %%
+# Cross-check ghost indices
+# -------------------------
+np.testing.assert_array_equal(block_cpp.ghost_nodes_idx, block_py.ghost_nodes_idx)
+np.testing.assert_array_equal(block_cpp.ghost_elems_idx[:-1], block_py.ghost_elems_idx)
+np.testing.assert_array_equal(
+    block_cpp.ghost_voronoi_idx[:-2], block_py.ghost_voronoi_idx
+)
+
+
+# %%
+# Cross-check block memory
+# ------------------------
+def cross_check_block_memory():
+    for key in keys:
+        cpp_value = getattr(block_cpp, key)
+        py_value = getattr(block_py, key)
+        assert cpp_value.shape == py_value.shape
+        assert np.allclose(cpp_value, py_value), f"{key} is not equal"
+        print(f"{key} |  values and shapes checked")
+
+
+cross_check_block_memory()
+
+# %%
+# Cross-check computing internal forces and torques
+# -------------------------------------------------
+block_cpp.compute_internal_forces_and_torques(0.0)
+block_py.compute_internal_forces_and_torques(0.0)
+
+cross_check_block_memory()
+
+# %%
+# Cross-check updating accelerations
+# ----------------------------------
+block_cpp.update_accelerations(0.0)
+block_py.update_accelerations(0.0)
+
+cross_check_block_memory()
+# %%
+# Cross-check updating kinematics
+# -----------------------------
+block_cpp.update_kinematics(0.0, 1.4)
+block_py.update_kinematics(0.0, 1.4)
+
+cross_check_block_memory()
+# %%
+# Cross-check updating dynamics
+# -----------------------------
+block_cpp.update_dynamics(0.0, 1.6)
+block_py.update_dynamics(0.0, 1.6)
+
+cross_check_block_memory()
+# %%
diff --git a/backend/examples/run_timoshenko.py b/backend/examples/run_timoshenko.py
new file mode 100644
index 00000000..c4603bcc
--- /dev/null
+++ b/backend/examples/run_timoshenko.py
@@ -0,0 +1,176 @@
+"""
+Test case for many-rod simulation with CPP memory block in the back.
+"""
+
+import numpy as np
+
+pass
+from tqdm import tqdm
+
+import elastica as ea
+import elasticapp as epp
+
+# %%
+# Now that we have imported all the necessary classes, we want to create our beam system. We do this by combining all the modules we need to represent the physics that we to include in the simulation. In this case, that is the ``BaseSystemCollection``, ``Constraint``, ``Forcings`` and ``Damping`` because the simulation will consider a rod that is fixed in place on one end, and subject to an applied force on the other end.
+
+
+class TimoshenkoBeamSimulator(
+    ea.BaseSystemCollection, ea.Constraints, ea.Forcing, ea.CallBacks, ea.Damping
+):
+    pass
+
+
+timoshenko_sim = TimoshenkoBeamSimulator()
+timoshenko_sim.enable_block_supports(ea.CosseratRod, epp.MemoryBlockCosseratRod)
+
+# %%
+# Creating Rods
+# -------------
+# With our simulator set up, we can now define the numerical, material, and geometric properties.
+#
+# First we define the number of elements in the rod. Next, the material properties are defined for every rod. These are the Young's modulus, the Poisson ratio, the density and the viscous damping coefficient. Finally, the geometry of the rod also needs to be defined by specifying the location of the rod and its orientation, length and radius.
+#
+# All of the values defined here are done in SI units, though this is not strictly necessary. You can rescale properties however you want, as long as you use consistent units throughout the simulation. See `here <https://info.simuleon.com/blog/units-in-abaqus>`_ for an example of consistent units.
+#
+# In order to make the difference between a shearable and unshearable rod more clear, we are using a Poisson ratio of 99. This is an unphysical value, as Poisson ratios can not exceed 0.5, however, it is used here for demonstration purposes.
+
+# setting up test params
+simulation_time = 5
+
+n_elem = 100
+start = np.zeros((3,))
+direction = np.array([0.0, 0.0, 1.0])
+normal = np.array([0.0, 1.0, 0.0])
+base_length = 3.0
+base_radius = 0.25
+base_area = np.pi * base_radius**2
+density = 5000
+nu = 0.1 / 7 / density / base_area
+E = 1e6
+# For shear modulus of 1e4, nu is 99!
+poisson_ratio = 99
+shear_modulus = E / (poisson_ratio + 1.0)
+
+# %%
+# With all of the rod's parameters set, we can now create a rod with the specificed properties and add the rod to the simulator system. **Important:** Make sure that any rods you create get added to the simulator system (``timoshenko_sim``), otherwise they will not be included in your simulation.
+
+n_rods = 10_000_000
+for _ in range(n_rods):
+    shearable_rod = ea.CosseratRod.straight_rod(
+        n_elem,
+        start,
+        direction,
+        normal,
+        base_length,
+        base_radius,
+        density,
+        youngs_modulus=E,
+        shear_modulus=shear_modulus,
+    )
+    timoshenko_sim.append(shearable_rod)
+
+# %%
+# Adding Damping
+# --------------
+# With the rod added to the simulator, we can add damping to the rod. We do this using the ``.dampen()`` option and the ``AnalyticalLinearDamper``. We are modifying ``timoshenko_sim`` simulator to ``dampen`` the ``shearable_rod`` object using ``AnalyticalLinearDamper`` type of dissipation (damping) model.
+#
+# We also need to define ``damping_constant`` and simulation ``time_step`` and pass in ``.using()`` method.
+
+dl = base_length / n_elem
+dt = 0.07 * dl
+timoshenko_sim.dampen(shearable_rod).using(
+    ea.AnalyticalLinearDamper,
+    damping_constant=nu,
+    time_step=dt,
+)
+
+# %%
+# Adding Boundary Conditions
+# --------------------------
+# With the rod added to the system, we need to apply boundary conditions. The first condition we will apply is fixing the location of one end of the rod. We do this using the ``.constrain()`` option and the ``OneEndFixedRod`` boundary condition. We are modifying the ``timoshenko_sim`` simulator to ``constrain`` the ``shearable_rod`` object using the ``OneEndFixedRod`` type of constraint.
+#
+# We also need to define which node of the rod is being constrained. We do this by passing the index of the nodes that we want to constrain to ``constrained_position_idx``. Here we are fixing the first node in the rod. In order to keep the rod from rotating around the fixed node, we also need to constrain an element between two nodes. This fixes the orientation of the rod. We do this by passing the index of the element that we want to fix to ``constrained_director_idx``. Like with the position, we are fixing the first element of the rod. Together, this constrains the position and orientation of the rod at the origin.
+
+# One end of the rod is now fixed in place
+timoshenko_sim.constrain(shearable_rod).using(
+    ea.OneEndFixedBC, constrained_position_idx=(0,), constrained_director_idx=(0,)
+)
+
+# %%
+# The next boundary condition that we want to apply is the endpoint force. Similarly to how we constrained one of the points, we want the ``timoshenko_sim`` simulator to ``add_forcing_to`` the ``shearable_rod`` object using the ``EndpointForces`` type of forcing. This ``EndpointForces`` applies forces to both ends of the rod. We want to apply a negative force in the :math:`d_1` direction, but only at the end of the rod. We do this by specifying the force vector to be applied at each end as ``origin_force`` and ``end_force``. We also want to ramp up the force over time, so we make the force take some ``ramp_up_time`` to reach its steady-state value. This helps avoid numerical errors due to discontinuities in the applied force.
+
+# Forces added to the rod
+end_force = np.array([-15.0, 0.0, 0.0])
+timoshenko_sim.add_forcing_to(shearable_rod).using(
+    ea.EndpointForces, 0.0 * end_force, end_force, ramp_up_time=simulation_time / 2.0
+)
+
+# %%
+# Add Unshearable Rod
+# -------------------
+#
+# Along with the shearable rod, we also want to add an unshearable rod to be able to compare the difference between the two. We do this the same way we did for the first rod, however, because this rod is unsherable, we need to change the Poisson ratio to make the rod unsherable. For a truely unsheraable rod, you would need a Poisson ratio of -1.0, however, this causes the system to be numerically unstable, so instead we make the system nearly unshearable by using a Poisson ratio of -0.85.
+
+# Start into the plane
+unshearable_start = np.array([0.0, -1.0, 0.0])
+shear_modulus = E / (-0.7 + 1.0)
+unshearable_rod = ea.CosseratRod.straight_rod(
+    n_elem,
+    unshearable_start,
+    direction,
+    normal,
+    base_length,
+    base_radius,
+    density,
+    youngs_modulus=E,
+    # Unshearable rod needs G -> inf, which is achievable with -ve poisson ratio
+    shear_modulus=shear_modulus,
+)
+
+timoshenko_sim.append(unshearable_rod)
+
+# add damping
+timoshenko_sim.dampen(unshearable_rod).using(
+    ea.AnalyticalLinearDamper,
+    damping_constant=nu,
+    time_step=dt,
+)
+# add boundary conditions
+timoshenko_sim.constrain(unshearable_rod).using(
+    ea.OneEndFixedBC, constrained_position_idx=(0,), constrained_director_idx=(0,)
+)
+timoshenko_sim.add_forcing_to(unshearable_rod).using(
+    ea.EndpointForces, 0.0 * end_force, end_force, ramp_up_time=simulation_time / 2.0
+)
+
+# %%
+# System Finalization
+# -------------------
+# We have now added all the necessary rods and boundary conditions to our system. The last thing we need to do is finalize the system. This goes through the system, rearranges things, and precomputes useful quantities to prepare the system for simulation.
+#
+# As a note, if you make any changes to the rod after calling finalize, you will need to re-setup the system. This requires rerunning all cells above this point.
+
+timoshenko_sim.finalize()
+
+# %%
+# Define Simulation Time
+# ----------------------
+# The last thing we need to do decide how long we want the simulation to run for and what timestepping method to use. Currently, the PositionVerlet algorithim is suggested default method.
+#
+# In this example, we are trying to match a steady-state solution by temporally evolving our system to reach equilibrium. As such, there is a tradeoff between letting the simulation run long enough to reach the equilibrium and waiting around for the simulation to be done. Here we are running the simulation for 10 seconds, this produces reasonable agreement with the analytical solution without taking to long to finish. If you run the simulation for longer, you will get better agreement with the analytical solution.
+
+timestepper = ea.PositionVerlet()
+# timestepper = PEFRL()
+
+total_steps = int(simulation_time / dt)
+print("Total steps", total_steps)
+
+# %%
+# Run Simulation
+# --------------
+#
+# We are now ready to perform the simulation. To run the simulation, we ``integrate`` the ``timoshenko_sim`` system using the ``timestepper`` method until ``final_time`` by taking ``total_steps``. As currently setup, the beam simulation takes about 1 minute to run.
+
+time = 0.0
+for i in tqdm(range(total_steps)):
+    time = timestepper.step(timoshenko_sim, time, dt)
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
new file mode 100644
index 00000000..5ebedb6e
--- /dev/null
+++ b/backend/pyproject.toml
@@ -0,0 +1,78 @@
+[project]
+name = "elasticapp"
+version = "0.0.4"
+description = "A CPP accelerated backend for PyElastica kernels"
+requires-python = ">=3.10"
+license = {text = "MIT License"}
+dependencies = [
+    "pybind11",
+    "pytest",
+    "pytest-rng",
+    "pytest-mock",
+]
+
+# this is a list that can be expanded
+authors = [{name = "Seung Hyun Kim", email = "skim0119@gmail.com"}]
+
+# classifiers can be added here, later
+classifiers = [
+]
+
+[project.urls]
+"Homepage" = "https://github.com/GazzolaLab/PyElastica/"
+"Bug Reports" = "https://github.com/GazzolaLab/PyElastica/issues"
+"Source" = "https://github.com/GazzolaLab/PyElastica/"
+"Documentation" = "https://docs.cosseratrods.org/"
+
+[build-system]
+requires = ["scikit-build-core", "pybind11"]
+build-backend = "scikit_build_core.build"
+
+[tool.scikit-build]
+build-dir = "build"
+
+[tool.scikit-build.cmake]
+version = ">=3.20"
+build-type = "Release"
+args = []
+
+# [tool.mypy]
+# files = "setup.py"
+# python_version = "3.7"
+# strict = true
+# show_error_codes = true
+# enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
+# warn_unreachable = true
+#
+# [[tool.mypy.overrides]]
+# module = ["ninja"]
+# ignore_missing_imports = true
+
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
+xfail_strict = true
+filterwarnings = [
+    "error",
+    "ignore:(ast.Str|Attribute s|ast.NameConstant|ast.Num) is deprecated:DeprecationWarning:_pytest",
+]
+testpaths = ["tests/py"]
+
+[tool.cibuildwheel]
+test-command = "pytest {project}/tests"
+test-extras = ["test"]
+test-skip = ["*universal2:arm64"]
+# Setuptools bug causes collision between pypy and cpython artifacts
+before-build = "rm -rf {project}/build"
+
+[tool.ruff]
+target-version = "py37"
+
+[tool.ruff.lint]
+extend-select = [
+  "B",    # flake8-bugbear
+  "I",    # isort
+  "PGH",  # pygrep-hooks
+  "RUF",  # Ruff-specific
+  "UP",   # pyupgrade
+]
diff --git a/backend/src/_api.cpp b/backend/src/_api.cpp
new file mode 100644
index 00000000..1e3bb93d
--- /dev/null
+++ b/backend/src/_api.cpp
@@ -0,0 +1,606 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <pybind11/stl.h>
+#include "block.h"
+#include "block_view.h"
+#include "traits.h"
+#include "api.h"
+#include "operations.h"
+#include "cosserat_rod_system.h"
+#include <iostream>
+#include <string>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+#include <chrono>
+#include <Eigen/Core>
+
+#include <omp.h>
+
+namespace py = pybind11;
+
+namespace elasticapp {
+
+#ifdef ELASTICAPP_GIL_RELEASE
+#define ELASTICAPP_GIL_RELEASE_SCOPE() py::gil_scoped_release gil_release;
+#else
+#define ELASTICAPP_GIL_RELEASE_SCOPE()
+#endif
+
+// Helper to find a variable type by name at runtime
+// Iterates through SystemType::Variables tuple and finds matching name
+template<SystemModel SystemType, typename VariablesTuple, std::size_t Index>
+auto get_variable_by_name_impl(BlockView<SystemType>& view, const std::string& var_name) {
+    using CurrentVar = std::tuple_element_t<Index, VariablesTuple>;
+
+    // Check if current variable's name matches
+    if (var_name == std::string(CurrentVar::name)) {
+        return view.template get<CurrentVar>();
+    }
+
+    // Recurse to next variable if not last
+    if constexpr (Index + 1 < std::tuple_size_v<VariablesTuple>) {
+        return get_variable_by_name_impl<SystemType, VariablesTuple, Index + 1>(view, var_name);
+    }
+
+    // If we've exhausted all variables, throw error
+    throw std::runtime_error("Unknown variable name: " + var_name);
+}
+
+// Helper function to map string variable names to types and call get<VariableType>()
+// This allows Python to use block.at(index).get("position") syntax
+template<SystemModel SystemType>
+auto get_variable_by_name(BlockView<SystemType>& view, const std::string& var_name) {
+    using VariablesTuple = typename SystemType::Variables;
+
+    if constexpr (std::tuple_size_v<VariablesTuple> > 0) {
+        return get_variable_by_name_impl<SystemType, VariablesTuple, 0>(view, var_name);
+    } else {
+        throw std::runtime_error("System has no variables");
+    }
+}
+
+// Helper to find a variable type by name at runtime for Block
+// Iterates through SystemType::Variables tuple and finds matching name
+template<typename BlockType, typename VariablesTuple, std::size_t Index>
+auto get_block_variable_by_name_impl(BlockType& block, const std::string& var_name) {
+    using CurrentVar = std::tuple_element_t<Index, VariablesTuple>;
+
+    // Check if current variable's name matches
+    if (var_name == std::string(CurrentVar::name)) {
+        return block.template get<CurrentVar>();
+    }
+
+    // Recurse to next variable if not last
+    if constexpr (Index + 1 < std::tuple_size_v<VariablesTuple>) {
+        return get_block_variable_by_name_impl<BlockType, VariablesTuple, Index + 1>(block, var_name);
+    }
+
+    // If we've exhausted all variables, throw error
+    throw std::runtime_error("Unknown variable name: " + var_name);
+}
+
+// Helper function to map string variable names to types and call get<VariableType>() for Block
+// This allows Python to use block.get("position") syntax
+template<typename BlockType>
+auto get_block_variable_by_name(BlockType& block, const std::string& var_name) {
+    using VariablesTuple = typename BlockType::Variables;
+
+    if constexpr (std::tuple_size_v<VariablesTuple> > 0) {
+        return get_block_variable_by_name_impl<BlockType, VariablesTuple, 0>(block, var_name);
+    } else {
+        throw std::runtime_error("System has no variables");
+    }
+}
+
+// Helper to reset ghost for a variable by name
+template<typename BlockType, typename VariablesTuple, std::size_t Index>
+void reset_ghost_for_variable_by_name_impl(BlockType& block, const std::string& var_name) {
+    using CurrentVar = std::tuple_element_t<Index, VariablesTuple>;
+
+    // Check if current variable's name matches
+    if (var_name == std::string(CurrentVar::name)) {
+        block.template reset_ghost_for_variable<CurrentVar>();
+        return;
+    }
+
+    // Recurse to next variable if not last
+    if constexpr (Index + 1 < std::tuple_size_v<VariablesTuple>) {
+        reset_ghost_for_variable_by_name_impl<BlockType, VariablesTuple, Index + 1>(block, var_name);
+    } else {
+        throw std::runtime_error("Unknown variable name: " + var_name);
+    }
+}
+
+// Helper function to reset ghost for a variable by name
+template<typename BlockType>
+void reset_ghost_for_variable_by_name(BlockType& block, const std::string& var_name) {
+    using VariablesTuple = typename BlockType::Variables;
+
+    if constexpr (std::tuple_size_v<VariablesTuple> > 0) {
+        reset_ghost_for_variable_by_name_impl<BlockType, VariablesTuple, 0>(block, var_name);
+    } else {
+        throw std::runtime_error("System has no variables");
+    }
+}
+
+// Helper to convert Eigen Block view to numpy array
+// For Scalar variables (rows == 1), returns a 1D array instead of 2D
+// For Matrix variables (rows == 9), returns a 3D array (3, 3, N) directly
+template<typename BlockExpr>
+py::object block_to_numpy(BlockExpr&& block_expr, py::object parent) {
+    // Evaluate the expression to get dimensions
+    auto rows = static_cast<py::ssize_t>(block_expr.rows());
+    auto cols = static_cast<py::ssize_t>(block_expr.cols());
+
+    // Get actual strides from the Eigen Block expression
+    // For Eigen Blocks, innerStride() is the stride between elements in the same row/column
+    // and outerStride() is the stride between rows/columns depending on storage order
+    // For column-major: innerStride() = 1 (between rows), outerStride() = underlying_rows (between columns)
+    // For row-major: innerStride() = 1 (between columns), outerStride() = underlying_cols (between rows)
+    auto inner_stride = static_cast<py::ssize_t>(block_expr.innerStride() * sizeof(double));
+    auto outer_stride = static_cast<py::ssize_t>(block_expr.outerStride() * sizeof(double));
+
+    // For Scalar variables (rows == 1), return as 1D array
+    if (rows == 1) {
+        // For 1D array, stride is the column stride
+        py::ssize_t stride;
+        if constexpr (IsColMajor) {
+            // Column-major: stride between columns is outer_stride
+            stride = outer_stride;
+        } else {
+            // Row-major: stride between columns is inner_stride
+            stride = inner_stride;
+        }
+
+        return py::array_t<double>(
+            {cols},
+            {stride},
+            const_cast<double*>(block_expr.data()),
+            parent  // Keep parent object alive
+        );
+    }
+
+    // For Matrix variables (rows == 9), return as 3D array (3, 3, N)
+    // Matrix variables represent 3x3 matrices stored as flattened 9-element vectors
+    // Storage order: [m00, m10, m20, m01, m11, m21, m02, m12, m22] (column-major)
+    if (rows == 9) {
+        // For (3, 3, N) view from (9, N) column-major:
+        // Mapping: arr_3d[a, b, c] -> arr_2d[a*3 + b, c]
+        // Strides:
+        //   - Page stride (a dimension): 3 * row_stride (to skip 3 rows)
+        //   - Row stride (b dimension): row_stride (to skip 1 row)
+        //   - Col stride (c dimension): col_stride (to skip 1 column)
+        py::ssize_t page_stride, row_stride_3d, col_stride_3d;
+        if constexpr (IsColMajor) {
+            // Column-major: row_stride = inner_stride, col_stride = outer_stride
+            page_stride = 3 * inner_stride;  // Stride for first 3x3 dimension (skip 3 rows)
+            row_stride_3d = inner_stride;     // Stride between rows in 3x3 (skip 1 row)
+            col_stride_3d = outer_stride;    // Stride between columns (N dimension)
+        } else {
+            // Row-major: row_stride = outer_stride, col_stride = inner_stride
+            page_stride = 3 * outer_stride;
+            row_stride_3d = outer_stride;
+            col_stride_3d = inner_stride;
+        }
+
+        // Use py::buffer_info for 3D arrays with custom strides
+        std::vector<py::ssize_t> shape = {3, 3, cols};
+        std::vector<py::ssize_t> strides = {page_stride, row_stride_3d, col_stride_3d};
+        py::buffer_info buf_info(
+            const_cast<double*>(block_expr.data()),
+            sizeof(double),
+            py::format_descriptor<double>::format(),
+            3,
+            shape,
+            strides
+        );
+        return py::array_t<double>(buf_info, parent);
+    }
+
+    // For Vector variables (rows == 3), return as 2D array
+    // For numpy, strides are in bytes and represent the step size for each dimension
+    // For column-major (Eigen default): row_stride = inner_stride, col_stride = outer_stride
+    // For row-major: row_stride = outer_stride, col_stride = inner_stride
+    py::ssize_t row_stride, col_stride;
+    if constexpr (IsColMajor) {
+        // Column-major: stride between rows is inner_stride, between columns is outer_stride
+        row_stride = inner_stride;
+        col_stride = outer_stride;
+    } else {
+        // Row-major: stride between rows is outer_stride, between columns is inner_stride
+        row_stride = outer_stride;
+        col_stride = inner_stride;
+    }
+
+    // Create numpy array view (non-owning) with correct strides
+    return py::array_t<double>(
+        {rows, cols},
+        {row_stride, col_stride},
+        const_cast<double*>(block_expr.data()),
+        parent  // Keep parent object alive
+    );
+}
+
+PYBIND11_MODULE(_memory_block, m) {
+    m.doc() = R"pbdoc(
+        Elasticapp BlockCosseratRod module
+        ----------------------------------------
+
+        Provides BlockCosseratRod class for 2D array management with Eigen backend.
+    )pbdoc";
+
+    // Forward declare BlockRodSystemView so it can be used as a return type
+    using BlockRodSystemViewType = BlockRodSystem::View;
+
+    // Thread management functions (only available when threading is enabled)
+    // Disable Eigen's internal threading to prevent oversubscription with OpenMP
+    // We use OpenMP for explicit parallelization, so Eigen should use single-threaded operations
+    Eigen::setNbThreads(1);
+
+    m.def("set_num_threads", [](int num_threads) {
+        if (num_threads > 0) {
+            omp_set_num_threads(num_threads);
+            // Ensure Eigen uses single thread to avoid oversubscription
+            Eigen::setNbThreads(1);
+        }
+    },
+    R"pbdoc(
+        Set the number of OpenMP threads to use for parallel operations.
+
+        Args:
+            num_threads: Number of threads to use (must be > 0).
+                        If 0 or negative, OpenMP will use its default
+                        (typically all available CPU cores).
+
+        Note:
+            This affects all subsequent parallel regions in the code.
+            The environment variable OMP_NUM_THREADS can also be used
+            to control thread count.
+    )pbdoc",
+    py::arg("num_threads"));
+
+    m.def("get_num_threads", []() {
+        return omp_get_num_threads();
+    },
+    R"pbdoc(
+        Get the current number of threads in the current parallel region.
+
+        Returns:
+            int: Number of threads (returns 1 if called outside a parallel region).
+    )pbdoc");
+
+    m.def("get_max_threads", []() {
+        return omp_get_max_threads();
+    },
+    R"pbdoc(
+        Get the maximum number of threads that can be used.
+
+        Returns:
+            int: Maximum number of threads available.
+    )pbdoc");
+
+    m.def("get_thread_num", []() {
+        return omp_get_thread_num();
+    },
+    R"pbdoc(
+        Get the current thread number (0 to num_threads-1).
+
+        Returns:
+            int: Current thread number (returns 0 if called outside a parallel region).
+    )pbdoc");
+
+    // BlockRodSystem class
+    py::class_<BlockRodSystem>(m, "BlockRodSystem")
+        .def(py::init([](py::object n_elems_per_rod_obj) {
+            std::vector<std::size_t> n_elems_per_rod;
+
+            // Handle list, tuple, or numpy array
+            if (py::isinstance<py::list>(n_elems_per_rod_obj)) {
+                py::list lst = n_elems_per_rod_obj.cast<py::list>();
+                for (auto item : lst) {
+                    n_elems_per_rod.push_back(item.cast<std::size_t>());
+                }
+            } else if (py::isinstance<py::tuple>(n_elems_per_rod_obj)) {
+                py::tuple tup = n_elems_per_rod_obj.cast<py::tuple>();
+                for (auto item : tup) {
+                    n_elems_per_rod.push_back(item.cast<std::size_t>());
+                }
+            } else if (py::isinstance<py::array>(n_elems_per_rod_obj)) {
+                py::array arr = n_elems_per_rod_obj.cast<py::array>();
+                auto buf = arr.request();
+                if (buf.ndim != 1) {
+                    throw std::runtime_error("numpy array must be 1-dimensional");
+                }
+                // Properly convert numpy array elements to std::size_t
+                // Handle different integer types safely
+                n_elems_per_rod.reserve(buf.size);
+                if (buf.itemsize == sizeof(std::int32_t)) {
+                    auto* ptr = static_cast<std::int32_t*>(buf.ptr);
+                    for (py::ssize_t i = 0; i < buf.size; ++i) {
+                        n_elems_per_rod.push_back(static_cast<std::size_t>(ptr[i]));
+                    }
+                } else if (buf.itemsize == sizeof(std::int64_t)) {
+                    auto* ptr = static_cast<std::int64_t*>(buf.ptr);
+                    for (py::ssize_t i = 0; i < buf.size; ++i) {
+                        n_elems_per_rod.push_back(static_cast<std::size_t>(ptr[i]));
+                    }
+                } else if (buf.itemsize == sizeof(std::size_t)) {
+                    auto* ptr = static_cast<std::size_t*>(buf.ptr);
+                    n_elems_per_rod.assign(ptr, ptr + buf.size);
+                } else {
+                    // Fallback: iterate and cast each element
+                    for (py::ssize_t i = 0; i < buf.size; ++i) {
+                        py::object item = arr.attr("__getitem__")(i);
+                        n_elems_per_rod.push_back(item.cast<std::size_t>());
+                    }
+                }
+            } else {
+                throw std::runtime_error("n_elems_per_rod must be a list, tuple, or numpy array");
+            }
+
+            return new BlockRodSystem(n_elems_per_rod);
+        }),
+        R"pbdoc(
+            Create a BlockRodSystem from list of element counts per rod.
+
+            Args:
+                n_elems_per_rod: List, tuple, or numpy array of integers representing
+                                 number of elements in each rod
+        )pbdoc",
+        py::arg("n_elems_per_rod"))
+        .def_property_readonly("n_systems", [](const BlockRodSystem& block) {
+            return block.n_systems();
+        },
+        R"pbdoc(
+            Number of systems (rods) in the block.
+        )pbdoc")
+        .def_property_readonly("shape", [](const BlockRodSystem& block) {
+            auto shape = block.shape();
+            return py::make_tuple(shape.first, shape.second);
+        },
+        R"pbdoc(
+            Get the shape of the block as (depth, width).
+
+            Returns:
+                tuple: (depth, width) tuple representing the block dimensions
+        )pbdoc")
+        .def("as_ref", [](const BlockRodSystem& block) {
+            return block_to_numpy(block.data(), py::cast(block));
+        },
+        R"pbdoc(
+            Get a numpy array view of the entire block data.
+
+            Returns:
+                numpy.ndarray: A writable numpy array view into the block's data.
+                The array does not own the data.
+        )pbdoc",
+        py::keep_alive<0, 1>())
+        .def("system_start_index", [](const BlockRodSystem& block, std::size_t index) {
+            return block.system_start_index(index);
+        },
+        R"pbdoc(
+            Get the starting column index for a specific rod.
+
+            Args:
+                index: Index of the rod
+
+            Returns:
+                int: Starting column index for the rod in the block
+        )pbdoc",
+        py::arg("index"))
+        .def("at", [](BlockRodSystem& block, std::size_t index) -> BlockRodSystemViewType {
+            return block.at(index);
+        },
+        R"pbdoc(
+            Get a view for a specific rod.
+
+            Args:
+                index: Index of the rod
+
+            Returns:
+                BlockRodSystemView: View object for accessing variables of this rod
+        )pbdoc",
+        py::arg("index"), py::return_value_policy::reference_internal)
+        .def("get", [](BlockRodSystem& block, const std::string& var_name) {
+            auto block_expr = get_block_variable_by_name(block, var_name);
+            return block_to_numpy(block_expr, py::cast(block));
+        },
+        R"pbdoc(
+            Get a variable by name as a numpy array view across all rods.
+
+            Args:
+                var_name: Name of the variable (e.g., "position", "velocity", "director")
+
+            Returns:
+                numpy.ndarray: A writable numpy array view into the variable's data
+                across all rods. The array does not own the data.
+        )pbdoc",
+        py::arg("var_name"), py::keep_alive<0, 1>())
+        .def("compute_internal_forces_and_torques", [](BlockRodSystem& block, double time) {
+            ELASTICAPP_GIL_RELEASE_SCOPE();
+            block.compute_internal_forces_and_torques(time);
+        },
+        R"pbdoc(
+            Compute internal forces and torques for all rods in the block.
+
+            This operation computes the internal forces and torques based on the
+            current state of the rods (positions, velocities, etc.).
+
+            Args:
+                time: Current simulation time.
+        )pbdoc",
+        py::arg("time"))
+        .def("compute_strains", [](BlockRodSystem& block, double time) {
+            ELASTICAPP_GIL_RELEASE_SCOPE();
+            block.compute_strains(time);
+        },
+        R"pbdoc(
+            Compute strains for all rods in the block.
+
+            This operation computes the shear/stretch strains and bending/twist strains
+            based on the current state of the rods.
+
+            Args:
+                time: Current simulation time (included for API compatibility, not used in implementation).
+        )pbdoc",
+        py::arg("time"))
+        .def("update_accelerations", [](BlockRodSystem& block, double time) {
+            ELASTICAPP_GIL_RELEASE_SCOPE();
+            block.update_accelerations(time);
+        },
+        R"pbdoc(
+            Update accelerations based on forces and torques.
+
+            This operation updates the acceleration variables based on the
+            computed forces and torques.
+
+            Args:
+                time: Current simulation time (included for API compatibility, not used in implementation).
+        )pbdoc",
+        py::arg("time"))
+        .def("zeroed_out_external_forces_and_torques", [](BlockRodSystem& block, double time) {
+            ELASTICAPP_GIL_RELEASE_SCOPE();
+            block.zeroed_out_external_forces_and_torques(time);
+        },
+        R"pbdoc(
+            Zero out external forces and torques for all rods.
+
+            This operation sets all external forces and torques to zero,
+            typically called at the beginning of each time step.
+
+            Args:
+                time: Current simulation time (included for API compatibility, not used in implementation).
+        )pbdoc",
+        py::arg("time"))
+        .def("update_kinematics", [](BlockRodSystem& block, double time, double prefac) {
+            ELASTICAPP_GIL_RELEASE_SCOPE();
+            block.update_kinematics(prefac);
+        },
+        R"pbdoc(
+            Update kinematics (position, director) for all rods.
+
+            This operation updates the kinematic variables based on velocity and omega.
+            Updates: position += prefac * velocity, director = R(prefac * omega) @ director
+
+            Args:
+                time: Current time (for compatibility with Python interface, not used in C++)
+                prefac: Integration prefactor (e.g., time step dt)
+        )pbdoc",
+        py::arg("time"), py::arg("prefac"))
+        .def("update_dynamics", [](BlockRodSystem& block, double time, double prefac) {
+            ELASTICAPP_GIL_RELEASE_SCOPE();
+            block.update_dynamics(prefac);
+        },
+        R"pbdoc(
+            Update dynamics (velocity, omega) for all rods.
+
+            This operation updates the dynamic variables based on acceleration and alpha.
+            Updates: velocity += prefac * acceleration, omega += prefac * alpha
+
+            Args:
+                time: Current time (for compatibility with Python interface, not used in C++)
+                prefac: Integration prefactor (e.g., time step dt)
+        )pbdoc",
+        py::arg("time"), py::arg("prefac"))
+        .def_property_readonly("ghost_nodes_idx", [](const BlockRodSystem& block) {
+            auto indices = block.ghost_nodes_idx();
+            // Convert to numpy array (pybind11 will handle the conversion automatically)
+            return py::cast(indices);
+        },
+        R"pbdoc(
+            Get indices of ghost nodes between rods.
+
+            Returns:
+                numpy.ndarray: An array of ghost node indices (length: n_rods - 1).
+                The array does not own the data.
+        )pbdoc",
+        py::keep_alive<0, 1>())
+        .def_property_readonly("ghost_elems_idx", [](const BlockRodSystem& block) {
+            auto indices = block.ghost_elems_idx();
+            // Convert to numpy array (pybind11 will handle the conversion automatically)
+            return py::cast(indices);
+        },
+        R"pbdoc(
+            Get indices of ghost elements between rods.
+
+            Returns:
+                numpy.ndarray: An array of ghost element indices (length: 2 * (n_rods - 1)).
+                The array does not own the data.
+        )pbdoc",
+        py::keep_alive<0, 1>())
+        .def_property_readonly("ghost_voronoi_idx", [](const BlockRodSystem& block) {
+            auto indices = block.ghost_voronoi_idx();
+            // Convert to numpy array (pybind11 will handle the conversion automatically)
+            return py::cast(indices);
+        },
+        R"pbdoc(
+            Get indices of ghost voronoi nodes between rods.
+
+            Returns:
+                numpy.ndarray: An array of ghost voronoi indices (length: 3 * (n_rods - 1)).
+                The array does not own the data.
+        )pbdoc",
+        py::keep_alive<0, 1>())
+        .def("reset_ghost_for_variable", [](BlockRodSystem& block, const std::string& var_name) {
+            // Helper to reset ghost for a variable by name
+            reset_ghost_for_variable_by_name(block, var_name);
+        },
+        R"pbdoc(
+            Reset ghost values for a specific variable by name.
+
+            Args:
+                var_name: Name of the variable (e.g., "position", "velocity", "director")
+        )pbdoc",
+        py::arg("var_name"))
+        .def("reset_ghost", [](BlockRodSystem& block) {
+            block.reset_ghost();
+        },
+        R"pbdoc(
+            Reset ghost values for all variables.
+
+            This operation sets all ghost node/element/voronoi values to their
+            default ghost_value as defined in each variable type.
+        )pbdoc");
+
+    // BlockRodSystemView class
+    py::class_<BlockRodSystemViewType>(m, "BlockRodSystemView")
+        .def_property_readonly("shape", [](const BlockRodSystemViewType& view) {
+            auto shape = view.shape();
+            return py::make_tuple(shape.first, shape.second);
+        },
+        R"pbdoc(
+            Get the shape of the view as (depth, width).
+
+            Returns:
+                tuple: (depth, width) tuple representing the view dimensions
+        )pbdoc")
+        .def("as_ref", [](const BlockRodSystemViewType& view) {
+            return block_to_numpy(view.data(), py::cast(view));
+        },
+        R"pbdoc(
+            Get a numpy array view of the entire view data.
+
+            Returns:
+                numpy.ndarray: A writable numpy array view into the view's data.
+                The array does not own the data.
+        )pbdoc",
+        py::keep_alive<0, 1>())
+        .def("get", [](BlockRodSystemViewType& view, const std::string& var_name) {
+            auto block_expr = get_variable_by_name(view, var_name);
+            return block_to_numpy(block_expr, py::cast(view));
+        },
+        R"pbdoc(
+            Get a variable by name as a numpy array view.
+
+            Args:
+                var_name: Name of the variable (e.g., "position", "velocity", "director")
+
+            Returns:
+                numpy.ndarray: A writable numpy array view into the variable's data.
+                The array does not own the data.
+        )pbdoc",
+        py::arg("var_name"), py::keep_alive<0, 1>());
+
+}
+} // namespace elasticapp
diff --git a/backend/src/_version.cpp b/backend/src/_version.cpp
new file mode 100644
index 00000000..e4e99d90
--- /dev/null
+++ b/backend/src/_version.cpp
@@ -0,0 +1,27 @@
+#include <pybind11/pybind11.h>
+#include "version.h"
+
+namespace py = pybind11;
+
+PYBIND11_MODULE(version, m) {
+    m.doc() = R"pbdoc(
+        Elasticapp version module
+        -------------------------
+
+        Provides version information for the elasticapp package.
+    )pbdoc";
+
+    // Version function
+    m.def("version", &elasticapp::version, R"pbdoc(
+        Return the current version of elasticapp.
+
+        Returns:
+            str: The version string
+    )pbdoc");
+
+#ifdef VERSION_INFO
+    m.attr("__version__") = elasticapp::version();
+#else
+    m.attr("__version__") = "dev";
+#endif
+}
diff --git a/backend/src/api.h b/backend/src/api.h
new file mode 100644
index 00000000..e17670b4
--- /dev/null
+++ b/backend/src/api.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "block.h"
+#include "cosserat_rod_system.h"
+#include "operations.h"
+
+namespace elasticapp {
+
+// BlockRodSystem is a CRTP mix of CosseratRodSystem, Block, and CosseratRodOperations
+// It combines System functionality with Block data storage and rod-specific operations
+// This allows Block to have access to all CosseratRodSystem variables and operations
+// Block<CosseratRodSystem, CosseratRodOperations> inherits from:
+// - CosseratRodSystem (system variables and methods)
+// - CosseratRodOperations<Block<...>> (rod-specific operations)
+// So it has:
+// - All CosseratRodSystem methods
+// - All Block methods
+// - All CosseratRodOperations methods (compute_internal_forces_and_torques, etc.)
+// - Access to CosseratRodSystem::Variables for template metaprogramming
+using BlockRodSystem = Block<CosseratRodSystem, CosseratRodOperations>;
+} // namespace elasticapp
diff --git a/backend/src/block.h b/backend/src/block.h
new file mode 100644
index 00000000..0f9cb451
--- /dev/null
+++ b/backend/src/block.h
@@ -0,0 +1,280 @@
+#pragma once
+
+#include <cstddef>
+#include <vector>
+#include <stdexcept>
+#include <string>
+#include "traits.h"
+#include "system.h"
+#include "block_get_impl.h"
+#include "block_view.h"
+#include "operations.h"
+
+namespace elasticapp {
+
+// Block class using CRTP with SystemModel and Operations
+// Block inherits from SystemModel, giving it access to all system variables and methods
+// Block also inherits from OperationsType using CRTP, allowing for extensible operations
+// SystemModel must be a System<ValidVariable...> type
+// OperationsType must be a template class that takes the Block type as a template parameter
+template<SystemModel SystemType, template<typename> class OperationsType = DefaultOperations>
+class Block : public SystemType, public OperationsType<Block<SystemType, OperationsType>> {
+public:
+
+    using System = SystemType;
+    using Variables = typename SystemType::Variables;
+    using View = BlockView<SystemType>;
+
+    constexpr static std::size_t GHOST_NODE_WIDTH = 1; // Size of ghost for node variables.
+
+    // Constructor: takes list of element counts per rod
+    explicit Block(const std::vector<std::size_t>& n_elems_per_rod) {
+        // Validate input: reject empty list or less than 6 total elements
+        if (n_elems_per_rod.empty()) {
+            throw std::invalid_argument("n_elems_per_rod cannot be empty");
+        }
+
+        std::size_t total_elements = 0;
+        for (std::size_t n_elems : n_elems_per_rod) {
+            total_elements += n_elems;
+        }
+        if (total_elements < 6) {
+            throw std::invalid_argument("Total number of elements must be at least 6, got " +
+                                       std::to_string(total_elements));
+        }
+
+        compute_width_and_indices(n_elems_per_rod);
+        depth_ = SystemType::get_depth();
+        data_ = MatrixType(static_cast<IndexType>(depth_), static_cast<IndexType>(width_));
+        data_.setZero();
+        initialize_ghost_indices();
+        reset_ghost();  // Initialize all ghost values
+    }
+
+    std::pair<std::size_t, std::size_t> shape() const {
+        return std::make_pair(
+            static_cast<std::size_t>(data_.rows()),
+            static_cast<std::size_t>(data_.cols())
+        );
+    }
+
+    // Accessors
+    std::size_t width() const { return width_; }
+    std::size_t depth() const { return depth_; }
+    std::size_t n_systems() const { return rod_start_indices_.size(); }
+
+    std::size_t system_start_index(std::size_t rod_index) const {
+        if (rod_index >= rod_start_indices_.size()) {
+            throw std::out_of_range("System index out of range");
+        }
+        return rod_start_indices_[rod_index];
+    }
+
+    MatrixType& data() { return data_; }
+    const MatrixType& data() const { return data_; }
+
+    // Get number of elements for a specific rod
+    std::size_t rod_n_elems(std::size_t rod_index) const {
+        if (rod_index >= rod_n_elems_.size()) {
+            throw std::out_of_range("System index out of range");
+        }
+        return rod_n_elems_[rod_index];
+    }
+
+    std::size_t rod_n_nodes(std::size_t rod_index) const {
+        return rod_n_elems(rod_index) + 1;
+    }
+
+    std::size_t rod_n_voronoi(std::size_t rod_index) const {
+        return rod_n_elems(rod_index) - 1;
+    }
+
+    // Get the n_elems_per_rod vector (for BlockView construction)
+    const std::vector<std::size_t>& get_n_elems_per_rod() const { return rod_n_elems_; }
+
+    // Get ghost indices (return const references for efficiency)
+    inline const std::vector<std::size_t>& ghost_nodes_idx() const {
+        return ghost_nodes_idx_;
+    }
+    inline const std::vector<std::size_t>& ghost_elems_idx() const {
+        return ghost_elems_idx_;
+    }
+    inline const std::vector<std::size_t>& ghost_voronoi_idx() const {
+        return ghost_voronoi_idx_;
+    }
+
+    // Reset ghost values for a specific variable
+    // Uses VariableTag::ghost_value and appropriate ghost indices based on placement
+    template<typename VariableTag>
+    void reset_ghost_for_variable() {
+        static_assert(tuple_contains_v<VariableTag, system_variables_t<SystemType>>,
+            "VariableTag is not a valid member of tuple SystemType::Variables");
+
+        // Compute row offset for this variable
+        constexpr std::size_t row_offset = compute_variable_offset<VariableTag, SystemType>();
+        constexpr std::size_t var_dimension = get_dimension_v<VariableTag>;
+
+        // Get appropriate ghost indices based on placement
+        std::vector<std::size_t> ghost_indices;
+        if constexpr (std::is_base_of_v<Placement::OnNode, VariableTag>) {
+            ghost_indices = ghost_nodes_idx();
+        } else if constexpr (std::is_base_of_v<Placement::OnElement, VariableTag>) {
+            ghost_indices = ghost_elems_idx();
+        } else if constexpr (std::is_base_of_v<Placement::OnVoronoi, VariableTag>) {
+            ghost_indices = ghost_voronoi_idx();
+        }
+
+        // Set ghost values at each ghost index
+        // Note: ghost indices are in the full width coordinate system
+        // For OnElement and OnVoronoi variables, we need to ensure ghost indices
+        // are within the adjusted width (since get() returns adjusted width views)
+        const auto& ghost_val = VariableTag::ghost_value;
+
+        // Optimized: Add simd hint for inner loop (var_dimension is compile-time constant)
+        for (std::size_t ghost_col : ghost_indices) {
+            // Only reset ghost values that are within the adjusted width
+            // (ghost indices beyond adjusted width are not accessible via get())
+            IndexType data_col = static_cast<IndexType>(ghost_col);
+            #pragma omp simd
+            for (std::size_t row = 0; row < var_dimension; ++row) {
+                IndexType data_row = static_cast<IndexType>(row_offset + row);
+                data_(data_row, data_col) = ghost_val(static_cast<IndexType>(row), 0);
+            }
+        }
+    }
+
+    // Reset ghost values for all variables
+    // Iterates over all variables and calls reset_ghost_for_variable for each
+    void reset_ghost() {
+        reset_ghost_impl<system_variables_t<SystemType>, 0>();
+    }
+
+    // Get a view for a specific variable across all rods
+    // Returns a view into the variable's data (rows) and adjusted columns based on placement
+    // - OnNode: full width
+    // - OnElement: width - 1
+    // - OnVoronoi: width - 2
+    // No data is copied - returns a reference to the same matrix
+    template<typename VariableTag>
+    auto get() {
+        // Assert VariableTag is a valid member of tuple SystemType::Variables
+        static_assert(tuple_contains_v<VariableTag, system_variables_t<SystemType>>,
+            "VariableTag is not a valid member of tuple SystemType::Variables");
+        // Compute row offset for this variable
+        constexpr std::size_t row_offset = compute_variable_offset<VariableTag, SystemType>();
+        constexpr std::size_t var_dimension = get_dimension_v<VariableTag>;
+
+        // Adjust width based on placement type
+        std::size_t adjusted_width = width_;
+        if constexpr (std::is_base_of_v<Placement::OnElement, VariableTag>) {
+            adjusted_width = width_ > 0 ? width_ - 1 : 0;
+        } else if constexpr (std::is_base_of_v<Placement::OnVoronoi, VariableTag>) {
+            adjusted_width = width_ > 1 ? width_ - 2 : 0;
+        }
+        // OnNode: no adjustment needed (full width)
+
+        // Return a view into the specific rows and adjusted columns
+        return get_block_slice(data_, row_offset, var_dimension, 0, adjusted_width);
+    }
+
+    template<typename VariableTag>
+    auto get() const {
+        // Assert VariableTag is a valid member of tuple SystemType::Variables
+        static_assert(tuple_contains_v<VariableTag, system_variables_t<SystemType>>,
+            "VariableTag is not a valid member of tuple SystemType::Variables");
+        // Compute row offset for this variable
+        constexpr std::size_t row_offset = compute_variable_offset<VariableTag, SystemType>();
+        constexpr std::size_t var_dimension = get_dimension_v<VariableTag>;
+
+        // Adjust width based on placement type
+        std::size_t adjusted_width = width_;
+        if constexpr (std::is_base_of_v<Placement::OnElement, VariableTag>) {
+            adjusted_width = width_ > 0 ? width_ - 1 : 0;
+        } else if constexpr (std::is_base_of_v<Placement::OnVoronoi, VariableTag>) {
+            adjusted_width = width_ > 1 ? width_ - 2 : 0;
+        }
+        // OnNode: no adjustment needed (full width)
+
+        // Return a view into the specific rows and adjusted columns
+        return get_block_slice(data_, row_offset, var_dimension, 0, adjusted_width);
+    }
+
+    // Create a view for a specific rod
+    View at(std::size_t rod_index);
+
+private:
+    MatrixType data_;
+    std::size_t width_;
+    std::size_t depth_;
+    std::vector<std::size_t> rod_start_indices_;
+    std::vector<std::size_t> rod_n_elems_;  // Store n_elems for each rod
+
+    std::vector<std::size_t> ghost_nodes_idx_; // Indices of ghost nodes between rods.
+    std::vector<std::size_t> ghost_elems_idx_; // Indices of ghost elements between rods.
+    std::vector<std::size_t> ghost_voronoi_idx_; // Indices of ghost voronoi nodes between rods.
+
+    void compute_width_and_indices(const std::vector<std::size_t>& n_elems_per_rod) {
+        width_ = 0;
+        rod_start_indices_.clear();
+        rod_n_elems_.clear();
+        rod_start_indices_.reserve(n_elems_per_rod.size());
+        rod_n_elems_.reserve(n_elems_per_rod.size());
+
+        for (std::size_t n_elems : n_elems_per_rod) {
+            rod_start_indices_.push_back(width_);
+            rod_n_elems_.push_back(n_elems);
+            // Each rod has n_elems + 1 nodes
+            width_ += n_elems + 1 + GHOST_NODE_WIDTH;
+        }
+        width_ -= GHOST_NODE_WIDTH; // Remove the last ghost node width.
+    }
+
+    void initialize_ghost_indices() {
+        ghost_nodes_idx_.reserve(rod_n_elems_.size() * GHOST_NODE_WIDTH);
+        ghost_elems_idx_.reserve(rod_n_elems_.size() * (1 + GHOST_NODE_WIDTH));
+        ghost_voronoi_idx_.reserve(rod_n_elems_.size() * (2 + GHOST_NODE_WIDTH));
+
+        std::size_t cumulative_nodes = 0;
+        for (std::size_t i = 0; i < rod_n_elems_.size(); ++i) {
+            cumulative_nodes += rod_n_elems_[i] + 1;  // n_elems + 1 = n_nodes
+            ghost_elems_idx_.push_back(cumulative_nodes - 1);
+            ghost_voronoi_idx_.push_back(cumulative_nodes - 2);
+            ghost_voronoi_idx_.push_back(cumulative_nodes - 1);
+            for (std::size_t j = 0; j < GHOST_NODE_WIDTH; ++j) {
+                ghost_nodes_idx_.push_back(cumulative_nodes + j);
+                ghost_elems_idx_.push_back(cumulative_nodes + j);
+                ghost_voronoi_idx_.push_back(cumulative_nodes + j);
+            }
+            cumulative_nodes += GHOST_NODE_WIDTH;
+        }
+
+        ghost_nodes_idx_.pop_back();
+        ghost_elems_idx_.pop_back();
+        ghost_voronoi_idx_.pop_back();
+    }
+
+    // Helper to iterate over all variables and reset ghost values
+    template<typename VariablesTuple, std::size_t Index>
+    void reset_ghost_impl() {
+        using CurrentVar = std::tuple_element_t<Index, VariablesTuple>;
+        reset_ghost_for_variable<CurrentVar>();
+
+        // Recurse to next variable if not last
+        if constexpr (Index + 1 < std::tuple_size_v<VariablesTuple>) {
+            reset_ghost_impl<VariablesTuple, Index + 1>();
+        }
+    }
+
+};
+
+
+// Implement at() after BlockView is fully defined
+template<SystemModel SystemType, template<typename> class OperationsType>
+typename Block<SystemType, OperationsType>::View Block<SystemType, OperationsType>::at(std::size_t rod_index) {
+    std::size_t rod_start_col = system_start_index(rod_index);
+    std::size_t rod_n_elems = rod_n_elems_[rod_index];
+    return BlockView<SystemType>(data_, rod_index, rod_start_col, rod_n_elems);
+}
+
+
+} // namespace elasticapp
diff --git a/backend/src/block_get_impl.h b/backend/src/block_get_impl.h
new file mode 100644
index 00000000..3b70341d
--- /dev/null
+++ b/backend/src/block_get_impl.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include <cstddef>
+#include <tuple>
+#include <type_traits>
+#include "system.h"
+#include "variable_offsets.h"
+#include "traits.h"
+
+namespace elasticapp {
+
+// Uses C++17 fold expressions for a concise implementation
+// Helper to check if a type is in a tuple
+template<typename T, typename Tuple>
+struct tuple_contains_impl;
+
+template<typename T, typename... Ts>
+struct tuple_contains_impl<T, std::tuple<Ts...>> {
+    static constexpr bool value = (std::is_same_v<T, Ts> || ...);
+};
+
+template<typename T, typename Tuple>
+constexpr bool tuple_contains_v = tuple_contains_impl<T, Tuple>::value;
+
+
+// Helper function to get number of columns for a variable based on placement
+template<typename VariableTag, SystemModel SystemType>
+inline constexpr std::size_t get_variable_num_cols(std::size_t rod_n_nodes,
+                                            std::size_t rod_n_elems,
+                                            std::size_t rod_n_voronoi) {
+    if constexpr (std::is_base_of_v<Placement::OnNode, VariableTag>) {
+        return rod_n_nodes;
+    } else if constexpr (std::is_base_of_v<Placement::OnElement, VariableTag>) {
+        return rod_n_elems;
+    } else if constexpr (std::is_base_of_v<Placement::OnVoronoi, VariableTag>) {
+        return rod_n_voronoi;
+    } else {
+        static_assert(std::is_base_of_v<Placement::OnNode, VariableTag> ||
+                     std::is_base_of_v<Placement::OnElement, VariableTag> ||
+                     std::is_base_of_v<Placement::OnVoronoi, VariableTag>,
+                     "VariableTag must have a Placement tag");
+        return 0;  // Should never reach here
+    }
+}
+
+// Generic implementation helper for Block::get() and BlockRodSystemView::get()
+// Extracted to reduce code duplication between const and non-const versions
+// This can be used by both Block and BlockRodSystemView classes
+template<typename VariableTag, SystemModel SystemType, typename MatrixRef>
+inline auto get_impl(MatrixRef&& matrix,
+              std::size_t rod_n_nodes,
+              std::size_t rod_n_elems,
+              std::size_t rod_n_voronoi,
+              std::size_t rod_start_col) {
+    // Assert VariableTag is a valid member of tuple SystemType::Variables
+    static_assert(tuple_contains_v<VariableTag, system_variables_t<SystemType>>,
+        "VariableTag is not a valid member of tuple SystemType::Variables");
+    // Compute row offset for this variable
+    constexpr std::size_t row_offset = compute_variable_offset<VariableTag, SystemType>();
+    constexpr std::size_t var_dimension = get_dimension_v<VariableTag>;
+
+    // Determine number of columns based on placement
+    std::size_t num_cols = get_variable_num_cols<VariableTag, SystemType>(
+        rod_n_nodes, rod_n_elems, rod_n_voronoi);
+
+    // Return a view into the specific rows and columns
+    return get_block_slice(matrix, row_offset, var_dimension, rod_start_col, num_cols);
+}
+
+} // namespace elasticapp
diff --git a/backend/src/block_view.h b/backend/src/block_view.h
new file mode 100644
index 00000000..2e98627f
--- /dev/null
+++ b/backend/src/block_view.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <cstddef>
+#include <vector>
+#include <stdexcept>
+#include "system.h"
+#include "block_get_impl.h"
+#include "traits.h"
+
+namespace elasticapp {
+
+// BlockView provides access to variables for a specific rod
+// This is templated on SystemType and uses template metaprogramming to expose variables
+template<SystemModel SystemType>
+class BlockView {
+public:
+    // using Variables = typename SystemType::Variables; // tuple of variables
+    constexpr static std::size_t depth = SystemType::get_depth();
+
+    BlockView(MatrixType& data, std::size_t rod_index,
+              std::size_t rod_start_col, std::size_t rod_n_elems);
+
+    std::pair<std::size_t, std::size_t> shape() const {
+        return std::make_pair(depth, rod_n_nodes_);
+    }
+
+    // Return a view into the submatrix (columns for this rod)
+    // Uses traits helper function to encapsulate Eigen-specific operations
+    auto data() {
+        return get_column_slice(data_, rod_start_col_, rod_n_nodes_);
+    }
+
+    auto data() const {
+        return get_column_slice(data_, rod_start_col_, rod_n_nodes_);
+    }
+
+    // Get a view for a specific variable
+    // Returns a view into the variable's data (rows) and this rod's columns
+    // No data is copied - returns a reference to the same matrix
+    template<typename VariableTag>
+    auto get() {
+        return get_impl<VariableTag, SystemType>(
+            data_, rod_n_nodes_, rod_n_elems_, rod_n_voronoi_, rod_start_col_);
+    }
+
+    template<typename VariableTag>
+    auto get() const {
+        return get_impl<VariableTag, SystemType>(
+            data_, rod_n_nodes_, rod_n_elems_, rod_n_voronoi_, rod_start_col_);
+    }
+
+protected:
+    MatrixType& data_;
+    std::size_t rod_index_;
+    std::size_t rod_n_elems_;
+    std::size_t rod_n_nodes_;
+    std::size_t rod_n_voronoi_;
+    std::size_t rod_start_col_;
+};
+
+// Note: block.h is included after BlockView definition to break circular dependency
+// The BlockView constructor implementation is in block.h after Block is fully defined
+template<SystemModel SystemType>
+BlockView<SystemType>::BlockView(MatrixType& data, std::size_t rod_index,
+                                 std::size_t rod_start_col, std::size_t rod_n_elems)
+    : data_(data), rod_index_(rod_index),
+      rod_n_elems_(rod_n_elems),
+      rod_n_nodes_(rod_n_elems + 1),
+      rod_n_voronoi_((rod_n_elems > 0) ? rod_n_elems - 1 : 0),
+      rod_start_col_(rod_start_col)
+{
+}
+
+} // namespace elasticapp
diff --git a/backend/src/cosserat_equations.h b/backend/src/cosserat_equations.h
new file mode 100644
index 00000000..75a62f14
--- /dev/null
+++ b/backend/src/cosserat_equations.h
@@ -0,0 +1,607 @@
+#pragma once
+
+#include <cstddef>
+#include <cmath>
+#include "cosserat_rod_system.h"
+#include "traits.h"
+#include "math/eigen_detail/eigen_linear_algebra.hpp"
+#include "math/eigen_detail/eigen_calculus.hpp"
+#include <omp.h>
+
+namespace elasticapp {
+
+// Forward declaration
+template<SystemModel SystemType, template<typename> class OperationsType>
+class Block;
+
+// Compute geometry from state (lengths, tangents, radius)
+// Updates: lengths, tangents, radius
+template<typename BlockType>
+inline void compute_geometry_from_state(BlockType& block) {
+    // Get variable views
+    auto&& position = block.template get<system::cosserat_rod::Position>();
+    auto&& volume = block.template get<system::cosserat_rod::Volume>();
+    auto&& lengths = block.template get<system::cosserat_rod::Lengths>();
+    auto&& tangents = block.template get<system::cosserat_rod::Tangents>();
+    auto&& radius = block.template get<system::cosserat_rod::Radius>();
+
+    // Compute position differences using difference_kernel
+    // position_diff = position[:, 1:] - position[:, :-1]
+    // This is equivalent to: difference_kernel(position)
+    auto position_diff = difference_kernel(position);
+
+    // Compute lengths = norm(position_diff) + 1e-14
+    // FIXME: 1e-14 is added to fix ghost lengths, which is 0, and causes division by zero error!
+    auto lengths_vec = batch_norm(position_diff);
+    const IndexType n_elems = lengths.cols();
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        lengths(0, k) = lengths_vec(k) + 1e-14;
+    }
+
+    // Compute tangents = position_diff / lengths
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        const double len = lengths(0, k);
+        tangents(0, k) = position_diff(0, k) / len;
+        tangents(1, k) = position_diff(1, k) / len;
+        tangents(2, k) = position_diff(2, k) / len;
+    }
+
+    // Compute radius from volume conservation: radius = sqrt(volume / (lengths * pi))
+    const double pi = 3.14159265358979323846;
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        radius(0, k) = std::sqrt(volume(0, k) / (lengths(0, k) * pi));
+    }
+}
+
+// Compute all dilatations (element and voronoi)
+// Updates: lengths, tangents, radius, dilatation, voronoi_dilatation
+template<typename BlockType>
+inline void compute_all_dilatations(BlockType& block) {
+    // Get variable views
+    auto&& lengths = block.template get<system::cosserat_rod::Lengths>();
+    auto&& rest_lengths = block.template get<system::cosserat_rod::RestLengths>();
+    auto&& dilatation = block.template get<system::cosserat_rod::Dilatation>();
+    auto&& rest_voronoi_lengths = block.template get<system::cosserat_rod::RestVoronoiLengths>();
+    auto&& voronoi_dilatation = block.template get<system::cosserat_rod::VoronoiDilatation>();
+
+    // Compute dilatation = lengths / rest_lengths
+    const IndexType n_elems = lengths.cols();
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        dilatation(0, k) = lengths(0, k) / rest_lengths(0, k);
+    }
+
+    // Compute voronoi_lengths from average of lengths
+    // voronoi_lengths = 0.5 * (lengths[k+1] + lengths[k]) for k in [0, n_voronoi-1]
+    auto voronoi_lengths = average_kernel(lengths);
+
+    // Compute voronoi_dilatation = voronoi_lengths / rest_voronoi_lengths
+    const IndexType n_voronoi = voronoi_dilatation.cols();
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_voronoi; ++k) {
+        voronoi_dilatation(0, k) = voronoi_lengths(0, k) / rest_voronoi_lengths(0, k);
+    }
+}
+
+// Compute shear/stretch strains (sigma)
+// Updates: lengths, tangents, radius, dilatation, voronoi_dilatation, sigma
+template<typename BlockType>
+inline void compute_shear_stretch_strains(BlockType& block) {
+    // Get variable views
+    auto&& dilatation = block.template get<system::cosserat_rod::Dilatation>();
+    auto&& director = block.template get<system::cosserat_rod::Director>();
+    auto&& tangents = block.template get<system::cosserat_rod::Tangents>();
+    auto&& sigma = block.template get<system::cosserat_rod::Sigma>();
+
+    // Compute sigma = dilatation * batch_matvec(director_collection, tangents) - z_vector
+    // director is stored as (9, n_elems) - flattened 3x3 matrices
+    // Storage order: [d00, d10, d20, d01, d11, d21, d02, d12, d22] (column-major)
+    // tangents is (3, n_elems)
+    // sigma is (3, n_elems)
+    const IndexType n_elems = sigma.cols();
+
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        // Extract 3x3 director matrix for element k
+        // director[:, k] is (row-wise flattened 3x3 matrix)
+        //  [d00, d01, d02]
+        //  [d10, d11, d12]
+        //  [d20, d21, d22]
+        double d00 = director(0, k);
+        double d01 = director(1, k);
+        double d02 = director(2, k);
+        double d10 = director(3, k);
+        double d11 = director(4, k);
+        double d12 = director(5, k);
+        double d20 = director(6, k);
+        double d21 = director(7, k);
+        double d22 = director(8, k);
+
+        // Compute director @ tangents for element k
+        // result = director_matrix * tangents[:, k]
+        double result0 = d00 * tangents(0, k) + d01 * tangents(1, k) + d02 * tangents(2, k);
+        double result1 = d10 * tangents(0, k) + d11 * tangents(1, k) + d12 * tangents(2, k);
+        double result2 = d20 * tangents(0, k) + d21 * tangents(1, k) + d22 * tangents(2, k);
+
+        // Compute sigma = dilatation * result - z_vector
+        const double dil = dilatation(0, k);
+        sigma(0, k) = dil * result0;
+        sigma(1, k) = dil * result1;
+        sigma(2, k) = dil * result2 - 1.0;
+    }
+}
+
+// Compute internal shear/stretch stresses from model
+// Updates: lengths, tangents, radius, dilatation, voronoi_dilatation, sigma, internal_stress
+template<typename BlockType>
+inline void compute_internal_shear_stretch_stresses_from_model(BlockType& block) {
+    // Get variable views
+    auto&& shear_matrix = block.template get<system::cosserat_rod::ShearMatrix>();
+    auto&& sigma = block.template get<system::cosserat_rod::Sigma>();
+    auto&& rest_sigma = block.template get<system::cosserat_rod::RestSigma>();
+    auto&& internal_stress = block.template get<system::cosserat_rod::InternalStress>();
+
+    // Compute sigma_diff = sigma - rest_sigma
+    // sigma is (3, n_elems), rest_sigma is (3, n_elems)
+    const IndexType n_elems = sigma.cols();
+
+    // Compute internal_stress = batch_matvec(shear_matrix, sigma - rest_sigma)
+    // shear_matrix is stored as (9, n_elems) - flattened 3x3 matrices
+    // Storage order: [m00, m10, m20, m01, m11, m21, m02, m12, m22] (column-major)
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        // Compute sigma_diff = sigma - rest_sigma for element k
+        double sigma_diff0 = sigma(0, k) - rest_sigma(0, k);
+        double sigma_diff1 = sigma(1, k) - rest_sigma(1, k);
+        double sigma_diff2 = sigma(2, k) - rest_sigma(2, k);
+
+        // Extract 3x3 shear_matrix for element k
+        // shear_matrix[:, k] is [m00, m10, m20, m01, m11, m21, m02, m12, m22]
+        double m00 = shear_matrix(0, k);
+        double m10 = shear_matrix(1, k);
+        double m20 = shear_matrix(2, k);
+        double m01 = shear_matrix(3, k);
+        double m11 = shear_matrix(4, k);
+        double m21 = shear_matrix(5, k);
+        double m02 = shear_matrix(6, k);
+        double m12 = shear_matrix(7, k);
+        double m22 = shear_matrix(8, k);
+
+        // Compute shear_matrix @ sigma_diff for element k
+        // result = shear_matrix * sigma_diff
+        internal_stress(0, k) = m00 * sigma_diff0 + m01 * sigma_diff1 + m02 * sigma_diff2;
+        internal_stress(1, k) = m10 * sigma_diff0 + m11 * sigma_diff1 + m12 * sigma_diff2;
+        internal_stress(2, k) = m20 * sigma_diff0 + m21 * sigma_diff1 + m22 * sigma_diff2;
+    }
+}
+
+// Compute internal forces for Cosserat rod
+// This is a template function that will be implemented later
+template<typename BlockType>
+inline void compute_internal_forces(BlockType& block) {
+    // Get variable views
+    auto&& director = block.template get<system::cosserat_rod::Director>();
+    auto&& internal_stress = block.template get<system::cosserat_rod::InternalStress>();
+    auto&& dilatation = block.template get<system::cosserat_rod::Dilatation>();
+    auto&& internal_forces = block.template get<system::cosserat_rod::InternalForces>();
+    auto&& cosserat_internal_stress = block.template get<system::cosserat_rod::ScratchVectorA>();
+
+    // Compute cosserat_internal_stress = director^T @ internal_stress / dilatation
+    // director is stored as (9, n_elems) - flattened 3x3 matrices
+    // Storage order: [d00, d10, d20, d01, d11, d21, d02, d12, d22] (column-major)
+    // internal_stress is (3, n_elems) where n_elems includes ghost elements
+    // cosserat_internal_stress is (3, n_elems)
+    // Note: internal_stress has adjusted width (width - 1 for OnElement), but we need full width
+    // Get the full width from director which also has adjusted width
+    const IndexType n_elems = director.cols();
+
+    // Temporary matrix for cosserat_internal_stress
+    // MatrixType cosserat_internal_stress(3, n_elems);
+
+    // Compute cosserat_internal_stress = director^T @ internal_stress
+    // Python: cosserat_internal_stress[i, k] = sum_j(director_collection[j, i, k] * internal_stress[j, k])
+    // In C++: director_collection[j, i, k] = director(j*3 + i, k)
+    for (IndexType i = 0; i < 3; ++i) {
+        #pragma omp parallel for simd schedule(static)
+        for (IndexType k = 0; k < n_elems; ++k) {
+            double sum = 0.0;
+            for (IndexType j = 0; j < 3; ++j) {
+                // director_collection[j, i, k] = director(j*3 + i, k)
+                IndexType director_idx = j * 3 + i;
+                sum += director(director_idx, k) * internal_stress(j, k);
+            }
+            cosserat_internal_stress(i, k) = sum / dilatation(0, k);
+        }
+    }
+
+    // Reset ghost values for cosserat_internal_stress (OnElement)
+    block.template reset_ghost_for_variable<system::cosserat_rod::ScratchVectorA>();
+
+    // Compute internal_forces = two_point_difference_kernel(cosserat_internal_stress)
+    // internal_forces is OnNode (3, n_nodes) where n_nodes = n_elems + 1
+    two_point_difference_kernel(internal_forces, cosserat_internal_stress);
+}
+
+// Compute bending/twist strains (kappa)
+// Updates: kappa
+template<typename BlockType>
+inline void compute_bending_twist_strains(BlockType& block) {
+    // Get variable views
+    auto&& director = block.template get<system::cosserat_rod::Director>();
+    auto&& rest_voronoi_lengths = block.template get<system::cosserat_rod::RestVoronoiLengths>();
+    auto&& kappa = block.template get<system::cosserat_rod::Kappa>();
+
+    // director is stored as (9, n_elems) - flattened 3x3 matrices
+    // Storage order: [d00, d10, d20, d01, d11, d21, d02, d12, d22] (column-major)
+    // rest_voronoi_lengths is (1, n_voronoi) where n_voronoi = n_elems - 1
+    // kappa is (3, n_voronoi)
+    const IndexType n_voronoi = kappa.cols();
+
+    // Compute temp = inv_rotate(director_collection)
+    // inv_rotate computes relative rotation between consecutive director frames
+    // Python: temp = _inv_rotate(director_collection)
+    // temp has shape (3, n_voronoi) where n_voronoi = n_elems - 1
+    double temp_x, temp_y, temp_z;
+
+    // Compute inv_rotate manually: relative rotation between consecutive directors
+    // Python implementation computes cross products between consecutive director rows
+    #pragma omp parallel for schedule(static)
+    for (IndexType k = 0; k < n_voronoi; ++k) {
+        // Extract director matrices for k and k+1
+        // director[:, k] is (row-wise flattened 3x3 matrix)
+        //  [d00, d01, d02]
+        //  [d10, d11, d12]
+        //  [d20, d21, d22]
+
+        // For director[k]:
+        double d00_k = director(0, k);
+        double d01_k = director(1, k);
+        double d02_k = director(2, k);
+        double d10_k = director(3, k);
+        double d11_k = director(4, k);
+        double d12_k = director(5, k);
+        double d20_k = director(6, k);
+        double d21_k = director(7, k);
+        double d22_k = director(8, k);
+
+        // For director[k+1]:
+        double d00_k1 = director(0, k + 1);
+        double d01_k1 = director(1, k + 1);
+        double d02_k1 = director(2, k + 1);
+        double d10_k1 = director(3, k + 1);
+        double d11_k1 = director(4, k + 1);
+        double d12_k1 = director(5, k + 1);
+        double d20_k1 = director(6, k + 1);
+        double d21_k1 = director(7, k + 1);
+        double d22_k1 = director(8, k + 1);
+
+        // Compute inv_rotate: cross product between consecutive director frames
+        temp_x = (d20_k1 * d10_k + d21_k1 * d11_k + d22_k1 * d12_k) -
+                     (d10_k1 * d20_k + d11_k1 * d21_k + d12_k1 * d22_k);
+        temp_y = (d00_k1 * d20_k + d01_k1 * d21_k + d02_k1 * d22_k) -
+                     (d20_k1 * d00_k + d21_k1 * d01_k + d22_k1 * d02_k);
+        temp_z = (d10_k1 * d00_k + d11_k1 * d01_k + d12_k1 * d02_k) -
+                     (d00_k1 * d10_k + d01_k1 * d11_k + d02_k1 * d12_k);
+
+        double trace = (d00_k1 * d00_k + d01_k1 * d01_k + d02_k1 * d02_k) +
+                (d10_k1 * d10_k + d11_k1 * d11_k + d12_k1 * d12_k) +
+                (d20_k1 * d20_k + d21_k1 * d21_k + d22_k1 * d22_k);
+
+        // Clip the trace to between -1 and 3.
+        // Any deviation beyond this is numerical error
+        trace = std::clamp(trace, -1.0, 3.0);
+        double cos = 0.5 * trace - 0.5;
+        double theta = std::acos(cos) + 1e-14;
+        double magnitude = -0.5 * theta / std::sin(theta) / rest_voronoi_lengths(0, k);
+        kappa(0, k) = temp_x * magnitude;
+        kappa(1, k) = temp_y * magnitude;
+        kappa(2, k) = temp_z * magnitude;
+    }
+}
+
+// Compute internal bending/twist stresses from model
+// Updates: kappa, internal_couple
+template<typename BlockType>
+inline void compute_internal_bending_twist_stresses_from_model(BlockType& block) {
+    // Get variable views
+    auto&& kappa = block.template get<system::cosserat_rod::Kappa>();
+    auto&& rest_kappa = block.template get<system::cosserat_rod::RestKappa>();
+    auto&& bend_matrix = block.template get<system::cosserat_rod::BendMatrix>();
+    auto&& internal_couple = block.template get<system::cosserat_rod::InternalCouple>();
+
+    // kappa is (3, n_voronoi), rest_kappa is (3, n_voronoi)
+    // bend_matrix is stored as (9, n_voronoi) - flattened 3x3 matrices
+    // internal_couple is (3, n_voronoi)
+    const IndexType n_voronoi = kappa.cols();
+
+    // Compute diff_kappa = kappa - rest_kappa
+    // MatrixType diff_kappa(3, n_voronoi);
+    auto&& diff_kappa = block.template get<system::cosserat_rod::ScratchVectorA>();
+
+    for (IndexType i = 0; i < 3; ++i) {
+        #pragma omp parallel for simd schedule(static)
+        for (IndexType k = 0; k < n_voronoi; ++k) {
+            diff_kappa(i, k) = kappa(i, k) - rest_kappa(i, k);
+        }
+    }
+
+    // Compute internal_couple = batch_matvec(bend_matrix, diff_kappa)
+    // bend_matrix is stored as (9, n_voronoi) - flattened 3x3 matrices
+    // Storage order: [m00, m10, m20, m01, m11, m21, m02, m12, m22] (column-major)
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_voronoi; ++k) {
+        // Extract 3x3 bend_matrix for voronoi k
+        // bend_matrix[:, k] is [m00, m10, m20, m01, m11, m21, m02, m12, m22]
+        double m00 = bend_matrix(0, k);
+        double m10 = bend_matrix(1, k);
+        double m20 = bend_matrix(2, k);
+        double m01 = bend_matrix(3, k);
+        double m11 = bend_matrix(4, k);
+        double m21 = bend_matrix(5, k);
+        double m02 = bend_matrix(6, k);
+        double m12 = bend_matrix(7, k);
+        double m22 = bend_matrix(8, k);
+
+        // Compute bend_matrix @ diff_kappa for voronoi k
+        // result = bend_matrix * diff_kappa[:, k]
+        internal_couple(0, k) = m00 * diff_kappa(0, k) + m01 * diff_kappa(1, k) + m02 * diff_kappa(2, k);
+        internal_couple(1, k) = m10 * diff_kappa(0, k) + m11 * diff_kappa(1, k) + m12 * diff_kappa(2, k);
+        internal_couple(2, k) = m20 * diff_kappa(0, k) + m21 * diff_kappa(1, k) + m22 * diff_kappa(2, k);
+    }
+}
+
+// Compute dilatation rate
+// Updates: dilatation_rate
+template<typename BlockType>
+inline void compute_dilatation_rate(BlockType& block) {
+    // Get variable views
+    auto&& position = block.template get<system::cosserat_rod::Position>();
+    auto&& velocity = block.template get<system::cosserat_rod::Velocity>();
+    auto&& lengths = block.template get<system::cosserat_rod::Lengths>();
+    auto&& rest_lengths = block.template get<system::cosserat_rod::RestLengths>();
+    auto&& dilatation_rate = block.template get<system::cosserat_rod::DilatationRate>();
+
+    // position is (3, n_nodes), velocity is (3, n_nodes)
+    // lengths is (1, n_elems), rest_lengths is (1, n_elems)
+    // dilatation_rate is (1, n_elems)
+    const IndexType n_nodes = position.cols();
+    const IndexType n_elems = lengths.cols();
+
+    // Compute r_dot_v = batch_dot(position, velocity)
+    // This is the dot product of position and velocity at each node
+    // MatrixType r_dot_v(1, n_nodes);
+    auto&& r_dot_v = block.template get<system::cosserat_rod::ScratchScalarA>();
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_nodes; ++k) {
+        r_dot_v(0, k) = position(0, k) * velocity(0, k) +
+                       position(1, k) * velocity(1, k) +
+                       position(2, k) * velocity(2, k);
+    }
+
+    // Compute r_plus_one_dot_v = batch_dot(position[..., 1:], velocity[..., :-1])
+    // Dot product of position[1:] and velocity[:-1] (both have n_elems elements)
+    // MatrixType r_plus_one_dot_v(1, n_elems);
+    auto&& r_plus_one_dot_v = block.template get<system::cosserat_rod::ScratchScalarB>();
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        r_plus_one_dot_v(0, k) = position(0, k + 1) * velocity(0, k) +
+                                position(1, k + 1) * velocity(1, k) +
+                                position(2, k + 1) * velocity(2, k);
+    }
+
+    // Compute r_dot_v_plus_one = batch_dot(position[..., :-1], velocity[..., 1:])
+    // Dot product of position[:-1] and velocity[1:] (both have n_elems elements)
+    // MatrixType r_dot_v_plus_one(1, n_elems);
+    auto&& r_dot_v_plus_one = block.template get<system::cosserat_rod::ScratchScalarC>();
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        r_dot_v_plus_one(0, k) = position(0, k) * velocity(0, k + 1) +
+                                 position(1, k) * velocity(1, k + 1) +
+                                 position(2, k) * velocity(2, k + 1);
+    }
+
+    // Compute dilatation_rate for each element
+    // dilatation_rate[k] = (r_dot_v[k] + r_dot_v[k + 1] - r_dot_v_plus_one[k] - r_plus_one_dot_v[k])
+    //                     / lengths[k] / rest_lengths[k]
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        dilatation_rate(0, k) = (r_dot_v(0, k) + r_dot_v(0, k + 1) -
+                                 r_dot_v_plus_one(0, k) - r_plus_one_dot_v(0, k)) /
+                                lengths(0, k) / rest_lengths(0, k);
+    }
+}
+
+// Compute internal torques for Cosserat rod
+// This is a template function that will be implemented later
+template<typename BlockType>
+inline void compute_internal_torques(BlockType& block) {
+    // Get variable views
+    auto&& voronoi_dilatation = block.template get<system::cosserat_rod::VoronoiDilatation>();
+    auto&& internal_couple = block.template get<system::cosserat_rod::InternalCouple>();
+    auto&& kappa = block.template get<system::cosserat_rod::Kappa>();
+    auto&& rest_voronoi_lengths = block.template get<system::cosserat_rod::RestVoronoiLengths>();
+    auto&& director = block.template get<system::cosserat_rod::Director>();
+    auto&& tangents = block.template get<system::cosserat_rod::Tangents>();
+    auto&& internal_stress = block.template get<system::cosserat_rod::InternalStress>();
+    auto&& rest_lengths = block.template get<system::cosserat_rod::RestLengths>();
+    auto&& mass_second_moment_of_inertia = block.template get<system::cosserat_rod::MassSecondMomentOfInertia>();
+    auto&& omega = block.template get<system::cosserat_rod::Omega>();
+    auto&& dilatation = block.template get<system::cosserat_rod::Dilatation>();
+    auto&& dilatation_rate = block.template get<system::cosserat_rod::DilatationRate>();
+    auto&& internal_torques = block.template get<system::cosserat_rod::InternalTorques>();
+
+    // voronoi_dilatation is (1, n_voronoi), internal_couple is (3, n_voronoi)
+    // kappa is (3, n_voronoi), rest_voronoi_lengths is (1, n_voronoi)
+    // director is (9, n_elems), tangents is (3, n_elems)
+    // internal_stress is (3, n_elems), rest_lengths is (1, n_elems)
+    // mass_second_moment_of_inertia is (9, n_elems), omega is (3, n_elems)
+    // dilatation is (1, n_elems), dilatation_rate is (1, n_elems)
+    // internal_torques is (3, n_elems)
+    const IndexType n_voronoi = voronoi_dilatation.cols();
+    const IndexType n_elems = internal_torques.cols();
+    const IndexType n_nodes = n_elems + 1;
+
+    // Scratch buffers
+    auto&& voronoi_dilatation_inv_cube_cached = block.template get<system::cosserat_rod::ScratchScalarC>();
+    auto&& bend_twist_couple_2D = block.template get<system::cosserat_rod::InternalTorques>();
+    auto&& J_omega_upon_e = block.template get<system::cosserat_rod::ScratchVectorA>();
+    auto&& scratch_vec_b_voronoi = block.template get<system::cosserat_rod::ScratchVectorB>();
+    auto&& bend_twist_couple_3D = block.template get<system::cosserat_rod::ScratchVectorC>();
+    auto&& shear_stretch_couple = block.template get<system::cosserat_rod::ScratchVectorD>();
+    auto&& lagrangian_transport = block.template get<system::cosserat_rod::ScratchVectorE>();
+    auto&& unsteady_dilatation = block.template get<system::cosserat_rod::ScratchVectorF>();
+
+    // Compute voronoi_dilatation_inv_cube_cached = 1.0 / voronoi_dilatation^3
+    // MatrixType voronoi_dilatation_inv_cube_cached(1, n_voronoi);
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_voronoi; ++k) {
+        double voronoi_dil = voronoi_dilatation(0, k);
+        voronoi_dilatation_inv_cube_cached(0, k) = 1.0 / (voronoi_dil * voronoi_dil * voronoi_dil);
+    }
+
+    // Compute bend_twist_couple_2D = difference_kernel(internal_couple * voronoi_dilatation_inv_cube_cached, ghost_voronoi_idx)
+    // First compute the product
+    // MatrixType internal_couple_scaled(3, n_voronoi);
+    auto internal_couple_scaled = scratch_vec_b_voronoi;
+    for (IndexType i = 0; i < 3; ++i) {
+        #pragma omp parallel for simd schedule(static)
+        for (IndexType k = 0; k < n_voronoi; ++k) {
+            internal_couple_scaled(i, k) = internal_couple(i, k) * voronoi_dilatation_inv_cube_cached(0, k);
+        }
+    }
+
+    // Reset ghost values for internal_couple_scaled (OnVoronoi)
+    // reset_ghost :: internal_couple_scaled
+    block.template reset_ghost_for_variable<system::cosserat_rod::ScratchVectorB>();
+
+    // Apply difference_kernel (two_point_difference_kernel)
+    // MatrixType bend_twist_couple_2D(3, n_nodes);
+    two_point_difference_kernel(bend_twist_couple_2D, internal_couple_scaled);
+
+    // Compute bend_twist_couple_3D = quadrature_kernel((kappa x internal_couple) * rest_voronoi_lengths * voronoi_dilatation_inv_cube_cached, ghost_voronoi_idx)
+    // First compute kappa x internal_couple
+    // MatrixType kappa_cross_internal_couple(3, n_voronoi);
+    auto kappa_cross_internal_couple = scratch_vec_b_voronoi;
+    batch_cross(kappa_cross_internal_couple, kappa, internal_couple);
+
+    // Multiply by rest_voronoi_lengths and voronoi_dilatation_inv_cube_cached
+    // MatrixType bend_twist_couple_3D_input(3, n_voronoi);
+    auto bend_twist_couple_3D_input = scratch_vec_b_voronoi;
+    for (IndexType i = 0; i < 3; ++i) {
+        #pragma omp parallel for simd schedule(static)
+        for (IndexType k = 0; k < n_voronoi; ++k) {
+            bend_twist_couple_3D_input(i, k) = kappa_cross_internal_couple(i, k) *
+                                              rest_voronoi_lengths(0, k) *
+                                              voronoi_dilatation_inv_cube_cached(0, k);
+        }
+    }
+
+    // Reset ghost values
+    // reset_ghost:: bend_twist_couple_3D_input;
+    block.template reset_ghost_for_variable<system::cosserat_rod::ScratchVectorB>();
+
+    // Apply quadrature_kernel (trapezoidal)
+    // MatrixType bend_twist_couple_3D(3, n_nodes);
+    quadrature_kernel(bend_twist_couple_3D, bend_twist_couple_3D_input);
+
+    // Compute shear_stretch_couple = (Q^T * tangents) x internal_stress * rest_lengths
+    // First compute Q^T * tangents (same as director^T @ tangents)
+    // {  // DEPRECATED BLOCK: REPLACED WITH BELOW OPS
+    // MatrixType director_tangents(3, n_elems);
+    // for (IndexType i = 0; i < 3; ++i) {
+    //     #pragma omp parallel for simd schedule(static)
+    //     for (IndexType k = 0; k < n_elems; ++k) {
+    //         double sum = 0.0;
+    //         for (IndexType j = 0; j < 3; ++j) {
+    //             IndexType director_idx = j * 3 + i;
+    //             sum += director(director_idx, k) * tangents(j, k);
+    //         }
+    //         director_tangents(i, k) = sum;
+    //     }
+    // }
+
+    // // Compute cross product: (Q^T * tangents) x internal_stress
+    // // MatrixType shear_stretch_couple(3, n_elems);
+    // batch_cross(shear_stretch_couple, director_tangents, internal_stress);
+
+    // // Multiply by rest_lengths
+    // for (IndexType i = 0; i < 3; ++i) {
+    //     #pragma omp parallel for simd schedule(static)
+    //     for (IndexType k = 0; k < n_elems; ++k) {
+    //         shear_stretch_couple(i, k) *= rest_lengths(0, k);
+    //     }
+    // }
+    // }
+
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        // Compute Q^T * tangents
+
+        double a0 = director(0, k) * tangents(0, k) + director(3, k) * tangents(1, k) + director(6, k) * tangents(2, k);
+        double a1 = director(1, k) * tangents(0, k) + director(4, k) * tangents(1, k) + director(7, k) * tangents(2, k);
+        double a2 = director(2, k) * tangents(0, k) + director(5, k) * tangents(1, k) + director(8, k) * tangents(2, k);
+
+        // internal_stress alias
+        double i0 = internal_stress(0, k);
+        double i1 = internal_stress(1, k);
+        double i2 = internal_stress(2, k);
+
+        // cross-product (Q*t x n) * l_hat
+        shear_stretch_couple(0, k) += (a1 * i2 - a2 * i1) * rest_lengths(0, k);
+        shear_stretch_couple(1, k) += (a2 * i0 - a0 * i2) * rest_lengths(0, k);
+        shear_stretch_couple(2, k) += (a0 * i1 - a1 * i0) * rest_lengths(0, k);
+    }
+
+    // Compute J_omega_upon_e = batch_matvec(mass_second_moment_of_inertia, omega) / dilatation
+    // MatrixType J_omega_upon_e(3, n_elems);
+    #pragma omp parallel for simd schedule(static)
+    for (IndexType k = 0; k < n_elems; ++k) {
+        // Extract 3x3 mass_second_moment_of_inertia for element k
+        double m00 = mass_second_moment_of_inertia(0, k);
+        double m10 = mass_second_moment_of_inertia(1, k);
+        double m20 = mass_second_moment_of_inertia(2, k);
+        double m01 = mass_second_moment_of_inertia(3, k);
+        double m11 = mass_second_moment_of_inertia(4, k);
+        double m21 = mass_second_moment_of_inertia(5, k);
+        double m02 = mass_second_moment_of_inertia(6, k);
+        double m12 = mass_second_moment_of_inertia(7, k);
+        double m22 = mass_second_moment_of_inertia(8, k);
+
+        // Compute mass_second_moment_of_inertia @ omega
+        J_omega_upon_e(0, k) = (m00 * omega(0, k) + m01 * omega(1, k) + m02 * omega(2, k)) / dilatation(0, k);
+        J_omega_upon_e(1, k) = (m10 * omega(0, k) + m11 * omega(1, k) + m12 * omega(2, k)) / dilatation(0, k);
+        J_omega_upon_e(2, k) = (m20 * omega(0, k) + m21 * omega(1, k) + m22 * omega(2, k)) / dilatation(0, k);
+    }
+
+    // Compute lagrangian_transport = (J * omega / dilatation) x omega
+    // MatrixType lagrangian_transport(3, n_elems);
+    batch_cross(lagrangian_transport, J_omega_upon_e, omega);
+
+    // Compute unsteady_dilatation = J_omega_upon_e * dilatation_rate / dilatation
+    // MatrixType unsteady_dilatation(3, n_elems);
+    for (IndexType i = 0; i < 3; ++i) {
+        #pragma omp parallel for simd schedule(static)
+        for (IndexType k = 0; k < n_elems; ++k) {
+            unsteady_dilatation(i, k) = J_omega_upon_e(i, k) * dilatation_rate(0, k) / dilatation(0, k);
+        }
+    }
+
+    // Compute internal_torques = sum of all components
+    // Note: bend_twist_couple_2D and bend_twist_couple_3D are (3, n_nodes), but we need (3, n_elems)
+    // So we need to take the element values (columns 0 to n_elems-1)
+    for (IndexType i = 0; i < 3; ++i) {
+        #pragma omp parallel for simd schedule(static)
+        for (IndexType k = 0; k < n_elems; ++k) {
+            internal_torques(i, k) += // bend_twist_couple_2D(i, k) +  // Note: internal_torques share bend_twist_couple_2D
+                                     bend_twist_couple_3D(i, k) +
+                                     shear_stretch_couple(i, k) +
+                                     lagrangian_transport(i, k) +
+                                     unsteady_dilatation(i, k);
+        }
+    }
+}
+
+} // namespace elasticapp
diff --git a/backend/src/cosserat_rod_system.h b/backend/src/cosserat_rod_system.h
new file mode 100644
index 00000000..11d6875e
--- /dev/null
+++ b/backend/src/cosserat_rod_system.h
@@ -0,0 +1,181 @@
+#pragma once
+
+#include "system.h"
+
+namespace elasticapp {
+
+// CosseratRod-specific variable tags
+// These variable types are now made internal to this translation unit
+namespace system::cosserat_rod {
+    // Node variables
+    struct Position : Placement::OnNode, DataType::Vector {
+        static constexpr std::string_view name = "position";
+    };
+    struct Velocity : Placement::OnNode, DataType::Vector {
+        static constexpr std::string_view name = "velocity";
+    };
+    struct Acceleration : Placement::OnNode, DataType::Vector {
+        static constexpr std::string_view name = "acceleration";
+    };
+    struct Mass : Placement::OnNode, DataType::Scalar {
+        static constexpr std::string_view name = "mass";
+        inline static MatrixType ghost_value = MatrixType::Constant(1, 1, 1.0);
+    };
+    struct InternalForces : Placement::OnNode, DataType::Vector {
+        static constexpr std::string_view name = "internal_forces";
+    };
+    struct ExternalForces : Placement::OnNode, DataType::Vector {
+        static constexpr std::string_view name = "external_forces";
+    };
+
+    // Element variables
+    struct Director : Placement::OnElement, DataType::Matrix {
+        static constexpr std::string_view name = "director";
+    };
+    struct Omega : Placement::OnElement, DataType::Vector {
+        static constexpr std::string_view name = "omega";
+    };
+    struct Alpha : Placement::OnElement, DataType::Vector {
+        static constexpr std::string_view name = "alpha";
+    };
+    struct RestLengths : Placement::OnElement, DataType::Scalar {
+        static constexpr std::string_view name = "rest_lengths";
+        // Note: Not constexpr because Eigen dynamic matrices don't have constexpr constructors
+        // For dynamic matrices, Constant requires dimensions: Constant(rows, cols, value)
+        inline static MatrixType ghost_value = MatrixType::Constant(1, 1, 1.0);
+    };
+    struct Density : Placement::OnElement, DataType::Scalar {
+        static constexpr std::string_view name = "density";
+    };
+    struct Volume : Placement::OnElement, DataType::Scalar {
+        static constexpr std::string_view name = "volume";
+    };
+    struct MassSecondMomentOfInertia : Placement::OnElement, DataType::Matrix {
+        static constexpr std::string_view name = "mass_second_moment_of_inertia";
+    };
+    struct InvMassSecondMomentOfInertia : Placement::OnElement, DataType::Matrix {
+        static constexpr std::string_view name = "inv_mass_second_moment_of_inertia";
+    };
+    struct InternalTorques : Placement::OnElement, DataType::Vector {
+        static constexpr std::string_view name = "internal_torques";
+    };
+    struct ExternalTorques : Placement::OnElement, DataType::Vector {
+        static constexpr std::string_view name = "external_torques";
+    };
+    struct Lengths : Placement::OnElement, DataType::Scalar {
+        static constexpr std::string_view name = "lengths";
+    };
+    struct Tangents : Placement::OnElement, DataType::Vector {
+        static constexpr std::string_view name = "tangents";
+    };
+    struct Radius : Placement::OnElement, DataType::Scalar {
+        static constexpr std::string_view name = "radius";
+    };
+    struct Dilatation : Placement::OnElement, DataType::Scalar {
+        static constexpr std::string_view name = "dilatation";
+    };
+    struct DilatationRate : Placement::OnElement, DataType::Scalar {
+        static constexpr std::string_view name = "dilatation_rate";
+    };
+    struct Sigma : Placement::OnElement, DataType::Vector {
+        static constexpr std::string_view name = "sigma";
+    };
+    struct RestSigma : Placement::OnElement, DataType::Vector {
+        static constexpr std::string_view name = "rest_sigma";
+    };
+    struct InternalStress : Placement::OnElement, DataType::Vector {
+        static constexpr std::string_view name = "internal_stress";
+    };
+    struct ShearMatrix : Placement::OnElement, DataType::Matrix {
+        static constexpr std::string_view name = "shear_matrix";
+    };
+
+    // Voronoi variables
+    struct RestVoronoiLengths : Placement::OnVoronoi, DataType::Scalar {
+        static constexpr std::string_view name = "rest_voronoi_lengths";
+        // Note: Not constexpr because Eigen dynamic matrices don't have constexpr constructors
+        // For dynamic matrices, Constant requires dimensions: Constant(rows, cols, value)
+        inline static MatrixType ghost_value = MatrixType::Constant(1, 1, 1.0);
+    };
+    struct VoronoiDilatation : Placement::OnVoronoi, DataType::Scalar {
+        static constexpr std::string_view name = "voronoi_dilatation";
+    };
+    struct Kappa : Placement::OnVoronoi, DataType::Vector {
+        static constexpr std::string_view name = "kappa";
+    };
+    struct RestKappa : Placement::OnVoronoi, DataType::Vector {
+        static constexpr std::string_view name = "rest_kappa";
+    };
+    struct InternalCouple : Placement::OnVoronoi, DataType::Vector {
+        static constexpr std::string_view name = "internal_couple";
+    };
+    struct BendMatrix : Placement::OnVoronoi, DataType::Matrix {
+        static constexpr std::string_view name = "bend_matrix";
+    };
+
+    // Dummy variables for computation
+   struct ScratchVectorA : Placement::OnElement, DataType::Vector { static constexpr std::string_view name = "scratch_vector_a"; };
+   struct ScratchVectorB : Placement::OnVoronoi, DataType::Vector { static constexpr std::string_view name = "scratch_vector_b"; };
+   struct ScratchVectorC : Placement::OnNode, DataType::Vector { static constexpr std::string_view name = "scratch_vector_c"; };
+   struct ScratchVectorD : Placement::OnNode, DataType::Vector { static constexpr std::string_view name = "scratch_vector_d"; };
+   struct ScratchVectorE : Placement::OnNode, DataType::Vector { static constexpr std::string_view name = "scratch_vector_e"; };
+   struct ScratchVectorF : Placement::OnNode, DataType::Vector { static constexpr std::string_view name = "scratch_vector_f"; };
+
+   struct ScratchScalarA : Placement::OnNode, DataType::Scalar { static constexpr std::string_view name = "scratch_scalar_a"; };
+   struct ScratchScalarB : Placement::OnNode, DataType::Scalar { static constexpr std::string_view name = "scratch_scalar_b"; };
+   struct ScratchScalarC : Placement::OnNode, DataType::Scalar { static constexpr std::string_view name = "scratch_scalar_c"; };
+}
+
+// CosseratRodSystem is a System with all variables from CosseratRod
+// Variables are organized by placement (Node, Element, Voronoi)
+using CosseratRodSystem = System<
+    // Node variables
+    system::cosserat_rod::Position,
+    system::cosserat_rod::Velocity,
+    system::cosserat_rod::Acceleration,
+    system::cosserat_rod::Mass,
+    system::cosserat_rod::InternalForces,
+    system::cosserat_rod::ExternalForces,
+
+    // Element variables
+    system::cosserat_rod::Omega,
+    system::cosserat_rod::Alpha,
+    system::cosserat_rod::Director,
+    system::cosserat_rod::RestLengths,
+    system::cosserat_rod::Density,
+    system::cosserat_rod::Volume,
+    system::cosserat_rod::MassSecondMomentOfInertia,
+    system::cosserat_rod::InvMassSecondMomentOfInertia,
+    system::cosserat_rod::InternalTorques,
+    system::cosserat_rod::ExternalTorques,
+    system::cosserat_rod::Lengths,
+    system::cosserat_rod::Tangents,
+    system::cosserat_rod::Radius,
+    system::cosserat_rod::Dilatation,
+    system::cosserat_rod::DilatationRate,
+    system::cosserat_rod::Sigma,
+    system::cosserat_rod::RestSigma,
+    system::cosserat_rod::InternalStress,
+    system::cosserat_rod::ShearMatrix,
+
+    // Voronoi variables
+    system::cosserat_rod::RestVoronoiLengths,
+    system::cosserat_rod::VoronoiDilatation,
+    system::cosserat_rod::Kappa,
+    system::cosserat_rod::RestKappa,
+    system::cosserat_rod::InternalCouple,
+    system::cosserat_rod::BendMatrix,
+
+    // Dummy variables
+    system::cosserat_rod::ScratchVectorA,
+    system::cosserat_rod::ScratchVectorB,
+    system::cosserat_rod::ScratchVectorC,
+    system::cosserat_rod::ScratchVectorD,
+    system::cosserat_rod::ScratchVectorE,
+    system::cosserat_rod::ScratchVectorF,
+    system::cosserat_rod::ScratchScalarA,
+    system::cosserat_rod::ScratchScalarB,
+    system::cosserat_rod::ScratchScalarC
+>;
+
+} // namespace elasticapp
diff --git a/backend/src/environment/_api.cpp b/backend/src/environment/_api.cpp
new file mode 100644
index 00000000..419df981
--- /dev/null
+++ b/backend/src/environment/_api.cpp
@@ -0,0 +1,142 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <pybind11/stl.h>
+#include "../api.h"
+#include "api.h"
+#include "collision/collision_system.h"
+#include "collision/physics/no_interaction.h"
+#include "collision/physics/linear_spring_dashpot.h"
+
+namespace py = pybind11;
+
+PYBIND11_MODULE(_collision, m) {
+    m.doc() = R"pbdoc(
+        Elasticapp Collision module
+        ---------------------------
+
+        Provides collision detection and resolution for Cosserat rods using
+        the Discrete Element Method (DEM).
+    )pbdoc";
+
+    using namespace elasticapp;
+    using namespace elasticapp::environment::collision;
+    using namespace elasticapp::environment::collision::physics;
+
+    // NoInteraction model class (for testing)
+    py::class_<NoInteraction>(m, "NoInteraction")
+        .def(py::init<>(),
+            R"pbdoc(
+                Initialize a NoInteraction collision physics model.
+
+                This model returns zero force for all contacts, useful for testing
+                collision detection without applying forces.
+
+                Args:
+                    None (no parameters required)
+            )pbdoc");
+
+    // LinearSpringDashpot model class
+    py::class_<LinearSpringDashpot>(m, "LinearSpringDashpot")
+        .def(py::init<double, double, double>(),
+            R"pbdoc(
+                Initialize a LinearSpringDashpot collision physics model.
+
+                Args:
+                    k_normal: Normal spring constant for repulsion force
+                    eta_normal: Normal damping coefficient (also used for tangential damping)
+                    friction: Static friction coefficient
+            )pbdoc",
+            py::arg("k_normal") = 1.0,
+            py::arg("eta_normal") = 0.1,
+            py::arg("friction") = 0.5)
+        .def(py::init<double, double, double, double>(),
+            R"pbdoc(
+                Initialize a LinearSpringDashpot collision physics model with explicit tangential damping.
+
+                Args:
+                    k_normal: Normal spring constant for repulsion force
+                    eta_normal: Normal damping coefficient
+                    eta_tangential: Tangential damping coefficient
+                    friction: Static friction coefficient
+            )pbdoc",
+            py::arg("k_normal"),
+            py::arg("eta_normal"),
+            py::arg("eta_tangential"),
+            py::arg("friction"))
+        .def(py::init<double, double, double, double, double>(),
+            R"pbdoc(
+                Initialize a LinearSpringDashpot collision physics model with explicit tangential spring and damping.
+
+                Args:
+                    k_normal: Normal spring constant for repulsion force
+                    eta_normal: Normal damping coefficient
+                    k_tangential: Tangential spring constant
+                    eta_tangential: Tangential damping coefficient
+                    friction: Static friction coefficient
+            )pbdoc",
+            py::arg("k_normal"),
+            py::arg("eta_normal"),
+            py::arg("k_tangential"),
+            py::arg("eta_tangential"),
+            py::arg("friction"));
+
+    // CollisionSystem class (using DefaultCollisionSystem)
+    // Support both LinearSpringDashpot and NoInteraction models
+    py::class_<DefaultCollisionSystem>(m, "CollisionSystem")
+        .def(py::init<const LinearSpringDashpot&, std::size_t>(),
+            R"pbdoc(
+                Initialize a CollisionSystem with a LinearSpringDashpot physics model.
+
+                Uses default policies: HashGrid (coarse), MaxContacts (fine), UnionFind (batching).
+
+                Args:
+                    model: The LinearSpringDashpot collision physics model
+                    detect_every: Perform coarse detection every N steps (default: 1, meaning every step)
+            )pbdoc",
+            py::arg("model"),
+            py::arg("detect_every") = 1)
+        .def(py::init<const NoInteraction&, std::size_t>(),
+            R"pbdoc(
+                Initialize a CollisionSystem with a NoInteraction physics model.
+
+                Uses default policies: HashGrid (coarse), MaxContacts (fine), UnionFind (batching).
+                Useful for testing collision detection without applying forces.
+
+                Args:
+                    model: The NoInteraction collision physics model
+                    detect_every: Perform coarse detection every N steps (default: 1, meaning every step)
+            )pbdoc",
+            py::arg("model"),
+            py::arg("detect_every") = 1)
+        .def("detect_every", &DefaultCollisionSystem::detect_every,
+            R"pbdoc(
+                Get the detect_every parameter.
+
+                Returns:
+                    int: Number of steps between coarse detection calls
+            )pbdoc")
+        .def("set_detect_every", &DefaultCollisionSystem::set_detect_every,
+            R"pbdoc(
+                Set the detect_every parameter.
+
+                Args:
+                    detect_every: Number of steps between coarse detection calls
+            )pbdoc",
+            py::arg("detect_every"))
+        .def("resolve", &DefaultCollisionSystem::resolve,
+            R"pbdoc(
+                Resolve collisions for a BlockRodSystem.
+
+                This method performs the full collision detection and resolution pipeline:
+                1. Data extraction from Block
+                2. Coarse collision detection (HashGrid)
+                3. Fine collision detection (MaxContacts)
+                4. Contact batching (UnionFind)
+                5. Contact resolution (LinearSpringDashpot)
+                6. Force application to ExternalForces
+
+                Args:
+                    system: The BlockRodSystem to resolve collisions for
+            )pbdoc",
+            py::arg("system"));
+}
diff --git a/backend/src/environment/api.h b/backend/src/environment/api.h
new file mode 100644
index 00000000..24f356cf
--- /dev/null
+++ b/backend/src/environment/api.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "../api.h"  // For BlockRodSystem
+#include "collision/collision_system.h"
+#include "collision/course_detection/hash_grid.h"
+#include "collision/fine_detection/max_contacts.h"
+#include "collision/batching/union_find.h"
+
+namespace elasticapp::environment {
+
+// Re-export CollisionSystem from collision namespace for convenience
+namespace collision {
+    using DefaultCollisionSystem = CollisionSystem<HashGrid, MaxContacts, UnionFind>;
+}
+
+} // namespace elasticapp::environment
diff --git a/backend/src/environment/collision/batching/union_find.cpp b/backend/src/environment/collision/batching/union_find.cpp
new file mode 100644
index 00000000..702cec50
--- /dev/null
+++ b/backend/src/environment/collision/batching/union_find.cpp
@@ -0,0 +1,105 @@
+#include "union_find.h"
+#include <algorithm>
+#include <unordered_map>
+
+namespace elasticapp::environment {
+namespace collision {
+
+//==============================================================================
+// UnionFind::UnionFindDS Implementation
+//==============================================================================
+
+UnionFind::UnionFindDS::UnionFindDS(std::size_t max_node)
+    : parent_(max_node + 1), rank_(max_node + 1, 0) {
+    // Initialize: each node is its own parent (root)
+    for (std::size_t i = 0; i <= max_node; ++i) {
+        parent_[i] = i;
+    }
+}
+
+std::size_t UnionFind::UnionFindDS::find_root(std::size_t x) {
+    // Path compression: make parent point directly to root
+    if (parent_[x] != x) {
+        parent_[x] = find_root(parent_[x]);
+    }
+    return parent_[x];
+}
+
+void UnionFind::UnionFindDS::union_nodes(std::size_t x, std::size_t y) {
+    std::size_t root_x = find_root(x);
+    std::size_t root_y = find_root(y);
+
+    if (root_x == root_y) {
+        return;  // Already in same component
+    }
+
+    // Union by rank: attach smaller tree under larger tree
+    if (rank_[root_x] < rank_[root_y]) {
+        parent_[root_x] = root_y;
+    } else if (rank_[root_x] > rank_[root_y]) {
+        parent_[root_y] = root_x;
+    } else {
+        // Same rank: attach one to the other and increment rank
+        parent_[root_y] = root_x;
+        rank_[root_x]++;
+    }
+}
+
+//==============================================================================
+// UnionFind Implementation
+//==============================================================================
+
+std::vector<std::vector<std::size_t>> UnionFind::batch(
+    std::vector<Contact>& contacts
+) const {
+    const std::size_t n_contacts = contacts.size();
+
+    if (n_contacts == 0) {
+        return {};
+    }
+
+    // Step 1: Find maximum node index to size the Union-Find structure
+    std::size_t max_node = 0;
+    for (const auto& contact : contacts) {
+        max_node = std::max(max_node, std::max(contact.node1_idx, contact.node2_idx));
+    }
+
+    // Step 2: Initialize Union-Find data structure
+    UnionFindDS uf(max_node);
+
+    // Step 3: Union all nodes connected by contacts
+    // This builds the connected components: nodes connected by contacts are in the same component
+    for (const auto& contact : contacts) {
+        uf.union_nodes(contact.node1_idx, contact.node2_idx);
+    }
+
+    // Step 4: Group contacts by their root component and set their indices
+    // Contacts whose nodes share the same root are in the same batch
+    std::unordered_map<std::size_t, std::vector<std::size_t>> batches_map;
+    batches_map.reserve(n_contacts);  // Reserve space (worst case: one batch per contact)
+
+    for (std::size_t i = 0; i < n_contacts; ++i) {
+        auto& contact = contacts[i];
+        // Use root of node1 (both nodes in a contact have the same root after union)
+        std::size_t root = uf.find_root(contact.node1_idx);
+        auto& batch = batches_map[root];
+
+        // Set contact index to its position within the batch (matches reference implementation)
+        contact.set_index(batch.size());
+
+        // Add contact index to batch
+        batch.push_back(i);
+    }
+
+    // Step 5: Convert map to vector of batches
+    std::vector<std::vector<std::size_t>> batches;
+    batches.reserve(batches_map.size());
+    for (auto& [root, batch] : batches_map) {
+        batches.push_back(std::move(batch));
+    }
+
+    return batches;
+}
+
+} // namespace collision
+} // namespace elasticapp::environment
diff --git a/backend/src/environment/collision/batching/union_find.h b/backend/src/environment/collision/batching/union_find.h
new file mode 100644
index 00000000..5a4412be
--- /dev/null
+++ b/backend/src/environment/collision/batching/union_find.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include "../types.h"
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace elasticapp::environment {
+namespace collision {
+
+/**
+ * UnionFind batching policy.
+ *
+ * Groups contacts into batches using a Union-Find data structure.
+ * Based on the Elastica UnionFind batching implementation.
+ *
+ * Algorithm:
+ * 1. Build Union-Find structure on nodes: For each contact, union the two nodes it connects
+ * 2. Identify root nodes: Each connected component has a unique root
+ * 3. Group contacts by root: Contacts whose nodes share the same root are in the same batch
+ *
+ * This creates independent batches of contacts that can be processed in parallel,
+ * as contacts in different batches don't share any nodes and thus don't interfere.
+ *
+ * This is a CRTP policy class that will be mixed into CollisionSystem.
+ */
+class UnionFind {
+public:
+    /**
+     * Default constructor.
+     */
+    UnionFind() = default;
+
+    /**
+     * Destructor.
+     */
+    ~UnionFind() = default;
+
+    /**
+     * Group contacts into batches.
+     *
+     * Uses Union-Find to identify connected components of contacts.
+     * Contacts are connected if they share a common node (directly or transitively).
+     *
+     * Algorithm:
+     * 1. Initialize Union-Find: Each node is its own root initially
+     * 2. Union nodes: For each contact, union node1 and node2
+     * 3. Group contacts: Contacts whose nodes have the same root are grouped together
+     * 4. Set contact indices: Each contact's index is set to its position within its batch
+     *
+     * @param contacts Vector of all contacts (non-const to allow setting index)
+     * @return Vector of batches, where each batch is a vector of contact indices
+     */
+    std::vector<std::vector<std::size_t>> batch(
+        std::vector<Contact>& contacts
+    ) const;
+
+private:
+    /**
+     * Union-Find data structure for finding connected components.
+     */
+    class UnionFindDS {
+    public:
+        /**
+         * Constructor with maximum node index.
+         */
+        explicit UnionFindDS(std::size_t max_node);
+
+        /**
+         * Find root of a node with path compression.
+         */
+        std::size_t find_root(std::size_t x);
+
+        /**
+         * Union two nodes.
+         */
+        void union_nodes(std::size_t x, std::size_t y);
+
+    private:
+        std::vector<std::size_t> parent_;  // parent[i] = parent of node i
+        std::vector<std::size_t> rank_;    // rank[i] = approximate tree height (for union by rank)
+    };
+};
+
+} // namespace collision
+} // namespace elasticapp::environment
diff --git a/backend/src/environment/collision/collision_system.h b/backend/src/environment/collision/collision_system.h
new file mode 100644
index 00000000..3d46be63
--- /dev/null
+++ b/backend/src/environment/collision/collision_system.h
@@ -0,0 +1,275 @@
+#pragma once
+
+#include "../../api.h"  // For BlockRodSystem
+#include "../../cosserat_rod_system.h"  // For system::cosserat_rod namespace
+#include "../../utility/hash.h"
+#include "concepts.hpp"
+#include "physics/linear_spring_dashpot.h"
+#include "physics/no_interaction.h"
+#include "types.h"
+#include <variant>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>  // For std::pair
+#include <functional>  // For std::hash
+
+namespace elasticapp::environment {
+namespace collision {
+
+/**
+ * CollisionSystem class for collision detection and resolution.
+ *
+ * This class orchestrates the collision detection and resolution pipeline:
+ * 1. Data extraction from Block
+ * 2. Coarse detection (HashGrid)
+ * 3. Fine detection (MaxContacts)
+ * 4. Contact batching (UnionFind)
+ * 5. Contact resolution (PhysicsModel - runtime selectable)
+ * 6. Force application to ExternalForces
+ *
+ * Uses CRTP (Curiously Recurring Template Pattern) to mix in policy classes
+ * for coarse detection, fine detection, and batching strategies.
+ *
+ * Physics models are stored in a std::variant for runtime selection without
+ * virtual function overhead.
+ *
+ * @tparam CoarseDetectionPolicy Policy for coarse collision detection
+ * @tparam FineDetectionPolicy Policy for fine collision detection
+ * @tparam BatchingPolicy Policy for contact batching
+ */
+template<CoarseDetectionPolicy CoarseDetection, FineDetectionPolicy FineDetection, BatchingPolicy Batching>
+class CollisionSystem : public CoarseDetection, public FineDetection, public Batching {
+public:
+    /**
+     * Variant type for physics models.
+     * Currently supports LinearSpringDashpot, but can be extended with additional models.
+     * This syntax is used to avoid using virtual functions and CRTP hierarchies.
+     * Different structures can be considered once a more compositional approach to the physics model is needed.
+     *
+     * To add a new physics model:
+     * 1. Implement the model class with a compute_force(Contact&, double&) method
+     * 2. Add it to this variant: using PhysicsModel = std::variant<...>;
+     */
+
+    using PhysicsModel = std::variant<physics::LinearSpringDashpot, physics::NoInteraction, std::nullptr_t>;
+
+    /**
+     * Constructor takes a physics model and detection frequency.
+     *
+     * @param model The collision physics model (e.g., LinearSpringDashpot instance)
+     * @param detect_every Perform coarse detection every N steps (default: 1, meaning every step)
+     */
+    CollisionSystem(const PhysicsModel& model, std::size_t detect_every = 1) : model_(model), detect_every_(detect_every), step_counter_(0) {}
+
+    /**
+     * Resolve collisions for a BlockRodSystem.
+     *
+     * This method performs the full collision detection and resolution pipeline.
+     */
+    void resolve(BlockRodSystem& block);
+
+    /**
+     * Get the contact cache (non-const).
+     *
+     * @return Reference to the contact cache vector
+     */
+    std::vector<std::pair<std::size_t, std::size_t>>& contact_cache() {
+        return contact_cache_;
+    }
+
+    /**
+     * Get the contact cache (const).
+     *
+     * @return Const reference to the contact cache vector
+     */
+    const std::vector<std::pair<std::size_t, std::size_t>>& contact_cache() const {
+        return contact_cache_;
+    }
+
+    /**
+     * Get the detect_every parameter.
+     *
+     * @return The number of steps between coarse detection calls
+     */
+    std::size_t detect_every() const {
+        return detect_every_;
+    }
+
+    /**
+     * Set the detect_every parameter.
+     *
+     * @param detect_every Number of steps between coarse detection calls
+     */
+    void set_detect_every(std::size_t detect_every) {
+        detect_every_ = detect_every;
+    }
+
+private:
+    PhysicsModel model_;
+
+    /**
+     * Number of steps between coarse detection calls.
+     *
+     * Coarse detection (HashGrid) is only performed every detect_every_ steps.
+     * Fine detection, batching, and resolution still occur every step using
+     * the cached contact pairs from the last coarse detection.
+     *
+     * Default: 1 (detect every step)
+     */
+    std::size_t detect_every_;
+
+    /**
+     * Current step counter for tracking when to perform coarse detection.
+     */
+    mutable std::size_t step_counter_;
+
+    /**
+     * Contact cache for storing candidate pairs from coarse detection.
+     *
+     * This cache stores the candidate pairs identified by coarse detection,
+     * allowing fine detection to reuse them without re-running coarse detection.
+     * The cache is cleared when coarse detection is performed.
+     */
+    mutable std::vector<std::pair<std::size_t, std::size_t>> contact_cache_;
+
+    /**
+     * Previous contacts for tracking tangential displacement over time.
+     *
+     * Stores contacts from the previous resolve() call, keyed by normalized node pair.
+     * This allows accumulation of tangential displacement between calls.
+     * Key: Normalized node pair (min(node1, node2), max(node1, node2))
+     * Value: Contact from previous call with accumulated displacement
+     */
+    std::unordered_map<std::pair<std::size_t, std::size_t>, Contact, utility::PairHash> previous_contacts_;
+
+    /**
+     * Normalize a node pair to ensure consistent ordering.
+     *
+     * Returns (min(n1, n2), max(n1, n2)) to ensure the same pair
+     * always maps to the same key regardless of order.
+     *
+     * @param n1 First node index
+     * @param n2 Second node index
+     * @return Normalized pair (min, max)
+     */
+    static std::pair<std::size_t, std::size_t> normalize_pair(
+        std::size_t n1,
+        std::size_t n2
+    ) {
+        return n1 < n2 ? std::make_pair(n1, n2) : std::make_pair(n2, n1);
+    }
+};
+
+
+template<CoarseDetectionPolicy CoarseDetection, FineDetectionPolicy FineDetection, BatchingPolicy Batching>
+void CollisionSystem<CoarseDetection, FineDetection, Batching>::resolve(
+    BlockRodSystem& block) {
+
+    // Get data from BlockRodSystem
+    namespace variable_tags = ::elasticapp::system::cosserat_rod;
+    auto&& positions = block.template get<variable_tags::Position>();
+    auto&& velocities = block.template get<variable_tags::Velocity>();
+    auto&& radii_elem = block.template get<variable_tags::Radius>();
+    auto&& external_forces = block.template get<variable_tags::ExternalForces>();
+
+    const std::size_t n_nodes = positions.cols();
+    const std::size_t n_elems = radii_elem.cols();
+
+    /*
+
+    // Convert element radii to node radii
+    // Treat entire block as a single rod - ghost nodes handle edge cases
+    // Mapping strategy for a rod with n_elems elements and n_nodes nodes:
+    // - Node 0 -> Element 0
+    // - Node i (1 to n_elems-1) -> Element i-1
+    // - Node i >= n_elems -> Element n_elems-1 (ghost nodes use last element)
+    Eigen::MatrixXd radii(1, n_nodes);
+    for (std::size_t i = 0; i < n_nodes; ++i) {
+        std::size_t elem_idx;
+        if (i == 0) {
+            elem_idx = 0;
+        } else if (i < n_elems) {
+            elem_idx = i - 1;
+        } else {
+            // For ghost nodes (i >= n_elems), use the last element
+            elem_idx = n_elems - 1;
+        }
+        radii(0, i) = radii_elem(0, elem_idx);
+    }
+
+    // Step 1: Coarse detection (only every detect_every_ steps)
+    bool should_detect = (step_counter_ % detect_every_ == 0);
+
+    if (should_detect) {
+        // Clear contact cache and perform coarse detection
+        contact_cache_.clear();
+        // Call coarse detection via CRTP (inherited from CoarseDetection)
+        contact_cache_ = static_cast<CoarseDetection&>(*this).detect(positions, radii);
+    }
+    // Otherwise, reuse cached contact pairs from previous detection
+
+    // Step 2: Fine detection (always performed, uses cached pairs if detection was skipped)
+    std::vector<Contact> contacts;
+    if (!contact_cache_.empty()) {
+        // Extract physics parameters from model and call fine detection via CRTP
+        std::visit([&](const auto& physics_model) {
+            contacts = static_cast<FineDetection&>(*this).detect(contact_cache_, positions, velocities, radii, physics_model);
+        }, model_);
+    }
+
+    // Step 3: Contact batching (via CRTP)
+    std::vector<std::vector<std::size_t>> batches = static_cast<Batching&>(*this).batch(contacts);
+
+    // Step 4 & 5: Contact resolution and force application
+    for (const auto& batch : batches) {
+        for (std::size_t contact_idx : batch) {
+            Contact& contact = contacts[contact_idx];
+
+            // Update tangential displacement from previous contact if exists
+            auto pair_key = normalize_pair(contact.node1_idx, contact.node2_idx);
+            auto prev_it = previous_contacts_.find(pair_key);
+            if (prev_it != previous_contacts_.end()) {
+                // Accumulate tangential displacement
+                const Eigen::Vector3d& prev_pos = prev_it->second.position;
+                const Eigen::Vector3d& curr_pos = contact.position;
+                const Eigen::Vector3d& prev_normal = prev_it->second.normal;
+
+                // Project displacement onto tangent plane
+                Eigen::Vector3d displacement = curr_pos - prev_pos;
+                Eigen::Vector3d normal_displacement = displacement.dot(prev_normal) * prev_normal;
+                contact.tangential_displacement = prev_it->second.tangential_displacement +
+                                                 (displacement - normal_displacement);
+            }
+
+            // Compute force using physics model
+            double penetration_depth;
+            Eigen::Vector3d force;
+            std::visit([&](const auto& physics_model) {
+                force = physics_model.compute_force(contact, penetration_depth);
+            }, model_);
+
+            // Apply forces to external_forces
+            if (penetration_depth > 0.0) {
+                // Force on node1
+                external_forces(0, contact.node1_idx) += force(0);
+                external_forces(1, contact.node1_idx) += force(1);
+                external_forces(2, contact.node1_idx) += force(2);
+
+                // Force on node2 (opposite direction)
+                external_forces(0, contact.node2_idx) -= force(0);
+                external_forces(1, contact.node2_idx) -= force(1);
+                external_forces(2, contact.node2_idx) -= force(2);
+            }
+
+            // Store contact for next iteration
+            previous_contacts_[pair_key] = contact;
+        }
+    }
+
+    // Increment step counter
+    step_counter_++;
+    */
+}
+
+} // namespace collision
+} // namespace elasticapp::environment
diff --git a/backend/src/environment/collision/concepts.hpp b/backend/src/environment/collision/concepts.hpp
new file mode 100644
index 00000000..de464a64
--- /dev/null
+++ b/backend/src/environment/collision/concepts.hpp
@@ -0,0 +1,90 @@
+#pragma once
+
+#include <concepts>
+#include <vector>
+#include <utility>
+#include <cstddef>
+#include <Eigen/Dense>
+#include "types.h"
+#include "physics/linear_spring_dashpot.h"
+
+namespace elasticapp::environment {
+namespace collision {
+
+/**
+ * Concept for CoarseDetectionPolicy.
+ *
+ * Coarse detection policies perform broad-phase collision detection to identify
+ * candidate contact pairs. They should be fast but may produce false positives
+ * that will be filtered by fine detection.
+ *
+ * Requirements:
+ * - Must have a detect() method that takes positions and radii matrices
+ * - Must return a vector of candidate node pairs (indices)
+ * - Positions and radii are Eigen::MatrixXd
+ */
+template<typename T>
+concept CoarseDetectionPolicy = requires(
+    T policy,
+    const Eigen::MatrixXd& positions,
+    const Eigen::MatrixXd& radii
+) {
+    // Must have a detect method that takes positions and radii
+    { policy.detect(positions, radii) } -> std::convertible_to<std::vector<std::pair<std::size_t, std::size_t>>>;
+};
+
+/**
+ * Concept for FineDetectionPolicy.
+ *
+ * Fine detection policies perform narrow-phase collision detection on candidate
+ * pairs from coarse detection. They determine actual contact geometry and properties.
+ *
+ * Requirements:
+ * - Must have a detect() method that takes candidate pairs, positions, velocities,
+ *   radii, and a physics model (any type)
+ * - Must return a vector of Contact objects with full contact information
+ * - Positions, velocities, and radii are Eigen::MatrixXd
+ */
+template<typename T, typename PhysicsModel>
+concept FineDetectionPolicyFor = requires(
+    T policy,
+    const std::vector<std::pair<std::size_t, std::size_t>>& candidate_pairs,
+    const Eigen::MatrixXd& positions,
+    const Eigen::MatrixXd& velocities,
+    const Eigen::MatrixXd& radii,
+    const PhysicsModel& physics_model
+) {
+    { policy.detect(candidate_pairs, positions, velocities, radii, physics_model) }
+        -> std::convertible_to<std::vector<Contact>>;
+};
+
+/**
+ * Concept for FineDetectionPolicy (unconstrained version).
+ *
+ * A policy satisfies FineDetectionPolicy if it can work with any physics model type.
+ * This is checked by requiring it to work with at least one model type (LinearSpringDashpot).
+ */
+template<typename T>
+concept FineDetectionPolicy = FineDetectionPolicyFor<T, physics::LinearSpringDashpot>;
+
+/**
+ * Concept for BatchingPolicy.
+ *
+ * Batching policies group contacts into batches for efficient parallel processing.
+ * Contacts in the same batch can be processed independently.
+ *
+ * Requirements:
+ * - Must have a batch() method that takes a vector of contacts
+ * - Must return a vector of batches, where each batch is a vector of contact indices
+ */
+template<typename T>
+concept BatchingPolicy = requires(
+    T policy,
+    std::vector<Contact>& contacts
+) {
+    // Must have a batch method that takes contacts and returns batches
+    { policy.batch(contacts) } -> std::convertible_to<std::vector<std::vector<std::size_t>>>;
+};
+
+} // namespace collision
+} // namespace elasticapp::environment
diff --git a/backend/src/environment/collision/course_detection/hash_grid.cpp b/backend/src/environment/collision/course_detection/hash_grid.cpp
new file mode 100644
index 00000000..96e6c570
--- /dev/null
+++ b/backend/src/environment/collision/course_detection/hash_grid.cpp
@@ -0,0 +1,439 @@
+#include "hash_grid.h"
+#include <unordered_set>
+#include <cmath>
+#include <algorithm>
+#include <limits>
+
+namespace elasticapp::environment {
+namespace collision {
+
+//==============================================================================
+// HashGrid::SingleHashGrid Implementation
+//==============================================================================
+
+HashGrid::SingleHashGrid::SingleHashGrid(double cell_span, const Parameters& params)
+    : params_(params),
+      cell_span_(cell_span),
+      inverse_cell_span_(1.0 / cell_span),
+      x_cell_count_(params.initial_x_cell_count),
+      y_cell_count_(params.initial_y_cell_count),
+      z_cell_count_(params.initial_z_cell_count) {
+
+    // Ensure grid dimensions are powers of 2 for bitwise operations
+    x_cell_count_ = round_up_to_power_of_2(x_cell_count_);
+    y_cell_count_ = round_up_to_power_of_2(y_cell_count_);
+    z_cell_count_ = round_up_to_power_of_2(z_cell_count_);
+
+    // Compute hash masks (for bitwise modulo: x & mask instead of x % size)
+    x_hash_mask_ = x_cell_count_ - 1;
+    y_hash_mask_ = y_cell_count_ - 1;
+    z_hash_mask_ = z_cell_count_ - 1;
+
+    xy_cell_count_ = x_cell_count_ * y_cell_count_;
+    xyz_cell_count_ = xy_cell_count_ * z_cell_count_;
+
+    // Allocate cells
+    cells_.resize(xyz_cell_count_);
+
+    // Initialize neighbor offsets
+    initialize_neighbor_offsets();
+}
+
+HashGrid::SingleHashGrid::~SingleHashGrid() {
+    clear_neighbor_offsets();
+}
+
+void HashGrid::SingleHashGrid::clear_neighbor_offsets() {
+    for (auto& cell : cells_) {
+        if (cell.neighbor_offsets != nullptr && cell.neighbor_offsets != std_neighbor_offsets_) {
+            delete[] cell.neighbor_offsets;
+            cell.neighbor_offsets = nullptr;
+        }
+    }
+}
+
+void HashGrid::SingleHashGrid::initialize_neighbor_offsets() {
+    // Initialize standard neighbor offsets (for inner cells)
+    // 27 neighbors in 3D: (-1,-1,-1) to (1,1,1)
+    long xc = static_cast<long>(x_cell_count_);
+    long yc = static_cast<long>(y_cell_count_);
+    long zc = static_cast<long>(z_cell_count_);
+    long xyc = static_cast<long>(xy_cell_count_);
+
+    std::size_t i = 0;
+    for (long zz = -xyc; zz <= xyc; zz += xyc) {
+        for (long yy = -xc; yy <= xc; yy += xc) {
+            for (long xx = -1; xx <= 1; ++xx, ++i) {
+                std_neighbor_offsets_[i] = xx + yy + zz;
+            }
+        }
+    }
+
+    // Set up neighbor offsets for each cell
+    for (std::size_t z = 0; z < z_cell_count_; ++z) {
+        for (std::size_t y = 0; y < y_cell_count_; ++y) {
+            for (std::size_t x = 0; x < x_cell_count_; ++x) {
+                std::size_t cell_idx = x + y * x_cell_count_ + z * xy_cell_count_;
+                Cell& cell = cells_[cell_idx];
+
+                // Check if border cell
+                bool is_border = (x == 0 || x == x_cell_count_ - 1 ||
+                                 y == 0 || y == y_cell_count_ - 1 ||
+                                 z == 0 || z == z_cell_count_ - 1);
+                cell.is_border_cell = is_border;
+
+                if (is_border) {
+                    // Border cells need custom offsets (wrapping or clamping)
+                    cell.neighbor_offsets = new long[27];
+
+                    i = 0;
+                    for (long zz = -xyc; zz <= xyc; zz += xyc) {
+                        long zo = zz;
+                        // Handle z wrapping
+                        if (z == 0 && zz == -xyc) {
+                            zo = static_cast<long>(xyz_cell_count_) - xyc;
+                        } else if (z == z_cell_count_ - 1 && zz == xyc) {
+                            zo = xyc - static_cast<long>(xyz_cell_count_);
+                        }
+
+                        for (long yy = -xc; yy <= xc; yy += xc) {
+                            long yo = yy;
+                            // Handle y wrapping
+                            if (y == 0 && yy == -xc) {
+                                yo = xyc - xc;
+                            } else if (y == y_cell_count_ - 1 && yy == xc) {
+                                yo = xc - xyc;
+                            }
+
+                            for (long xx = -1; xx <= 1; ++xx, ++i) {
+                                long xo = xx;
+                                // Handle x wrapping
+                                if (x == 0 && xx == -1) {
+                                    xo = xc - 1;
+                                } else if (x == x_cell_count_ - 1 && xx == 1) {
+                                    xo = 1 - xc;
+                                }
+
+                                cell.neighbor_offsets[i] = xo + yo + zo;
+                            }
+                        }
+                    }
+                } else {
+                    // Inner cells use standard offsets
+                    cell.neighbor_offsets = std_neighbor_offsets_;
+                }
+            }
+        }
+    }
+}
+
+std::size_t HashGrid::SingleHashGrid::hash(const Eigen::Vector3d& position) const {
+    const double x = position.x();
+    const double y = position.y();
+    const double z = position.z();
+
+    std::size_t x_hash, y_hash, z_hash;
+
+    // Use inverse multiplication and bitwise AND for efficiency
+    if (x < 0) {
+        double i = (-x) * inverse_cell_span_;
+        x_hash = x_cell_count_ - 1 - (static_cast<std::size_t>(i) & x_hash_mask_);
+    } else {
+        double i = x * inverse_cell_span_;
+        x_hash = static_cast<std::size_t>(i) & x_hash_mask_;
+    }
+
+    if (y < 0) {
+        double i = (-y) * inverse_cell_span_;
+        y_hash = y_cell_count_ - 1 - (static_cast<std::size_t>(i) & y_hash_mask_);
+    } else {
+        double i = y * inverse_cell_span_;
+        y_hash = static_cast<std::size_t>(i) & y_hash_mask_;
+    }
+
+    if (z < 0) {
+        double i = (-z) * inverse_cell_span_;
+        z_hash = z_cell_count_ - 1 - (static_cast<std::size_t>(i) & z_hash_mask_);
+    } else {
+        double i = z * inverse_cell_span_;
+        z_hash = static_cast<std::size_t>(i) & z_hash_mask_;
+    }
+
+    return x_hash + y_hash * x_cell_count_ + z_hash * xy_cell_count_;
+}
+
+void HashGrid::SingleHashGrid::add_node(const NodeInfo& node) {
+    nodes_.push_back(node);
+    std::size_t node_idx = nodes_.size() - 1;
+
+    // Compute cell index
+    std::size_t cell_idx = hash(node.position);
+
+    // Add to cell
+    Cell& cell = cells_[cell_idx];
+    if (cell.node_indices.empty()) {
+        cell.node_indices.reserve(params_.initial_cell_vector_size);
+    }
+    cell.node_indices.push_back(node_idx);
+
+    // Store mapping
+    node_to_cell_[node.index] = cell_idx;
+}
+
+void HashGrid::SingleHashGrid::detect_collisions(
+    std::vector<std::pair<std::size_t, std::size_t>>& candidate_pairs,
+    const std::vector<NodeInfo>& finer_grid_nodes
+) const {
+    // Use set to avoid duplicates
+    std::unordered_set<std::size_t> processed_pairs;
+
+    auto make_pair_key = [](std::size_t i, std::size_t j) -> std::size_t {
+        if (i > j) std::swap(i, j);
+        return i * 4294967291ULL + j;  // Large prime for hashing
+    };
+
+    // Process collisions within this grid
+    for (std::size_t cell_idx = 0; cell_idx < cells_.size(); ++cell_idx) {
+        const Cell& cell = cells_[cell_idx];
+        if (cell.node_indices.empty()) continue;
+
+        const auto& node_indices = cell.node_indices;
+
+        // Check pairs within the same cell
+        for (std::size_t i = 0; i < node_indices.size(); ++i) {
+            for (std::size_t j = i + 1; j < node_indices.size(); ++j) {
+                std::size_t idx1 = nodes_[node_indices[i]].index;
+                std::size_t idx2 = nodes_[node_indices[j]].index;
+
+                std::size_t pair_key = make_pair_key(idx1, idx2);
+                if (processed_pairs.find(pair_key) == processed_pairs.end()) {
+                    candidate_pairs.push_back({idx1, idx2});
+                    processed_pairs.insert(pair_key);
+                }
+            }
+        }
+
+        // Check pairs with neighboring cells
+        constexpr std::size_t hnn = get_half_number_of_neighbors();
+        for (std::size_t i = 0; i < hnn; ++i) {
+            long offset = cell.neighbor_offsets[i];
+            std::size_t neighbor_idx = cell_idx + offset;
+
+            // Bounds check
+            if (neighbor_idx >= cells_.size()) {
+                continue;  // Out of bounds
+            }
+
+            const Cell& neighbor_cell = cells_[neighbor_idx];
+            if (neighbor_cell.node_indices.empty()) continue;
+
+            // Check all pairs between this cell and neighbor
+            for (std::size_t idx1 : node_indices) {
+                for (std::size_t idx2 : neighbor_cell.node_indices) {
+                    std::size_t n1 = nodes_[idx1].index;
+                    std::size_t n2 = nodes_[idx2].index;
+
+                    if (n1 < n2) {  // Only check once per pair
+                        std::size_t pair_key = make_pair_key(n1, n2);
+                        if (processed_pairs.find(pair_key) == processed_pairs.end()) {
+                            candidate_pairs.push_back({n1, n2});
+                            processed_pairs.insert(pair_key);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Check collisions with nodes from finer grids
+    if (!finer_grid_nodes.empty()) {
+        for (const auto& fine_node : finer_grid_nodes) {
+            std::size_t cell_idx = hash(fine_node.position);
+            const Cell& cell = cells_[cell_idx];
+
+            // Check all neighbors
+            for (std::size_t i = 0; i < get_number_of_neighbors(); ++i) {
+                long offset = cell.neighbor_offsets[i];
+                std::size_t neighbor_idx = cell_idx + offset;
+
+                // Bounds check
+                if (neighbor_idx >= cells_.size()) {
+                    continue;
+                }
+
+                const Cell& neighbor_cell = cells_[neighbor_idx];
+                for (std::size_t idx : neighbor_cell.node_indices) {
+                    std::size_t n1 = fine_node.index;
+                    std::size_t n2 = nodes_[idx].index;
+
+                    if (n1 < n2) {
+                        std::size_t pair_key = make_pair_key(n1, n2);
+                        if (processed_pairs.find(pair_key) == processed_pairs.end()) {
+                            candidate_pairs.push_back({n1, n2});
+                            processed_pairs.insert(pair_key);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void HashGrid::SingleHashGrid::clear() {
+    nodes_.clear();
+    node_to_cell_.clear();
+    for (auto& cell : cells_) {
+        cell.node_indices.clear();
+    }
+}
+
+std::vector<HashGrid::NodeInfo> HashGrid::SingleHashGrid::get_all_nodes() const {
+    return nodes_;
+}
+
+//==============================================================================
+// HashGrid Implementation
+//==============================================================================
+
+HashGrid::HashGrid() : HashGrid(Parameters()) {}
+
+HashGrid::HashGrid(const Parameters& params) : params_(params) {}
+
+HashGrid::~HashGrid() {
+    clear();
+}
+
+std::size_t HashGrid::round_up_to_power_of_2(std::size_t n) {
+    if (n == 0) return 1;
+    n--;
+    n |= n >> 1;
+    n |= n >> 2;
+    n |= n >> 4;
+    n |= n >> 8;
+    n |= n >> 16;
+    if (sizeof(std::size_t) > 4) {
+        n |= n >> 32;
+    }
+    return n + 1;
+}
+
+double HashGrid::compute_cell_span_for_node(double node_diameter, const Parameters& params) {
+    // Cell span should be slightly larger than node diameter
+    // Use hierarchy factor to determine appropriate level
+    double base_span = node_diameter * std::sqrt(params.hierarchy_factor);
+
+    // Round to reasonable value
+    return base_span;
+}
+
+void HashGrid::clear() {
+    grids_.clear();
+}
+
+std::vector<std::pair<std::size_t, std::size_t>> HashGrid::detect(
+    const Eigen::MatrixXd& positions,
+    const Eigen::MatrixXd& radii
+) {
+    const Eigen::Index n_nodes = positions.cols();
+
+    if (n_nodes == 0) {
+        return std::vector<std::pair<std::size_t, std::size_t>>();
+    }
+
+    // Clear previous state
+    clear();
+
+    // Build node info
+    std::vector<NodeInfo> nodes;
+    nodes.reserve(n_nodes);
+    double max_diameter = 0.0;
+
+    for (Eigen::Index i = 0; i < n_nodes; ++i) {
+        Eigen::Vector3d pos = positions.col(i);
+        double radius = std::abs(radii(i));
+        double diameter = 2.0 * radius;
+        max_diameter = std::max(max_diameter, diameter);
+
+        nodes.emplace_back(static_cast<std::size_t>(i), pos, radius);
+    }
+
+    // If too few nodes, use simple approach
+    if (n_nodes < params_.hashgrid_activation_threshold) {
+        // Simple all-pairs (for very small systems)
+        std::vector<std::pair<std::size_t, std::size_t>> pairs;
+        for (std::size_t i = 0; i < nodes.size(); ++i) {
+            for (std::size_t j = i + 1; j < nodes.size(); ++j) {
+                pairs.push_back({nodes[i].index, nodes[j].index});
+            }
+        }
+        return pairs;
+    }
+
+    // Create initial grid with appropriate cell span
+    double initial_cell_span = compute_cell_span_for_node(max_diameter, params_);
+    auto initial_grid = std::make_unique<SingleHashGrid>(initial_cell_span, params_);
+    grids_.push_back(std::move(initial_grid));
+
+    // Assign nodes to appropriate grids
+    for (const auto& node : nodes) {
+        double required_cell_span = compute_cell_span_for_node(node.diameter, params_);
+
+        // Find or create appropriate grid
+        SingleHashGrid* target_grid = nullptr;
+        auto it = grids_.begin();
+
+        while (it != grids_.end()) {
+            double grid_span = (*it)->get_cell_span();
+
+            if (node.diameter < grid_span) {
+                // Check if next grid is too small
+                auto next_it = std::next(it);
+                if (next_it == grids_.end() || (*next_it)->get_cell_span() > required_cell_span) {
+                    // This grid is appropriate
+                    target_grid = it->get();
+                    break;
+                }
+            }
+            ++it;
+        }
+
+        // If no appropriate grid found, create a new one
+        if (target_grid == nullptr) {
+            // Create grid with larger cell span
+            double new_span = required_cell_span;
+            if (!grids_.empty()) {
+                double largest_span = grids_.back()->get_cell_span();
+                while (new_span <= largest_span) {
+                    new_span *= params_.hierarchy_factor;
+                }
+            }
+
+            auto new_grid = std::make_unique<SingleHashGrid>(new_span, params_);
+            target_grid = new_grid.get();
+            grids_.push_back(std::move(new_grid));
+        }
+
+        target_grid->add_node(node);
+    }
+
+    // Detect collisions hierarchically
+    std::vector<std::pair<std::size_t, std::size_t>> candidate_pairs;
+
+    // Process grids from finest to coarsest
+    for (auto it = grids_.begin(); it != grids_.end(); ++it) {
+        // Collect nodes from finer grids
+        std::vector<NodeInfo> finer_nodes;
+        for (auto prev_it = grids_.begin(); prev_it != it; ++prev_it) {
+            auto grid_nodes = (*prev_it)->get_all_nodes();
+            finer_nodes.insert(finer_nodes.end(), grid_nodes.begin(), grid_nodes.end());
+        }
+
+        // Detect collisions in this grid
+        (*it)->detect_collisions(candidate_pairs, finer_nodes);
+    }
+
+    return candidate_pairs;
+}
+
+} // namespace collision
+} // namespace elasticapp::environment
diff --git a/backend/src/environment/collision/course_detection/hash_grid.h b/backend/src/environment/collision/course_detection/hash_grid.h
new file mode 100644
index 00000000..d88d0f26
--- /dev/null
+++ b/backend/src/environment/collision/course_detection/hash_grid.h
@@ -0,0 +1,262 @@
+#pragma once
+
+#include "../types.h"
+#include <vector>
+#include <utility>
+#include <unordered_map>
+#include <unordered_set>
+#include <list>
+#include <memory>
+#include <Eigen/Dense>
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+
+namespace elasticapp::environment {
+namespace collision {
+
+/**
+ * HashGrid coarse collision detection policy.
+ *
+ * Implements hierarchical spatial hashing for O(N) broad-phase collision detection.
+ * Based on the Elastica HashGrids implementation, adapted for node-based systems.
+ *
+ * Key features:
+ * - Hierarchical grids: Multiple grids with different cell sizes for optimal performance
+ * - Precomputed neighbor offsets: Fast access to adjacent cells
+ * - Bitwise hash operations: Efficient cell coordinate computation (requires power-of-2 dimensions)
+ * - Automatic grid sizing: Adapts to node sizes at runtime
+ *
+ * Algorithm:
+ * 1. Creates a hierarchy of grids with different cell sizes
+ * 2. Assigns each node to the grid with smallest cells larger than the node's diameter
+ * 3. Uses spatial hashing to partition nodes into grid cells
+ * 4. For collision detection, checks nodes in same cell and 26 neighboring cells (3D)
+ *
+ * This is a CRTP policy class that will be mixed into CollisionSystem.
+ */
+class HashGrid {
+public:
+    /**
+     * Configuration parameters for the hash grid.
+     */
+    struct Parameters {
+        std::size_t initial_x_cell_count = 64;      // Initial grid size in x (must be power of 2)
+        std::size_t initial_y_cell_count = 64;      // Initial grid size in y (must be power of 2)
+        std::size_t initial_z_cell_count = 64;      // Initial grid size in z (must be power of 2)
+        std::size_t initial_cell_vector_size = 16;   // Initial capacity for node vectors in cells
+        std::size_t minimal_grid_density = 4;        // Minimum cells per node
+        std::size_t hashgrid_activation_threshold = 10; // Min nodes to use hash grid
+        double hierarchy_factor = 2.0;              // Factor between grid levels
+
+        Parameters() = default;
+    };
+
+    /**
+     * Default constructor.
+     */
+    HashGrid();
+
+    /**
+     * Constructor with parameters.
+     */
+    explicit HashGrid(const Parameters& params);
+
+    /**
+     * Destructor.
+     */
+    ~HashGrid();
+
+    /**
+     * Perform coarse collision detection.
+     *
+     * Identifies pairs of nodes that are potentially colliding using
+     * hierarchical spatial hashing. Returns candidate pairs for fine detection.
+     *
+     * @param positions Node positions (3 x n_nodes)
+     * @param radii Node radii (1 x n_nodes)
+     * @return Vector of pairs (node1_idx, node2_idx) that are potentially colliding
+     */
+    std::vector<std::pair<std::size_t, std::size_t>> detect(
+        const Eigen::MatrixXd& positions,
+        const Eigen::MatrixXd& radii
+    );
+
+    /**
+     * Clear all grids (for reuse).
+     */
+    void clear();
+
+private:
+    // Forward declaration
+    class SingleHashGrid;
+
+    /**
+     * Grid cell coordinates (3D integer coordinates).
+     */
+    struct CellCoord {
+        int x, y, z;
+
+        bool operator==(const CellCoord& other) const {
+            return x == other.x && y == other.y && z == other.z;
+        }
+    };
+
+    /**
+     * Hash function for CellCoord to use in unordered_map.
+     */
+    struct CellCoordHash {
+        std::size_t operator()(const CellCoord& coord) const {
+            const std::size_t h1 = std::hash<int>{}(coord.x);
+            const std::size_t h2 = std::hash<int>{}(coord.y);
+            const std::size_t h3 = std::hash<int>{}(coord.z);
+            return h1 ^ (h2 << 1) ^ (h3 << 2);
+        }
+    };
+
+    /**
+     * Node information for grid assignment.
+     */
+    struct NodeInfo {
+        std::size_t index;
+        Eigen::Vector3d position;
+        double radius;
+        double diameter;  // 2 * radius
+
+        NodeInfo(std::size_t idx, const Eigen::Vector3d& pos, double r)
+            : index(idx), position(pos), radius(r), diameter(2.0 * r) {}
+    };
+
+    /**
+     * Single hash grid in the hierarchy.
+     * Implements one level of the hierarchical grid system.
+     */
+    class SingleHashGrid {
+    public:
+        /**
+         * Constructor.
+         *
+         * @param cell_span Size of each grid cell (edge length)
+         * @param params Configuration parameters
+         */
+        SingleHashGrid(double cell_span, const Parameters& params);
+
+        /**
+         * Destructor.
+         */
+        ~SingleHashGrid();
+
+        /**
+         * Get cell span (cell size).
+         */
+        double get_cell_span() const noexcept { return cell_span_; }
+
+        /**
+         * Add a node to this grid.
+         */
+        void add_node(const NodeInfo& node);
+
+        /**
+         * Detect collisions within this grid and with nodes from finer grids.
+         *
+         * @param candidate_pairs Output vector for candidate pairs
+         * @param finer_grid_nodes Nodes from finer grids (smaller cell sizes) to check against
+         */
+        void detect_collisions(
+            std::vector<std::pair<std::size_t, std::size_t>>& candidate_pairs,
+            const std::vector<NodeInfo>& finer_grid_nodes = {}
+        ) const;
+
+        /**
+         * Clear all nodes from this grid.
+         */
+        void clear();
+
+        /**
+         * Get all nodes in this grid.
+         */
+        std::vector<NodeInfo> get_all_nodes() const;
+
+    private:
+        /**
+         * Cell structure for storing nodes.
+         */
+        struct Cell {
+            std::vector<std::size_t> node_indices;  // Indices into nodes_ vector
+            long* neighbor_offsets;                 // Offsets to neighboring cells (27 in 3D)
+            bool is_border_cell;                    // Whether this is a border cell
+
+            Cell() : neighbor_offsets(nullptr), is_border_cell(false) {}
+        };
+
+        /**
+         * Compute hash (cell index) from position.
+         * Uses bitwise operations for efficiency (requires power-of-2 grid dimensions).
+         */
+        std::size_t hash(const Eigen::Vector3d& position) const;
+
+        /**
+         * Initialize neighbor offsets for all cells.
+         */
+        void initialize_neighbor_offsets();
+
+        /**
+         * Clear neighbor offsets.
+         */
+        void clear_neighbor_offsets();
+
+        /**
+         * Get number of neighbors (27 in 3D).
+         */
+        static constexpr std::size_t get_number_of_neighbors() { return 27; }
+
+        /**
+         * Get half number of neighbors (13, excluding center).
+         */
+        static constexpr std::size_t get_half_number_of_neighbors() { return 13; }
+
+        // Member variables
+        Parameters params_;
+        double cell_span_;
+        double inverse_cell_span_;  // Precomputed 1.0 / cell_span_ for efficiency
+
+        // Grid dimensions (must be powers of 2 for bitwise operations)
+        std::size_t x_cell_count_;
+        std::size_t y_cell_count_;
+        std::size_t z_cell_count_;
+        std::size_t xy_cell_count_;   // x_cell_count_ * y_cell_count_
+        std::size_t xyz_cell_count_;  // Total number of cells
+
+        // Hash masks for bitwise modulo operations
+        std::size_t x_hash_mask_;
+        std::size_t y_hash_mask_;
+        std::size_t z_hash_mask_;
+
+        // Grid cells (linear array)
+        std::vector<Cell> cells_;
+
+        // Standard neighbor offsets (for inner cells)
+        long std_neighbor_offsets_[27];
+
+        // Node storage
+        std::vector<NodeInfo> nodes_;
+        std::unordered_map<std::size_t, std::size_t> node_to_cell_;  // node index -> cell index
+    };
+
+    /**
+     * Compute appropriate cell span for a node based on its diameter.
+     */
+    static double compute_cell_span_for_node(double node_diameter, const Parameters& params);
+
+    /**
+     * Round up to next power of 2.
+     */
+    static std::size_t round_up_to_power_of_2(std::size_t n);
+
+    // Member variables
+    Parameters params_;
+    mutable std::list<std::unique_ptr<SingleHashGrid>> grids_;  // Grids sorted by cell span (ascending)
+};
+
+} // namespace collision
+} // namespace elasticapp::environment
diff --git a/backend/src/environment/collision/fine_detection/max_contacts.cpp b/backend/src/environment/collision/fine_detection/max_contacts.cpp
new file mode 100644
index 00000000..bb37fb50
--- /dev/null
+++ b/backend/src/environment/collision/fine_detection/max_contacts.cpp
@@ -0,0 +1,78 @@
+#include "max_contacts.h"
+#include <algorithm>
+#include <cmath>
+
+namespace elasticapp::environment {
+namespace collision {
+
+std::vector<Contact> MaxContacts::detect_sphere_sphere(
+    std::size_t node1_idx,
+    std::size_t node2_idx,
+    const Eigen::Vector3d& pos1,
+    const Eigen::Vector3d& pos2,
+    const Eigen::Vector3d& vel1,
+    const Eigen::Vector3d& vel2,
+    double r1,
+    double r2,
+    double stiffness,
+    double normal_damping,
+    double tangential_damping,
+    double friction
+) const {
+    std::vector<Contact> contacts;
+
+    // Ordering: Normal from node2 to node1 always (matching reference implementation)
+    // This ensures consistent normal direction regardless of pair order
+    Eigen::Vector3d normal = pos1 - pos2;
+    const double normal_length = normal.norm();
+
+    // Avoid division by zero for coincident nodes
+    if (normal_length < 1e-12) {
+        return contacts;  // Nodes are coincident, skip
+    }
+
+    // Compute separation distance: dist = length(normal) - r1 - r2
+    // - dist < 0: penetrating (overlapping)
+    // - dist > 0: separated
+    // - dist = 0: touching
+    const double dist = normal_length - r1 - r2;
+
+    // Check if in contact (dist < contact_threshold)
+    // This allows contacts even when slightly separated for stability
+    if (dist < contact_threshold) {
+        // Normalize the normal vector
+        normal /= normal_length;
+
+        // Compute contact position
+        // Contact point lies on the line connecting centers, at the point of maximum overlap
+        // Formula from reference: gPos = pos2 + normal * (r2 + 0.5 * dist)
+        // When dist < 0 (penetrating), this places the contact point between the surfaces
+        const double k = r2 + 0.5 * dist;
+        const Eigen::Vector3d contact_pos = pos2 + normal * k;
+
+        // Create contact
+        // Note: distance is stored as dist (negative when penetrating, positive when separated)
+        // The Contact struct expects distance to be negative when penetrating
+        Contact contact(
+            node1_idx, node2_idx,
+            contact_pos,
+            normal,
+            dist,  // dist is already negative when penetrating
+            vel1,
+            vel2,
+            stiffness,
+            normal_damping,
+            tangential_damping,
+            friction
+        );
+
+        contacts.push_back(contact);
+    }
+
+    return contacts;
+}
+
+// Template implementation moved to header file (max_contacts.h) due to template nature
+
+} // namespace collision
+} // namespace elasticapp::environment
diff --git a/backend/src/environment/collision/fine_detection/max_contacts.h b/backend/src/environment/collision/fine_detection/max_contacts.h
new file mode 100644
index 00000000..a4e78c66
--- /dev/null
+++ b/backend/src/environment/collision/fine_detection/max_contacts.h
@@ -0,0 +1,186 @@
+#pragma once
+
+#include "../types.h"
+#include <vector>
+#include <Eigen/Dense>
+#include <cmath>
+
+#include <omp.h>
+
+namespace elasticapp::environment {
+namespace collision {
+
+/**
+ * MaxContacts fine collision detection policy.
+ *
+ * Finds points of maximum overlap between potentially colliding pairs.
+ * This is the default fine detection algorithm in the reference implementation.
+ *
+ * Based on the Elastica MaxContacts implementation, which creates the maximal
+ * number of contact points necessary to handle collisions between rigid bodies
+ * physically accurately.
+ *
+ * For sphere-sphere collisions (nodes), a single contact point is generated
+ * at the point of maximum overlap along the line connecting the two centers.
+ *
+ * This is a CRTP policy class that will be mixed into CollisionSystem.
+ */
+class MaxContacts {
+public:
+    /**
+     * Contact threshold for determining if two objects are in contact.
+     *
+     * Objects are considered in contact if their separation distance is less
+     * than this threshold. This allows contacts even when objects are slightly
+     * separated, which is important for stability in discrete time simulations.
+     *
+     * Value based on typical Elastica implementations (typically 1e-6 to 1e-9).
+     */
+    static constexpr double contact_threshold = 1e-8;
+
+    /**
+     * Default constructor.
+     */
+    MaxContacts() = default;
+
+    /**
+     * Destructor.
+     */
+    ~MaxContacts() = default;
+
+    /**
+     * Perform fine collision detection on candidate pairs.
+     *
+     * Performs precise intersection tests and finds points of maximum overlap.
+     * Returns detailed contact information for confirmed collisions.
+     *
+     * Algorithm (for sphere-sphere):
+     * 1. Compute normal vector from node2 to node1: normal = pos1 - pos2
+     * 2. Compute separation distance: dist = length(normal) - r1 - r2
+     *    - dist < 0: penetrating (overlapping)
+     *    - dist > 0: separated
+     * 3. If dist < contact_threshold, create contact:
+     *    - Normalize normal vector
+     *    - Contact position: pos2 + normal * (r2 + 0.5 * dist)
+     *    - Contact normal points from node2 to node1
+     *    - Distance stored as dist (negative when penetrating)
+     *
+     * Material properties (stiffness, damping, friction) are taken from
+     * the physics model and stored in each Contact for later force computation.
+     *
+     * @param candidate_pairs Pairs of node indices from coarse detection
+     * @param positions Node positions (3 x n_nodes)
+     * @param velocities Node velocities (3 x n_nodes)
+     * @param radii Node radii (1 x n_nodes)
+     * @param physics_model Physics model containing material properties
+     * @return Vector of Contact objects for confirmed collisions
+     */
+    template<typename PhysicsModel>
+    std::vector<Contact> detect(
+        const std::vector<std::pair<std::size_t, std::size_t>>& candidate_pairs,
+        const Eigen::MatrixXd& positions,
+        const Eigen::MatrixXd& velocities,
+        const Eigen::MatrixXd& radii,
+        const PhysicsModel& physics_model
+    ) const {
+        std::vector<Contact> contacts;
+        contacts.reserve(candidate_pairs.size());  // Reserve space for potential contacts
+
+        // Extract material properties from physics model
+        const double stiffness = physics_model.k_normal;
+        const double normal_damping = physics_model.eta_normal;
+        const double tangential_damping = physics_model.eta_tangential;
+        const double friction = physics_model.friction;
+
+        // Process each candidate pair (parallelized with OpenMP)
+        const std::size_t n_pairs = candidate_pairs.size();
+
+        // Thread-local storage for contacts (each thread collects its own contacts)
+        std::vector<std::vector<Contact>> thread_contacts(omp_get_max_threads());
+
+        #pragma omp parallel for
+        for (std::size_t idx = 0; idx < n_pairs; ++idx) {
+            const auto& pair = candidate_pairs[idx];
+            const std::size_t i1 = pair.first;
+            const std::size_t i2 = pair.second;
+
+            // Get node positions
+            const Eigen::Vector3d pos1 = positions.col(i1);
+            const Eigen::Vector3d pos2 = positions.col(i2);
+
+            // Get node velocities
+            const Eigen::Vector3d vel1 = velocities.col(i1);
+            const Eigen::Vector3d vel2 = velocities.col(i2);
+
+            // Get node radii
+            const double r1 = std::abs(radii(i1));
+            const double r2 = std::abs(radii(i2));
+
+            // Detect sphere-sphere collision
+            auto pair_contacts = detect_sphere_sphere(
+                i1, i2,
+                pos1, pos2,
+                vel1, vel2,
+                r1, r2,
+                stiffness,
+                normal_damping,
+                tangential_damping,
+                friction
+            );
+
+            // Add contacts to thread-local storage
+            const int thread_id = omp_get_thread_num();
+            thread_contacts[thread_id].insert(
+                thread_contacts[thread_id].end(),
+                pair_contacts.begin(),
+                pair_contacts.end()
+            );
+        }
+
+        // Combine thread-local contacts into final result
+        for (const auto& thread_contact_vec : thread_contacts) {
+            contacts.insert(contacts.end(), thread_contact_vec.begin(), thread_contact_vec.end());
+        }
+
+        return contacts;
+    }
+
+private:
+    /**
+     * Detect collision between two spheres (nodes).
+     *
+     * Implements the sphere-sphere collision detection algorithm from the
+     * reference implementation.
+     *
+     * @param node1_idx Index of first node
+     * @param node2_idx Index of second node
+     * @param pos1 Position of first node
+     * @param pos2 Position of second node
+     * @param vel1 Velocity of first node
+     * @param vel2 Velocity of second node
+     * @param r1 Radius of first node
+     * @param r2 Radius of second node
+     * @param stiffness Contact stiffness
+     * @param normal_damping Normal damping coefficient
+     * @param tangential_damping Tangential damping coefficient
+     * @param friction Friction coefficient
+     * @return Contact object if collision detected, or empty optional
+     */
+    std::vector<Contact> detect_sphere_sphere(
+        std::size_t node1_idx,
+        std::size_t node2_idx,
+        const Eigen::Vector3d& pos1,
+        const Eigen::Vector3d& pos2,
+        const Eigen::Vector3d& vel1,
+        const Eigen::Vector3d& vel2,
+        double r1,
+        double r2,
+        double stiffness,
+        double normal_damping,
+        double tangential_damping,
+        double friction
+    ) const;
+};
+
+} // namespace collision
+} // namespace elasticapp::environment
diff --git a/backend/src/environment/collision/physics/linear_spring_dashpot.cpp b/backend/src/environment/collision/physics/linear_spring_dashpot.cpp
new file mode 100644
index 00000000..0dd93b3f
--- /dev/null
+++ b/backend/src/environment/collision/physics/linear_spring_dashpot.cpp
@@ -0,0 +1,91 @@
+#include "linear_spring_dashpot.h"
+#include "../types.h"
+#include <algorithm>
+#include <cmath>
+
+namespace elasticapp::environment::collision {
+namespace physics {
+
+inline Eigen::Vector3d LinearSpringDashpot::compute_force(
+    const collision::Contact& contact,
+    double& penetration_depth
+) const {
+    // Check if contact is penetrating
+    if (!is_penetrating(contact)) {
+        penetration_depth = 0.0;
+        return Eigen::Vector3d::Zero();
+    }
+
+    // Penetration depth: positive when penetrating
+    // contact.distance is negative when penetrating (overlap)
+    // Matching CONTEXT: real_t delta(-contact.get_distance());
+    penetration_depth = -contact.distance;
+
+    // Get contact normal (unit vector pointing from body2 to body1)
+    const Eigen::Vector3d& normal = contact.normal;
+
+    // Calculate relative velocity matching CONTEXT/COLLISION.md documentation:
+    // tangential_velocity = (other_elem_velocity - this_elem_velocity) - normal_velocity * normal_direction
+    // where other_elem is node2 and this_elem is node1
+    // So: relative_velocity = velocity2 - velocity1
+    const Eigen::Vector3d rel_velocity = contact.velocity2 - contact.velocity1;
+
+    // Normal component of relative velocity
+    // Matching CONTEXT/COLLISION.md: normal_velocity = (other_elem_velocity - this_elem_velocity).dot(normal_direction)
+    const double rel_vel_normal = rel_velocity.dot(normal);
+
+    // Tangential component of relative velocity
+    // Matching CONTEXT/COLLISION.md: tangential_velocity = (other_elem_velocity - this_elem_velocity) - normal_velocity * normal_direction
+    const Eigen::Vector3d rel_vel_tangential = rel_velocity - rel_vel_normal * normal;
+
+    // Normal force magnitude (ReLU operation - no negative forces)
+    // Matching CONTEXT/COLLISION.md: normal_force = k_normal * penetration + eta_normal * normal_velocity
+    // Use physics parameters from LinearSpringDashpot instance, not from contact
+    // (Contact may have material properties, but we use the model's parameters)
+    const double f_normal_mag = std::max(0.0,
+        k_normal * penetration_depth +
+        eta_normal * rel_vel_normal
+    );
+
+    // Normal force vector
+    // Matching CONTEXT: Vec3 fN(fNabs * contact.get_normal());
+    const Eigen::Vector3d f_normal = f_normal_mag * normal;
+
+    // Tangential force calculation (full spring-dashpot model)
+    // Matching CONTEXT/COLLISION.md exactly:
+    // auto tangential_force = -k_tangential * tangential_displacement_vec;
+    // tangential_force += -eta_tangential * tangential_velocity;
+    // This enables positive k_tangential for full spring-dashpot friction model
+    Eigen::Vector3d f_tangential = -k_tangential * contact.tangential_displacement
+                                   - eta_tangential * rel_vel_tangential;
+
+    // Apply Coulomb friction limit
+    // Matching reference: regularized_tangential_force_mag = min(tangential_force_mag, mu_static * normal_force)
+    const double f_tangential_mag = f_tangential.norm();
+    const double regularized_f_tangential_mag = std::min(
+        f_tangential_mag,
+        friction * f_normal_mag
+    );
+
+    // Normalize tangential force if magnitude is significant
+    // Matching reference: if (tangential_force_mag > 1e-12) {
+    //     tangential_force *= regularized_tangential_force_mag / tangential_force_mag;
+    // }
+    if (f_tangential_mag > 1e-12) {
+        f_tangential *= regularized_f_tangential_mag / f_tangential_mag;
+    } else {
+        f_tangential = Eigen::Vector3d::Zero();
+    }
+
+    // Net force (to be applied to body1, body2 gets -net_force)
+    // Matching CONTEXT: Vec3 net_force = (fN + fT);
+    return f_normal + f_tangential;
+}
+
+inline bool LinearSpringDashpot::is_penetrating(const collision::Contact& contact) {
+    // Contact is penetrating if distance < 0 (overlap)
+    return contact.distance < 0.0;
+}
+
+} // namespace physics
+} // namespace elasticapp::environment::collision
diff --git a/backend/src/environment/collision/physics/linear_spring_dashpot.h b/backend/src/environment/collision/physics/linear_spring_dashpot.h
new file mode 100644
index 00000000..c23d0e66
--- /dev/null
+++ b/backend/src/environment/collision/physics/linear_spring_dashpot.h
@@ -0,0 +1,126 @@
+#pragma once
+
+#include <Eigen/Dense>
+#include <cstddef>
+#include "../types.h"
+
+namespace elasticapp::environment::collision {
+namespace physics {
+
+/**
+ * Linear spring-dashpot collision physics model.
+ *
+ * Implements the Haff and Werner model for DEM collision response.
+ * Computes normal (repulsive) and tangential (friction) forces based on:
+ * - Normal force: F_n = max(0, k_n * delta + eta_n * v_n)
+ * - Tangential force: F_t = -k_t * d_t - eta_t * v_t (regularized by Coulomb limit)
+ *
+ * where:
+ * - k_n: normal spring constant (stiffness)
+ * - k_t: tangential spring constant (default: 0.0 for velocity-only friction)
+ * - eta_n: normal damping coefficient
+ * - eta_t: tangential damping coefficient
+ * - mu: friction coefficient
+ * - delta: penetration depth
+ * - d_t: tangential displacement (accumulated over time)
+ * - v_n: normal relative velocity
+ * - v_t: tangential relative velocity
+ */
+struct LinearSpringDashpot {
+    // Physics parameters
+    double k_normal;      // Normal spring constant
+    double k_tangential;  // Tangential spring constant
+    double eta_normal;    // Normal damping coefficient
+    double eta_tangential; // Tangential damping coefficient
+    double friction;      // Static friction coefficient
+
+    /**
+     * Constructor with physics parameters (3 parameters - uses eta_normal for tangential).
+     *
+     * @param k_normal Normal spring constant for repulsion
+     * @param eta_normal Normal damping coefficient (also used for tangential)
+     * @param friction Static friction coefficient
+     */
+    LinearSpringDashpot(
+        double k_normal,
+        double eta_normal,
+        double friction
+    ) : k_normal(k_normal),
+        k_tangential(0.0),  // Default: no tangential spring (velocity-only friction)
+        eta_normal(eta_normal),
+        eta_tangential(eta_normal),  // Use same value for tangential
+        friction(friction) {}
+
+    /**
+     * Constructor with physics parameters (4 parameters - explicit tangential damping).
+     *
+     * @param k_normal Normal spring constant for repulsion
+     * @param eta_normal Normal damping coefficient
+     * @param eta_tangential Tangential damping coefficient
+     * @param friction Static friction coefficient
+     */
+    LinearSpringDashpot(
+        double k_normal,
+        double eta_normal,
+        double eta_tangential,
+        double friction
+    ) : k_normal(k_normal),
+        k_tangential(0.0),  // Default: no tangential spring (velocity-only friction)
+        eta_normal(eta_normal),
+        eta_tangential(eta_tangential),
+        friction(friction) {}
+
+    /**
+     * Constructor with physics parameters (5 parameters - explicit tangential spring and damping).
+     *
+     * @param k_normal Normal spring constant for repulsion
+     * @param eta_normal Normal damping coefficient
+     * @param k_tangential Tangential spring constant
+     * @param eta_tangential Tangential damping coefficient
+     * @param friction Static friction coefficient
+     */
+    LinearSpringDashpot(
+        double k_normal,
+        double eta_normal,
+        double k_tangential,
+        double eta_tangential,
+        double friction
+    ) : k_normal(k_normal),
+        k_tangential(k_tangential),
+        eta_normal(eta_normal),
+        eta_tangential(eta_tangential),
+        friction(friction) {}
+
+    /**
+     * Compute collision forces for a contact.
+     *
+     * This method implements the LinearSpringDashpot force model:
+     * 1. Calculates penetration depth
+     * 2. Computes normal force (spring + damping)
+     * 3. Computes tangential force (spring + damping, regularized by Coulomb limit)
+     * 4. Returns the net force vector
+     *
+     * Tangential force formula:
+     * F_t = -k_tangential * tangential_displacement - eta_tangential * tangential_velocity
+     * Then regularized: F_t = min(|F_t|, mu * F_n) * normalize(F_t)
+     *
+     * @param contact The contact information (position, normal, distance, velocities, tangential_displacement, etc.)
+     * @param penetration_depth Output: penetration depth (positive if penetrating)
+     * @return Net force vector to apply to the first body (force on second body is -net_force)
+     */
+    inline Eigen::Vector3d compute_force(
+        const collision::Contact& contact,
+        double& penetration_depth
+    ) const;
+
+    /**
+     * Check if contact is penetrating.
+     *
+     * @param contact The contact information
+     * @return True if bodies are penetrating (distance < 0)
+     */
+    inline static bool is_penetrating(const collision::Contact& contact);
+};
+
+} // namespace physics
+} // namespace elasticapp::environment::collision
diff --git a/backend/src/environment/collision/physics/no_interaction.cpp b/backend/src/environment/collision/physics/no_interaction.cpp
new file mode 100644
index 00000000..977b03a5
--- /dev/null
+++ b/backend/src/environment/collision/physics/no_interaction.cpp
@@ -0,0 +1,17 @@
+#include "no_interaction.h"
+#include "../types.h"
+
+namespace elasticapp::environment::collision {
+namespace physics {
+
+inline Eigen::Vector3d NoInteraction::compute_force(
+    const collision::Contact& contact,
+    double& penetration_depth
+) const {
+    // No interaction - always return zero force
+    penetration_depth = 0.0;
+    return Eigen::Vector3d::Zero();
+}
+
+} // namespace physics
+} // namespace elasticapp::environment::collision
diff --git a/backend/src/environment/collision/physics/no_interaction.h b/backend/src/environment/collision/physics/no_interaction.h
new file mode 100644
index 00000000..fff09ae1
--- /dev/null
+++ b/backend/src/environment/collision/physics/no_interaction.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <Eigen/Dense>
+#include <cstddef>
+#include "../types.h"
+
+namespace elasticapp::environment::collision {
+namespace physics {
+
+/**
+ * NoInteraction physics model for testing purposes.
+ *
+ * This model returns zero force for all contacts, allowing collision detection
+ * to be tested without applying any forces. Useful for:
+ * - Testing collision detection algorithms
+ * - Validating contact geometry
+ * - Debugging collision pipeline without force effects
+ */
+struct NoInteraction {
+    /**
+     * Default constructor (no parameters needed).
+     */
+    NoInteraction() = default;
+
+    /**
+     * Compute collision forces for a contact.
+     *
+     * Always returns zero force regardless of contact state.
+     *
+     * @param contact The contact information (unused, but required for interface compatibility)
+     * @param penetration_depth Output: penetration depth (set to 0.0)
+     * @return Zero force vector
+     */
+    inline Eigen::Vector3d compute_force(
+        const collision::Contact& contact,
+        double& penetration_depth
+    ) const;
+};
+
+} // namespace physics
+} // namespace elasticapp::environment::collision
diff --git a/backend/src/environment/collision/types.h b/backend/src/environment/collision/types.h
new file mode 100644
index 00000000..c68625e8
--- /dev/null
+++ b/backend/src/environment/collision/types.h
@@ -0,0 +1,87 @@
+#pragma once
+
+#include <Eigen/Dense>
+#include <cstddef>
+
+namespace elasticapp::environment {
+namespace collision {
+
+/**
+ * Contact information between two nodes.
+ *
+ * Represents a collision contact point between two nodes (spheres)
+ * in the collision detection system.
+ */
+struct Contact {
+    // Node indices in the block
+    std::size_t node1_idx;  // Index of first node
+    std::size_t node2_idx;  // Index of second node
+
+    // Contact geometry
+    Eigen::Vector3d position;  // Contact point position (world coordinates)
+    Eigen::Vector3d normal;     // Contact normal (unit vector, pointing from node2 to node1)
+    double distance;            // Signed distance: negative when penetrating (overlap)
+
+    // Node velocities at contact point
+    Eigen::Vector3d velocity1;  // Velocity of node1
+    Eigen::Vector3d velocity2;  // Velocity of node2
+
+    // Physics parameters (from material properties)
+    double stiffness;           // Contact stiffness (k_normal)
+    double normal_damping;      // Normal damping coefficient (eta_normal)
+    double tangential_damping;  // Tangential damping coefficient (eta_tangential)
+    double friction;            // Static friction coefficient (mu)
+
+    // Tangential displacement tracking (for spring-dashpot model)
+    Eigen::Vector3d tangential_displacement;  // Accumulated tangential displacement
+
+    // Contact index within batch
+    std::size_t index;  // Index within batch (default: 0)
+
+    /**
+     * Default constructor.
+     */
+    Contact() : node1_idx(0), node2_idx(0),
+                position(Eigen::Vector3d::Zero()),
+                normal(Eigen::Vector3d::Zero()),
+                distance(0.0),
+                velocity1(Eigen::Vector3d::Zero()),
+                velocity2(Eigen::Vector3d::Zero()),
+                stiffness(1.0),
+                normal_damping(0.1),
+                tangential_damping(0.1),
+                friction(0.5),
+                tangential_displacement(Eigen::Vector3d::Zero()),
+                index(0) {}
+
+    /**
+     * Constructor with all parameters.
+     */
+    Contact(
+        std::size_t n1, std::size_t n2,
+        const Eigen::Vector3d& pos,
+        const Eigen::Vector3d& n,
+        double dist,
+        const Eigen::Vector3d& v1,
+        const Eigen::Vector3d& v2,
+        double k, double eta_n, double eta_t, double mu
+    ) : node1_idx(n1), node2_idx(n2),
+        position(pos), normal(n), distance(dist),
+        velocity1(v1), velocity2(v2),
+        stiffness(k), normal_damping(eta_n),
+        tangential_damping(eta_t), friction(mu),
+        tangential_displacement(Eigen::Vector3d::Zero()),
+        index(0) {}
+
+    /**
+     * Set the contact index within its batch.
+     *
+     * @param idx The index to set
+     */
+    void set_index(std::size_t idx) {
+        index = idx;
+    }
+};
+
+} // namespace collision
+} // namespace elasticapp::environment
diff --git a/backend/src/math/eigen_detail/eigen_calculus.hpp b/backend/src/math/eigen_detail/eigen_calculus.hpp
new file mode 100644
index 00000000..e87c6e9e
--- /dev/null
+++ b/backend/src/math/eigen_detail/eigen_calculus.hpp
@@ -0,0 +1,104 @@
+#pragma once
+
+#include <Eigen/Dense>
+#include "traits.h"
+#include <cassert>
+#include <omp.h>
+
+namespace elasticapp {
+
+//**************************************************************************
+/*!\brief Simple 2-point difference rule with zero at end points.
+//
+// Discrete 2-point difference in elasticapp of a function f:[a,b]-> R, i.e
+// D f[a,b] -> df[a,b] where f satisfies the conditions
+// f(a) = f(b) = 0.0. Operates from rod's elemental space to nodal space.
+//
+// \param[out] out_matrix(3, n_nodes) difference values
+// \param[in] in_matrix(3, n_elems) vector batch
+// where n_nodes = n_elems + 1
+*/
+template <typename MT1, typename MT2>
+inline void two_point_difference_kernel(MT1& out_matrix, const MT2& in_matrix) {
+    constexpr std::size_t dimension(3UL);
+    assert(in_matrix.rows() == dimension);
+    assert(out_matrix.rows() == dimension);
+    const std::size_t n_elems = in_matrix.cols();
+    const std::size_t n_nodes = n_elems + 1UL;
+    assert(out_matrix.cols() == n_nodes);
+
+    // First column: f(a) = in_matrix[:, 0]
+    out_matrix.col(0) = in_matrix.col(0);
+
+    // Last column: f(b) = -in_matrix[:, n_elems-1]
+    out_matrix.col(n_nodes - 1) = -in_matrix.col(n_elems - 1);
+
+    // Middle columns: difference between consecutive elements
+    if (n_elems > 1) {
+        out_matrix.block(0, 1, dimension, n_elems - 1) =
+            in_matrix.block(0, 1, dimension, n_elems - 1) -
+            in_matrix.block(0, 0, dimension, n_elems - 1);
+    }
+}
+
+//**************************************************************************
+/*!\brief Simple trapezoidal quadrature rule with zero at end points.
+//
+// Discrete integral of a function in elasticapp
+// f : [a,b] -> R, ∫[a,b] f -> R
+// where f satisfies the conditions f(a) = f(b) = 0.0.
+// Operates from rod's elemental space to nodal space.
+//
+// \param[out] out_matrix(3, n_nodes) quadrature values
+// \param[in] in_matrix(3, n_elems) vector batch
+// where n_nodes = n_elems + 1
+*/
+template <typename MT1, typename MT2>
+inline void quadrature_kernel(MT1& out_matrix, const MT2& in_matrix) {
+    constexpr std::size_t dimension(3UL);
+    const std::size_t n_elems = in_matrix.cols();
+    assert(in_matrix.rows() == dimension);
+    assert(out_matrix.rows() == dimension);
+    const std::size_t n_nodes = n_elems + 1UL;
+    assert(out_matrix.cols() == n_nodes);
+
+    using ValueType = typename MT1::Scalar;
+
+    // First column: 0.5 * in_matrix[:, 0]
+    out_matrix.col(0) = ValueType(0.5) * in_matrix.col(0);
+
+    // Last column: 0.5 * in_matrix[:, n_elems-1]
+    out_matrix.col(n_nodes - 1) = ValueType(0.5) * in_matrix.col(n_elems - 1);
+
+    // Middle columns: 0.5 * (in_matrix[:, k] + in_matrix[:, k-1])
+    out_matrix.block(0, 1, dimension, n_elems - 1) =
+        ValueType(0.5) *
+        (in_matrix.block(0, 1, dimension, n_elems - 1) +
+            in_matrix.block(0, 0, dimension, n_elems - 1));
+}
+
+//**************************************************************************
+/*!\brief Average between consecutive elements.
+//
+// Computes average of consecutive elements: average[k] = 0.5 * (vector[k+1] + vector[k])
+//
+// \param[in] in_vector(1, n_elems) or (n_elems,) scalar array
+//
+// \return output_vector(1, n_voronoi) where n_voronoi = n_elems - 1
+*/
+template <typename MT>
+auto average_kernel(const MT& in_vector) {
+    const std::size_t n_elems = in_vector.cols();
+    const std::size_t n_voronoi = n_elems > 0 ? n_elems - 1UL : 0UL;
+
+    MatrixType result(1, n_voronoi);
+
+    #pragma omp parallel for if(!omp_in_parallel())
+    for (std::size_t k = 0; k < n_voronoi; ++k) {
+        result(0, k) = 0.5 * (in_vector(0, k + 1) + in_vector(0, k));
+    }
+
+    return result;
+}
+
+} // namespace elasticapp
diff --git a/backend/src/math/eigen_detail/eigen_linear_algebra.hpp b/backend/src/math/eigen_detail/eigen_linear_algebra.hpp
new file mode 100644
index 00000000..3b8cce30
--- /dev/null
+++ b/backend/src/math/eigen_detail/eigen_linear_algebra.hpp
@@ -0,0 +1,197 @@
+#pragma once
+
+#include "traits.h"
+#include <Eigen/Dense>
+#include <cassert>
+#include <cmath>
+#include <omp.h>
+
+namespace elasticapp {
+
+//**************************************************************************
+/*!\brief Vector Difference
+//
+// \param[in] in_vector(3, n_nodes)
+//
+// \return output_vector(3, n_elems) where n_elems = n_nodes - 1
+*/
+template<typename V>
+inline auto difference_kernel(const V& in_vector) {
+    constexpr std::size_t dimension(3UL);
+    assert(in_vector.rows() == dimension);
+    const std::size_t n_nodes = in_vector.cols();
+    const std::size_t n_elems = n_nodes - 1UL;
+
+    MatrixType result(dimension, n_elems);
+    result = in_vector.block(0, 1, dimension, n_elems) -
+             in_vector.block(0, 0, dimension, n_elems);
+    return result;
+}
+
+//**************************************************************************
+/*!\brief Batchwise matrix-vector product.
+//
+// Computes: matvec_batch{ik} = matrix_batch{ijk} * vector_batch{jk}
+//
+// \param[out] matvec_batch(3, n_elems) matrix-vector product
+// \param[in] matrix_batch(3, 3, n_elems)
+// \param[in] vector_batch(3, n_elems)
+*/
+template <typename MT1, typename TT, typename MT2>
+inline void batch_matvec(MT1& matvec_batch,
+                  const TT& matrix_batch,
+                  const MT2& vector_batch) {
+    constexpr std::size_t dimension(3UL);
+    const std::size_t n_elems = matrix_batch.cols();
+
+    assert(matvec_batch.rows() == dimension);
+    assert(matvec_batch.cols() == n_elems);
+    assert(matrix_batch.pages() == dimension);
+    assert(matrix_batch.rows() == dimension);
+    assert(vector_batch.rows() == dimension);
+    assert(vector_batch.cols() == n_elems);
+
+    #pragma omp parallel for if(!omp_in_parallel())
+    for (std::size_t i = 0; i < dimension; ++i) {
+        for (std::size_t k = 0; k < n_elems; ++k) {
+            double sum = 0.0;
+            for (std::size_t j = 0; j < dimension; ++j) {
+                sum += matrix_batch(i, j, k) * vector_batch(j, k);
+            }
+            matvec_batch(i, k) = sum;
+        }
+    }
+}
+
+//**************************************************************************
+/*!\brief Batchwise matrix-matrix product.
+//
+// Computes: matmul_batch{ilk} = first_matrix_batch{ijk} * second_matrix_batch{jlk}
+//
+// \param[out] matmul_batch(3, 3, n_elems) matrix-matrix product
+// \param[in] first_matrix_batch(3, 3, n_elems)
+// \param[in] second_matrix_batch(3, 3, n_elems)
+*/
+template <typename TT1, typename TT2, typename TT3>
+inline void batch_matmul(TT1& matmul_batch,
+                  const TT2& first_matrix_batch,
+                  const TT3& second_matrix_batch) {
+    constexpr std::size_t dimension(3UL);
+    const std::size_t n_elems = first_matrix_batch.cols();
+
+    assert(matmul_batch.pages() == dimension);
+    assert(matmul_batch.rows() == dimension);
+    assert(matmul_batch.cols() == n_elems);
+    assert(first_matrix_batch.pages() == dimension);
+    assert(first_matrix_batch.rows() == dimension);
+    assert(second_matrix_batch.pages() == dimension);
+    assert(second_matrix_batch.rows() == dimension);
+    assert(second_matrix_batch.cols() == n_elems);
+
+    for (std::size_t i = 0; i < dimension; ++i) {
+        for (std::size_t l = 0; l < dimension; ++l) {
+            for (std::size_t k = 0; k < n_elems; ++k) {
+                double sum = 0.0;
+                for (std::size_t j = 0; j < dimension; ++j) {
+                    sum += first_matrix_batch(i, j, k) * second_matrix_batch(j, l, k);
+                }
+                matmul_batch(i, l, k) = sum;
+            }
+        }
+    }
+}
+
+//**************************************************************************
+/*!\brief Batchwise vector-vector cross product.
+//
+// Computes cross product for each column: cross_batch{ik} = first_vector_batch{ik} x second_vector_batch{ik}
+//
+// \param[out] cross_batch(3, n_elems) vector-vector cross product
+// \param[in] first_vector_batch(3, n_elems)
+// \param[in] second_vector_batch(3, n_elems)
+*/
+template <typename MT1, typename MT2, typename MT3>
+inline void batch_cross(MT1& cross_batch,
+                 const MT2& first_vector_batch,
+                 const MT3& second_vector_batch) {
+    constexpr std::size_t dimension(3UL);
+    const std::size_t n_elems = first_vector_batch.cols();
+
+    assert(cross_batch.rows() == dimension);
+    assert(cross_batch.cols() == n_elems);
+    assert(first_vector_batch.rows() == dimension);
+    assert(second_vector_batch.rows() == dimension);
+    assert(second_vector_batch.cols() == n_elems);
+
+    #pragma omp parallel for if(!omp_in_parallel())
+    for (std::size_t k = 0; k < n_elems; ++k) {
+        cross_batch(0, k) = first_vector_batch(1, k) * second_vector_batch(2, k) -
+                           first_vector_batch(2, k) * second_vector_batch(1, k);
+        cross_batch(1, k) = first_vector_batch(2, k) * second_vector_batch(0, k) -
+                           first_vector_batch(0, k) * second_vector_batch(2, k);
+        cross_batch(2, k) = first_vector_batch(0, k) * second_vector_batch(1, k) -
+                           first_vector_batch(1, k) * second_vector_batch(0, k);
+    }
+}
+
+//**************************************************************************
+/*!\brief Batchwise vector-vector dot product.
+//
+// Computes: dot_batch{j} = first_vector_batch{ij} * second_vector_batch{ij}
+//
+// \param[in] first_vector_batch(3, n_elems)
+// \param[in] second_vector_batch(3, n_elems)
+//
+// \return dot_batch(n_elems)
+*/
+template <typename MT1, typename MT2>
+inline auto batch_dot(const MT1& first_vector_batch,
+               const MT2& second_vector_batch) {
+    constexpr std::size_t dimension(3UL);
+    const std::size_t n_elems = first_vector_batch.cols();
+
+    assert(first_vector_batch.rows() == dimension);
+    assert(second_vector_batch.rows() == dimension);
+    assert(second_vector_batch.cols() == n_elems);
+
+    VectorType result(n_elems);
+    #pragma omp parallel for if(!omp_in_parallel())
+    for (std::size_t k = 0; k < n_elems; ++k) {
+        double sum = 0.0;
+        for (std::size_t i = 0; i < dimension; ++i) {
+            sum += first_vector_batch(i, k) * second_vector_batch(i, k);
+        }
+        result(k) = sum;
+    }
+    return result;
+}
+
+//**************************************************************************
+/*!\brief Batchwise vector L2 norm.
+//
+// Computes: norm_batch{j} = (vector_batch{ij} * vector_batch{ij})^0.5
+//
+// \param[in] vector_batch(3, n_elems)
+//
+// \return norm_batch(n_elems)
+*/
+template <typename MT>
+inline auto batch_norm(const MT& vector_batch) {
+    constexpr std::size_t dimension(3UL);
+    assert(vector_batch.rows() == dimension);
+
+    const std::size_t n_elems = vector_batch.cols();
+    VectorType result(n_elems);
+
+    #pragma omp parallel for if(!omp_in_parallel())
+    for (std::size_t k = 0; k < n_elems; ++k) {
+        double sum = 0.0;
+        for (std::size_t i = 0; i < dimension; ++i) {
+            sum += vector_batch(i, k) * vector_batch(i, k);
+        }
+        result(k) = std::sqrt(sum);
+    }
+    return result;
+}
+
+}  // namespace elasticapp
diff --git a/backend/src/math/eigen_detail/eigen_rotation.hpp b/backend/src/math/eigen_detail/eigen_rotation.hpp
new file mode 100644
index 00000000..6fba8817
--- /dev/null
+++ b/backend/src/math/eigen_detail/eigen_rotation.hpp
@@ -0,0 +1,118 @@
+#pragma once
+
+#include "traits.h"
+#include <Eigen/Dense>
+#include <cassert>
+#include <cmath>
+#include <omp.h>
+
+namespace elasticapp {
+
+//**************************************************************************
+/*!\brief Batchwise matrix logarithmic operator (inverse rotation).
+//
+// Batchwise for rotation matrix R computes the corresponding rotation
+// axis vector {theta (u)} using the matrix log() operator.
+//
+// \param[out] rot_axis_vector_batch(3, n_elems) rotation axis vector batch
+// \param[in] rot_matrix_batch(3, 3, n_elems) rotation matrix batch
+*/
+template <typename MT, typename TT>
+void batch_inv_rotate(MT& rot_axis_vector_batch, const TT& rot_matrix_batch) {
+    constexpr std::size_t dimension(3UL);
+    const std::size_t n_elems = rot_matrix_batch.cols();
+    using ValueType = double;
+
+    assert(rot_matrix_batch.pages() == dimension);
+    assert(rot_matrix_batch.rows() == dimension);
+    assert(rot_axis_vector_batch.rows() == dimension);
+    assert(rot_axis_vector_batch.cols() == n_elems);
+
+    #pragma omp parallel for if(!omp_in_parallel())
+    for (std::size_t k = 0; k < n_elems; ++k) {
+        // Compute trace: tr(R) = R[0,0] + R[1,1] + R[2,2]
+        double trace = rot_matrix_batch(0, 0, k) +
+                      rot_matrix_batch(1, 1, k) +
+                      rot_matrix_batch(2, 2, k);
+
+        // Clip trace to [-1, 3] for numerical stability
+        trace = std::max(-1.0, std::min(3.0, trace));
+
+        // theta = acos((tr(R) - 1) / 2)
+        double theta = std::acos(0.5 * (trace - 1.0) - 1e-12);
+
+        // Compute R - R^T (skew-symmetric part)
+        // Extract vector from skew-symmetric matrix
+        rot_axis_vector_batch(0, k) = rot_matrix_batch(2, 1, k) - rot_matrix_batch(1, 2, k);
+        rot_axis_vector_batch(1, k) = rot_matrix_batch(0, 2, k) - rot_matrix_batch(2, 0, k);
+        rot_axis_vector_batch(2, k) = rot_matrix_batch(1, 0, k) - rot_matrix_batch(0, 1, k);
+
+        // theta (u) = -theta * inv_skew([R - RT]) / (2 * sin(theta))
+        double sin_theta = std::sin(theta) + 1e-14;
+        double magnitude = -0.5 * theta / sin_theta;
+
+        rot_axis_vector_batch(0, k) *= magnitude;
+        rot_axis_vector_batch(1, k) *= magnitude;
+        rot_axis_vector_batch(2, k) *= magnitude;
+    }
+}
+
+//**************************************************************************
+/*!\brief Batchwise matrix exponential operator (Rodrigues formula).
+//
+// Batchwise for rotation axis vector {theta u} computes the corresponding
+// rotation matrix R using the matrix exp() operator (Rodrigues formula):
+// R = I + sin(theta) * U + (1 - cos(theta)) * U^2
+//
+// \param[out] rot_matrix_batch(3, 3, n_elems) rotation matrix batch
+// \param[in] rot_axis_vector_batch(3, n_elems) rotation axis vector batch
+*/
+template <typename TT, typename MT>
+void exp_batch(TT& rot_matrix_batch, const MT& rot_axis_vector_batch) {
+    constexpr std::size_t dimension(3UL);
+    using ValueType = double;
+
+    assert(rot_axis_vector_batch.rows() == dimension);
+    assert(rot_matrix_batch.pages() == dimension);
+    assert(rot_matrix_batch.rows() == dimension);
+
+    const std::size_t n_elems = rot_axis_vector_batch.cols();
+
+    #pragma omp parallel for if(!omp_in_parallel())
+    for (std::size_t k = 0; k < n_elems; ++k) {
+        // Compute theta = ||axis||
+        double v0 = rot_axis_vector_batch(0, k);
+        double v1 = rot_axis_vector_batch(1, k);
+        double v2 = rot_axis_vector_batch(2, k);
+        double theta = std::sqrt(v0 * v0 + v1 * v1 + v2 * v2);
+
+        // Normalize axis
+        double norm = theta + 1e-14;
+        v0 /= norm;
+        v1 /= norm;
+        v2 /= norm;
+
+        // Precompute sin and cos
+        double sin_theta = std::sin(theta);
+        double cos_theta = std::cos(theta);
+        double one_minus_cos = 1.0 - cos_theta;
+
+        // Build rotation matrix using Rodrigues formula
+        // Diagonal elements: R[ii] = cos(theta) + (1 - cos(theta)) * u[i]^2
+        rot_matrix_batch(0, 0, k) = cos_theta + one_minus_cos * v0 * v0;
+        rot_matrix_batch(1, 1, k) = cos_theta + one_minus_cos * v1 * v1;
+        rot_matrix_batch(2, 2, k) = cos_theta + one_minus_cos * v2 * v2;
+
+        // Off-diagonal elements
+        rot_matrix_batch(0, 1, k) = one_minus_cos * v0 * v1 + sin_theta * v2;
+        rot_matrix_batch(1, 0, k) = one_minus_cos * v0 * v1 - sin_theta * v2;
+
+        rot_matrix_batch(0, 2, k) = one_minus_cos * v0 * v2 - sin_theta * v1;
+        rot_matrix_batch(2, 0, k) = one_minus_cos * v0 * v2 + sin_theta * v1;
+
+        rot_matrix_batch(1, 2, k) = one_minus_cos * v1 * v2 + sin_theta * v0;
+        rot_matrix_batch(2, 1, k) = one_minus_cos * v1 * v2 - sin_theta * v0;
+    }
+}
+
+} // namespace elasticapp
diff --git a/backend/src/math/node_element_mapping.cpp b/backend/src/math/node_element_mapping.cpp
new file mode 100644
index 00000000..7852a0c2
--- /dev/null
+++ b/backend/src/math/node_element_mapping.cpp
@@ -0,0 +1,120 @@
+#include "node_element_mapping.h"
+#include <stdexcept>
+#include <algorithm>
+
+namespace elasticapp {
+namespace math {
+
+std::size_t node_to_element_index(
+    std::size_t node_idx,
+    const std::vector<std::size_t>& rod_start_indices,
+    const std::vector<std::size_t>& rod_n_elems
+) {
+    if (rod_start_indices.empty() || rod_n_elems.empty()) {
+        throw std::invalid_argument("rod_start_indices and rod_n_elems cannot be empty");
+    }
+
+    // Find which rod this node belongs to
+    std::size_t rod_index = 0;
+    for (std::size_t i = 0; i < rod_start_indices.size(); ++i) {
+        if (i + 1 < rod_start_indices.size()) {
+            // Not the last rod
+            if (node_idx >= rod_start_indices[i] && node_idx < rod_start_indices[i + 1]) {
+                rod_index = i;
+                break;
+            }
+        } else {
+            // Last rod
+            if (node_idx >= rod_start_indices[i]) {
+                rod_index = i;
+                break;
+            }
+        }
+    }
+
+    // Check if node_idx is valid for the found rod
+    std::size_t rod_start = rod_start_indices[rod_index];
+    std::size_t rod_n_nodes = rod_n_elems[rod_index] + 1;  // n_elements + 1 nodes
+    std::size_t local_node_idx = node_idx - rod_start;
+
+    if (local_node_idx >= rod_n_nodes) {
+        throw std::out_of_range("node_idx is beyond the last node of its rod");
+    }
+
+    // Map local node index to local element index
+    // Strategy:
+    // - Node 0 -> Element 0
+    // - Node i (1 to n_elements-1) -> Element i-1
+    // - Node n_elements (last) -> Element n_elements-1
+    std::size_t local_elem_idx;
+    if (local_node_idx == 0) {
+        local_elem_idx = 0;
+    } else if (local_node_idx < rod_n_elems[rod_index]) {
+        local_elem_idx = local_node_idx - 1;
+    } else {
+        // Last node
+        local_elem_idx = rod_n_elems[rod_index] - 1;
+    }
+
+    // Convert to global element index
+    // Need to compute element start indices
+    std::size_t elem_start = 0;
+    for (std::size_t i = 0; i < rod_index; ++i) {
+        elem_start += rod_n_elems[i];
+    }
+
+    return elem_start + local_elem_idx;
+}
+
+std::pair<std::size_t, std::size_t> node_to_rod_and_element(
+    std::size_t node_idx,
+    const std::vector<std::size_t>& rod_start_indices,
+    const std::vector<std::size_t>& rod_n_elems
+) {
+    if (rod_start_indices.empty() || rod_n_elems.empty()) {
+        throw std::invalid_argument("rod_start_indices and rod_n_elems cannot be empty");
+    }
+
+    // Find which rod this node belongs to
+    std::size_t rod_index = 0;
+    for (std::size_t i = 0; i < rod_start_indices.size(); ++i) {
+        if (i + 1 < rod_start_indices.size()) {
+            // Not the last rod
+            if (node_idx >= rod_start_indices[i] && node_idx < rod_start_indices[i + 1]) {
+                rod_index = i;
+                break;
+            }
+        } else {
+            // Last rod
+            if (node_idx >= rod_start_indices[i]) {
+                rod_index = i;
+                break;
+            }
+        }
+    }
+
+    // Check if node_idx is valid for the found rod
+    std::size_t rod_start = rod_start_indices[rod_index];
+    std::size_t rod_n_nodes = rod_n_elems[rod_index] + 1;  // n_elements + 1 nodes
+    std::size_t local_node_idx = node_idx - rod_start;
+
+    if (local_node_idx >= rod_n_nodes) {
+        throw std::out_of_range("node_idx is beyond the last node of its rod");
+    }
+
+    // Map local node index to local element index
+    std::size_t local_elem_idx;
+    if (local_node_idx == 0) {
+        local_elem_idx = 0;
+    } else if (local_node_idx < rod_n_elems[rod_index]) {
+        local_elem_idx = local_node_idx - 1;
+    } else {
+        // Last node
+        local_elem_idx = rod_n_elems[rod_index] - 1;
+    }
+
+    return std::make_pair(rod_index, local_elem_idx);
+}
+
+} // namespace math
+} // namespace elasticapp
diff --git a/backend/src/math/node_element_mapping.h b/backend/src/math/node_element_mapping.h
new file mode 100644
index 00000000..37ada32e
--- /dev/null
+++ b/backend/src/math/node_element_mapping.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+namespace elasticapp {
+namespace math {
+
+/**
+ * Map a global node index to the corresponding element index in a Block.
+ *
+ * For a rod with n_elements, there are n_elements+1 nodes.
+ * The mapping strategy:
+ * - For interior nodes (1 to n_elements-1): use the element before the node (element = node - 1)
+ * - For the first node (0): use element 0
+ * - For the last node (n_elements): use element n_elements-1
+ *
+ * This function handles multiple rods in a Block by using rod_start_indices
+ * to determine which rod a node belongs to.
+ *
+ * @param node_idx Global node index in the Block
+ * @param rod_start_indices Vector of starting node indices for each rod
+ * @param rod_n_elems Vector of number of elements for each rod
+ * @return Global element index corresponding to the node
+ * @throws std::out_of_range if node_idx is invalid
+ */
+std::size_t node_to_element_index(
+    std::size_t node_idx,
+    const std::vector<std::size_t>& rod_start_indices,
+    const std::vector<std::size_t>& rod_n_elems
+);
+
+/**
+ * Map a global node index to the corresponding element index within its rod.
+ *
+ * This is a helper function that first finds which rod the node belongs to,
+ * then maps it to the element index within that rod.
+ *
+ * @param node_idx Global node index in the Block
+ * @param rod_start_indices Vector of starting node indices for each rod
+ * @param rod_n_elems Vector of number of elements for each rod
+ * @return Pair of (rod_index, local_element_index)
+ * @throws std::out_of_range if node_idx is invalid
+ */
+std::pair<std::size_t, std::size_t> node_to_rod_and_element(
+    std::size_t node_idx,
+    const std::vector<std::size_t>& rod_start_indices,
+    const std::vector<std::size_t>& rod_n_elems
+);
+
+} // namespace math
+} // namespace elasticapp
diff --git a/backend/src/operations.h b/backend/src/operations.h
new file mode 100644
index 00000000..2b4d6fb8
--- /dev/null
+++ b/backend/src/operations.h
@@ -0,0 +1,379 @@
+#pragma once
+
+#include <cmath>
+#include <cstddef>
+#include "cosserat_rod_system.h"
+#include "cosserat_equations.h"
+#include "traits.h"
+
+#include <omp.h>
+
+namespace elasticapp {
+
+// Thread management utilities
+// Set the number of OpenMP threads to use
+// This affects all subsequent parallel regions
+// Args:
+//   num_threads: Number of threads to use (0 = use OpenMP default, typically all CPU cores)
+inline void set_num_threads(int num_threads) {
+    if (num_threads > 0) {
+        omp_set_num_threads(num_threads);
+    }
+    // If num_threads is 0 or negative, OpenMP will use its default
+}
+
+// Get the current number of threads in the current parallel region
+// Returns 1 if called outside a parallel region
+inline int get_num_threads() {
+    return omp_get_num_threads();
+}
+
+// Get the maximum number of threads that can be used
+inline int get_max_threads() {
+    return omp_get_max_threads();
+}
+
+// Get the current thread number (0 to num_threads-1)
+// Returns 0 if called outside a parallel region
+inline int get_thread_num() {
+    return omp_get_thread_num();
+}
+
+// Default empty operations class using CRTP pattern
+// This class can be extended with operations that work on the derived Block type
+//
+// Example usage with custom operations:
+//   template<typename Derived>
+//   class MyOperations {
+//   public:
+//       void my_operation() {
+//           auto& block = static_cast<Derived&>(*this);
+//           // Access block members and perform operations
+//       }
+//   };
+//
+//   using MyBlock = Block<CosseratRodSystem, MyOperations>;
+template<typename Derived>
+class DefaultOperations {
+public:
+    // Access to the derived class
+    Derived& derived() { return static_cast<Derived&>(*this); }
+    const Derived& derived() const { return static_cast<const Derived&>(*this); }
+
+protected:
+    // Protected constructor to prevent direct instantiation
+    DefaultOperations() = default;
+    ~DefaultOperations() = default;
+
+    // Prevent copying/moving (can be enabled in derived class if needed)
+    DefaultOperations(const DefaultOperations&) = default;
+    DefaultOperations(DefaultOperations&&) = default;
+    DefaultOperations& operator=(const DefaultOperations&) = default;
+    DefaultOperations& operator=(DefaultOperations&&) = default;
+};
+
+// Alias for backward compatibility
+template<typename Derived>
+using Operations = DefaultOperations<Derived>;
+
+// CosseratRodOperations class for Cosserat rod-specific operations
+// This class provides operations for computing forces, torques, accelerations, etc.
+template<typename Derived>
+class CosseratRodOperations {
+public:
+    // Compute internal forces and torques
+    inline void compute_internal_forces_and_torques(double time) {
+        (void)time;  // Suppress unused parameter warning
+        auto& block = static_cast<Derived&>(*this);
+        // Compute internal forces and torques separately
+        compute_geometry_from_state(block);
+        compute_all_dilatations(block);
+        compute_shear_stretch_strains(block);
+        compute_internal_shear_stretch_stresses_from_model(block);
+        compute_internal_forces(block);
+
+        compute_bending_twist_strains(block);
+        compute_internal_bending_twist_stresses_from_model(block);
+        compute_dilatation_rate(block);
+        compute_internal_torques(block);
+    }
+
+    // Compute strains
+    inline void compute_strains(double time) {
+        (void)time;  // Suppress unused parameter warning
+        auto& block = static_cast<Derived&>(*this);
+        // Compute internal forces and torques separately
+        compute_geometry_from_state(block);
+        compute_all_dilatations(block);
+        compute_shear_stretch_strains(block);
+        compute_bending_twist_strains(block);
+    }
+
+    // Update accelerations based on forces
+    inline void update_accelerations(double time) {
+        (void)time;  // Suppress unused parameter warning
+        auto& block = static_cast<Derived&>(*this);
+
+        // Get all required variables
+        auto&& acceleration = block.template get<system::cosserat_rod::Acceleration>();
+        auto&& internal_forces = block.template get<system::cosserat_rod::InternalForces>();
+        auto&& external_forces = block.template get<system::cosserat_rod::ExternalForces>();
+        auto&& mass = block.template get<system::cosserat_rod::Mass>();
+        auto&& alpha = block.template get<system::cosserat_rod::Alpha>();
+        auto&& inv_mass_second_moment_of_inertia = block.template get<system::cosserat_rod::InvMassSecondMomentOfInertia>();
+        auto&& internal_torques = block.template get<system::cosserat_rod::InternalTorques>();
+        auto&& external_torques = block.template get<system::cosserat_rod::ExternalTorques>();
+        auto&& dilatation = block.template get<system::cosserat_rod::Dilatation>();
+
+        // Update translational acceleration: a = (F_internal + F_external) / m
+        // acceleration is OnNode, Vector (3 rows, n_nodes cols)
+        const IndexType n_nodes = acceleration.cols();
+        for (IndexType i = 0; i < 3; ++i) {
+            #pragma omp parallel for simd schedule(static)
+            for (IndexType k = 0; k < n_nodes; ++k) {
+                acceleration(i, k) = (internal_forces(i, k) + external_forces(i, k)) / mass(0, k);
+            }
+        }
+
+        // Update angular acceleration: alpha = inv_J * (tau_internal + tau_external) * dilatation
+        // First zero out alpha
+        alpha.setZero();
+
+        // inv_mass_second_moment_of_inertia is OnElement, Matrix (9 rows, n_elems cols)
+        // Stored as 9xN where each column is a flattened 3x3 matrix: [I00, I01, I02, I10, I11, I12, I20, I21, I22]
+        // alpha is OnElement, Vector (3 rows, n_elems cols)
+        const IndexType n_elems = alpha.cols();
+        for (IndexType i = 0; i < 3; ++i) {
+            for (IndexType j = 0; j < 3; ++j) {
+                #pragma omp parallel for simd schedule(static)
+                for (IndexType k = 0; k < n_elems; ++k) {
+                    // Map 3x3 matrix index (i, j) to flattened 9x1 index: i*3 + j
+                    IndexType inv_J_idx = i * 3 + j;
+                    alpha(i, k) += inv_mass_second_moment_of_inertia(inv_J_idx, k)
+                                 * (internal_torques(j, k) + external_torques(j, k))
+                                 * dilatation(0, k);
+                }
+            }
+        }
+    }
+
+    // Zero out external forces and torques
+    // time parameter is included to match Python signature, but not used in implementation
+    inline void zeroed_out_external_forces_and_torques(double time) {
+        (void)time;  // Suppress unused parameter warning
+
+        auto& block = static_cast<Derived&>(*this);
+        auto&& external_forces = block.template get<system::cosserat_rod::ExternalForces>();
+        auto&& external_torques = block.template get<system::cosserat_rod::ExternalTorques>();
+
+        external_forces.setZero();
+        external_torques.setZero();
+        return;
+
+        // Zero out all external forces (OnNode, Vector: 3 rows, n_nodes columns)
+        // Use explicit loop to ensure values are set (matching Python implementation)
+        const IndexType n_nodes = external_forces.cols();
+        for (IndexType i = 0; i < 3; ++i) {
+            #pragma omp parallel for simd schedule(static)
+            for (IndexType k = 0; k < n_nodes; ++k) {
+                external_forces(i, k) = 0.0;
+            }
+        }
+
+        // Zero out all external torques (OnElement, Vector: 3 rows, n_elems columns)
+        const IndexType n_elems = external_torques.cols();
+        for (IndexType i = 0; i < 3; ++i) {
+            #pragma omp parallel for simd schedule(static)
+            for (IndexType k = 0; k < n_elems; ++k) {
+                external_torques(i, k) = 0.0;
+            }
+        }
+    }
+
+    // Update kinematics (position, director) using velocity and omega
+    // Equivalent to Python: update_kinematics(time, prefac)
+    inline void update_kinematics(double prefac) {
+        auto& block = static_cast<Derived&>(*this);
+
+        // Get variable views from block using accessible type names
+        auto&& position = block.template get<system::cosserat_rod::Position>();
+        auto&& velocity = block.template get<system::cosserat_rod::Velocity>();
+        auto&& director = block.template get<system::cosserat_rod::Director>();
+        auto&& omega = block.template get<system::cosserat_rod::Omega>();
+
+        // Update position: x += prefac * v
+        // position is (3, n_nodes), velocity is (3, n_nodes)
+        const IndexType n_nodes = position.cols();
+        for (IndexType i = 0; i < 3; ++i) {
+            #pragma omp parallel for simd schedule(static) // num_threads(2)
+            for (IndexType k = 0; k < n_nodes; ++k) {
+                position(i, k) += prefac * velocity(i, k);
+            }
+        }
+        // position += prefac * velocity;
+
+        // Update director using rotation matrix from omega
+        // director is stored as (9, n_elems) - flattened 3x3 matrices
+        // omega is (3, n_elems)
+        const std::size_t n_elems = director.cols();
+        constexpr std::size_t dim = 3;
+
+        // Parallelize loop if threading is enabled
+        #pragma omp parallel for simd schedule(static)
+        for (std::size_t k = 0; k < n_elems; ++k) {
+            // Match Python implementation: _get_rotation_matrix in _rotations.py
+            // Step 1: Get unscaled omega components
+            double v0 = omega(0, k);
+            double v1 = omega(1, k);
+            double v2 = omega(2, k);
+
+            // Step 2: Compute theta = ||omega|| (magnitude before scaling)
+            double theta = std::sqrt(v0 * v0 + v1 * v1 + v2 * v2);
+
+            // Step 3: Normalize axis (add epsilon to prevent division by zero, matching Python)
+            double norm = theta + 1e-14;
+            v0 /= norm;
+            v1 /= norm;
+            v2 /= norm;
+
+            // Step 4: Scale theta by prefac (matching Python: theta *= scale)
+            theta *= prefac;
+
+            // Step 5: Precompute sin and cos (matching Python: u_prefix = sin(theta), u_sq_prefix = 1.0 - cos(theta))
+            double u_prefix = std::sin(theta);
+            double u_sq_prefix = 1.0 - std::cos(theta);
+
+            // Step 6: Build rotation matrix using exact Python formulas
+            // Python: rot_mat[0, 0, k] = 1.0 - u_sq_prefix * (v1 * v1 + v2 * v2)
+            // This is equivalent to: cos(theta) + (1 - cos(theta)) * v0^2
+            // but we use Python's formula for exact numerical match
+            double R00 = 1.0 - u_sq_prefix * (v1 * v1 + v2 * v2);
+            double R11 = 1.0 - u_sq_prefix * (v0 * v0 + v2 * v2);
+            double R22 = 1.0 - u_sq_prefix * (v0 * v0 + v1 * v1);
+
+            // Off-diagonal elements (matching Python exactly)
+            double R01 = u_prefix * v2 + u_sq_prefix * v0 * v1;
+            double R10 = -u_prefix * v2 + u_sq_prefix * v0 * v1;
+            double R02 = -u_prefix * v1 + u_sq_prefix * v0 * v2;
+            double R20 = u_prefix * v1 + u_sq_prefix * v0 * v2;
+            double R12 = u_prefix * v0 + u_sq_prefix * v1 * v2;
+            double R21 = -u_prefix * v0 + u_sq_prefix * v1 * v2;
+
+            // Extract current director (3x3 matrix stored as 9 elements)
+            // Storage order: [d00, d10, d20, d01, d11, d21, d02, d12, d22]
+            // (column-major order for 3x3 matrix)
+            double d00 = director(0, k);
+            double d10 = director(1, k);
+            double d20 = director(2, k);
+            double d01 = director(3, k);
+            double d11 = director(4, k);
+            double d21 = director(5, k);
+            double d02 = director(6, k);
+            double d12 = director(7, k);
+            double d22 = director(8, k);
+
+            // Apply rotation: R @ director (matrix multiplication)
+            // New director = R * old_director
+            director(0, k) = R00 * d00 + R01 * d01 + R02 * d02;
+            director(1, k) = R10 * d00 + R11 * d01 + R12 * d02;
+            director(2, k) = R20 * d00 + R21 * d01 + R22 * d02;
+            director(3, k) = R00 * d10 + R01 * d11 + R02 * d12;
+            director(4, k) = R10 * d10 + R11 * d11 + R12 * d12;
+            director(5, k) = R20 * d10 + R21 * d11 + R22 * d12;
+            director(6, k) = R00 * d20 + R01 * d21 + R02 * d22;
+            director(7, k) = R10 * d20 + R11 * d21 + R12 * d22;
+            director(8, k) = R20 * d20 + R21 * d21 + R22 * d22;
+        }
+    }
+
+    // Update dynamics (velocity, omega) using acceleration and alpha
+    // Equivalent to Python: update_dynamics(time, prefac)
+    // Optimized: Use direct loop instead of Eigen operations for better performance
+    // This matches the Python Numba implementation which uses simple nested loops
+    inline void update_dynamics(double prefac) {
+        auto& block = static_cast<Derived&>(*this);
+
+        // Get variable views once (these are lightweight Eigen block expressions)
+        // The get<>() calls are template functions that should be fully inlined
+        // However, to minimize overhead, we compute dimensions once and reuse
+        auto&& velocity = block.template get<system::cosserat_rod::Velocity>();
+        auto&& acceleration = block.template get<system::cosserat_rod::Acceleration>();
+        auto&& omega = block.template get<system::cosserat_rod::Omega>();
+        auto&& alpha = block.template get<system::cosserat_rod::Alpha>();
+
+        // Cache dimensions (avoid repeated .cols() calls)
+        const IndexType n_nodes = velocity.cols();
+        const IndexType n_elems = omega.cols();
+
+        #pragma omp parallel sections
+        {
+            #pragma omp section
+            {
+                #ifdef ELASTICAPP_COMPONENT_THREADS
+                #pragma omp parallel for simd schedule(static) num_threads(ELASTICAPP_COMPONENT_THREADS)
+                #endif
+                for (IndexType k = 0; k < n_nodes; ++k) {
+                    velocity(0, k) += prefac * acceleration(0, k);
+                }
+            }
+            #pragma omp section
+            {
+                #ifdef ELASTICAPP_COMPONENT_THREADS
+                #pragma omp parallel for simd schedule(static) num_threads(ELASTICAPP_COMPONENT_THREADS)
+                #endif
+                for (IndexType k = 0; k < n_nodes; ++k) {
+                    velocity(1, k) += prefac * acceleration(1, k);
+                }
+            }
+            #pragma omp section
+            {
+                #ifdef ELASTICAPP_COMPONENT_THREADS
+                #pragma omp parallel for simd schedule(static) num_threads(ELASTICAPP_COMPONENT_THREADS)
+                #endif
+                for (IndexType k = 0; k < n_nodes; ++k) {
+                    velocity(2, k) += prefac * acceleration(2, k);
+                }
+            }
+            #pragma omp section
+            {
+                #ifdef ELASTICAPP_COMPONENT_THREADS
+                #pragma omp parallel for simd schedule(static) num_threads(ELASTICAPP_COMPONENT_THREADS)
+                #endif
+                for (IndexType k = 0; k < n_nodes; ++k) {
+                    omega(0, k) += prefac * alpha(0, k);
+                }
+            }
+            #pragma omp section
+            {
+                #ifdef ELASTICAPP_COMPONENT_THREADS
+                #pragma omp parallel for simd schedule(static) num_threads(ELASTICAPP_COMPONENT_THREADS)
+                #endif
+                for (IndexType k = 0; k < n_nodes; ++k) {
+                    omega(1, k) += prefac * alpha(1, k);
+                }
+            }
+            #pragma omp section
+            {
+                #ifdef ELASTICAPP_COMPONENT_THREADS
+                #pragma omp parallel for simd schedule(static) num_threads(ELASTICAPP_COMPONENT_THREADS)
+                #endif
+                for (IndexType k = 0; k < n_nodes; ++k) {
+                    omega(2, k) += prefac * alpha(2, k);
+                }
+            }
+        }
+    }
+
+protected:
+    // Protected constructor to prevent direct instantiation
+    CosseratRodOperations() = default;
+    ~CosseratRodOperations() = default;
+
+    // Prevent copying/moving (can be enabled in derived class if needed)
+    CosseratRodOperations(const CosseratRodOperations&) = default;
+    CosseratRodOperations(CosseratRodOperations&&) = default;
+    CosseratRodOperations& operator=(const CosseratRodOperations&) = default;
+    CosseratRodOperations& operator=(CosseratRodOperations&&) = default;
+};
+
+} // namespace elasticapp
diff --git a/backend/src/py/__init__.py b/backend/src/py/__init__.py
new file mode 100644
index 00000000..f4c2086e
--- /dev/null
+++ b/backend/src/py/__init__.py
@@ -0,0 +1,31 @@
+"""Elasticapp: A CPP accelerated backend for PyElastica kernels."""
+
+# Import version function from version module
+from elasticapp.version import version
+
+# Import BlockRodSystem from C++ module
+from elasticapp._memory_block import BlockRodSystem, BlockRodSystemView
+
+# Import MemoryBlockCosseratRod after BlockRodSystem is defined to avoid circular import
+from .module_collision import CollisionEnvironment
+from .collision_physics import LinearSpringDashpot
+from .memory_block_rod import MemoryBlockCosseratRod
+
+
+__all__ = [
+    "BlockRodSystem",
+    "BlockRodSystemView",
+    "MemoryBlockCosseratRod",
+    "version",
+]
+
+# import submodules so that they can be easily accessed
+# import elasticapp._linalg
+# import elasticapp._rotations
+
+# Note: These imports are commented out as they depend on blaze,
+# which is being removed. They will be refactored in later tasks.
+# import elasticapp._PyTags
+# import elasticapp._PyArrays
+# import elasticapp._PyCosseratRods
+# import elasticapp._PyExamples
diff --git a/backend/src/py/collision_physics.py b/backend/src/py/collision_physics.py
new file mode 100644
index 00000000..0f617085
--- /dev/null
+++ b/backend/src/py/collision_physics.py
@@ -0,0 +1,98 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from elasticapp._memory_block import BlockRodSystem
+from elasticapp._collision import CollisionSystem
+
+from .typing_alias import CoarseDetectionType, FineDetectionType, BatchingType
+
+
+class CollisionPhysics(ABC):
+    """
+    Abstract base class for block-wise collision physics.
+    """
+
+    def __init__(
+        self,
+        coarse_detection: CoarseDetectionType = "hash_grid",
+        fine_detection: FineDetectionType = "sphere_sphere",
+        batching: BatchingType = "union_find",
+    ) -> None:
+        super().__init__()
+        self.coarse_detection = coarse_detection
+        self.fine_detection = fine_detection
+        self.batching = batching
+
+    @abstractmethod
+    def resolve_collision(self, system: "BlockRodSystem") -> None:
+        pass
+
+
+class LinearSpringDashpot(CollisionPhysics):
+    """
+    Linear spring-dashpot collision physics.
+    """
+
+    def __init__(
+        self,
+        *args,
+        k_normal: float = 1.0,
+        eta_normal: float = 0.1,
+        k_tangential: float = 0.0,
+        eta_tangential: float | None = None,
+        friction: float = 0.5,
+        detect_every: int = 1,
+        **kwargs,
+    ) -> None:
+        super().__init__(*args, **kwargs)
+
+        from elasticapp._collision import LinearSpringDashpot as Model
+
+        if eta_tangential is None and k_tangential == 0.0:
+            # Use 3-parameter constructor (eta_tangential = eta_normal, k_tangential = 0.0)
+            model = Model(k_normal=k_normal, eta_normal=eta_normal, friction=friction)
+        elif k_tangential == 0.0:
+            # Use 4-parameter constructor (explicit eta_tangential, k_tangential = 0.0)
+            model = Model(
+                k_normal=k_normal,
+                eta_normal=eta_normal,
+                eta_tangential=eta_tangential,
+                friction=friction,
+            )
+        else:
+            # Use 5-parameter constructor (explicit k_tangential and eta_tangential)
+            if eta_tangential is None:
+                eta_tangential = eta_normal  # Default to eta_normal if not provided
+            model = Model(
+                k_normal=k_normal,
+                eta_normal=eta_normal,
+                k_tangential=k_tangential,
+                eta_tangential=eta_tangential,
+                friction=friction,
+            )
+        self.collision_system = CollisionSystem(model, detect_every=detect_every)
+
+    def resolve_collision(self, system: "BlockRodSystem") -> None:
+        self.collision_system.resolve(system)
+
+
+class NoInteraction(CollisionPhysics):
+    """
+    NoInteraction collision physics model for testing purposes.
+
+    This model returns zero force for all contacts, allowing collision detection
+    to be tested without applying any forces. Useful for:
+    - Testing collision detection algorithms
+    - Validating contact geometry
+    - Debugging collision pipeline without force effects
+    """
+
+    def __init__(self, *args, detect_every: int = 1, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+
+        from elasticapp._collision import NoInteraction as Model
+
+        model = Model()
+        self.collision_system = CollisionSystem(model, detect_every=detect_every)
+
+    def resolve_collision(self, system: "BlockRodSystem") -> None:
+        self.collision_system.resolve(system)
diff --git a/backend/src/py/memory_block_rod.py b/backend/src/py/memory_block_rod.py
new file mode 100644
index 00000000..737b032b
--- /dev/null
+++ b/backend/src/py/memory_block_rod.py
@@ -0,0 +1,167 @@
+"""Create block-structure class for collection of Cosserat rod systems."""
+
+import numpy as np
+
+from elastica.rod.cosserat_rod import CosseratRod
+from elastica.rod.data_structures import _RodSymplecticStepperMixin
+from elastica.typing import RodType, SystemIdxType
+
+from elasticapp._memory_block import BlockRodSystem
+
+# Mapping python CosseratRod attribute to C++ tag that will be block memory allocated.
+# Tags are defined in cosserat_rod_system.h
+PY2CPP_VARNAMES: dict[str, str] = {
+    # Node variables
+    "mass": "mass",
+    "position_collection": "position",
+    "velocity_collection": "velocity",
+    "acceleration_collection": "acceleration",
+    "internal_forces": "internal_forces",
+    "external_forces": "external_forces",
+    # Element variables
+    "omega_collection": "omega",
+    "alpha_collection": "alpha",
+    "director_collection": "director",
+    "rest_lengths": "rest_lengths",
+    "density": "density",
+    "volume": "volume",
+    "mass_second_moment_of_inertia": "mass_second_moment_of_inertia",
+    "inv_mass_second_moment_of_inertia": "inv_mass_second_moment_of_inertia",
+    "internal_torques": "internal_torques",
+    "external_torques": "external_torques",
+    "lengths": "lengths",
+    "tangents": "tangents",
+    "radius": "radius",
+    "dilatation": "dilatation",
+    "dilatation_rate": "dilatation_rate",
+    "sigma": "sigma",
+    "rest_sigma": "rest_sigma",
+    "internal_stress": "internal_stress",
+    "shear_matrix": "shear_matrix",
+    # Voronoi variables
+    "rest_voronoi_lengths": "rest_voronoi_lengths",
+    "voronoi_dilatation": "voronoi_dilatation",
+    "kappa": "kappa",
+    "rest_kappa": "rest_kappa",
+    "internal_couple": "internal_couple",
+    "bend_matrix": "bend_matrix",
+}
+
+
+class MemoryBlockCosseratRod(CosseratRod, _RodSymplecticStepperMixin):
+    """
+    Memory block class for Cosserat rod equations.
+
+    This class is derived from CosseratRod to inherit all rod methods while providing
+    a memory-efficient block structure for multiple rod systems. It uses the C++ backend
+    BlockRodSystem for efficient memory management.
+
+    Parameters
+    ----------
+    systems : list[RodType]
+        List of CosseratRod objects to be included in the memory block structure.
+        Currently only straight rods are supported (ring rods are not yet implemented).
+    system_idx_list : list[SystemIdxType]
+        List of system indices corresponding to each rod in the `systems` list.
+        These indices are used to map rods back to their original positions in
+        the simulator's system collection.
+
+    Attributes
+    ----------
+    n_systems : int
+        Total number of rod systems in the memory block.
+    n_rods : int
+        Total number of rods (same as n_systems).
+    n_elems : int
+        Total number of elements across all rods in the block structure.
+    n_nodes : int
+        Total number of nodes across all rods (n_elems + 1).
+    n_voronoi : int
+        Total number of Voronoi points across all rods (n_elems - 1).
+    system_idx_list : numpy.ndarray
+        Array of system indices mapping rods to their original positions.
+    ghost_nodes_idx : numpy.ndarray
+        Indices of ghost nodes used for boundary conditions.
+    ghost_elems_idx : numpy.ndarray
+        Indices of ghost elements used for boundary conditions.
+    ghost_voronoi_idx : numpy.ndarray
+        Indices of ghost Voronoi points used for boundary conditions.
+
+    Notes
+    -----
+    - Currently only straight rods are supported. Ring rod support is planned for future.
+    - All rod data (positions, directors, velocities, etc.) is stored in contiguous
+      memory blocks for efficient computation.
+    """
+
+    def __init__(
+        self, systems: list[RodType], system_idx_list: list[SystemIdxType]
+    ) -> None:
+        self.n_systems = len(systems)
+
+        # Sorted systems (only straight rods for now)
+        self.system_idx_list = np.array(system_idx_list, dtype=np.int32)
+
+        n_elems_straight_rods = np.array([x.n_elems for x in systems], dtype=np.int32)
+
+        # Create C++ block with element counts
+        # BlockRodSystem accepts numpy arrays directly (as well as lists/tuples)
+        self._block = BlockRodSystem(n_elems_straight_rods)
+        for py_key, cpp_key in PY2CPP_VARNAMES.items():
+            full_block_memory = self._block.get(cpp_key)
+            setattr(self, py_key, full_block_memory)
+
+        # Get ghost indices from C++ block
+        self.ghost_nodes_idx = np.array(self._block.ghost_nodes_idx, dtype=np.int32)
+        self.ghost_elems_idx = np.array(self._block.ghost_elems_idx, dtype=np.int32)
+        self.ghost_voronoi_idx = np.array(self._block.ghost_voronoi_idx, dtype=np.int32)
+
+        # Compute metadata from block and element counts
+        # n_elems is total elements including ghost elements
+        # For n rods, there are (n-1) ghost nodes, and 2*(n-1) ghost elements
+        self.n_elems = int(np.sum(n_elems_straight_rods) + 2 * (len(systems) - 1))
+        self.n_nodes = self.n_elems + 1
+        self.n_voronoi = self.n_elems - 1
+        self.n_rods = len(systems)
+
+        for idx, system in enumerate(systems):
+            self.relink_system_properties_to_block_memory(system, idx)
+
+        self._block.reset_ghost()
+
+        # Compute strains for the block
+        self.compute_strains(0.0)
+
+    def relink_system_properties_to_block_memory(
+        self,
+        system: RodType,
+        system_idx: int,
+    ) -> None:
+        """Relink system properties to block memory."""
+        for py_key, cpp_key in PY2CPP_VARNAMES.items():
+            assert hasattr(
+                system, py_key
+            ), f"System {system} does not have attribute {py_key}."
+            value = getattr(system, py_key).copy()
+            block_memory = self._block.at(system_idx).get(cpp_key)
+            block_memory[...] = value
+            setattr(system, py_key, block_memory)
+
+    # # Override python implementation
+    def compute_strains(self, time: np.float64) -> None:
+        self._block.compute_strains(time)
+
+    def compute_internal_forces_and_torques(self, time: np.float64) -> None:
+        self._block.compute_internal_forces_and_torques(time)
+
+    def zeroed_out_external_forces_and_torques(self, time: np.float64) -> None:
+        self._block.zeroed_out_external_forces_and_torques(time)
+
+    def update_accelerations(self, time: np.float64) -> None:
+        self._block.update_accelerations(time)
+
+    def update_kinematics(self, time: np.float64, prefac: np.float64) -> None:
+        self._block.update_kinematics(time, prefac)
+
+    def update_dynamics(self, time: np.float64, prefac: np.float64) -> None:
+        self._block.update_dynamics(time, prefac)
diff --git a/backend/src/py/module_collision.py b/backend/src/py/module_collision.py
new file mode 100644
index 00000000..d0153230
--- /dev/null
+++ b/backend/src/py/module_collision.py
@@ -0,0 +1,188 @@
+from __future__ import annotations
+
+__doc__ = """
+CollisionEnvironment
+--------------------
+
+Provides the collision environment interface to configure collision detection and
+detection resolution for Cosserat rods in the simulation.
+"""
+__all__ = ["CollisionEnvironment"]
+from typing import Any, Type
+import functools
+
+from elastica.external_forces import NoForces
+from elastica.typing import SystemIdxType
+from elastica.modules.protocol import SystemCollectionProtocol, ModuleProtocol
+
+from .memory_block_rod import MemoryBlockCosseratRod
+from .collision_physics import CollisionPhysics
+
+from .typing_alias import CoarseDetectionType, FineDetectionType, BatchingType
+
+
+class CollisionEnvironment(SystemCollectionProtocol):
+    """
+    The CollisionEnvironment class enables collision detection and resolution for Cosserat rods in the simulation.
+    By including CollisionEnvironment as a mixin, the simulator gains support for discrete element method (DEM)-based contact handling.
+    Use this environment together with C++ backend support to activate and configure collision pipeline stages.
+
+    Notes
+    -----
+    Currently, the CollisionEnvironment is only compatible when used with the
+    C++ backend.
+    Currently, the feature does not support variations in the collision detection
+    methods, as it requires re-compilation of the C++ backend. This must be done
+    by the user.
+
+    Examples
+    --------
+    User can include the CollisionEnvironment in the simulator class to enable
+    collision detection and reaction-forces for all Cosserat rods.
+
+    >>> import elastica as ea
+    >>> import elasticapp as epp
+    ...
+    >>> # Include the CollisionEnvironment in the simulator class
+    >>> class Simulator(ea.BaseSystemCollection, epp.CollisionEnvironment, ...):
+    ...     pass
+    ...
+    >>> simulator = Simulator()
+    >>> # Enable the C++ block supports for the Cosserat rods
+    >>> simulator.enable_block_supports(ea.CosseratRod, epp.MemoryBlockCosseratRod)
+
+    To change the configuration of the collision physics, use `.configure_collision`.
+
+    >>> simulator \
+        .configure_collision_detection() \
+        .using(
+            epp.LinearSpringDashpot,
+            k_normal=1.0,
+            eta_normal=0.1,
+            friction=0.5
+        )
+
+    Attributes
+    ----------
+    _ext_forces_torques: list
+        List of forcing class defined for rod-like objects.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._feature_group_finalize.append(self._finalize_block_collision)
+
+        # Initial configuration
+        self._collision_coarse_detection: CoarseDetectionType
+        self._collision_fine_detection: FineDetectionType
+        self._collision_batching: BatchingType
+        self._collision_controller: "_CollisionController"
+        self.configure_collision_detection()
+
+    def configure_collision_detection(
+        self,
+        coarse_detection: CoarseDetectionType = "hash_grid",
+        fine_detection: FineDetectionType = "sphere_sphere",
+        batching: BatchingType = "union_find",
+    ) -> ModuleProtocol:
+        """
+        This method applies external forces and torques on the relevant
+        user-defined system or rod-like object. You must input the system
+        or rod-like object that you want to apply external forces and torques on.
+
+        Parameters
+        ----------
+        coarse_detection: CoarseDetectionType
+            The coarse detection algorithm to use.
+        fine_detection: FineDetectionType
+            The fine detection algorithm to use.
+        batching: BatchingType
+            The batching algorithm to use.
+        """
+        self._collision_coarse_detection = coarse_detection
+        self._collision_fine_detection = fine_detection
+        self._collision_batching = batching
+
+        # Create _Constraint object, cache it and return to user
+        self._collision_controller = _CollisionController()
+        self._feature_group_synchronize.append_id(self._collision_controller)
+
+        return self._collision_controller
+
+    def _finalize_block_collision(self) -> None:
+        controller = self._collision_controller.instantiate()
+        del self._collision_controller
+        # Find block rods in the system
+        for sys in self.final_systems():
+            if isinstance(sys, MemoryBlockCosseratRod):
+                block = sys._block
+                break
+        else:
+            raise ValueError(
+                "No block rods found in the system. Collision requies at least one rod in the simulator."
+            )
+
+        compute_collision = functools.partial(
+            controller.resolve_collision, system=block
+        )
+
+        self._feature_group_synchronize.add_operators(controller, [compute_collision])
+
+        return controller
+
+
+class _CollisionController:
+    """
+    Collision controller module private class
+    """
+
+    _forcing_cls: Type[NoForces]
+    _args: Any
+    _kwargs: Any
+
+    def using(self, cls: Type[NoForces], *args: Any, **kwargs: Any) -> None:
+        """
+        This method sets which forcing class is used to apply forcing
+        to user defined rod-like objects.
+
+        Parameters
+        ----------
+        cls: Type[Any]
+            User defined forcing class.
+        *args: Any
+            Variable length argument list.
+        **kwargs: Any
+            Arbitrary keyword arguments.
+
+        Returns
+        -------
+
+        """
+        assert issubclass(
+            cls, CollisionPhysics
+        ), "{} is not a valid collision physics. Did you forget to derive from CollisionPhysics?".format(
+            cls
+        )
+        self._cls = cls
+        self._args = args
+        self._kwargs = kwargs
+
+    def id(self) -> SystemIdxType:
+        return None  # type: ignore
+
+    def instantiate(self) -> "CollisionPhysics":
+        """Constructs a constraint after checks"""
+        if not hasattr(self, "_cls"):
+            raise RuntimeError(
+                "No collision physics provided to act on block rod"
+                "but a collision physics was registered. Did you forget to call"
+                "the `using` method"
+            )
+
+        try:
+            return self._cls(*self._args, **self._kwargs)
+        except (TypeError, IndexError):
+            raise TypeError(
+                r"Unable to construct collision physics class.\n"
+                r"Did you provide all necessary collision physics parameters?"
+            )
diff --git a/backend/src/py/typing_alias.py b/backend/src/py/typing_alias.py
new file mode 100644
index 00000000..982093ee
--- /dev/null
+++ b/backend/src/py/typing_alias.py
@@ -0,0 +1,5 @@
+from typing import Literal
+
+CoarseDetectionType = Literal["hash_grid"]
+FineDetectionType = Literal["sphere_sphere"]
+BatchingType = Literal["union_find"]  # , "single_batch", "hybrid_batch"]
diff --git a/backend/src/system.h b/backend/src/system.h
new file mode 100644
index 00000000..8e772a4b
--- /dev/null
+++ b/backend/src/system.h
@@ -0,0 +1,141 @@
+#pragma once
+
+#include <cstddef>
+#include <type_traits>
+#include <concepts>
+#include <tuple>
+#include "traits.h"
+
+namespace elasticapp {
+
+//
+// System<Ts...> is a minimal header-only class representing a collection of physical variables
+// and their properties for a mechanical or simulation system. It is used solely for compile-
+// time type information and does not allocate memory or hold state. Its purpose is to define,
+// via template arguments, which variables are present in a system and to specify their placement
+// (e.g., on nodes, elements, voronoi) and data type (e.g., scalar, vector, matrix) using tags.
+// System enables static reflection and type-driven logic for Block, BlockView, and other components.
+//
+// Example:
+//   using MySystem = System<Displacement, Force, BendingMoment>;
+//
+//   - Displacement, Force, BendingMoment are structs inheriting correct placement and data type tags.
+//   - MySystem::Variables is a type list of the variables, usable in metaprogramming.
+//   - System does not itself allocate memory or store real data.
+//
+
+// ********************************************************
+// Placement tags
+// ********************************************************
+
+// Placement tags - indicate where variables are stored
+namespace Placement {
+    struct OnNode {};
+    struct OnElement {};
+    struct OnVoronoi {};
+} // namespace Placement
+
+template<typename T>
+concept HasPlacementTag =
+    std::is_base_of_v<Placement::OnNode, T> ||
+    std::is_base_of_v<Placement::OnElement, T> ||
+    std::is_base_of_v<Placement::OnVoronoi, T>;
+
+// ********************************************************
+// DataType tags
+// ********************************************************
+
+namespace DataType {
+    // Dimension tags - indicate the size of variables
+    struct Scalar {
+        static constexpr std::size_t dimension = 1;
+        // Note: Not constexpr because Eigen dynamic matrices don't have constexpr constructors
+        // Using inline static (C++17) to define in header
+        inline static MatrixType ghost_value = MatrixType::Zero(1, 1);
+    };
+    struct Vector {
+        static constexpr std::size_t dimension = 3;
+        // Note: Not constexpr because Eigen dynamic matrices don't have constexpr constructors
+        // Using inline static (C++17) to define in header
+        inline static MatrixType ghost_value = MatrixType::Zero(3, 1);
+    };
+    struct Matrix {
+        static constexpr std::size_t dimension = 9;
+        // Note: Not constexpr because Eigen dynamic matrices don't have constexpr constructors
+        // Using inline static (C++17) to define in header
+        inline static MatrixType ghost_value = MatrixType::Zero(9, 1);
+    };
+} // namespace DataType
+
+template<typename T>
+concept HasDataTypeTag =
+    std::is_base_of_v<DataType::Scalar, T> ||
+    std::is_base_of_v<DataType::Vector, T> ||
+    std::is_base_of_v<DataType::Matrix, T>;
+
+template<typename T>
+concept ValidVariable =
+    HasPlacementTag<T> &&
+    HasDataTypeTag<T> &&
+    requires {
+        { T::name } -> std::convertible_to<std::string_view>;
+        { T::ghost_value };
+    };
+
+// Helper to extract dimension from a variable type
+template<typename T>
+constexpr std::size_t get_dimension_v =
+    std::is_base_of_v<DataType::Scalar, T> ? DataType::Scalar::dimension :
+    std::is_base_of_v<DataType::Vector, T> ? DataType::Vector::dimension :
+    std::is_base_of_v<DataType::Matrix, T> ? DataType::Matrix::dimension : 0;
+
+// Helper to sum dimensions across a parameter pack
+template<typename... Variables>
+constexpr std::size_t sum_dimensions_v = (0 + ... + get_dimension_v<Variables>);
+
+// ********************************************************
+// System class
+// ********************************************************
+
+// System class - collection of Variables
+// Template parameter pack for variable types
+// Each Variable must derive from one Placement tag and one DataType tag
+template<ValidVariable... Vars>
+class System {
+public:
+    // Expose the variable types for template metaprogramming
+    using Variables = std::tuple<Vars...>;
+
+    static constexpr std::size_t get_depth() {
+        return sum_dimensions_v<Vars...>;
+    }
+
+};
+
+// Helper to extract variable types from a System type
+template<typename SystemType>
+struct system_variables;
+
+template<ValidVariable... Vars>
+struct system_variables<System<Vars...>> {
+    using type = std::tuple<Vars...>;
+};
+
+template<typename SystemType>
+using system_variables_t = typename system_variables<SystemType>::type;
+
+// Concept to check if a type is a System<ValidVariable...>
+// A type is a SystemModel if:
+// 1. It is not instantiable on its own.
+// 2. It has a Variables type alias (std::tuple<...>)
+// 3. It has a static get_depth() method
+// 4. system_variables<T> is specialized (i.e., T is System<...>)
+template<typename SystemType>
+concept SystemModel = requires {
+    typename SystemType::Variables;  // Must have Variables type alias
+    { SystemType::get_depth() } -> std::convertible_to<std::size_t>;  // Must have static get_depth()
+    typename system_variables<SystemType>::type;  // system_variables<T> must be specialized
+    requires std::is_same_v<typename system_variables<SystemType>::type, typename SystemType::Variables>;
+};
+
+} // namespace elasticapp
diff --git a/backend/src/traits.h b/backend/src/traits.h
new file mode 100644
index 00000000..6fafcd78
--- /dev/null
+++ b/backend/src/traits.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <Eigen/Dense>
+#include <cstddef>
+#include <type_traits>
+
+namespace elasticapp {
+
+using MatrixType = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+using VectorType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
+using IndexType = Eigen::Index;
+constexpr bool IsRowMajor = true;
+constexpr bool IsColMajor = false;
+
+// Helper for static_asserts in templates
+template<typename T>
+struct dependent_false : std::false_type {};
+
+// Helper functions for computing strides for numpy array views
+inline std::pair<ptrdiff_t, ptrdiff_t> compute_strides(std::size_t rows, std::size_t cols) {
+    if constexpr (IsRowMajor) {
+        // Row-major: stride between rows is cols * sizeof(double)
+        // stride between columns is sizeof(double)
+        return {static_cast<ptrdiff_t>(cols * sizeof(double)),
+                static_cast<ptrdiff_t>(sizeof(double))};
+    } else {
+        // Column-major: stride between rows is sizeof(double)
+        // stride between columns is rows * sizeof(double)
+        return {static_cast<ptrdiff_t>(sizeof(double)),
+                static_cast<ptrdiff_t>(rows * sizeof(double))};
+    }
+}
+
+// Helper functions for matrix slicing operations
+// These encapsulate Eigen-specific operations
+
+// Get a view into a submatrix (columns for a rod)
+// Returns a lightweight view - no copy, just a reference to the columns
+inline auto get_column_slice(MatrixType& matrix, std::size_t start_col, std::size_t num_cols) {
+    return matrix.middleCols(static_cast<IndexType>(start_col),
+                             static_cast<IndexType>(num_cols));
+}
+
+inline auto get_column_slice(const MatrixType& matrix, std::size_t start_col, std::size_t num_cols) {
+    return matrix.middleCols(static_cast<IndexType>(start_col),
+                             static_cast<IndexType>(num_cols));
+}
+
+// Get a view into a submatrix (specific rows and columns)
+// Returns a lightweight view - no copy, just a reference to the submatrix
+inline auto get_block_slice(MatrixType& matrix,
+                            std::size_t start_row, std::size_t num_rows,
+                            std::size_t start_col, std::size_t num_cols) {
+    return matrix.block(static_cast<IndexType>(start_row),
+                        static_cast<IndexType>(start_col),
+                        static_cast<IndexType>(num_rows),
+                        static_cast<IndexType>(num_cols));
+}
+
+inline auto get_block_slice(const MatrixType& matrix,
+                            std::size_t start_row, std::size_t num_rows,
+                            std::size_t start_col, std::size_t num_cols) {
+    return matrix.block(static_cast<IndexType>(start_row),
+                        static_cast<IndexType>(start_col),
+                        static_cast<IndexType>(num_rows),
+                        static_cast<IndexType>(num_cols));
+}
+
+} // namespace elasticapp
diff --git a/backend/src/utility/hash.h b/backend/src/utility/hash.h
new file mode 100644
index 00000000..d7642529
--- /dev/null
+++ b/backend/src/utility/hash.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <cstddef>
+#include <utility>  // For std::pair
+#include <functional>  // For std::hash
+
+namespace elasticapp {
+namespace utility {
+
+/**
+ * Hash function for std::pair<std::size_t, std::size_t> (needed for unordered_map)
+ * C++11 doesn't provide std::hash for pairs by default
+ */
+struct PairHash {
+    std::size_t operator()(const std::pair<std::size_t, std::size_t>& p) const {
+        // Combine hashes of both elements
+        // Using a simple hash combination (can be improved with boost::hash_combine)
+        return std::hash<std::size_t>()(p.first) ^ (std::hash<std::size_t>()(p.second) << 1);
+    }
+};
+
+} // namespace utility
+} // namespace elasticapp
diff --git a/backend/src/variable_offsets.h b/backend/src/variable_offsets.h
new file mode 100644
index 00000000..7c2f5383
--- /dev/null
+++ b/backend/src/variable_offsets.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include "cosserat_rod_system.h"
+#include "system.h"
+#include <cstddef>
+#include <type_traits>
+#include <concepts>
+#include <tuple>
+
+namespace elasticapp {
+
+// Generic helpers for computing variable offsets in any System
+
+// Helper to find the index of a type in a parameter pack
+template<typename Target, std::size_t CurrentIndex, typename... Ts>
+struct find_index_impl; // Forward declaration
+
+// Found the type
+template<typename Target, std::size_t CurrentIndex, typename... Rest>
+struct find_index_impl<Target, CurrentIndex, Target, Rest...> {
+    static constexpr std::size_t value = CurrentIndex;
+};
+
+// Recurse to the next type in the pack
+template<typename Target, std::size_t CurrentIndex, typename First, typename... Rest>
+struct find_index_impl<Target, CurrentIndex, First, Rest...> {
+    static constexpr std::size_t value = find_index_impl<Target, CurrentIndex + 1, Rest...>::value;
+};
+
+// Type not found in the pack (empty pack) - this provides a clear error message
+template<typename Target, std::size_t CurrentIndex>
+struct find_index_impl<Target, CurrentIndex> {
+    static_assert(dependent_false<Target>::value,
+        "Undefined variable requested. Please ensure the variable is defined in the System's variable list.");
+};
+
+// Helper to sum dimensions of variables up to (but not including) a given index
+template<std::size_t Index, typename... Variables>
+struct sum_dimensions_up_to_index_impl;
+
+template<std::size_t Index, typename First, typename... Rest>
+struct sum_dimensions_up_to_index_impl<Index, First, Rest...> {
+    static constexpr std::size_t value =
+        (Index == 0) ? 0 :
+        (get_dimension_v<First> + sum_dimensions_up_to_index_impl<Index - 1, Rest...>::value);
+};
+
+template<std::size_t Index>
+struct sum_dimensions_up_to_index_impl<Index> {
+    static constexpr std::size_t value = 0;
+};
+
+// Helper to compute offset for a variable in a System
+// This finds the variable's position and sums dimensions of all variables before it
+template<typename Variable, typename... SystemVariables>
+struct compute_variable_offset_impl {
+    static constexpr std::size_t index = find_index_impl<Variable, 0, SystemVariables...>::value;
+    static constexpr std::size_t value = sum_dimensions_up_to_index_impl<index, SystemVariables...>::value;
+};
+
+// Helper to expand a tuple into a parameter pack for compute_variable_offset_impl
+template<typename Variable, typename VariablesTuple>
+struct compute_variable_offset_from_system_impl;
+
+template<typename Variable, typename... SystemVariables>
+struct compute_variable_offset_from_system_impl<Variable, std::tuple<SystemVariables...>> {
+    static constexpr std::size_t value = compute_variable_offset_impl<Variable, SystemVariables...>::value;
+};
+
+// Generic function to compute variable offset for any System type
+// This automatically extracts variables from SystemType::Variables and computes the offset
+// SystemType must be a System<...> type
+template<typename Variable, SystemModel SystemType>
+constexpr std::size_t compute_variable_offset() {
+    // Extract variables directly from SystemType's Variables type alias
+    using VariablesTuple = typename SystemType::Variables;
+    return compute_variable_offset_from_system_impl<Variable, VariablesTuple>::value;
+}
+
+} // namespace elasticapp
diff --git a/backend/src/version.h b/backend/src/version.h
new file mode 100644
index 00000000..304cf135
--- /dev/null
+++ b/backend/src/version.h
@@ -0,0 +1,18 @@
+#ifndef ELASTICAPP_VERSION_H
+#define ELASTICAPP_VERSION_H
+
+#include <string>
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+namespace elasticapp {
+
+inline std::string version() {
+    // VERSION_INFO is defined by CMake and stringified here
+    return std::string(MACRO_STRINGIFY(VERSION_INFO));
+}
+
+} // namespace elasticapp
+
+#endif // ELASTICAPP_VERSION_H
diff --git a/backend/tests/cpp/collision/test_collision_system.cpp b/backend/tests/cpp/collision/test_collision_system.cpp
new file mode 100644
index 00000000..33bbd9c5
--- /dev/null
+++ b/backend/tests/cpp/collision/test_collision_system.cpp
@@ -0,0 +1,190 @@
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/catch_approx.hpp>
+#include <vector>
+#include <utility>
+#include <cstddef>
+#include <Eigen/Dense>
+#include "../../../src/environment/collision/collision_system.h"
+#include "../../../src/environment/collision/types.h"
+#include "../../../src/api.h"
+
+using Catch::Approx;
+
+namespace elasticapp::environment::collision {
+namespace testing {
+
+/**
+ * Null coarse detection policy for unit testing.
+ * Returns empty vector (no candidate pairs detected).
+ */
+struct NullCoarseDetection {
+    std::vector<std::pair<std::size_t, std::size_t>> detect(
+        const Eigen::MatrixXd& positions,
+        const Eigen::MatrixXd& radii
+    ) const {
+        (void)positions;
+        (void)radii;
+        return {};
+    }
+};
+
+/**
+ * Null fine detection policy for unit testing.
+ * Returns empty vector (no contacts detected).
+ */
+struct NullFineDetection {
+    template<typename PhysicsModel>
+    std::vector<Contact> detect(
+        const std::vector<std::pair<std::size_t, std::size_t>>& candidate_pairs,
+        const Eigen::MatrixXd& positions,
+        const Eigen::MatrixXd& velocities,
+        const Eigen::MatrixXd& radii,
+        const PhysicsModel& physics_model
+    ) const {
+        (void)candidate_pairs;
+        (void)positions;
+        (void)velocities;
+        (void)radii;
+        (void)physics_model;
+        return {};
+    }
+};
+
+/**
+ * Null batching policy for unit testing.
+ * Returns empty batches.
+ */
+struct NullBatching {
+    std::vector<std::vector<std::size_t>> batch(
+        std::vector<Contact>& contacts
+    ) const {
+        (void)contacts;
+        return {};
+    }
+};
+
+} // namespace testing
+} // namespace elasticapp::environment::collision
+
+// Type alias for test CollisionSystem with null policies
+using TestCollisionSystem = elasticapp::environment::collision::CollisionSystem<
+    elasticapp::environment::collision::testing::NullCoarseDetection,
+    elasticapp::environment::collision::testing::NullFineDetection,
+    elasticapp::environment::collision::testing::NullBatching
+>;
+
+TEST_CASE("CollisionSystem construction", "[collision]") {
+    SECTION("Can be constructed with nullptr_t physics model") {
+        std::nullptr_t null_model = nullptr;
+        TestCollisionSystem system(null_model);
+
+        // Should construct successfully
+        REQUIRE(true);
+    }
+
+    SECTION("Can be constructed with nullptr_t physics model and detect_every") {
+        std::nullptr_t null_model = nullptr;
+        TestCollisionSystem system(null_model, 5);
+
+        REQUIRE(system.detect_every() == 5);
+    }
+
+    SECTION("Default detect_every is 1") {
+        std::nullptr_t null_model = nullptr;
+        TestCollisionSystem system(null_model);
+
+        REQUIRE(system.detect_every() == 1);
+    }
+}
+
+TEST_CASE("CollisionSystem contact_cache", "[collision]") {
+    std::nullptr_t null_model = nullptr;
+    TestCollisionSystem system(null_model);
+
+    SECTION("contact_cache() returns non-const reference") {
+        auto& cache = system.contact_cache();
+        REQUIRE(cache.empty());
+
+        // Can modify cache
+        cache.push_back({0, 1});
+        cache.push_back({2, 3});
+
+        REQUIRE(cache.size() == 2);
+        REQUIRE(cache[0] == std::make_pair<std::size_t, std::size_t>(0, 1));
+        REQUIRE(cache[1] == std::make_pair<std::size_t, std::size_t>(2, 3));
+    }
+
+    SECTION("contact_cache() const returns const reference") {
+        const auto& system_const = system;
+        const auto& cache = system_const.contact_cache();
+
+        // Should be empty initially
+        REQUIRE(cache.empty());
+
+        // Modify via non-const reference
+        system.contact_cache().push_back({4, 5});
+
+        // Const reference should see the change
+        REQUIRE(cache.size() == 1);
+        REQUIRE(cache[0] == std::make_pair<std::size_t, std::size_t>(4, 5));
+    }
+
+    SECTION("contact_cache persists across calls") {
+        auto& cache1 = system.contact_cache();
+        cache1.push_back({10, 20});
+
+        auto& cache2 = system.contact_cache();
+        REQUIRE(cache2.size() == 1);
+        REQUIRE(cache2[0] == std::make_pair<std::size_t, std::size_t>(10, 20));
+    }
+}
+
+TEST_CASE("CollisionSystem detect_every", "[collision]") {
+    std::nullptr_t null_model = nullptr;
+    TestCollisionSystem system(null_model);
+
+    SECTION("get detect_every returns current value") {
+        REQUIRE(system.detect_every() == 1);
+    }
+
+    SECTION("set_detect_every updates the value") {
+        system.set_detect_every(3);
+        REQUIRE(system.detect_every() == 3);
+
+        system.set_detect_every(10);
+        REQUIRE(system.detect_every() == 10);
+    }
+
+    SECTION("set_detect_every can set to 0") {
+        system.set_detect_every(0);
+        REQUIRE(system.detect_every() == 0);
+    }
+
+    SECTION("set_detect_every can set to large values") {
+        system.set_detect_every(1000);
+        REQUIRE(system.detect_every() == 1000);
+    }
+}
+
+TEST_CASE("CollisionSystem with null policies", "[collision]") {
+    SECTION("System can be instantiated with all null policies") {
+        std::nullptr_t null_model = nullptr;
+        TestCollisionSystem system(null_model);
+
+        // Should construct successfully
+        REQUIRE(true);
+    }
+
+    SECTION("System methods work with null policies") {
+        std::nullptr_t null_model = nullptr;
+        TestCollisionSystem system(null_model);
+
+        // All public methods should work
+        auto& cache = system.contact_cache();
+        REQUIRE(cache.empty());
+
+        REQUIRE(system.detect_every() == 1);
+        system.set_detect_every(2);
+        REQUIRE(system.detect_every() == 2);
+    }
+}
diff --git a/backend/tests/cpp/mock/mock_block_system.h b/backend/tests/cpp/mock/mock_block_system.h
new file mode 100644
index 00000000..2aa9a97e
--- /dev/null
+++ b/backend/tests/cpp/mock/mock_block_system.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include "system.h"
+#include "block.h"
+
+namespace elasticapp::mock {
+
+// Mock system for testing BlockView with arbitrary variables
+// This demonstrates that BlockView can work with any System type
+
+// Simple test variables
+struct MockVar1 :
+    ::elasticapp::Placement::OnNode,
+    ::elasticapp::DataType::Vector {
+        static constexpr std::string_view name = "mock_var1";
+    };
+struct MockVar2 :
+    ::elasticapp::Placement::OnNode,
+    ::elasticapp::DataType::Scalar {
+        static constexpr std::string_view name = "mock_var2";
+    };
+struct MockVar3 :
+    ::elasticapp::Placement::OnElement,
+    ::elasticapp::DataType::Vector {
+        static constexpr std::string_view name = "mock_var3";
+    };
+struct MockVar4 :
+    ::elasticapp::Placement::OnElement,
+    ::elasticapp::DataType::Matrix {
+        static constexpr std::string_view name = "mock_var4";
+    };
+struct MockVar5 :
+    ::elasticapp::Placement::OnVoronoi,
+    ::elasticapp::DataType::Scalar {
+        static constexpr std::string_view name = "mock_var5";
+    };
+
+// MockSystem with a small set of variables for testing
+using MockSystem = ::elasticapp::System<
+    MockVar1,  // Node, Vector (3)
+    MockVar2,  // Node, Scalar (1)
+    MockVar3,  // Element, Vector (3)
+    MockVar4,  // Element, Matrix (9)
+    MockVar5   // Voronoi, Scalar (1)
+>;
+// Total depth: 3 + 1 + 3 + 9 + 1 = 17
+
+using MockBlockSystem = ::elasticapp::Block<MockSystem>;
+using MockBlockSystemView = ::elasticapp::BlockView<MockSystem>;
+
+} // namespace elasticapp::mock
diff --git a/backend/tests/cpp/mock/mock_block_system_with_operation.h b/backend/tests/cpp/mock/mock_block_system_with_operation.h
new file mode 100644
index 00000000..2db37f44
--- /dev/null
+++ b/backend/tests/cpp/mock/mock_block_system_with_operation.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include "system.h"
+#include "block.h"
+#include "operations.h"
+#include "mock_block_system.h"  // For MockSystem and variable definitions
+
+namespace elasticapp::mock {
+
+// Mock operations class that implements addition of two variables
+// Adds MockVar1 (Vector) and MockVar2 (Scalar, broadcasted) and stores in MockVar1
+template<typename Derived>
+class AdditionOperations {
+public:
+    // Operation: result = var1 + var2 (where var2 is broadcasted if scalar)
+    // This adds MockVar1 (Vector) and MockVar2 (Scalar broadcasted to Vector)
+    // and stores the result back in MockVar1
+    void add_variables() {
+        auto& block = static_cast<Derived&>(*this);
+
+        // Get views of the variables
+        auto var1 = block.template get<MockVar1>();  // Vector on Node
+        auto var2 = block.template get<MockVar2>();  // Scalar on Node
+
+        // var1 is a matrix view: rows = 3 (vector dimension), cols = width
+        // var2 is a matrix view: rows = 1 (scalar dimension), cols = width
+
+        // Add var2 (scalar) to each component of var1 (vector)
+        // var1 has 3 rows, var2 has 1 row
+        for (Eigen::Index col = 0; col < var1.cols(); ++col) {
+            double scalar_value = var2(0, col);
+            for (Eigen::Index row = 0; row < var1.rows(); ++row) {
+                var1(row, col) = var1(row, col) + scalar_value;
+            }
+        }
+    }
+
+    // Alternative operation: add two vectors (MockVar1 + MockVar3)
+    // Note: MockVar1 is on Node, MockVar3 is on Element, so we need to handle different sizes
+    // For simplicity, we'll add MockVar1 to a scaled version of itself
+    void add_vector_to_itself() {
+        auto& block = static_cast<Derived&>(*this);
+        auto var1 = block.template get<MockVar1>();  // Vector on Node
+
+        // Add var1 to itself (multiply by 2)
+        var1 = var1 + var1;
+    }
+
+protected:
+    AdditionOperations() = default;
+    ~AdditionOperations() = default;
+
+    AdditionOperations(const AdditionOperations&) = default;
+    AdditionOperations(AdditionOperations&&) = default;
+    AdditionOperations& operator=(const AdditionOperations&) = default;
+    AdditionOperations& operator=(AdditionOperations&&) = default;
+};
+
+// MockBlockSystem with AdditionOperations
+using MockBlockSystemWithOperations = ::elasticapp::Block<MockSystem, AdditionOperations>;
+
+} // namespace elasticapp::mock
diff --git a/backend/tests/cpp/test_block.cpp b/backend/tests/cpp/test_block.cpp
new file mode 100644
index 00000000..e81c63ac
--- /dev/null
+++ b/backend/tests/cpp/test_block.cpp
@@ -0,0 +1,581 @@
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/catch_approx.hpp>
+#include <vector>
+#include "block.h"
+#include "mock/mock_block_system.h"
+
+using Catch::Approx;
+
+using MockBlockSystem = elasticapp::mock::MockBlockSystem;
+
+// Type aliases for convenience in tests
+TEST_CASE("Block construction", "[block]") {
+    SECTION("Can be constructed with list of element counts") {
+        std::vector<std::size_t> n_elems_per_rod = {6};  // 6 elements -> 7 nodes (width)
+        MockBlockSystem block(n_elems_per_rod);
+        auto shape = block.shape();
+        REQUIRE(shape.first == 17);  // MockSystem depth
+        REQUIRE(shape.second == 7);   // 6+1 nodes + 0 ghost = 7 nodes
+    }
+
+    SECTION("Can be constructed with list of element counts (auto depth)") {
+        std::vector<std::size_t> n_elems_per_rod = {3, 5, 2};
+        MockBlockSystem block(n_elems_per_rod);
+
+        // Width = sum of (n_elems + 1) for each rod + 2 ghost = (4 + 6 + 3) + 2 = 15
+        // Depth = MockSystem block_depth = 17
+        REQUIRE(block.width() == 15);
+        REQUIRE(block.depth() == 17);
+    }
+
+    SECTION("Block stores correct starting indices for each rod") {
+        std::vector<std::size_t> n_elems_per_rod = {3, 5, 2};
+        MockBlockSystem block(n_elems_per_rod);
+
+        // Rod 0: starts at 0, has 3+1=4 nodes + 1 ghost = 5 total
+        REQUIRE(block.system_start_index(0) == 0);
+        // Rod 1: starts at 5, has 5+1=6 nodes + 1 ghost = 7 total
+        REQUIRE(block.system_start_index(1) == 5);
+        // Rod 2: starts at 5+7=12, has 2+1=3 nodes + 1 ghost = 4 total
+        REQUIRE(block.system_start_index(2) == 12);
+    }
+
+    SECTION("Block calculates width correctly for single rod") {
+        std::vector<std::size_t> n_elems_per_rod = {7};
+        MockBlockSystem block(n_elems_per_rod);
+
+        // Single rod: 8 nodes + 0 ghost = 8 nodes
+        REQUIRE(block.width() == 8);
+        REQUIRE(block.system_start_index(0) == 0);
+    }
+
+    SECTION("Block calculates width correctly for empty list") {
+        std::vector<std::size_t> n_elems_per_rod = {};
+        REQUIRE_THROWS_AS(MockBlockSystem(n_elems_per_rod), std::invalid_argument);
+    }
+
+    SECTION("Block rejects input with less than 6 total elements") {
+        // Test with 5 total elements (should fail)
+        std::vector<std::size_t> n_elems_per_rod = {5};
+        REQUIRE_THROWS_AS(MockBlockSystem(n_elems_per_rod), std::invalid_argument);
+
+        // Test with 6 total elements (should succeed)
+        n_elems_per_rod = {6};
+        REQUIRE_NOTHROW(MockBlockSystem(n_elems_per_rod));
+
+        // Test with multiple rods totaling less than 6 (should fail)
+        n_elems_per_rod = {2, 3};  // Total = 5
+        REQUIRE_THROWS_AS(MockBlockSystem(n_elems_per_rod), std::invalid_argument);
+
+        // Test with multiple rods totaling 6 (should succeed)
+        n_elems_per_rod = {3, 3};  // Total = 6
+        REQUIRE_NOTHROW(MockBlockSystem(n_elems_per_rod));
+    }
+
+    SECTION("Block automatically computes depth from System") {
+        std::vector<std::size_t> n_elems_per_rod = {6};
+        MockBlockSystem mock_block(n_elems_per_rod);
+        REQUIRE(mock_block.depth() == 17);  // MockSystem depth
+    }
+}
+
+TEST_CASE("Block shape", "[block]") {
+    SECTION("Returns correct shape") {
+        std::vector<std::size_t> n_elems_per_rod = {6};  // 6 elements -> 7 nodes (width)
+        MockBlockSystem block(n_elems_per_rod);
+        auto shape = block.shape();
+        REQUIRE(shape.first == 17);  // MockSystem depth
+        REQUIRE(shape.second == 7);   // 7 nodes + 0 ghost = 7 nodes
+    }
+}
+
+TEST_CASE("Block data access", "[block]") {
+    SECTION("Data can be accessed and modified") {
+        std::vector<std::size_t> n_elems_per_rod = {6};  // 6 elements -> 7 nodes (width)
+        MockBlockSystem block(n_elems_per_rod);
+        auto& data = block.data();
+
+        // Modify data
+        data(0, 0) = 1.5;
+        data(0, 1) = 2.5;
+        data(1, 0) = 3.5;
+
+        // Verify modifications
+        REQUIRE(data(0, 0) == 1.5);
+        REQUIRE(data(0, 1) == 2.5);
+        REQUIRE(data(1, 0) == 3.5);
+    }
+}
+
+TEST_CASE("Block CRTP - access to System methods", "[block]") {
+    SECTION("Block inherits System methods") {
+        std::vector<std::size_t> n_elems_per_rod = {6};
+        MockBlockSystem block(n_elems_per_rod);
+
+        // Block inherits from MockSystem, so it has System methods
+        REQUIRE(block.depth() == 17);  // MockSystem depth
+    }
+}
+
+TEST_CASE("Block get() method", "[block]") {
+    std::vector<std::size_t> n_elems_per_rod = {3, 5, 2};
+    MockBlockSystem block(n_elems_per_rod);
+    // Block has 3 rods: rod 0 has 3 elems (4 nodes), rod 1 has 5 elems (6 nodes), rod 2 has 2 elems (3 nodes)
+    // Total width = 4 + 6 + 3 + (2 ghost) = 15
+
+    SECTION("Block get() returns correct shapes for different variable types") {
+        using namespace elasticapp::mock;
+        auto& matrix = block.data();
+        // Block width = 4 + 6 + 3 + 2 (ghost) = 15
+
+        // Test MockVar1 (Node, Vector, offset=0, dimension=3)
+        auto var1_view = block.get<MockVar1>();
+        REQUIRE(var1_view.rows() == 3);
+        REQUIRE(var1_view.cols() == 15);  // OnNode: full width
+
+        // Test MockVar2 (Node, Scalar, offset=3, dimension=1)
+        auto var2_view = block.get<MockVar2>();
+        REQUIRE(var2_view.rows() == 1);
+        REQUIRE(var2_view.cols() == 15);  // OnNode: full width
+
+        // Test MockVar3 (Element, Vector, offset=4, dimension=3)
+        auto var3_view = block.get<MockVar3>();
+        REQUIRE(var3_view.rows() == 3);
+        REQUIRE(var3_view.cols() == 14);  // OnElement: width - 1
+
+        // Test MockVar4 (Element, Matrix, offset=7, dimension=9)
+        auto var4_view = block.get<MockVar4>();
+        REQUIRE(var4_view.rows() == 9);
+        REQUIRE(var4_view.cols() == 14);  // OnElement: width - 1
+
+        // Test MockVar5 (Voronoi, Scalar, offset=16, dimension=1)
+        auto var5_view = block.get<MockVar5>();
+        REQUIRE(var5_view.rows() == 1);
+        REQUIRE(var5_view.cols() == 13);  // OnVoronoi: width - 2
+    }
+
+    SECTION("Block get() returns writable views that modify underlying data") {
+        using namespace elasticapp::mock;
+        auto& matrix = block.data();
+
+        // Test MockVar1 (Node, Vector)
+        auto var1_view = block.get<MockVar1>();
+
+        // Modify through the view
+        var1_view(0, 0) = 10.0;
+        var1_view(1, 1) = 20.0;
+        var1_view(2, 2) = 30.0;
+
+        // Verify modifications are reflected in underlying matrix
+        REQUIRE(matrix(0, 0) == 10.0);
+        REQUIRE(matrix(1, 1) == 20.0);
+        REQUIRE(matrix(2, 2) == 30.0);
+
+        // Modify through matrix and verify view sees changes
+        matrix(0, 3) = 40.0;
+        REQUIRE(var1_view(0, 3) == 40.0);
+
+        // Get another view - should see the same data
+        auto var1_view2 = block.get<MockVar1>();
+        REQUIRE(var1_view2(0, 0) == 10.0);
+        REQUIRE(var1_view2(1, 1) == 20.0);
+        REQUIRE(var1_view2(2, 2) == 30.0);
+
+        // Modify through second view
+        var1_view2(0, 4) = 50.0;
+        REQUIRE(var1_view(0, 4) == 50.0);  // Should be reflected in first view
+        REQUIRE(matrix(0, 4) == 50.0);     // And in underlying matrix
+    }
+
+    SECTION("Block get() works for different variable types") {
+        using namespace elasticapp::mock;
+        auto& matrix = block.data();
+
+        // Test MockVar1 (OnNode, Vector)
+        auto var1_view = block.get<MockVar1>();
+        var1_view(0, 0) = 100.0;
+        REQUIRE(matrix(0, 0) == 100.0);
+
+        // Test MockVar2 (OnNode, Scalar)
+        auto var2_view = block.get<MockVar2>();
+        var2_view(0, 0) = 200.0;
+        REQUIRE(matrix(3, 0) == 200.0);  // offset = 3
+
+        // Test MockVar3 (OnElement, Vector)
+        auto var3_view = block.get<MockVar3>();
+        var3_view(0, 0) = 300.0;
+        REQUIRE(matrix(4, 0) == 300.0);  // offset = 4
+
+        // Test MockVar4 (OnElement, Matrix)
+        auto var4_view = block.get<MockVar4>();
+        var4_view(0, 0) = 400.0;
+        REQUIRE(matrix(7, 0) == 400.0);  // offset = 7
+
+        // Test MockVar5 (OnVoronoi, Scalar)
+        auto var5_view = block.get<MockVar5>();
+        var5_view(0, 0) = 500.0;
+        REQUIRE(matrix(16, 0) == 500.0);  // offset = 16
+    }
+
+    SECTION("Block get() works across multiple rods") {
+        using namespace elasticapp::mock;
+        auto& matrix = block.data();
+
+        // Rod 0: starts at column 0, has 4 nodes
+        // Rod 1: starts at column 4, has 6 nodes
+        // Rod 2: starts at column 10, has 3 nodes
+
+        auto var1_view = block.get<MockVar1>();
+
+        // Modify data for different rods
+        var1_view(0, 0) = 1.0;   // Rod 0, first node
+        var1_view(0, 3) = 2.0;   // Rod 0, last node
+        var1_view(0, 4) = 3.0;   // Rod 1, first node
+        var1_view(0, 9) = 4.0;   // Rod 1, last node
+        var1_view(0, 10) = 5.0;  // Rod 2, first node
+        var1_view(0, 12) = 6.0;  // Rod 2, last node
+
+        // Verify all modifications
+        REQUIRE(matrix(0, 0) == 1.0);
+        REQUIRE(matrix(0, 3) == 2.0);
+        REQUIRE(matrix(0, 4) == 3.0);
+        REQUIRE(matrix(0, 9) == 4.0);
+        REQUIRE(matrix(0, 10) == 5.0);
+        REQUIRE(matrix(0, 12) == 6.0);
+    }
+
+    SECTION("Block get() validates VariableTag is in System") {
+        using namespace elasticapp::mock;
+
+        // All valid variables from MockSystem should compile and work
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar1, typename MockSystem::Variables>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar2, typename MockSystem::Variables>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar3, typename MockSystem::Variables>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar4, typename MockSystem::Variables>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar5, typename MockSystem::Variables>);
+
+        // Test that non-MockVar is not in MockSystem::Variables
+        class NonMockVar : elasticapp::Placement::OnNode, elasticapp::DataType::Vector {};
+        STATIC_REQUIRE(!elasticapp::tuple_contains_v<NonMockVar, typename MockSystem::Variables>);
+
+        // Verify get() works for all valid variables (compile-time check)
+        auto v1 = block.get<MockVar1>();
+        auto v2 = block.get<MockVar2>();
+        auto v3 = block.get<MockVar3>();
+        auto v4 = block.get<MockVar4>();
+        auto v5 = block.get<MockVar5>();
+
+        // Use the variables to ensure they compile
+        (void)v1;
+        (void)v2;
+        (void)v3;
+        (void)v4;
+        (void)v5;
+    }
+
+    SECTION("Block get() const version works") {
+        using namespace elasticapp::mock;
+        const MockBlockSystem& const_block = block;
+        auto& matrix = block.data();
+
+        // Set some values
+        matrix(0, 0) = 42.0;
+        matrix(3, 1) = 43.0;
+        matrix(4, 2) = 44.0;
+
+        // Get const views
+        auto var1_view = const_block.get<MockVar1>();
+        auto var2_view = const_block.get<MockVar2>();
+        auto var3_view = const_block.get<MockVar3>();
+
+        // Verify we can read values
+        REQUIRE(var1_view(0, 0) == 42.0);
+        REQUIRE(var2_view(0, 1) == 43.0);
+        REQUIRE(var3_view(0, 2) == 44.0);
+
+        // Verify shapes (with width adjustment)
+        REQUIRE(var1_view.rows() == 3);
+        REQUIRE(var1_view.cols() == 15);  // OnNode: full width
+        REQUIRE(var2_view.rows() == 1);
+        REQUIRE(var2_view.cols() == 15);  // OnNode: full width
+        REQUIRE(var3_view.rows() == 3);
+        REQUIRE(var3_view.cols() == 14);  // OnElement: width - 1
+    }
+
+    SECTION("Block get() width adjustment based on placement type") {
+        using namespace elasticapp::mock;
+        // Block width = 4 + 6 + 3 + 2 (ghost) = 15
+
+        // Test OnNode variables get full width
+        auto var1_view = block.get<MockVar1>();  // OnNode
+        auto var2_view = block.get<MockVar2>();  // OnNode
+        REQUIRE(var1_view.cols() == 15);
+        REQUIRE(var2_view.cols() == 15);
+        REQUIRE(var1_view.cols() == block.width());
+        REQUIRE(var2_view.cols() == block.width());
+
+        // Test OnElement variables get width - 1
+        auto var3_view = block.get<MockVar3>();  // OnElement
+        auto var4_view = block.get<MockVar4>();  // OnElement
+        REQUIRE(var3_view.cols() == 14);  // 15 - 1
+        REQUIRE(var4_view.cols() == 14);  // 15 - 1
+        REQUIRE(var3_view.cols() == block.width() - 1);
+        REQUIRE(var4_view.cols() == block.width() - 1);
+
+        // Test OnVoronoi variables get width - 2
+        auto var5_view = block.get<MockVar5>();  // OnVoronoi
+        REQUIRE(var5_view.cols() == 13);  // 15 - 2
+        REQUIRE(var5_view.cols() == block.width() - 2);
+    }
+
+    SECTION("Block get() width adjustment with single rod") {
+        using namespace elasticapp::mock;
+        std::vector<std::size_t> single_rod = {8};  // 5 elems -> 6 nodes, width = 6
+        MockBlockSystem single_block(single_rod);
+
+        // OnNode: full width
+        auto var1_view = single_block.get<MockVar1>();
+        REQUIRE(var1_view.cols() == 9);
+
+        // OnElement: width - 1
+        auto var3_view = single_block.get<MockVar3>();
+        REQUIRE(var3_view.cols() == 8);  // 6 - 1
+
+        // OnVoronoi: width - 2
+        auto var5_view = single_block.get<MockVar5>();
+        REQUIRE(var5_view.cols() == 7);  // 6 - 2
+    }
+
+    SECTION("Block get() width adjustment edge cases") {
+        using namespace elasticapp::mock;
+        // Test with small width (minimum 6 elements required)
+        std::vector<std::size_t> small_rod = {6};  // 6 elem -> 7 nodes, width = 7
+        MockBlockSystem small_block(small_rod);
+
+        // OnNode: full width
+        auto var1_view = small_block.get<MockVar1>();
+        REQUIRE(var1_view.cols() == 7);
+
+        // OnElement: width - 1
+        auto var3_view = small_block.get<MockVar3>();
+        REQUIRE(var3_view.cols() == 6);  // 7 - 1
+
+        // OnVoronoi: width - 2
+        auto var5_view = small_block.get<MockVar5>();
+        REQUIRE(var5_view.cols() == 5);  // 7 - 2 = 5
+    }
+}
+
+TEST_CASE("Block ghost indices", "[block]") {
+    using namespace elasticapp::mock;
+
+    SECTION("Ghost nodes indices with multiple rods") {
+        std::vector<std::size_t> n_elems_per_rod = {3, 5, 2};
+        MockBlockSystem block(n_elems_per_rod);
+        // Rod 0: 3 elems -> 4 nodes (indices 0-3)
+        // Ghost node at 4
+        // Rod 1: 5 elems -> 6 nodes (indices 5-10)
+        // Ghost node at 11
+        // Rod 2: 2 elems -> 3 nodes (indices 12-14)
+
+        auto ghost_nodes = block.ghost_nodes_idx();
+        REQUIRE(ghost_nodes.size() == 2);  // n_rods - 1
+        REQUIRE(ghost_nodes[0] == 4);      // After rod 0
+        REQUIRE(ghost_nodes[1] == 11);      // After rod 1
+    }
+
+    SECTION("Ghost elements indices with multiple rods") {
+        std::vector<std::size_t> n_elems_per_rod = {3, 5, 2};
+        MockBlockSystem block(n_elems_per_rod);
+
+        auto ghost_elems = block.ghost_elems_idx();
+        REQUIRE(ghost_elems.size() == 5);   // 2 * (n_rods - 1)
+        // For ghost node at 4: elements at 3 and 4
+        REQUIRE(ghost_elems[0] == 3);
+        REQUIRE(ghost_elems[1] == 4);
+        // For ghost node at 11: elements at 10 and 11
+        REQUIRE(ghost_elems[2] == 10);
+        REQUIRE(ghost_elems[3] == 11);
+    }
+
+    SECTION("Ghost voronoi indices with multiple rods") {
+        std::vector<std::size_t> n_elems_per_rod = {3, 5, 2};
+        MockBlockSystem block(n_elems_per_rod);
+
+        auto ghost_voronoi = block.ghost_voronoi_idx();
+        REQUIRE(ghost_voronoi.size() == 8);  // 3 * (n_rods - 1)
+        // For ghost node at 4: voronoi at 2, 3, 4
+        REQUIRE(ghost_voronoi[0] == 2);
+        REQUIRE(ghost_voronoi[1] == 3);
+        REQUIRE(ghost_voronoi[2] == 4);
+        // For ghost node at 11: voronoi at 9, 10, 11
+        REQUIRE(ghost_voronoi[3] == 9);
+        REQUIRE(ghost_voronoi[4] == 10);
+        REQUIRE(ghost_voronoi[5] == 11);
+        REQUIRE(ghost_voronoi[6] == 13);
+        REQUIRE(ghost_voronoi[7] == 14);
+    }
+
+    SECTION("Ghost indices with single rod") {
+        std::vector<std::size_t> n_elems_per_rod = {6};
+        MockBlockSystem block(n_elems_per_rod);
+
+        auto ghost_nodes = block.ghost_nodes_idx();
+        auto ghost_elems = block.ghost_elems_idx();
+        auto ghost_voronoi = block.ghost_voronoi_idx();
+
+        REQUIRE(ghost_nodes.empty());
+        // Since we have rectangular block, ghost elements and voronoi should not be empty
+        REQUIRE(ghost_elems.size() == 1);
+        REQUIRE(ghost_elems[0] == 6);
+        REQUIRE(ghost_voronoi.size() == 2);
+        REQUIRE(ghost_voronoi[0] == 5);
+        REQUIRE(ghost_voronoi[1] == 6);
+    }
+
+    SECTION("Ghost indices with two rods") {
+        std::vector<std::size_t> n_elems_per_rod = {4, 3};
+        MockBlockSystem block(n_elems_per_rod);
+        // Rod 0: 2 elems -> 3 nodes (indices 0-2)
+        // Ghost node at 3
+        // Rod 1: 3 elems -> 4 nodes (indices 4-7)
+
+        auto ghost_nodes = block.ghost_nodes_idx();
+        REQUIRE(ghost_nodes.size() == 1);
+        REQUIRE(ghost_nodes[0] == 5);
+
+        auto ghost_elems = block.ghost_elems_idx();
+        REQUIRE(ghost_elems.size() == 3);
+        REQUIRE(ghost_elems[0] == 4);  // Element before ghost node
+        REQUIRE(ghost_elems[1] == 5);  // Element at ghost node
+        REQUIRE(ghost_elems[2] == 9);  // Element after ghost node
+
+        auto ghost_voronoi = block.ghost_voronoi_idx();
+        REQUIRE(ghost_voronoi.size() == 5);
+        REQUIRE(ghost_voronoi[0] == 3);  // Voronoi 2 before ghost node
+        REQUIRE(ghost_voronoi[1] == 4);  // Voronoi 1 before ghost node
+        REQUIRE(ghost_voronoi[2] == 5);  // Voronoi at ghost node
+        REQUIRE(ghost_voronoi[3] == 8);  // Voronoi after ghost node
+        REQUIRE(ghost_voronoi[4] == 9);  // Voronoi after ghost node
+    }
+
+    SECTION("Ghost indices with empty block") {
+        std::vector<std::size_t> n_elems_per_rod = {};
+        REQUIRE_THROWS_AS(MockBlockSystem(n_elems_per_rod), std::invalid_argument);
+    }
+}
+
+TEST_CASE("Block reset_ghost operations", "[block]") {
+    using namespace elasticapp::mock;
+
+    SECTION("reset_ghost_for_variable for OnNode variable") {
+        std::vector<std::size_t> n_elems_per_rod = {3, 5};
+        MockBlockSystem block(n_elems_per_rod);
+        // Ghost nodes at indices 4, 10
+
+        // Get variable view and modify ghost positions
+        auto var1_view = block.get<MockVar1>();  // OnNode, Vector
+        var1_view(0, 4) = 999.0;  // Modify ghost node
+        var1_view(1, 4) = 888.0;
+        var1_view(2, 4) = 777.0;
+
+        // Reset ghost for this variable
+        block.reset_ghost_for_variable<MockVar1>();
+
+        // Verify ghost values are reset (MockVar1 uses DataType::Vector, ghost_value = Zero(3,1))
+        REQUIRE(var1_view(0, 4) == Approx(0.0));
+        REQUIRE(var1_view(1, 4) == Approx(0.0));
+        REQUIRE(var1_view(2, 4) == Approx(0.0));
+    }
+
+    SECTION("reset_ghost_for_variable for OnElement variable") {
+        std::vector<std::size_t> n_elems_per_rod = {3, 5};
+        MockBlockSystem block(n_elems_per_rod);
+        // Ghost elements at indices 3, 4, 10, 11
+
+        // Get variable view and modify ghost positions
+        auto var3_view = block.get<MockVar3>();  // OnElement, Vector
+        var3_view(0, 3) = 999.0;  // Modify ghost element
+        var3_view(1, 3) = 888.0;
+        var3_view(2, 3) = 777.0;
+
+        // Reset ghost for this variable
+        block.reset_ghost_for_variable<MockVar3>();
+
+        // Verify ghost values are reset (MockVar3 uses DataType::Vector, ghost_value = Zero(3,1))
+        REQUIRE(var3_view(0, 3) == Approx(0.0));
+        REQUIRE(var3_view(1, 3) == Approx(0.0));
+        REQUIRE(var3_view(2, 3) == Approx(0.0));
+    }
+
+    SECTION("reset_ghost_for_variable for OnVoronoi variable") {
+        std::vector<std::size_t> n_elems_per_rod = {3, 5};
+        MockBlockSystem block(n_elems_per_rod);
+        // Ghost voronoi at indices 2, 3, 4, 9, 10, 11
+
+        // Get variable view and modify ghost positions
+        auto var5_view = block.get<MockVar5>();  // OnVoronoi, Scalar
+        var5_view(0, 2) = 999.0;  // Modify ghost voronoi
+
+        // Reset ghost for this variable
+        block.reset_ghost_for_variable<MockVar5>();
+
+        // Verify ghost values are reset (MockVar5 uses DataType::Scalar, ghost_value = Zero(1,1))
+        REQUIRE(var5_view(0, 2) == Approx(0.0));
+    }
+
+    SECTION("reset_ghost resets all variables") {
+        std::vector<std::size_t> n_elems_per_rod = {4, 3};
+        MockBlockSystem block(n_elems_per_rod);
+        // Ghost nodes at index 3
+
+        // Modify ghost positions for multiple variables
+        auto var1_view = block.get<MockVar1>();  // OnNode
+        auto var2_view = block.get<MockVar2>();  // OnNode
+        auto var3_view = block.get<MockVar3>();  // OnElement
+
+        var1_view(0, 5) = 999.0;
+        var2_view(0, 5) = 888.0;
+        var3_view(0, 4) = 777.0;  // Ghost element at 2 (before ghost node at 3)
+
+        // Reset all ghosts
+        block.reset_ghost();
+
+        // Verify all ghost values are reset
+        REQUIRE(var1_view(0, 5) == Approx(0.0));
+        REQUIRE(var2_view(0, 5) == Approx(0.0));
+        REQUIRE(var3_view(0, 4) == Approx(0.0));
+    }
+
+    SECTION("reset_ghost called in constructor") {
+        std::vector<std::size_t> n_elems_per_rod = {3, 5};
+        MockBlockSystem block(n_elems_per_rod);
+        // Constructor should have called reset_ghost()
+
+        // Check that ghost values are already initialized
+        auto var1_view = block.get<MockVar1>();
+        auto ghost_nodes = block.ghost_nodes_idx();
+
+        if (!ghost_nodes.empty()) {
+            std::size_t ghost_col = ghost_nodes[0];
+            // Ghost values should be zero (default ghost_value for Vector)
+            REQUIRE(var1_view(0, ghost_col) == Approx(0.0));
+            REQUIRE(var1_view(1, ghost_col) == Approx(0.0));
+            REQUIRE(var1_view(2, ghost_col) == Approx(0.0));
+        }
+    }
+
+    SECTION("reset_ghost with single rod (no ghosts)") {
+        std::vector<std::size_t> n_elems_per_rod = {6};
+        MockBlockSystem block(n_elems_per_rod);
+
+        // Should not crash even with no ghost nodes
+        REQUIRE_NOTHROW(block.reset_ghost());
+        REQUIRE_NOTHROW(block.reset_ghost_for_variable<MockVar1>());
+    }
+
+    SECTION("reset_ghost with empty block") {
+        std::vector<std::size_t> n_elems_per_rod = {};
+        REQUIRE_THROWS_AS(MockBlockSystem(n_elems_per_rod), std::invalid_argument);
+    }
+}
diff --git a/backend/tests/cpp/test_block_operations.cpp b/backend/tests/cpp/test_block_operations.cpp
new file mode 100644
index 00000000..5583964a
--- /dev/null
+++ b/backend/tests/cpp/test_block_operations.cpp
@@ -0,0 +1,123 @@
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/catch_approx.hpp>
+#include <vector>
+#include <cmath>
+#include "block.h"
+#include "mock/mock_block_system_with_operation.h"
+
+using MockBlockSystemWithOps = elasticapp::mock::MockBlockSystemWithOperations;
+using namespace elasticapp::mock;
+using Catch::Approx;
+
+TEST_CASE("Block with Operations - Addition", "[block][operations]") {
+    std::vector<std::size_t> n_elems_per_rod = {3, 5};
+    MockBlockSystemWithOps block(n_elems_per_rod);
+
+    SECTION("Operations can be called on block") {
+        // Verify the block has the operations method
+        REQUIRE_NOTHROW(block.add_variables());
+    }
+
+    SECTION("Add variables operation - Vector + Scalar") {
+        // Get variable views
+        auto var1 = block.template get<MockVar1>();  // Vector (3 rows)
+        auto var2 = block.template get<MockVar2>();  // Scalar (1 row)
+
+        // Initialize test data
+        // Set var1 to some values
+        var1(0, 0) = 1.0;  // First component, first column
+        var1(1, 0) = 2.0;  // Second component, first column
+        var1(2, 0) = 3.0;  // Third component, first column
+
+        var1(0, 1) = 4.0;
+        var1(1, 1) = 5.0;
+        var1(2, 1) = 6.0;
+
+        // Set var2 (scalar) values
+        var2(0, 0) = 10.0;  // First column
+        var2(0, 1) = 20.0;  // Second column
+
+        // Perform addition operation
+        block.add_variables();
+
+        // Verify results: var1 should now be var1 + var2 (broadcasted)
+        REQUIRE(var1(0, 0) == Approx(11.0));  // 1.0 + 10.0
+        REQUIRE(var1(1, 0) == Approx(12.0));  // 2.0 + 10.0
+        REQUIRE(var1(2, 0) == Approx(13.0));  // 3.0 + 10.0
+
+        REQUIRE(var1(0, 1) == Approx(24.0));  // 4.0 + 20.0
+        REQUIRE(var1(1, 1) == Approx(25.0));  // 5.0 + 20.0
+        REQUIRE(var1(2, 1) == Approx(26.0));  // 6.0 + 20.0
+    }
+
+    SECTION("Add vector to itself operation") {
+        // Get variable view
+        auto var1 = block.template get<MockVar1>();  // Vector (3 rows)
+
+        // Initialize test data
+        var1(0, 0) = 1.0;
+        var1(1, 0) = 2.0;
+        var1(2, 0) = 3.0;
+
+        var1(0, 1) = 4.0;
+        var1(1, 1) = 5.0;
+        var1(2, 1) = 6.0;
+
+        // Perform addition operation (adds var1 to itself)
+        block.add_vector_to_itself();
+
+        // Verify results: var1 should now be 2 * original
+        REQUIRE(var1(0, 0) == Approx(2.0));  // 1.0 * 2
+        REQUIRE(var1(1, 0) == Approx(4.0));  // 2.0 * 2
+        REQUIRE(var1(2, 0) == Approx(6.0));  // 3.0 * 2
+
+        REQUIRE(var1(0, 1) == Approx(8.0));  // 4.0 * 2
+        REQUIRE(var1(1, 1) == Approx(10.0));  // 5.0 * 2
+        REQUIRE(var1(2, 1) == Approx(12.0));  // 6.0 * 2
+    }
+
+    SECTION("Operations work across multiple rods") {
+        // Block has 2 rods: rod 0 with 3 elems (4 nodes), rod 1 with 5 elems (6 nodes)
+        // Total width = 4 + 6 = 10
+
+        auto var1 = block.template get<MockVar1>();
+        auto var2 = block.template get<MockVar2>();
+
+        // Set values for rod 0 (columns 0-3)
+        var1(0, 0) = 1.0;
+        var1(1, 0) = 2.0;
+        var1(2, 0) = 3.0;
+        var2(0, 0) = 5.0;
+
+        // Set values for rod 1 (columns 4-9)
+        var1(0, 4) = 10.0;
+        var1(1, 4) = 20.0;
+        var1(2, 4) = 30.0;
+        var2(0, 4) = 50.0;
+
+        // Perform operation
+        block.add_variables();
+
+        // Verify rod 0 results
+        REQUIRE(var1(0, 0) == Approx(6.0));  // 1.0 + 5.0
+        REQUIRE(var1(1, 0) == Approx(7.0));  // 2.0 + 5.0
+        REQUIRE(var1(2, 0) == Approx(8.0));  // 3.0 + 5.0
+
+        // Verify rod 1 results
+        REQUIRE(var1(0, 4) == Approx(60.0));  // 10.0 + 50.0
+        REQUIRE(var1(1, 4) == Approx(70.0));  // 20.0 + 50.0
+        REQUIRE(var1(2, 4) == Approx(80.0));  // 30.0 + 50.0
+    }
+
+    SECTION("Operations can access block data and methods") {
+        // Verify that operations can access block's public interface
+        REQUIRE(block.width() > 0);
+        REQUIRE(block.depth() == 17);  // MockSystem depth
+        REQUIRE(block.n_systems() == 2);
+
+        // Operations should be able to call block methods
+        auto var1 = block.template get<MockVar1>();
+        REQUIRE(var1.rows() == 3);  // Vector dimension
+        REQUIRE(var1.cols() == block.width());
+    }
+}
diff --git a/backend/tests/cpp/test_block_view.cpp b/backend/tests/cpp/test_block_view.cpp
new file mode 100644
index 00000000..99505142
--- /dev/null
+++ b/backend/tests/cpp/test_block_view.cpp
@@ -0,0 +1,146 @@
+#include <catch2/catch_test_macros.hpp>
+#include "../../src/block.h"
+#include "mock/mock_block_system.h"
+#include <vector>
+#include <cmath>
+
+using MockBlockSystem = elasticapp::mock::MockBlockSystem;
+using MockBlockSystemView = typename MockBlockSystem::View;
+
+TEST_CASE("BlockView - Variable Retrieval", "[block_view]") {
+    std::vector<std::size_t> n_elems_per_rod = {3, 5, 2};
+    MockBlockSystem block(n_elems_per_rod);
+
+    SECTION("BlockView has correct shape") {
+        auto&& view1 = block.at(0);
+        auto&& view2 = block.at(1);
+        REQUIRE(view1.shape() == std::pair(17UL, 4UL));
+        REQUIRE(view2.shape() == std::pair(17UL, 6UL));
+        STATIC_REQUIRE(std::is_same_v<decltype(view1), MockBlockSystemView&&>);
+        STATIC_REQUIRE(std::is_same_v<decltype(view2), MockBlockSystemView&&>);
+    }
+
+    SECTION("BlockView data read/write access") {
+        for(size_t rod_index = 0; rod_index < block.n_systems(); ++rod_index) {
+            auto& matrix = block.data();
+            auto&& view = block.at(rod_index);
+            auto&& view_matrix = view.data();
+            size_t start_index = block.system_start_index(rod_index);
+
+            // Read access test
+            // start_index is a column index, so we access matrix(row, start_index + col)
+            matrix(0, start_index) = 1.2;
+            matrix(1, start_index + 1) = 2.3;
+            matrix(2, start_index + 2) = 3.4;
+            REQUIRE(matrix(0, start_index) == view_matrix(0, 0));
+            REQUIRE(matrix(1, start_index + 1) == view_matrix(1, 1));
+            REQUIRE(matrix(2, start_index + 2) == view_matrix(2, 2));
+
+            // Write access test
+            view_matrix(0, 0) = 4.2;
+            view_matrix(1, 1) = 5.3;
+            view_matrix(2, 2) = 6.4;
+            REQUIRE(matrix(0, start_index) == 4.2);
+            REQUIRE(matrix(1, start_index + 1) == 5.3);
+            REQUIRE(matrix(2, start_index + 2) == 6.4);
+        }
+    }
+
+    SECTION("BlockView get() method for specific variables") {
+        using namespace elasticapp::mock;
+        auto&& view = block.at(0);
+        auto& matrix = block.data();
+
+        // Test MockVar1 (Node, Vector, offset=0, dimension=3)
+        auto var1_view = view.get<MockVar1>();
+        REQUIRE(var1_view.rows() == 3);
+        REQUIRE(var1_view.cols() == 4);  // rod_n_nodes_ = 3 + 1 = 4
+
+        // Verify it's a view (no copy) - modify through view and check original
+        var1_view(0, 0) = 10.0;
+        var1_view(1, 1) = 20.0;
+        var1_view(2, 2) = 30.0;
+        size_t start_index = block.system_start_index(0);
+        REQUIRE(matrix(0, start_index) == 10.0);
+        REQUIRE(matrix(1, start_index + 1) == 20.0);
+        REQUIRE(matrix(2, start_index + 2) == 30.0);
+
+        // Test MockVar2 (Node, Scalar, offset=3, dimension=1)
+        auto var2_view = view.get<MockVar2>();
+        REQUIRE(var2_view.rows() == 1);
+        REQUIRE(var2_view.cols() == 4);  // rod_n_nodes_ = 4
+        var2_view(0, 0) = 100.0;
+        REQUIRE(matrix(3, start_index) == 100.0);
+
+        // Test MockVar3 (Element, Vector, offset=4, dimension=3)
+        auto var3_view = view.get<MockVar3>();
+        REQUIRE(var3_view.rows() == 3);
+        REQUIRE(var3_view.cols() == 3);  // rod_n_elems_ = 3
+        var3_view(0, 0) = 200.0;
+        REQUIRE(matrix(4, start_index) == 200.0);
+
+        // Test MockVar4 (Element, Matrix, offset=7, dimension=9)
+        auto var4_view = view.get<MockVar4>();
+        REQUIRE(var4_view.rows() == 9);
+        REQUIRE(var4_view.cols() == 3);  // rod_n_elems_ = 3
+        var4_view(0, 0) = 300.0;
+        REQUIRE(matrix(7, start_index) == 300.0);
+
+        // Test MockVar5 (Voronoi, Scalar, offset=16, dimension=1)
+        auto var5_view = view.get<MockVar5>();
+        REQUIRE(var5_view.rows() == 1);
+        REQUIRE(var5_view.cols() == 2);  // rod_n_voronoi_ = 3 - 1 = 2
+        var5_view(0, 0) = 400.0;
+        REQUIRE(matrix(16, start_index) == 400.0);
+    }
+
+    SECTION("BlockView get() validates VariableTag is in System") {
+        using namespace elasticapp::mock;
+        auto&& view = block.at(0);
+
+
+        // All valid variables from MockSystem should compile and work
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar1, typename MockSystem::Variables>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar2, typename MockSystem::Variables>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar3, typename MockSystem::Variables>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar4, typename MockSystem::Variables>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar5, typename MockSystem::Variables>);
+
+        // Test that non-MockVar is not in MockSystem::Variables
+        class NonMockVar : elasticapp::Placement::OnNode, elasticapp::DataType::Vector {};
+        STATIC_REQUIRE(!elasticapp::tuple_contains_v<NonMockVar, typename MockSystem::Variables>);
+
+        // Verify get() works for all valid variables (compile-time check)
+        auto v1 = view.get<MockVar1>();
+        auto v2 = view.get<MockVar2>();
+        auto v3 = view.get<MockVar3>();
+        auto v4 = view.get<MockVar4>();
+        auto v5 = view.get<MockVar5>();
+
+        // Use the variables to ensure they compile
+        (void)v1;
+        (void)v2;
+        (void)v3;
+        (void)v4;
+        (void)v5;
+    }
+
+    SECTION("tuple_contains_v trait works correctly") {
+        using namespace elasticapp::mock;
+        using MockVars = typename MockSystem::Variables;
+
+        // Test that tuple_contains_v correctly identifies members
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar1, MockVars>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar2, MockVars>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar3, MockVars>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar4, MockVars>);
+        STATIC_REQUIRE(elasticapp::tuple_contains_v<MockVar5, MockVars>);
+
+        // Test with invalid types (should be false)
+        struct InvalidVar1 : elasticapp::Placement::OnNode, elasticapp::DataType::Vector {};
+        struct InvalidVar2 : elasticapp::Placement::OnElement, elasticapp::DataType::Scalar {};
+
+        STATIC_REQUIRE(!elasticapp::tuple_contains_v<InvalidVar1, MockVars>);
+        STATIC_REQUIRE(!elasticapp::tuple_contains_v<InvalidVar2, MockVars>);
+    }
+}
diff --git a/backend/tests/cpp/test_system.cpp b/backend/tests/cpp/test_system.cpp
new file mode 100644
index 00000000..219a5e54
--- /dev/null
+++ b/backend/tests/cpp/test_system.cpp
@@ -0,0 +1,40 @@
+#include <catch2/catch_test_macros.hpp>
+#include <type_traits>
+#include "system.h"
+#include "cosserat_rod_system.h"
+#include "mock/mock_block_system.h"
+
+TEST_CASE("Variable tags - compile-time only", "[system]") {
+    // Variables should be tag types, not instantiable
+    // We can use them as template parameters
+
+    SECTION("Variable placement types exist") {
+        static_assert(std::is_same_v<elasticapp::Placement::OnNode, elasticapp::Placement::OnNode>);
+        static_assert(std::is_same_v<elasticapp::Placement::OnElement, elasticapp::Placement::OnElement>);
+        static_assert(std::is_same_v<elasticapp::Placement::OnVoronoi, elasticapp::Placement::OnVoronoi>);
+    }
+}
+
+TEST_CASE("Dimension types", "[system]") {
+    SECTION("Dimension types exist") {
+        static_assert(std::is_empty_v<elasticapp::DataType::Scalar>);
+        static_assert(std::is_empty_v<elasticapp::DataType::Vector>);
+        static_assert(std::is_empty_v<elasticapp::DataType::Matrix>);
+    }
+
+    SECTION("Dimension types have correct sizes") {
+        STATIC_REQUIRE(std::is_integral_v<decltype(elasticapp::DataType::Scalar::dimension)>);
+        STATIC_REQUIRE(std::is_integral_v<decltype(elasticapp::DataType::Vector::dimension)>);
+        STATIC_REQUIRE(std::is_integral_v<decltype(elasticapp::DataType::Matrix::dimension)>);
+    }
+}
+
+TEST_CASE("System block depth computation", "[system]") {
+
+    using MockSystem = elasticapp::mock::MockSystem;
+
+    SECTION("MockSystem has correct depth") {
+        REQUIRE(MockSystem::get_depth() == 17);
+    }
+
+}
diff --git a/backend/tests/cpp/test_system_variable_validation.cpp b/backend/tests/cpp/test_system_variable_validation.cpp
new file mode 100644
index 00000000..844df5de
--- /dev/null
+++ b/backend/tests/cpp/test_system_variable_validation.cpp
@@ -0,0 +1,78 @@
+#include <catch2/catch_test_macros.hpp>
+#include "../../src/system.h"
+#include <type_traits>
+#include <string_view>
+
+namespace elasticapp {
+
+// Valid variable examples
+struct ValidVar1 : Placement::OnNode, DataType::Vector {
+    static constexpr std::string_view name = "valid_var1";
+};
+struct ValidVar2 : Placement::OnElement, DataType::Scalar {
+    static constexpr std::string_view name = "valid_var2";
+};
+struct ValidVar3 : Placement::OnVoronoi, DataType::Matrix {
+    static constexpr std::string_view name = "valid_var3";
+};
+
+// Invalid variable examples (for testing compile-time checks)
+struct InvalidVarNoPlacement : DataType::Vector {};  // Missing Placement
+struct InvalidVarNoDataType : Placement::OnNode {};  // Missing DataType
+struct InvalidVarNeither {};  // Missing both
+
+// Dummy classes that use System as a mixin
+template<ValidVariable... Vars>
+class DummySystem : public System<Vars...> {
+public:
+    // Expose block_depth() method that uses System's static get_depth()
+    std::size_t block_depth() const {
+        return System<Vars...>::get_depth();
+    }
+};
+
+} // namespace elasticapp
+
+TEST_CASE("System - Variable Validation", "[system]") {
+    SECTION("Valid variables compile successfully") {
+        // These should compile without errors - using System as a mixin
+        elasticapp::DummySystem<elasticapp::ValidVar1> system1;
+        elasticapp::DummySystem<elasticapp::ValidVar1, elasticapp::ValidVar2> system2;
+        elasticapp::DummySystem<elasticapp::ValidVar1, elasticapp::ValidVar2, elasticapp::ValidVar3> system3;
+
+        REQUIRE(system1.block_depth() == 3);  // Vector = 3
+        REQUIRE(system2.block_depth() == 4);  // Vector(3) + Scalar(1) = 4
+        REQUIRE(system3.block_depth() == 13);  // Vector(3) + Scalar(1) + Matrix(9) = 13
+    }
+
+    SECTION("Variable validation concepts work correctly") {
+        // Test HasPlacementTag concept
+        static_assert(elasticapp::HasPlacementTag<elasticapp::ValidVar1>);
+        static_assert(elasticapp::HasPlacementTag<elasticapp::ValidVar2>);
+        static_assert(elasticapp::HasPlacementTag<elasticapp::ValidVar3>);
+        static_assert(!elasticapp::HasPlacementTag<elasticapp::InvalidVarNoPlacement>);
+
+        // Test HasDataTypeTag concept
+        static_assert(elasticapp::HasDataTypeTag<elasticapp::ValidVar1>);
+        static_assert(elasticapp::HasDataTypeTag<elasticapp::ValidVar2>);
+        static_assert(elasticapp::HasDataTypeTag<elasticapp::ValidVar3>);
+        static_assert(!elasticapp::HasDataTypeTag<elasticapp::InvalidVarNoDataType>);
+
+        // Test ValidVariable concept
+        static_assert(elasticapp::ValidVariable<elasticapp::ValidVar1>);
+        static_assert(elasticapp::ValidVariable<elasticapp::ValidVar2>);
+        static_assert(elasticapp::ValidVariable<elasticapp::ValidVar3>);
+        static_assert(!elasticapp::ValidVariable<elasticapp::InvalidVarNoPlacement>);
+        static_assert(!elasticapp::ValidVariable<elasticapp::InvalidVarNoDataType>);
+        static_assert(!elasticapp::ValidVariable<elasticapp::InvalidVarNeither>);
+    }
+}
+
+// Uncomment these to verify compile-time errors are caught:
+// TEST_CASE("System - Invalid Variables (should not compile)", "[system]") {
+//     // These should cause compile-time errors
+//     // elasticapp::System<elasticapp::InvalidVarNoPlacement> invalid1(10);
+//     // elasticapp::System<elasticapp::InvalidVarNoDataType> invalid2(10);
+//     // elasticapp::System<elasticapp::InvalidVarNeither> invalid3(10);
+//     // elasticapp::System<elasticapp::ValidVar1, elasticapp::InvalidVarNoPlacement> invalid4(10);
+// }
diff --git a/backend/tests/cpp/test_traits.cpp b/backend/tests/cpp/test_traits.cpp
new file mode 100644
index 00000000..5087a87d
--- /dev/null
+++ b/backend/tests/cpp/test_traits.cpp
@@ -0,0 +1,339 @@
+#include <catch2/catch_test_macros.hpp>
+#include "traits.h"
+#include "block.h"
+#include "mock/mock_block_system.h"
+
+using MockBlockSystem = elasticapp::mock::MockBlockSystem;
+
+TEST_CASE("Traits - Matrix storage order", "[traits]") {
+    SECTION("Matrix type is defined") {
+        std::vector<std::size_t> n_elems_per_rod = {8};  // 3 elements -> 4 nodes (width)
+        MockBlockSystem block(n_elems_per_rod);
+        auto& matrix = block.data();
+
+        // Verify matrix has correct dimensions
+        REQUIRE(matrix.rows() == 17);  // MockSystem depth
+        REQUIRE(matrix.cols() == 9);    // (3+1) nodes + 0 ghost = 4 nodes
+
+        // Verify we can access and modify data
+        matrix(0, 0) = 1.0;
+        REQUIRE(matrix(0, 0) == 1.0);
+    }
+
+    SECTION("Stride computation") {
+        auto strides = elasticapp::compute_strides(3, 4);
+
+        if constexpr (elasticapp::IsRowMajor) {
+            // Row-major: row stride = cols * sizeof(double), col stride = sizeof(double)
+            REQUIRE(strides.first == static_cast<ptrdiff_t>(4 * sizeof(double)));
+            REQUIRE(strides.second == static_cast<ptrdiff_t>(sizeof(double)));
+        } else {
+            // Column-major: row stride = sizeof(double), col stride = rows * sizeof(double)
+            REQUIRE(strides.first == static_cast<ptrdiff_t>(sizeof(double)));
+            REQUIRE(strides.second == static_cast<ptrdiff_t>(3 * sizeof(double)));
+        }
+    }
+}
+
+TEST_CASE("Traits - get_column_slice", "[traits]") {
+    SECTION("Basic column slice - non-const") {
+        elasticapp::MatrixType matrix(5, 10);  // 5 rows, 10 columns
+        matrix.setZero();
+
+        // Fill some columns with test data
+        for (std::size_t col = 0; col < 10; ++col) {
+            for (std::size_t row = 0; row < 5; ++row) {
+                matrix(row, col) = static_cast<double>(col * 10 + row);
+            }
+        }
+
+        // Get a slice of columns 2-5 (4 columns)
+        auto slice = elasticapp::get_column_slice(matrix, 2, 4);
+
+        // Verify dimensions
+        REQUIRE(slice.rows() == 5);
+        REQUIRE(slice.cols() == 4);
+
+        // Verify data matches original matrix
+        for (std::size_t col = 0; col < 4; ++col) {
+            for (std::size_t row = 0; row < 5; ++row) {
+                REQUIRE(slice(row, col) == matrix(row, col + 2));
+            }
+        }
+    }
+
+    SECTION("Column slice is a view, not a copy") {
+        elasticapp::MatrixType matrix(3, 6);
+        matrix.setZero();
+
+        // Get a slice
+        auto slice = elasticapp::get_column_slice(matrix, 1, 3);
+
+        // Modify through the slice
+        slice(0, 0) = 42.0;
+        slice(1, 1) = 99.0;
+        slice(2, 2) = 123.0;
+
+        // Verify original matrix is modified
+        REQUIRE(matrix(0, 1) == 42.0);   // slice(0,0) -> matrix(0,1)
+        REQUIRE(matrix(1, 2) == 99.0);    // slice(1,1) -> matrix(1,2)
+        REQUIRE(matrix(2, 3) == 123.0);   // slice(2,2) -> matrix(2,3)
+    }
+
+    SECTION("Basic column slice - const") {
+        elasticapp::MatrixType matrix(4, 8);
+        matrix.setZero();
+
+        // Fill with test data
+        for (std::size_t col = 0; col < 8; ++col) {
+            for (std::size_t row = 0; row < 4; ++row) {
+                matrix(row, col) = static_cast<double>(col * 100 + row);
+            }
+        }
+
+        // Get a const slice
+        const elasticapp::MatrixType& const_matrix = matrix;
+        auto const_slice = elasticapp::get_column_slice(const_matrix, 3, 2);
+
+        // Verify dimensions
+        REQUIRE(const_slice.rows() == 4);
+        REQUIRE(const_slice.cols() == 2);
+
+        // Verify data matches
+        REQUIRE(const_slice(0, 0) == matrix(0, 3));
+        REQUIRE(const_slice(3, 1) == matrix(3, 4));
+    }
+
+    SECTION("Single column slice") {
+        elasticapp::MatrixType matrix(5, 7);
+        matrix.setZero();
+
+        // Fill column 3 with test data
+        for (std::size_t row = 0; row < 5; ++row) {
+            matrix(row, 3) = static_cast<double>(row * 10);
+        }
+
+        auto slice = elasticapp::get_column_slice(matrix, 3, 1);
+
+        REQUIRE(slice.rows() == 5);
+        REQUIRE(slice.cols() == 1);
+
+        for (std::size_t row = 0; row < 5; ++row) {
+            REQUIRE(slice(row, 0) == static_cast<double>(row * 10));
+        }
+    }
+
+    SECTION("Full matrix slice") {
+        elasticapp::MatrixType matrix(6, 8);
+        matrix.setZero();
+
+        // Fill with test data
+        for (std::size_t col = 0; col < 8; ++col) {
+            for (std::size_t row = 0; row < 6; ++row) {
+                matrix(row, col) = static_cast<double>(row * 8 + col);
+            }
+        }
+
+        // Get slice of all columns
+        auto slice = elasticapp::get_column_slice(matrix, 0, 8);
+
+        REQUIRE(slice.rows() == 6);
+        REQUIRE(slice.cols() == 8);
+
+        // Verify all data matches
+        for (std::size_t col = 0; col < 8; ++col) {
+            for (std::size_t row = 0; row < 6; ++row) {
+                REQUIRE(slice(row, col) == matrix(row, col));
+            }
+        }
+    }
+
+    SECTION("Slice at end of matrix") {
+        elasticapp::MatrixType matrix(4, 10);
+        matrix.setZero();
+
+        // Fill last 3 columns
+        for (std::size_t col = 7; col < 10; ++col) {
+            for (std::size_t row = 0; row < 4; ++row) {
+                matrix(row, col) = static_cast<double>(col * 100 + row);
+            }
+        }
+
+        auto slice = elasticapp::get_column_slice(matrix, 7, 3);
+
+        REQUIRE(slice.rows() == 4);
+        REQUIRE(slice.cols() == 3);
+
+        // Verify data
+        REQUIRE(slice(0, 0) == matrix(0, 7));
+        REQUIRE(slice(3, 2) == matrix(3, 9));
+    }
+}
+
+TEST_CASE("Traits - get_block_slice", "[traits]") {
+    SECTION("Basic block slice - non-const") {
+        elasticapp::MatrixType matrix(8, 12);
+        matrix.setZero();
+
+        // Fill matrix with test data
+        for (std::size_t row = 0; row < 8; ++row) {
+            for (std::size_t col = 0; col < 12; ++col) {
+                matrix(row, col) = static_cast<double>(row * 100 + col);
+            }
+        }
+
+        // Get a block slice: rows 2-5 (4 rows), columns 3-7 (5 columns)
+        auto slice = elasticapp::get_block_slice(matrix, 2, 4, 3, 5);
+
+        // Verify dimensions
+        REQUIRE(slice.rows() == 4);
+        REQUIRE(slice.cols() == 5);
+
+        // Verify data matches original matrix
+        for (std::size_t row = 0; row < 4; ++row) {
+            for (std::size_t col = 0; col < 5; ++col) {
+                REQUIRE(slice(row, col) == matrix(row + 2, col + 3));
+            }
+        }
+    }
+
+    SECTION("Block slice is a view, not a copy") {
+        elasticapp::MatrixType matrix(6, 10);
+        matrix.setZero();
+
+        // Get a block slice
+        auto slice = elasticapp::get_block_slice(matrix, 1, 3, 2, 4);
+
+        // Modify through the slice
+        slice(0, 0) = 42.0;
+        slice(1, 1) = 99.0;
+        slice(2, 2) = 123.0;
+
+        // Verify original matrix is modified
+        REQUIRE(matrix(1, 2) == 42.0);   // slice(0,0) -> matrix(1,2)
+        REQUIRE(matrix(2, 3) == 99.0);    // slice(1,1) -> matrix(2,3)
+        REQUIRE(matrix(3, 4) == 123.0);   // slice(2,2) -> matrix(3,4)
+    }
+
+    SECTION("Basic block slice - const") {
+        elasticapp::MatrixType matrix(5, 8);
+        matrix.setZero();
+
+        // Fill with test data
+        for (std::size_t row = 0; row < 5; ++row) {
+            for (std::size_t col = 0; col < 8; ++col) {
+                matrix(row, col) = static_cast<double>(row * 50 + col);
+            }
+        }
+
+        // Get a const block slice
+        const elasticapp::MatrixType& const_matrix = matrix;
+        auto const_slice = elasticapp::get_block_slice(const_matrix, 2, 2, 3, 3);
+
+        // Verify dimensions
+        REQUIRE(const_slice.rows() == 2);
+        REQUIRE(const_slice.cols() == 3);
+
+        // Verify data matches
+        REQUIRE(const_slice(0, 0) == matrix(2, 3));
+        REQUIRE(const_slice(1, 2) == matrix(3, 5));
+    }
+
+    SECTION("Single element block slice") {
+        elasticapp::MatrixType matrix(5, 7);
+        matrix.setZero();
+
+        // Fill specific element
+        matrix(3, 4) = 42.0;
+
+        auto slice = elasticapp::get_block_slice(matrix, 3, 1, 4, 1);
+
+        REQUIRE(slice.rows() == 1);
+        REQUIRE(slice.cols() == 1);
+        REQUIRE(slice(0, 0) == 42.0);
+    }
+
+    SECTION("Full matrix block slice") {
+        elasticapp::MatrixType matrix(6, 8);
+        matrix.setZero();
+
+        // Fill with test data
+        for (std::size_t row = 0; row < 6; ++row) {
+            for (std::size_t col = 0; col < 8; ++col) {
+                matrix(row, col) = static_cast<double>(row * 10 + col);
+            }
+        }
+
+        // Get slice of entire matrix
+        auto slice = elasticapp::get_block_slice(matrix, 0, 6, 0, 8);
+
+        REQUIRE(slice.rows() == 6);
+        REQUIRE(slice.cols() == 8);
+
+        // Verify all data matches
+        for (std::size_t row = 0; row < 6; ++row) {
+            for (std::size_t col = 0; col < 8; ++col) {
+                REQUIRE(slice(row, col) == matrix(row, col));
+            }
+        }
+    }
+
+    SECTION("Block slice at corner of matrix") {
+        elasticapp::MatrixType matrix(7, 9);
+        matrix.setZero();
+
+        // Fill bottom-right corner
+        for (std::size_t row = 4; row < 7; ++row) {
+            for (std::size_t col = 6; col < 9; ++col) {
+                matrix(row, col) = static_cast<double>(row * 100 + col);
+            }
+        }
+
+        auto slice = elasticapp::get_block_slice(matrix, 4, 3, 6, 3);
+
+        REQUIRE(slice.rows() == 3);
+        REQUIRE(slice.cols() == 3);
+
+        // Verify data
+        REQUIRE(slice(0, 0) == matrix(4, 6));
+        REQUIRE(slice(2, 2) == matrix(6, 8));
+    }
+
+    SECTION("Block slice with single row") {
+        elasticapp::MatrixType matrix(5, 8);
+        matrix.setZero();
+
+        // Fill row 2
+        for (std::size_t col = 0; col < 8; ++col) {
+            matrix(2, col) = static_cast<double>(col * 10);
+        }
+
+        auto slice = elasticapp::get_block_slice(matrix, 2, 1, 0, 8);
+
+        REQUIRE(slice.rows() == 1);
+        REQUIRE(slice.cols() == 8);
+
+        for (std::size_t col = 0; col < 8; ++col) {
+            REQUIRE(slice(0, col) == static_cast<double>(col * 10));
+        }
+    }
+
+    SECTION("Block slice with single column") {
+        elasticapp::MatrixType matrix(6, 7);
+        matrix.setZero();
+
+        // Fill column 3
+        for (std::size_t row = 0; row < 6; ++row) {
+            matrix(row, 3) = static_cast<double>(row * 20);
+        }
+
+        auto slice = elasticapp::get_block_slice(matrix, 0, 6, 3, 1);
+
+        REQUIRE(slice.rows() == 6);
+        REQUIRE(slice.cols() == 1);
+
+        for (std::size_t row = 0; row < 6; ++row) {
+            REQUIRE(slice(row, 0) == static_cast<double>(row * 20));
+        }
+    }
+}
diff --git a/backend/tests/cpp/test_version.cpp b/backend/tests/cpp/test_version.cpp
new file mode 100644
index 00000000..8d7ccb56
--- /dev/null
+++ b/backend/tests/cpp/test_version.cpp
@@ -0,0 +1,16 @@
+#include <catch2/catch_test_macros.hpp>
+#include "version.h"
+#include <string>
+
+// Simple test to verify Catch2 is working
+TEST_CASE("Catch2 is working", "[basic]") {
+    REQUIRE(1 + 1 == 2);
+    REQUIRE(std::string("hello") == "hello");
+}
+
+// Test version string from version.cpp
+TEST_CASE("Version string from version.cpp", "[version]") {
+    std::string version = elasticapp::version();
+    REQUIRE(version.length() > 0);
+    REQUIRE(version.find('.') != std::string::npos);
+}
diff --git a/backend/tests/py/test_block.py b/backend/tests/py/test_block.py
new file mode 100644
index 00000000..078647af
--- /dev/null
+++ b/backend/tests/py/test_block.py
@@ -0,0 +1,197 @@
+import numpy as np
+import pytest
+
+from elasticapp import BlockRodSystem
+
+
+def test_block_construction():
+    """Test that Block can be constructed with shape."""
+    block = BlockRodSystem([3, 4])
+    # Rod 0: 3 elems -> 4 nodes, Rod 1: 4 elems -> 5 nodes
+    # Ghost nodes: 1 (between 2 rods)
+    # Total width = 4 + 5 + 1 = 10
+    shape = block.shape
+    assert shape[1] == 10
+
+
+def test_block_as_ref_returns_numpy_array():
+    """Test that as_ref() returns a numpy array."""
+    block = BlockRodSystem([3, 3])
+    # Rod 0: 3 elems -> 4 nodes, Rod 1: 3 elems -> 4 nodes
+    # Ghost nodes: 1 (between 2 rods)
+    # Total width = 4 + 4 + 1 = 9
+    arr = block.as_ref()
+    assert isinstance(arr, np.ndarray)
+    assert arr.shape[1] == 9
+    assert arr.flags["OWNDATA"] is False
+    assert arr.flags["WRITEABLE"] is True
+
+
+def test_block_as_ref_modifies_underlying_memory():
+    """Test that modifying the numpy array modifies the underlying C++ memory."""
+    block = BlockRodSystem([3, 3])
+    arr1 = block.as_ref()
+
+    # Try to modify
+    arr1[0, 0] = 1.5
+    assert arr1[0, 0] == 1.5
+    arr1[0, 0] = 2.5
+    assert arr1[0, 0] == 2.5
+
+
+def test_block_as_ref_bidirectional_modification():
+    """Test that modifications persist across multiple as_ref() calls."""
+    block = BlockRodSystem([3, 3])
+    arr1 = block.as_ref()
+
+    # Fill with some values
+    vals = np.empty(block.shape)
+    arr1[:] = vals
+    arr2 = block.as_ref()
+    np.testing.assert_array_equal(arr2, vals)
+
+    # Modify through second reference
+    arr2[0, 0] = 100.0
+    assert arr1[0, 0] == 100.0
+
+
+def test_block_rod_start_indices():
+    """Test that Block stores correct starting indices for each rod."""
+    block = BlockRodSystem([3, 5, 2])
+
+    # Rod 0: starts at 0, has 3+1=4 nodes + 1 ghost = 5 total
+    assert block.system_start_index(0) == 0
+    # Rod 1: starts at 5, has 5+1=6 nodes + 1 ghost = 7 total
+    assert block.system_start_index(1) == 5
+    # Rod 2: starts at 5+7=12, has 2+1=3 nodes
+    assert block.system_start_index(2) == 12
+
+
+def test_block_view_variable_query():
+    """Test that BlockRodSystemView can query variables."""
+    block = BlockRodSystem([3, 5, 2])
+    view = block.at(0)
+    assert view.get("position") is not None
+    assert view.get("position").shape == (3, 4)
+    assert view.get("velocity") is not None
+    assert view.get("velocity").shape == (3, 4)
+    assert view.get("director") is not None
+
+    print("")
+    print(type(view))
+    print(type(view.get("director")))
+    print(view.get("director"))
+    print(view.get("director").flags)
+    print(view.get("director").shape)
+    assert view.get("director").shape == (3, 3, 3)
+
+
+def test_block_get_shape():
+    """Test that Block.get() returns correct shapes for different variable types."""
+    block = BlockRodSystem([3, 4])
+    # Block has 2 rods: rod 0 has 3 elems (4 nodes), rod 1 has 4 elems (5 nodes)
+    # Ghost nodes: 1 (between 2 rods)
+    # Total width = 4 + 5 + 1 = 10
+
+    # OnNode variables: should have shape (dimension, full width)
+    position = block.get("position")
+    assert position.shape == (3, 10)  # Vector (3D) x total nodes (full width)
+    assert position.flags["OWNDATA"] is False
+    assert position.flags["WRITEABLE"] is True
+
+    velocity = block.get("velocity")
+    assert velocity.shape == (3, 10)  # OnNode: full width
+
+    mass = block.get("mass")
+    assert mass.shape == (10,)  # OnNode: full width
+
+    # OnElement variables: should have shape (dimension, width - 1)
+    director = block.get("director")
+    assert director.shape == (3, 3, 9)  # Matrix (9D) x (width - 1) = 10 - 1 = 9
+    assert director.flags["OWNDATA"] is False
+    assert director.flags["WRITEABLE"] is True
+
+    omega = block.get("omega")
+    assert omega.shape == (3, 9)  # Vector (3D) x (width - 1) = 10 - 1 = 9
+
+    # OnVoronoi variables: should have shape (dimension, width - 2)
+    kappa = block.get("kappa")
+    assert kappa.shape == (3, 8)  # Vector (3D) x (width - 2) = 10 - 2 = 8
+
+
+def test_block_get_contents():
+    """Test that Block.get() returns writable views that modify underlying data."""
+    block = BlockRodSystem([3, 3])
+    # Rod 0: 3 elems -> 4 nodes, Rod 1: 3 elems -> 4 nodes
+    # Ghost nodes: 1 (between 2 rods)
+    # Total width = 4 + 4 + 1 = 9
+
+    # Get position variable (OnNode: full width)
+    position = block.get("position")
+    assert position.shape == (3, 9)
+
+    # Modify through the view
+    position[0, 0] = 1.5
+    position[1, 0] = 2.5
+    position[2, 0] = 3.5
+
+    # Verify modifications persist
+    assert position[0, 0] == 1.5
+    assert position[1, 0] == 2.5
+    assert position[2, 0] == 3.5
+
+    # Get another view - should see the same data
+    position2 = block.get("position")
+    assert position2[0, 0] == 1.5
+    assert position2[1, 0] == 2.5
+    assert position2[2, 0] == 3.5
+
+    # Modify through second view
+    position2[0, 1] = 10.0
+    assert position[0, 1] == 10.0  # Should be reflected in first view
+
+
+def test_block_get_different_variable_types():
+    """Test that Block.get() works for different variable types (OnNode, OnElement, OnVoronoi)."""
+    block = BlockRodSystem([3, 3])
+    # Rod 0: 3 elems -> 4 nodes, Rod 1: 3 elems -> 4 nodes
+    # Ghost nodes: 1 (between 2 rods)
+    # Total width = 4 + 4 + 1 = 9
+
+    # OnNode variable (Vector) - full width
+    velocity = block.get("velocity")
+    assert velocity.shape == (3, 9)  # OnNode: full width
+    velocity[0, 0] = 100.0
+    assert velocity[0, 0] == 100.0
+
+    # OnNode variable (Scalar) - full width
+    mass = block.get("mass")
+    assert mass.shape == (9,)  # OnNode: full width
+    mass[0] = 5.0
+    assert mass[0] == 5.0
+
+    # OnElement variable (Matrix) - width - 1
+    director = block.get("director")
+    assert director.shape == (3, 3, 8)  # OnElement: width - 1 = 9 - 1 = 8
+    director[0, 0, 0] = 0.5
+    assert director[0, 0, 0] == 0.5
+
+    # OnElement variable (Vector)
+    omega = block.get("omega")
+    assert omega.shape == (3, 8)
+    omega[0, 0] = 1.0
+    assert omega[0, 0] == 1.0
+
+    # OnVoronoi variable (Vector) - width - 2
+    kappa = block.get("kappa")
+    assert kappa.shape == (3, 7)  # OnVoronoi: width - 2 = 9 - 2 = 7
+    kappa[0, 0] = 2.0
+    assert kappa[0, 0] == 2.0
+
+
+def test_block_get_invalid_variable():
+    """Test that Block.get() raises error for invalid variable names."""
+    block = BlockRodSystem([3, 4])
+
+    with pytest.raises(RuntimeError, match="Unknown variable name"):
+        block.get("invalid_variable")
diff --git a/backend/tests/py/test_block_ghost_indices.py b/backend/tests/py/test_block_ghost_indices.py
new file mode 100644
index 00000000..2a58a1d6
--- /dev/null
+++ b/backend/tests/py/test_block_ghost_indices.py
@@ -0,0 +1,73 @@
+import numpy as np
+import pytest
+
+from elasticapp import BlockRodSystem
+
+
+def test_ghost_nodes_idx():
+    """Test ghost nodes indices computation."""
+    block = BlockRodSystem([3, 5, 2])
+    # Rod 0: 3 elems -> 4 nodes (indices 0-3)
+    # Ghost node at 4
+    # Rod 1: 5 elems -> 6 nodes (indices 5-10)
+    # Ghost node at 11
+    # Rod 2: 2 elems -> 3 nodes (indices 12-14)
+
+    ghost_nodes = block.ghost_nodes_idx
+    assert ghost_nodes == [4, 11]
+
+
+def test_ghost_elems_idx():
+    """Test ghost elements indices computation."""
+    block = BlockRodSystem([3, 5, 2])
+
+    ghost_elems = block.ghost_elems_idx
+    # Rod 0: 3 elems -> elements 0-2, ghost elements at 3, 4
+    # Rod 1: 5 elems -> elements 5-9, ghost elements at 10, 11
+    # Rod 2: 2 elems -> elements 12-13, ghost element at 14 (last element)
+    assert ghost_elems == [3, 4, 10, 11, 14]
+
+
+def test_ghost_voronoi_idx():
+    """Test ghost voronoi indices computation."""
+    block = BlockRodSystem([3, 5, 2])
+
+    ghost_voronoi = block.ghost_voronoi_idx
+    # Rod 0: 3 elems -> voronoi 0-1, ghost voronoi at 2, 3, 4
+    # Rod 1: 5 elems -> voronoi 5-8, ghost voronoi at 9, 10, 11
+    # Rod 2: 2 elems -> voronoi 12, ghost voronoi at 13, 14 (includes last voronoi)
+    assert ghost_voronoi == [2, 3, 4, 9, 10, 11, 13, 14]
+
+
+def test_ghost_indices_single_rod():
+    """Test ghost indices with single rod (includes last element/voronoi)."""
+    block = BlockRodSystem([6])
+
+    ghost_nodes = block.ghost_nodes_idx
+    ghost_elems = block.ghost_elems_idx
+    ghost_voronoi = block.ghost_voronoi_idx
+
+    assert ghost_nodes == []
+    assert ghost_elems == [6]  # Last element is included as ghost
+    assert ghost_voronoi == [5, 6]  # Last two voronoi are included as ghosts
+
+
+def test_ghost_indices_two_rods():
+    """Test ghost indices with two rods."""
+    block = BlockRodSystem([3, 3])
+    # Rod 0: 3 elems -> 4 nodes (indices 0-3)
+    # Ghost node at 4
+    # Rod 1: 3 elems -> 4 nodes (indices 5-8)
+
+    ghost_nodes = block.ghost_nodes_idx
+    assert ghost_nodes == [4]
+    ghost_elems = block.ghost_elems_idx
+    assert ghost_elems == [3, 4, 8]  # Includes last element of rod 1
+    ghost_voronoi = block.ghost_voronoi_idx
+    assert ghost_voronoi == [2, 3, 4, 7, 8]  # Includes last voronoi of rod 1
+
+
+def test_ghost_indices_empty_block():
+    """Test ghost indices with empty block (should raise error)."""
+    with pytest.raises(ValueError, match="n_elems_per_rod cannot be empty"):
+        block = BlockRodSystem([])
diff --git a/backend/tests/py/test_block_reset_ghost.py b/backend/tests/py/test_block_reset_ghost.py
new file mode 100644
index 00000000..d4b1a34f
--- /dev/null
+++ b/backend/tests/py/test_block_reset_ghost.py
@@ -0,0 +1,151 @@
+import numpy as np
+import pytest
+
+from elasticapp import BlockRodSystem
+
+
+def test_reset_ghost_for_variable_onnode():
+    """Test reset_ghost_for_variable for OnNode variable."""
+    block = BlockRodSystem([3, 5])
+    # Ghost nodes at indices 4, 11
+
+    # Get position variable and modify ghost positions
+    position = block.get("position")
+    position[:] = 0.0
+    position[0, 4] = 999.0  # Modify ghost node
+    position[1, 4] = 888.0
+    position[2, 4] = 777.0
+
+    # Reset ghost for position variable
+    block.reset_ghost_for_variable("position")
+
+    # Verify ghost values are reset (position uses Vector, ghost_value = Zero(3,1))
+    assert position[0, 4] == 0.0
+    assert position[1, 4] == 0.0
+    assert position[2, 4] == 0.0
+
+
+def test_reset_ghost_for_variable_onelement():
+    """Test reset_ghost_for_variable for OnElement variable."""
+    block = BlockRodSystem([3, 5])
+    # Ghost elements at indices 3, 4, 10, 11
+
+    # Get director variable and modify ghost positions
+    director = block.get("director")
+    director[0, 0, 3] = 999.0  # Modify ghost element
+    director[1, 0, 3] = 888.0
+    director[2, 0, 3] = 777.0
+
+    # Reset ghost for director variable
+    block.reset_ghost_for_variable("director")
+
+    # Verify ghost values are reset (director uses Matrix, ghost_value = Zero(3,3))
+    print(director.flags)
+    assert director[0, 0, 3] == 0.0
+    assert director[1, 0, 3] == 0.0
+    assert director[2, 0, 3] == 0.0
+
+
+def test_reset_ghost_for_variable_onvoronoi():
+    """Test reset_ghost_for_variable for OnVoronoi variable."""
+    block = BlockRodSystem([3, 5])
+    # Ghost voronoi at indices 2, 3, 4, 9, 10, 11
+
+    # Get kappa variable and modify ghost positions
+    kappa = block.get("kappa")
+    kappa[:, 2] = 999.0  # Modify ghost voronoi
+
+    # Reset ghost for kappa variable
+    block.reset_ghost_for_variable("kappa")
+
+    # Verify ghost values are reset (kappa uses Vector, ghost_value = Zero(3,1))
+    assert kappa[0, 2] == 0.0
+    assert kappa[1, 2] == 0.0
+    assert kappa[2, 2] == 0.0
+
+
+def test_reset_ghost_resets_all_variables():
+    """Test that reset_ghost resets all variables."""
+    block = BlockRodSystem([3, 3])
+    # Ghost nodes at index 4
+
+    # Modify ghost positions for multiple variables
+    lengths = block.get("lengths")
+    position = block.get("position")
+    velocity = block.get("velocity")
+    director = block.get("director")
+
+    lengths[4] = 999.0
+    position[0, 4] = 999.0
+    velocity[0, 4] = 888.0
+    director[0, 0, 3] = 777.0  # Ghost element at 3 (before ghost node at 4)
+
+    # Reset all ghosts
+    block.reset_ghost()
+
+    # Verify all ghost values are reset
+    assert lengths[4] == 0.0
+    assert position[0, 4] == 0.0
+    assert velocity[0, 4] == 0.0
+    assert director[0, 0, 3] == 0.0
+
+
+def test_reset_ghost_called_in_constructor():
+    """Test that reset_ghost is called in constructor."""
+    block = BlockRodSystem([3, 5])
+    # Constructor should have called reset_ghost()
+
+    # Check that ghost values are already initialized
+    position = block.get("position")
+    ghost_nodes = block.ghost_nodes_idx
+
+    if len(ghost_nodes) > 0:
+        ghost_col = ghost_nodes[0]
+        # Ghost values should be zero (default ghost_value for Vector)
+        assert position[0, ghost_col] == 0.0
+        assert position[1, ghost_col] == 0.0
+        assert position[2, ghost_col] == 0.0
+
+
+def test_reset_ghost_with_single_rod():
+    """Test reset_ghost with single rod (no ghosts)."""
+    block = BlockRodSystem([6])
+
+    # Should not crash even with no ghost nodes
+    block.reset_ghost()
+    block.reset_ghost_for_variable("position")
+
+
+def test_reset_ghost_with_empty_block():
+    """Test reset_ghost with empty block."""
+    with pytest.raises(ValueError, match="n_elems_per_rod cannot be empty"):
+        block = BlockRodSystem([])
+
+
+def test_reset_ghost_for_variable_invalid_name():
+    """Test reset_ghost_for_variable with invalid variable name."""
+    block = BlockRodSystem([3, 5])
+
+    with pytest.raises(RuntimeError, match="Unknown variable name"):
+        block.reset_ghost_for_variable("invalid_variable")
+
+
+def test_reset_ghost_multiple_ghost_positions():
+    """Test reset_ghost with multiple rods (multiple ghost positions)."""
+    block = BlockRodSystem([3, 3, 3])
+    # Ghost nodes at indices 4, 8
+    # Rod 0: 3 elems -> 4 nodes (0-3), ghost at 4
+    # Rod 1: 3 elems -> 4 nodes (5-8), ghost at 9
+    # Rod 2: 3 elems -> 4 nodes (10-13)
+
+    # Modify multiple ghost positions
+    position = block.get("position")
+    position[:, 4] = 999.0  # First ghost node
+    position[:, 9] = 888.0  # Second ghost node
+
+    # Reset all ghosts
+    block.reset_ghost()
+
+    # Verify all ghost positions are reset
+    np.testing.assert_array_equal(position[:, 4], 0.0)
+    np.testing.assert_array_equal(position[:, 9], 0.0)
diff --git a/backend/tests/py/test_block_reshape.py b/backend/tests/py/test_block_reshape.py
new file mode 100644
index 00000000..a99ba591
--- /dev/null
+++ b/backend/tests/py/test_block_reshape.py
@@ -0,0 +1,297 @@
+"""
+Tests for Block variable reshaping behavior.
+
+This module tests that:
+- Scalar variables return 1D arrays
+- Vector variables return 2D arrays
+- Matrix variables return 3D arrays that are views (not copies)
+"""
+
+import numpy as np
+import pytest
+
+from elasticapp import BlockRodSystem
+
+
+class TestScalarVariableReshaping:
+    """Test that Scalar variables return 1D arrays."""
+
+    def test_scalar_variables_are_1d(self):
+        """Test that scalar variables return 1D arrays."""
+        block = BlockRodSystem([6, 6])
+        # Scalar variables: mass, density, volume, etc.
+        mass = block.get("mass")
+        density = block.get("density")
+        volume = block.get("volume")
+
+        # Should be 1D arrays
+        assert mass.ndim == 1
+        assert density.ndim == 1
+        assert volume.ndim == 1
+
+        # Check shapes (accounting for ghost nodes)
+        # 2 rods with 6 elements each = 7 nodes each = 14 nodes + 1 ghost = 15 total
+        assert mass.shape == (15,)
+        # OnElement variables: width - 1 = 15 - 1 = 14
+        assert density.shape == (14,)
+        assert volume.shape == (14,)
+
+    def test_scalar_variables_are_writable(self):
+        """Test that scalar variables are writable."""
+        block = BlockRodSystem([6])
+        mass = block.get("mass")
+
+        assert mass.flags.writeable is True
+        assert mass.flags.owndata is False  # Should be a view
+
+        # Modify and verify
+        original_value = mass[0]
+        mass[0] = 999.0
+        assert mass[0] == 999.0
+
+        # Verify persistence
+        mass_new = block.get("mass")
+        assert mass_new[0] == 999.0
+
+
+class TestVectorVariableReshaping:
+    """Test that Vector variables return 2D arrays."""
+
+    def test_vector_variables_are_2d(self):
+        """Test that vector variables return 2D arrays."""
+        block = BlockRodSystem([6, 6])
+        # Vector variables: position, velocity, acceleration, etc.
+        position = block.get("position")
+        velocity = block.get("velocity")
+        acceleration = block.get("acceleration")
+
+        # Should be 2D arrays
+        assert position.ndim == 2
+        assert velocity.ndim == 2
+        assert acceleration.ndim == 2
+
+        # Check shapes: (3, n_cols)
+        # 2 rods with 6 elements each = 7 nodes each = 14 nodes + 1 ghost = 15 total
+        assert position.shape == (3, 15)
+        assert velocity.shape == (3, 15)
+        assert acceleration.shape == (3, 15)
+
+    def test_vector_variables_are_writable(self):
+        """Test that vector variables are writable."""
+        block = BlockRodSystem([6])
+        position = block.get("position")
+
+        assert position.flags.writeable is True
+        assert position.flags.owndata is False  # Should be a view
+
+        # Modify and verify
+        original_value = position[0, 0].copy()
+        position[0, 0] = 999.0
+        assert position[0, 0] == 999.0
+
+        # Verify persistence
+        position_new = block.get("position")
+        assert position_new[0, 0] == 999.0
+
+
+class TestMatrixVariableReshaping:
+    """Test that Matrix variables return 3D arrays that are views."""
+
+    def test_matrix_variables_are_3d(self):
+        """Test that matrix variables return 3D arrays."""
+        block = BlockRodSystem([6, 6])
+        # Matrix variables: director, shear_matrix, bend_matrix, etc.
+        director = block.get("director")
+        shear_matrix = block.get("shear_matrix")
+        bend_matrix = block.get("bend_matrix")
+        mass_second_moment = block.get("mass_second_moment_of_inertia")
+
+        # Should be 3D arrays
+        assert director.ndim == 3
+        assert shear_matrix.ndim == 3
+        assert bend_matrix.ndim == 3
+        assert mass_second_moment.ndim == 3
+
+        # Check shapes: (3, 3, n_cols)
+        # OnElement variables: width - 1 = 15 - 1 = 14
+        assert director.shape == (3, 3, 14)
+        assert shear_matrix.shape == (3, 3, 14)
+        assert bend_matrix.shape == (3, 3, 13)  # OnVoronoi: width - 2 = 15 - 2 = 13
+        assert mass_second_moment.shape == (3, 3, 14)
+
+    def test_matrix_variables_are_views_not_copies(self):
+        """Test that matrix variables are views, not copies."""
+        block = BlockRodSystem([6])
+        director = block.get("director")
+
+        # Should be a view, not a copy
+        assert (
+            director.flags.owndata is False
+        ), "Matrix variables should be views, not copies"
+        assert director.flags.writeable is True
+        assert director.base is not None, "View should have a base array"
+
+    def test_matrix_variables_modifications_persist(self):
+        """Test that modifications to matrix variables persist."""
+        block = BlockRodSystem([6])
+        director = block.get("director")
+
+        # Modify a value
+        original_value = director[0, 0, 0].copy()
+        director[0, 0, 0] = 123.456
+
+        # Verify the modification
+        assert director[0, 0, 0] == 123.456
+
+        # Get a new view and verify it sees the same modification
+        director_new = block.get("director")
+        assert (
+            director_new[0, 0, 0] == 123.456
+        ), "Modifications should persist across views"
+
+    def test_matrix_variables_reference_same_memory(self):
+        """Test that matrix variables reference the same underlying memory."""
+        block = BlockRodSystem([6])
+        director1 = block.get("director")
+        director2 = block.get("director")
+
+        # Modify through first view
+        director1[0, 0, 0] = 789.0
+
+        # Check through second view
+        assert (
+            director2[0, 0, 0] == 789.0
+        ), "Both views should reference the same memory"
+
+    def test_matrix_variables_correct_strides(self):
+        """Test that matrix variables have correct strides for non-contiguous view."""
+        block = BlockRodSystem([6])
+        director = block.get("director")
+
+        # Strides should be non-contiguous (not C or F contiguous)
+        # For a (3, 3, 6) view of a (9, 6) column-major matrix:
+        # The underlying matrix is (9, 6) column-major
+        # - strides[0] (page stride): 3 * inner_stride = 3 * 8 = 24 bytes (distance between pages in 3x3)
+        # - strides[1] (row stride): inner_stride = 8 bytes (distance between rows)
+        # - strides[2] (col stride): outer_stride = 9 * 8 = 72 bytes (distance between columns in original)
+        assert len(director.strides) == 3
+
+        # Strides should be in bytes
+        # For column-major (9, 6) matrix viewed as (3, 3, 6):
+        # The actual stride calculation depends on the underlying matrix layout
+        # We just verify the strides are positive and the array is non-contiguous
+        assert director.strides[0] > 0, "Page stride should be positive"
+        assert director.strides[1] > 0, "Row stride should be positive"
+        assert director.strides[2] > 0, "Col stride should be positive"
+        # Verify it's not C or F contiguous (non-contiguous view)
+        assert not director.flags["C_CONTIGUOUS"], "Should not be C contiguous"
+        assert not director.flags["F_CONTIGUOUS"], "Should not be F contiguous"
+
+        # Page stride should be outer_stride (depends on underlying matrix)
+        assert director.strides[0] > 0, "Page stride should be positive"
+
+
+class TestBlockRodSystemViewReshaping:
+    """Test reshaping behavior for BlockRodSystemView."""
+
+    def test_blockview_scalar_is_1d(self):
+        """Test that BlockRodSystemView scalar variables are 1D."""
+        block = BlockRodSystem([6])
+        view = block.at(0)
+
+        mass = view.get("mass")
+        assert mass.ndim == 1
+        assert mass.shape == (7,)  # 6 elements + 1 = 7 nodes
+
+    def test_blockview_vector_is_2d(self):
+        """Test that BlockRodSystemView vector variables are 2D."""
+        block = BlockRodSystem([6])
+        view = block.at(0)
+
+        position = view.get("position")
+        assert position.ndim == 2
+        assert position.shape == (3, 7)  # 3 rows, 7 nodes
+
+    def test_blockview_matrix_is_3d(self):
+        """Test that BlockRodSystemView matrix variables are 3D views."""
+        block = BlockRodSystem([6])
+        view = block.at(0)
+
+        director = view.get("director")
+        assert director.ndim == 3
+        assert director.shape == (3, 3, 6)  # 3x3 matrices, 6 elements
+        assert director.flags.owndata is False, "Should be a view, not a copy"
+
+    def test_blockview_matrix_modifications_persist(self):
+        """Test that BlockRodSystemView matrix modifications persist."""
+        block = BlockRodSystem([6])
+        view = block.at(0)
+
+        director = view.get("director")
+        director[0, 0, 0] = 456.789
+
+        # Get new view
+        director_new = view.get("director")
+        assert director_new[0, 0, 0] == 456.789
+
+
+class TestReshapingEdgeCases:
+    """Test edge cases for reshaping."""
+
+    def test_single_rod_block(self):
+        """Test reshaping with a single rod."""
+        block = BlockRodSystem([6])
+
+        # Scalar
+        mass = block.get("mass")
+        assert mass.ndim == 1
+        assert mass.shape == (7,)  # 6 elements + 1 = 7 nodes
+
+        # Vector
+        position = block.get("position")
+        assert position.ndim == 2
+        assert position.shape == (3, 7)
+
+        # Matrix
+        director = block.get("director")
+        assert director.ndim == 3
+        assert director.shape == (3, 3, 6)  # 6 elements
+        assert director.flags.owndata is False
+
+    def test_multiple_rods_different_sizes(self):
+        """Test reshaping with multiple rods of different sizes."""
+        block = BlockRodSystem([3, 5, 2])
+
+        # Scalar (OnNode)
+        mass = block.get("mass")
+        assert mass.ndim == 1
+        # 3 elems -> 4 nodes, 5 elems -> 6 nodes, 2 elems -> 3 nodes
+        # Ghost nodes: 2 (between 3 rods)
+        # Total: 4 + 6 + 3 + 2 = 15
+        assert mass.shape == (15,)
+
+        # Vector (OnNode)
+        position = block.get("position")
+        assert position.ndim == 2
+        assert position.shape == (3, 15)
+
+        # Matrix (OnElement)
+        director = block.get("director")
+        assert director.ndim == 3
+        # OnElement: width - 1 = 15 - 1 = 14
+        assert director.shape == (3, 3, 14)
+        assert director.flags.owndata is False
+
+    def test_voronoi_variables(self):
+        """Test reshaping for OnVoronoi variables."""
+        block = BlockRodSystem([6, 6])
+
+        # OnVoronoi variables should have width - 2
+        kappa = block.get("kappa")
+        assert kappa.ndim == 2  # Vector, not Matrix
+        # OnVoronoi: width - 2 = 15 - 2 = 13
+        assert kappa.shape == (3, 13)
+
+        # OnVoronoi Matrix variable
+        # Note: There might not be OnVoronoi Matrix variables in the system
+        # This test verifies the width adjustment works correctly
diff --git a/backend/tests/py/test_block_view_rod.py b/backend/tests/py/test_block_view_rod.py
new file mode 100644
index 00000000..7344583a
--- /dev/null
+++ b/backend/tests/py/test_block_view_rod.py
@@ -0,0 +1,23 @@
+import numpy as np
+import pytest
+
+from elasticapp import BlockRodSystem
+
+
+def test_at_returns_block_view():
+    """Test that at() returns a BlockRodSystemView object."""
+    n_elems_per_system = [3, 5, 2]
+    block = BlockRodSystem(n_elems_per_system)
+    expected_depth = 124
+    # Rod 0: 3 elems -> 4 nodes, Rod 1: 5 elems -> 6 nodes, Rod 2: 2 elems -> 3 nodes
+    # Ghost nodes: 2 (between 3 rods)
+    # Total width = 4 + 6 + 3 + 2 = 15
+    expected_shape = (expected_depth, 15)
+
+    # assert block.n_systems == len(n_elems_per_system)
+    assert block.shape[1] == 15
+
+    for sys_index in range(block.n_systems):
+        block_view = block.at(sys_index)
+        # assert block_view.shape[0] == expected_depth  # depth may change with dummy variables
+        assert block_view.shape[1] == n_elems_per_system[sys_index] + 1
diff --git a/backend/tests/py/test_cosserat_rod_operation.py b/backend/tests/py/test_cosserat_rod_operation.py
new file mode 100644
index 00000000..0d80ee1a
--- /dev/null
+++ b/backend/tests/py/test_cosserat_rod_operation.py
@@ -0,0 +1,58 @@
+import numpy as np
+import pytest
+
+from elasticapp import BlockRodSystem
+
+
+def test_compute_internal_forces_and_torques():
+    """Test that compute_internal_forces_and_torques can be called."""
+    block = BlockRodSystem([3, 4])
+    # Should not raise an exception
+    block.compute_internal_forces_and_torques(0.0)
+
+
+def test_update_accelerations():
+    """Test that update_accelerations can be called."""
+    block = BlockRodSystem([3, 4])
+    # Should not raise an exception
+    block.update_accelerations(0.0)
+
+
+def test_zeroed_out_external_forces_and_torques():
+    """Test that zeroed_out_external_forces_and_torques can be called."""
+    block = BlockRodSystem([3, 4])
+    # Should not raise an exception
+    block.zeroed_out_external_forces_and_torques(0.0)
+
+
+def test_update_kinematics():
+    """Test that update_kinematics can be called."""
+    block = BlockRodSystem([3, 4])
+    # Should not raise an exception
+    block.update_kinematics(0.0, 1.0)
+
+
+def test_update_dynamics():
+    """Test that update_dynamics can be called."""
+    block = BlockRodSystem([3, 4])
+    # Should not raise an exception
+    block.update_dynamics(0.0, 1.0)
+
+
+def test_operations_are_callable():
+    """Test that all operation methods exist and are callable."""
+    block = BlockRodSystem([3, 4])
+
+    # Verify all methods exist
+    assert hasattr(block, "compute_internal_forces_and_torques")
+    assert hasattr(block, "update_accelerations")
+    assert hasattr(block, "zeroed_out_external_forces_and_torques")
+    assert hasattr(block, "update_kinematics")
+    assert hasattr(block, "update_dynamics")
+
+    # Verify they are callable
+    assert callable(block.compute_internal_forces_and_torques)
+    assert callable(block.update_accelerations)
+    assert callable(block.zeroed_out_external_forces_and_torques)
+    assert callable(block.update_kinematics)
+    assert callable(block.update_dynamics)
diff --git a/backend/tests/py/test_version.py b/backend/tests/py/test_version.py
new file mode 100644
index 00000000..cd7e7ba8
--- /dev/null
+++ b/backend/tests/py/test_version.py
@@ -0,0 +1,12 @@
+"""Test for elasticapp.version function."""
+
+import pytest
+import elasticapp
+import importlib.metadata
+
+
+def test_version():
+    """Test that elasticapp.version returns the current version."""
+    version = elasticapp.version()
+    assert isinstance(version, str)
+    assert version == importlib.metadata.version("elasticapp")
diff --git a/backend/tests/test_py_cpp_block_operation_match.py b/backend/tests/test_py_cpp_block_operation_match.py
new file mode 100644
index 00000000..55c1ec3d
--- /dev/null
+++ b/backend/tests/test_py_cpp_block_operation_match.py
@@ -0,0 +1,109 @@
+"""
+This script is an integrated test that ensures the C++ and Python
+implementations of Cosserat rod operations produce matching results.
+It was converted from the benchmarking script:
+`benchmarking/memory_block_integrity_check.py`.
+
+The test covers the following sequence of operations:
+1.  Creation of Python and C++ rod systems.
+2.  Initialization of memory blocks for both implementations.
+3.  Verification of ghost indices.
+4.  An initial check of memory block consistency.
+5.  Execution and cross-verification of:
+    - `compute_internal_forces_and_torques`
+    - `update_accelerations`
+    - `update_kinematics`
+    - `update_dynamics`
+"""
+
+import numpy as np
+
+import elastica as epy
+import elasticapp as epp
+
+
+def test_py_cpp_block_operation_match():
+    # Create test rods
+    n_rods = 5
+    n_elems_per_rod = 20
+
+    keys = list(epp.memory_block_rod.PY2CPP_VARNAMES.keys())
+    rods_cpp = [
+        epy.CosseratRod.straight_rod(
+            n_elements=n_elems_per_rod,
+            start=np.zeros(3),
+            direction=np.array([0.0, 0.0, 1.0]),
+            normal=np.array([1.0, 0.0, 0.0]),
+            base_length=1.0,
+            base_radius=0.01,
+            density=3000,
+            youngs_modulus=1e6,
+        )
+        for _ in range(n_rods)
+    ]
+    rods_py = [
+        epy.CosseratRod.straight_rod(
+            n_elements=n_elems_per_rod,
+            start=np.zeros(3),
+            direction=np.array([0.0, 0.0, 1.0]),
+            normal=np.array([1.0, 0.0, 0.0]),
+            base_length=1.0,
+            base_radius=0.01,
+            density=3000,
+            youngs_modulus=1e6,
+        )
+        for _ in range(n_rods)
+    ]
+    rng = np.random.default_rng(43)
+    for i in range(n_rods):
+        for key in ["rest_kappa", "rest_sigma"]:
+            shape = getattr(rods_py[i], key).shape
+            values = rng.random(size=shape)
+            getattr(rods_py[i], key)[...] = values.copy()
+            getattr(rods_cpp[i], key)[...] = values.copy()
+
+    # Create block rod system
+    block_cpp = epp.MemoryBlockCosseratRod(rods_cpp, range(n_rods))
+    block_py = epy.MemoryBlockCosseratRod(rods_py, range(n_rods))
+
+    # Cross-check ghost indices
+    np.testing.assert_array_equal(block_cpp.ghost_nodes_idx, block_py.ghost_nodes_idx)
+    np.testing.assert_array_equal(
+        block_cpp.ghost_elems_idx[:-1], block_py.ghost_elems_idx
+    )
+    np.testing.assert_array_equal(
+        block_cpp.ghost_voronoi_idx[:-2], block_py.ghost_voronoi_idx
+    )
+
+    def cross_check_block_memory():
+        for key in keys:
+            cpp_value = getattr(block_cpp, key)
+            py_value = getattr(block_py, key)
+            assert cpp_value.shape == py_value.shape
+            assert np.allclose(cpp_value, py_value), f"{key} is not equal"
+
+    cross_check_block_memory()
+
+    # Cross-check computing internal forces and torques
+    block_cpp.compute_internal_forces_and_torques(0.0)
+    block_py.compute_internal_forces_and_torques(0.0)
+
+    cross_check_block_memory()
+
+    # Cross-check updating accelerations
+    block_cpp.update_accelerations(0.0)
+    block_py.update_accelerations(0.0)
+
+    cross_check_block_memory()
+
+    # Cross-check updating kinematics
+    block_cpp.update_kinematics(0.0, 1.4)
+    block_py.update_kinematics(0.0, 1.4)
+
+    cross_check_block_memory()
+
+    # Cross-check updating dynamics
+    block_cpp.update_dynamics(0.0, 1.6)
+    block_py.update_dynamics(0.0, 1.6)
+
+    cross_check_block_memory()
diff --git a/docs/index.rst b/docs/index.rst
index 479320b1..8434cd51 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -69,8 +69,6 @@ If you are interested to contribute, please read `contribution-guide`_ first.
    api/simulator
    api/utility
 
-..   api/elastica++
-
 .. toctree::
    :maxdepth: 2
    :caption: Advanced Guide
diff --git a/pyproject.toml b/pyproject.toml
index 38b3e80e..c06f6608 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -139,6 +139,7 @@ warn_unused_ignores = false
 
 exclude = [
     "elastica/experimental/*",
+    "backend/*"
 ]
 
 [tool.coverage.report]
diff --git a/tests/test_restart.py b/tests/test_restart.py
index e625fb54..e538bff8 100644
--- a/tests/test_restart.py
+++ b/tests/test_restart.py
@@ -168,7 +168,7 @@ class BaseSimulatorClass(
 
         return np.concatenate(recorded_list)
 
-    @pytest.mark.parametrize("final_time", [0.2, 1.0])
+    @pytest.mark.parametrize("final_time", [0.002, 0.01])
     def test_save_restart_run_sim(self, tmp_path, final_time):
 
         # First half of simulation