Skip to content

Commit f2aa0df

Browse files
committed
Add optional GPU support with CuPy and fallback to CPU computation
1 parent a0c1254 commit f2aa0df

4 files changed

Lines changed: 88 additions & 78 deletions

File tree

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ https://docs.astral.sh/uv/#highlights
1818
uv sync
1919
```
2020

21+
### Add cupy if CUDA is available
22+
```bash
23+
uv pip install cupy-cuda12x
24+
```
25+
2126
### Activate the environment
2227
```bash
2328
source .venv/bin/activate

myogen/simulator/core/emg/surface_emg.py

Lines changed: 82 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,13 @@
7777
import warnings
7878
from datetime import datetime
7979

80-
import cupy
80+
try:
81+
import cupy as cp
82+
83+
HAS_CUPY = True
84+
except ImportError:
85+
HAS_CUPY = False
86+
8187
import joblib
8288
import matplotlib.patches as patches
8389
import matplotlib.pyplot as plt
@@ -1037,8 +1043,9 @@ def simulate_surface_emg(
10371043
convolution of spike trains with MUAP shapes, and summation across all active motor units
10381044
for each electrode channel.
10391045
1040-
The simulation uses GPU acceleration (via CuPy) for efficient parallel processing of
1046+
The simulation can optionally use GPU acceleration (via CuPy) for efficient parallel processing of
10411047
convolution operations across multiple electrode positions, motor units, and time points.
1048+
If CuPy is not available, the simulation automatically falls back to CPU computation with NumPy.
10421049
10431050
Scientific Background
10441051
--------------------
@@ -1067,11 +1074,13 @@ def simulate_surface_emg(
10671074
10681075
GPU Acceleration
10691076
---------------
1070-
The method automatically uses GPU processing for:
1077+
The method automatically uses GPU processing (if CuPy is available) for:
10711078
- Large-scale convolution operations across multiple dimensions
10721079
- Parallel processing of electrode positions and motor unit combinations
10731080
- Efficient memory management for large EMG datasets
1074-
- Automatic fallback to CPU if GPU unavailable
1081+
1082+
If CuPy is not installed or GPU is unavailable, the method automatically falls back
1083+
to CPU-based computation using NumPy with equivalent functionality.
10751084
10761085
Parameters
10771086
----------
@@ -1113,14 +1122,15 @@ def simulate_surface_emg(
11131122
between simulated motor units and available spike trains.
11141123
11151124
RuntimeError
1116-
If GPU memory is insufficient for large simulations (automatically falls back to CPU).
1125+
If GPU memory is insufficient for large simulations when using CuPy
1126+
(will automatically fall back to CPU-based computation).
11171127
11181128
Notes
11191129
-----
11201130
**Computational Complexity:**
11211131
- Time complexity: O(n_positions × n_pools × n_MUs × n_electrodes × n_time_samples)
11221132
- Space complexity: O(n_positions × n_pools × n_electrodes × n_time_samples)
1123-
- GPU acceleration provides 10-50x speedup for large simulations
1133+
- GPU acceleration (if available) provides 10-50x speedup for large simulations
11241134
11251135
**Memory Requirements:**
11261136
- Input MUAPs: n_positions × n_MUs × n_electrodes × n_time_samples × 8 bytes
@@ -1239,41 +1249,76 @@ def simulate_surface_emg(
12391249
mode="same",
12401250
)
12411251

1242-
# Perform the convolution and summation using GPU acceleration
1243-
spike_gpu = cupy.asarray(motor_neuron_pool.spike_trains)
1244-
muap_gpu = cupy.asarray(muap_shapes__tensor)
1245-
surface_emg_gpu = cupy.zeros(
1246-
(n_positions, n_pools, n_rows, n_cols, len(sample_conv))
1247-
)
1252+
# Perform the convolution and summation using GPU acceleration if available
1253+
if HAS_CUPY:
1254+
# Use GPU acceleration with CuPy
1255+
spike_gpu = cp.asarray(motor_neuron_pool.spike_trains)
1256+
muap_gpu = cp.asarray(muap_shapes__tensor)
1257+
surface_emg_gpu = cp.zeros(
1258+
(n_positions, n_pools, n_rows, n_cols, len(sample_conv))
1259+
)
12481260

1249-
for mu_pool_idx in tqdm(range(n_pools), desc="Simulating surface EMG"):
1250-
active_neuron_indices = set(
1251-
motor_neuron_pool.active_neuron_indices[mu_pool_idx]
1261+
for mu_pool_idx in tqdm(range(n_pools), desc="Simulating surface EMG (GPU)"):
1262+
active_neuron_indices = set(
1263+
motor_neuron_pool.active_neuron_indices[mu_pool_idx]
1264+
)
1265+
1266+
for position_idx in range(n_positions):
1267+
for row_idx in range(n_rows):
1268+
for col_idx in range(n_cols):
1269+
# Process all MUAPs for this combination on GPU
1270+
convolutions = cp.array(
1271+
[
1272+
cp.correlate(
1273+
spike_gpu[mu_pool_idx, muap_idx],
1274+
muap_gpu[position_idx, muap_idx, row_idx, col_idx],
1275+
mode="same",
1276+
)
1277+
for muap_idx in MUs_to_simulate.intersection(
1278+
active_neuron_indices
1279+
)
1280+
]
1281+
)
1282+
# Sum across MUAPs on GPU
1283+
surface_emg_gpu[position_idx, mu_pool_idx, row_idx, col_idx] = (
1284+
cp.sum(convolutions, axis=0)
1285+
)
1286+
1287+
# Transfer results back to CPU
1288+
self.surface_emg__tensor = cp.asnumpy(surface_emg_gpu)
1289+
else:
1290+
# Fallback to CPU computation with NumPy
1291+
surface_emg_cpu = np.zeros(
1292+
(n_positions, n_pools, n_rows, n_cols, len(sample_conv))
12521293
)
12531294

1254-
for position_idx in range(n_positions):
1255-
for row_idx in range(n_rows):
1256-
for col_idx in range(n_cols):
1257-
# Process all MUAPs for this combination on GPU
1258-
convolutions = cupy.array(
1259-
[
1260-
cupy.correlate(
1261-
spike_gpu[mu_pool_idx, muap_idx],
1262-
muap_gpu[position_idx, muap_idx, row_idx, col_idx],
1263-
mode="same",
1264-
)
1265-
for muap_idx in MUs_to_simulate.intersection(
1266-
active_neuron_indices
1267-
)
1268-
]
1269-
)
1270-
# Sum across MUAPs on GPU
1271-
surface_emg_gpu[position_idx, mu_pool_idx, row_idx, col_idx] = (
1272-
cupy.sum(convolutions, axis=0)
1273-
)
1295+
for mu_pool_idx in tqdm(range(n_pools), desc="Simulating surface EMG (CPU)"):
1296+
active_neuron_indices = set(
1297+
motor_neuron_pool.active_neuron_indices[mu_pool_idx]
1298+
)
12741299

1275-
# Transfer results back to CPU
1276-
self.surface_emg__tensor = cupy.asnumpy(surface_emg_gpu)
1300+
for position_idx in range(n_positions):
1301+
for row_idx in range(n_rows):
1302+
for col_idx in range(n_cols):
1303+
# Process all MUAPs for this combination on CPU
1304+
convolutions = np.array(
1305+
[
1306+
np.correlate(
1307+
motor_neuron_pool.spike_trains[mu_pool_idx, muap_idx],
1308+
muap_shapes__tensor[position_idx, muap_idx, row_idx, col_idx],
1309+
mode="same",
1310+
)
1311+
for muap_idx in MUs_to_simulate.intersection(
1312+
active_neuron_indices
1313+
)
1314+
]
1315+
)
1316+
# Sum across MUAPs on CPU
1317+
surface_emg_cpu[position_idx, mu_pool_idx, row_idx, col_idx] = (
1318+
np.sum(convolutions, axis=0)
1319+
)
1320+
1321+
self.surface_emg__tensor = surface_emg_cpu
12771322

12781323
return self.surface_emg__tensor
12791324

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ readme = "README.md"
66
requires-python = ">=3.12"
77
dependencies = [
88
"beartype>=0.21.0",
9-
"cupy-cuda12x>=13.4.1",
109
"impi-rt>=2021.15.0 ; sys_platform == 'win32'",
1110
"matplotlib>=3.10.1",
1211
"mpi4py>=4.0.3",
@@ -44,4 +43,4 @@ requires = ["setuptools>=61.0"]
4443
build-backend = "setuptools.build_meta"
4544

4645
[tool.setuptools]
47-
packages = ["myogen"]
46+
packages = ["myogen"]

uv.lock

Lines changed: 0 additions & 39 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)