Aharoni-Lab · kaghi · Dec 13, 2025 · Dec 13, 2025 · Dec 13, 2025 · Dec 13, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -41,13 +41,6 @@ jobs:
         run: |
           pdm run black --check .
 
-      - name: Help - Formatting
-        if: failure() && steps.black.outcome == 'failure'
-        run: |
-          echo "::error::❌ Code formatting check failed."
-          echo "To fix this, run the following command locally:"
-          echo "  pdm run black ."
-
   lint:
     runs-on: ubuntu-latest
     steps:
@@ -122,11 +115,9 @@ jobs:
         uses: actions/cache@v4
         with:
           path: |
-            ~/.cache/pdm
+            .venv
             .pdm-build
           key: ${{ runner.os }}-pdm-${{ matrix.python-version }}-${{ hashFiles('**/pdm.lock') }}
-          restore-keys: |
-            ${{ runner.os }}-pdm-${{ matrix.python-version }}-
 
       - name: Install PDM
         run: |
@@ -138,25 +129,54 @@ jobs:
           pdm install --check --no-lock -G test
 
       - name: Run Unit Tests
-        run: pdm run pytest tests/unit -v -n auto --cov=indeca --cov-report=xml
+        id: unit
+        run: pdm run pytest tests/unit --cov-report=xml
 
       - name: Run Integration & Regression Tests
-        run: pdm run pytest tests/integration tests/regression -v -n auto -m "not slow" --cov=indeca --cov-report=xml --cov-append
+        id: integration
+        if: success() || steps.unit.conclusion == 'failure'
+        run: pdm run pytest tests/integration tests/regression -m "not slow" --cov-report=xml --cov-append
 
       - name: Run Other Tests (Robustness, Validation, Demo)
-        run: pdm run pytest tests/robustness tests/validation tests/demo -v -n auto -m "not slow" --cov=indeca --cov-report=xml --cov-append
-
-      - name: Generate Plot Artifacts
-        run: pdm run plot_test
-
-      - uses: actions/upload-artifact@v4
-        if: always() # Upload artifacts even if tests fail
-        with:
-          name: test_output_${{ matrix.python-version }}
-          path: tests/output
+        if: success() || steps.unit.conclusion == 'failure' || steps.integration.conclusion == 'failure'
+        run: pdm run pytest tests/robustness tests/validation tests/demo -m "not slow" --cov-report=xml --cov-append
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4
         with:
           file: ./coverage.xml
           fail_ci_if_error: false
+
+  plot:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [ "3.11" ]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Cache PDM dependencies
+        uses: actions/cache@v4
+        with:
+          path: .venv
+          key: ${{ runner.os }}-pdm-${{ matrix.python-version }}-${{ hashFiles('**/pdm.lock') }}
+      - name: Install PDM
+        run: |
+          python -m pip install --upgrade pip
+          pip install pdm
+      - name: Install dependencies
+        run: |
+          pdm install --check --no-lock -G test
+      - name: Generate Plot Artifacts
+        id: plot
+        run: pdm run plot_test
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: test_output_${{ matrix.python-version }}
+          path: tests/output
diff --git a/.gitignore b/.gitignore
@@ -177,10 +177,13 @@ cython_debug/
 /tests/data
 *.npy
 *.feat
+*.prof
+*.png
 /tests/output/*
 !/tests/output/figs
 /tests/output/figs/*
 !/tests/output/figs/print
 !/tests/output/figs/func
 /tests/output/figs/func/*
 !/tests/output/figs/func/mlspike_comparison
+tests/*/output/
diff --git a/README.md b/README.md
@@ -49,4 +49,13 @@ repository of Interpretable Deconvolution for Calcium imaging.
 1. Run `CUDA_PATH=whatever python setup.py install`.
    If you followed the steps correctly, `CUDA_PATH` shouldn't matter (but it has to be set).
 1. Verify that `cuosqp` is installed under your environment.
+
+## Profiling
+
+For comprehensive profiling documentation, see [`benchmarks/profile/README.md`](benchmarks/profile/README.md).
+
+Quick reference:
+- **Line-level profiling**: Use `kernprof -l -v your_script.py` for functions decorated with `@profile`
+- **Pipeline-level profiling**: Use `yappi_profile` context manager for function-level attribution
+- **Benchmark scripts**: Deterministic benchmarks in `benchmarks/profile/` for regression detection
 
diff --git a/benchmarks/profile/README.md b/benchmarks/profile/README.md
@@ -0,0 +1,158 @@
+# Profiling
+
+InDeCa provides two complementary profiling approaches for performance analysis and optimization.
+
+## 1. Line-Level Profiling (line_profiler)
+
+For deep numerical inspection of hot loops, solvers, and kernel construction.
+
+Functions decorated with `@profile` can be profiled using:
+
+```bash
+kernprof -l -v your_script.py
+```
+
+This is particularly useful for:
+- Hot inner loops
+- Solver internals
+- Kernel construction
+- Deconvolution steps
+
+## 2. Pipeline-Level Profiling (yappi + snakeviz)
+
+For function-level attribution and call graph analysis.
+
+### Quick Start
+
+```python
+from indeca.utils.profiling import yappi_profile
+from indeca.pipeline import pipeline_bin_new, DeconvPipelineConfig
+
+Y = load_your_data()
+config = DeconvPipelineConfig(...)
+
+with yappi_profile("pipeline.prof"):
+    C, S, metrics = pipeline_bin_new(Y, config=config, spawn_dashboard=False)
+```
+
+View results:
+```bash
+snakeviz pipeline.prof
+```
+
+### Clock Types
+
+- **wall** (default): Real elapsed time, includes I/O and waiting
+- **cpu**: Actual computation time, excludes I/O
+
+```python
+with yappi_profile("cpu_profile.prof", clock="cpu"):
+    ...
+```
+
+### Usage
+
+The `yappi_profile` context manager wraps code execution and saves profiling data in pstat format:
+
+```python
+from indeca.utils.profiling import yappi_profile
+
+with yappi_profile("output.prof", clock="wall"):
+    # Your code here
+    result = expensive_function()
+```
+
+## Benchmark Scripts
+
+Deterministic benchmarks for performance regression detection. All scripts use fixed seeds and configurations for reproducible results.
+
+### Small Benchmark (10 cells × 1K frames)
+
+Quick iterations and fast profiling:
+
+```bash
+# Quick runtime check
+python benchmarks/profile/profile_pipeline_small.py
+
+# With yappi profiling (wall-clock time)
+python benchmarks/profile/profile_pipeline_small.py --profile
+
+# With yappi profiling (CPU time)
+python benchmarks/profile/profile_pipeline_small.py --profile --clock cpu
+
+# View results
+snakeviz benchmarks/profile/output/profile_pipeline_small.prof
+```
+
+### Medium Benchmark (50 cells × 5K frames)
+
+Realistic workload testing:
+
+```bash
+# Quick runtime check
+python benchmarks/profile/profile_pipeline_medium.py
+
+# With profiling
+python benchmarks/profile/profile_pipeline_medium.py --profile
+
+# View results
+snakeviz benchmarks/profile/output/profile_pipeline_medium.prof
+```
+
+### Large Benchmark (100 cells × 10K frames)
+
+Comprehensive profiling - **warning: may take several minutes**:
+
+```bash
+# Quick runtime check
+python benchmarks/profile/profile_pipeline_large.py
+
+# With profiling
+python benchmarks/profile/profile_pipeline_large.py --profile
+
+# View results
+snakeviz benchmarks/profile/output/profile_pipeline_large.prof
+```
+
+## When to Use Each Tool
+
+| Tool | Use Case |
+|------|----------|
+| line_profiler | Hot inner loops, solver internals, kernel construction |
+| yappi | Pipeline flow, function call attribution, call graphs |
+| Benchmark scripts | Performance regression detection, optimization validation |
+
+## Performance Regression Workflow
+
+1. **Baseline**: Run benchmark without profiling to establish baseline runtime
+   ```bash
+   python benchmarks/profile/profile_pipeline_small.py
+   ```
+
+2. **Profile**: Run with profiling to identify bottlenecks
+   ```bash
+   python benchmarks/profile/profile_pipeline_small.py --profile
+   ```
+
+3. **Analyze**: View call graph and function timings in snakeviz
+   ```bash
+   snakeviz benchmarks/profile/output/profile_pipeline_small.prof
+   ```
+
+4. **Optimize**: Focus on functions with highest cumulative time
+
+5. **Validate**: Re-run benchmark to measure improvement
+
+## Output Location
+
+All profiling output files are saved in `benchmarks/profile/output/`:
+
+- `profile_pipeline_small.prof`
+- `profile_pipeline_medium.prof`
+- `profile_pipeline_large.prof`
+
+These files are in pstat format and can be viewed with:
+- **snakeviz** (recommended): Interactive web-based visualization
+- **gprof2dot**: Generate call graph diagrams
+- **pyprof2calltree**: Convert for use with kcachegrind
+