Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
7ecf1f1
allow chunk sizes in {1,2,3}
Jun 27, 2025
1ba4dda
RGB sensor model partly from Noise-Optimal HDR paper
Jun 30, 2025
76636ec
Bayer pattern and demosaicing, skimage dependency addition
Sep 26, 2025
6bdbaa5
allow chunk sizes in {1,2,3}
Jun 27, 2025
3378fea
RGB sensor model partly from Noise-Optimal HDR paper
Jun 30, 2025
007d9a2
Bayer pattern and demosaicing, skimage dependency addition
Sep 26, 2025
fd6d71a
return to full words in CLI interface
Jan 15, 2026
9208923
add editor swap files to gitignore
Jan 15, 2026
fb737b8
typo
Jan 15, 2026
f28f4f6
allow grayscale SPAD simulation and binomial samples with bit-depth > 1
Jan 15, 2026
1896d46
lint fix
Jan 15, 2026
2460c75
Merge branch 'main' into rgb_configurable_exposure
jungerm2 Jan 15, 2026
0417c04
re-format
jungerm2 Jan 15, 2026
7992767
resolve conflicts
jungerm2 Jan 15, 2026
f5a1853
Test docs build on PR
jungerm2 Jan 15, 2026
d199131
remove skimage dependency
Jan 27, 2026
bbff6c7
rgb simulation interface cleanup; handling potential alpha channel in…
Jan 27, 2026
6bb38c3
remove skimage dependency
Jan 28, 2026
9edb88e
spad sim refactoring, removed skimage dependency
Jan 29, 2026
f25fee0
forgot linting
Jan 29, 2026
1a570bb
merge w/ main
jungerm2 Feb 24, 2026
ce8cc91
fix typing issues
jungerm2 Feb 24, 2026
3991229
Merge branch 'main' into rgb_configurable_exposure
jungerm2 Mar 3, 2026
45373bd
Merge branch 'main' into rgb_configurable_exposure
jungerm2 Mar 3, 2026
5e15ca7
add bitplanes arg for spc emulation and in schemas, use it in ffmpeg.…
jungerm2 Mar 4, 2026
bfd5aef
fix interpolate dataset
jungerm2 Mar 4, 2026
00650d2
remove unused strip_alpha helper
jungerm2 Mar 4, 2026
618cb3c
rename raw_to_rgb_bayer to raw_bayer_to_rgb
jungerm2 Mar 4, 2026
151f0b0
make emulate rgb from seq independent from seq length
jungerm2 Mar 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ ENV/
.vscode/
.idea/

# Editor swap files
*.swp
*.asv

# Custom
paper/
out/
Expand Down
8 changes: 8 additions & 0 deletions docs/source/apidocs/visionsim.utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ visionsim.utils.color module
:show-inheritance:
:undoc-members:

visionsim.utils.imgproc module
------------------------------

.. automodule:: visionsim.utils.imgproc
:members:
:show-inheritance:
:undoc-members:

visionsim.utils.progress module
-------------------------------

Expand Down
2 changes: 1 addition & 1 deletion examples/quickstart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

visionsim blender.render-animation lego.blend quickstart/lego-gt/ --render-config.keyframe-multiplier=5.0
visionsim ffmpeg.animate --input-dir=quickstart/lego-gt/ --outfile=quickstart/preview.mp4 --step=5 --fps=25 --force
visionsim interpolate.frames --input-dir=quickstart/lego-gt/ --output-dir=quickstart/lego-interp/ --n=32
visionsim interpolate.dataset --input-dir=quickstart/lego-gt/ --output-dir=quickstart/lego-interp/ --n=32
visionsim emulate.rgb --input-dir=quickstart/lego-interp/ --output-dir=quickstart/lego-rgb25fps/ --chunk-size=160 --readout-std=0
visionsim emulate.spad --input-dir=quickstart/lego-interp/ --output-dir=quickstart/lego-spc4kHz/
visionsim emulate.events --input-dir=quickstart/lego-gt/ --output-dir=quickstart/lego-dvs125fps/ --fps=125
2 changes: 1 addition & 1 deletion tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def build_docs(c, preview=False, full=False):
for i, n in enumerate((25, 50, 100, 200)):
for cmd in (
f"visionsim blender.render-animation lego.blend interpolation/lego-{n:04}/ --keyframe-multiplier={n / 100} --width=320 --height=320",
f"visionsim interpolate.frames interpolation/lego-{n:04}/ -o interpolation/lego{n:04}-interp/ -n={int(64 / 2**i)}",
f"visionsim interpolate.dataset interpolation/lego-{n:04}/ -o interpolation/lego{n:04}-interp/ -n={int(64 / 2**i)}",
f"gifski $(ls -1a interpolation/lego{n:04}-interp/frames/*.png | sed -n '1~8p') --fps 25 -o {DOCS_STATIC}/lego{n:04}-interp.gif",
):
_run(c, cmd, echo=True, warn=True)
Expand Down
10 changes: 9 additions & 1 deletion visionsim/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
import shlex
import subprocess
import sys
from functools import lru_cache
from pathlib import Path
from typing import overload
from typing import Any, Literal, overload

import tyro
from natsort import natsorted
Expand Down Expand Up @@ -81,6 +82,13 @@ def _validate_directories(
return input_path, output_path


@lru_cache
def _log_once(value: Any, msg: str, level: Literal["debug", "info", "warning", "error", "critical"] = "warning") -> Any:
"""Log a message once per unique value, returns the value."""
getattr(_log, level)(msg)
return value


def _run(
command: list[str] | str,
shell: bool = False,
Expand Down
144 changes: 99 additions & 45 deletions visionsim/cli/emulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,50 @@
import math
import shutil
from pathlib import Path
from typing import Any
from typing import Any, Literal

import numpy as np


def spad(
input_dir: Path,
output_dir: Path,
pattern: str | None = None,
factor: float = 1.0,
flux_gain: float = 1.0,
bitplanes: int = 1,
bitdepth: int | None = None,
force_gray: bool = False,
seed: int = 2147483647,
pattern: str | None = None,
max_size: int = 1000,
force: bool = False,
) -> None:
"""Perform bernoulli sampling on linearized RGB frames to yield binary frames
"""Perform binomial sampling on linearized RGB frames to yield (summed) single photon frames

This will save numpy files which may be bitpacked (when bitplanes == 1) and may have different dtypes
depending on the number of summed bitplanes. The shape of the output arrays will be (max_size, h, w, c) or (remainder, h, w, c)
where remainder = len(dataset) % max_size, where the width dimension is ceil(width / 8) when bitpacked.

If the input contains alpha channel (determined by the last dimension of the input images), it will be stripped.

Args:
input_dir: directory in which to look for frames
output_dir: directory in which to save binary frames
pattern: used to find source image files to convert to binary frames,
output_dir: directory in which to save single photon frames
pattern: used to find source image files to convert to single photon frames,
not needed when ``input_dir`` points to a valid dataset.
factor: multiplicative factor controlling dynamic range of output
flux_gain: multiplicative factor controlling dynamic range of output
bitplanes: number of summed binary measurements
bitdepth: if set, ``bitplanes`` will be overridden to ``2**bitdepth - 1``
force_gray: to disable RGB sensing even if the input images are color
seed: random seed to use while sampling, ensures reproducibility
max_size: maximum number of frames per output array before rolling over to new file
force: if true, overwrite output file(s) if present, else throw error
"""
from numpy.lib.format import open_memmap

from visionsim.cli import _log, _log_once
from visionsim.dataset import Dataset, Metadata
from visionsim.emulate.spc import emulate_spc
from visionsim.utils.color import srgb_to_linearrgb
from visionsim.utils.color import rgb_to_grayscale, srgb_to_linearrgb
from visionsim.utils.progress import ElapsedProgress

if input_dir.resolve() == output_dir.resolve():
Expand All @@ -48,6 +61,17 @@ def spad(
else:
dataset = Dataset.from_path(input_dir)

if bitdepth is not None:
_log.info(f"Overriding bitplanes to {2**bitdepth - 1} since bitdepth is set to {bitdepth}.")
bitplanes = 2**bitdepth - 1

# Map bitplanes to the smallest uint type that can hold it (minimum 8 bits)
out_dtype = next(
dtype
for limit, dtype in [(8, np.uint8), (16, np.uint16), (32, np.uint32), (64, np.uint64)]
if bitplanes <= 2**limit - 1
)

rng = np.random.default_rng(int(seed))
output_dir.mkdir(exist_ok=True, parents=True)
transforms: list[dict[str, Any]] = []
Expand All @@ -64,33 +88,41 @@ def spad(
else:
data = data.astype(float) / 255.0

# Default to bitpacking width
binary_img = emulate_spc(data, factor=factor, rng=rng) * 255
binary_img = binary_img.astype(np.uint8) >= 128
binary_img = np.packbits(binary_img, axis=1)
if len(data.shape) == 3 and data.shape[-1] in (2, 4): # LA/RGBA
_log_once(data.shape, "Alpha channel detected, ignoring it.", "info")
data = data[..., :-1]

if force_gray:
data = rgb_to_grayscale(data)

imgs = emulate_spc(data, flux_gain=flux_gain, bitplanes=bitplanes, rng=rng)

offset = i % max_size
file_path = output_dir / f"{i // max_size:04}.npy"
transform["file_path"] = file_path.name
transform["bitpack_dim"] = 2
transform["bitplanes"] = bitplanes
transform["offset"] = offset
h, w, c = data.shape

if bitplanes == 1:
# Default to bitpacking width
imgs = imgs >= 0.5
imgs = np.packbits(imgs, axis=1)
transform["bitpack_dim"] = 2
w = math.ceil(transform.get("w", w) / 8)
else:
w = transform.get("w", w)

if not file_path.exists():
data = open_memmap(
file_path,
mode="w+",
dtype=np.uint8,
shape=(
min(max_size, remainder),
transform.get("h", h),
math.ceil(transform.get("w", w) / 8),
transform.get("c", c),
),
dtype=out_dtype,
shape=(min(max_size, remainder), transform.get("h", h), w, c),
)
data[offset] = binary_img
data[offset] = imgs
else:
open_memmap(file_path)[offset] = binary_img
open_memmap(file_path)[offset] = imgs

transforms.append(transform)
progress.update(task, advance=1)
Expand Down Expand Up @@ -136,6 +168,7 @@ def events(

from visionsim.dataset import Dataset
from visionsim.emulate.dvs import EventEmulator
from visionsim.utils.color import rgb_to_grayscale
from visionsim.utils.progress import ElapsedProgress

if input_dir.resolve() == output_dir.resolve():
Expand Down Expand Up @@ -171,10 +204,7 @@ def events(
task = progress.add_task("Writing DVS data...", total=len(dataset))

for idx, (frame, _) in enumerate(dataset): # type: ignore
# Manually grayscale as we've already converted to floating point pixel values
# Values from http://en.wikipedia.org/wiki/Grayscale
r, g, b, *_ = np.transpose(frame, (2, 0, 1))
luma = 0.0722 * b + 0.7152 * g + 0.2126 * r
luma = rgb_to_grayscale(frame)
events = emulator.generate_events(luma, idx / int(fps))

if events is not None:
Expand All @@ -198,10 +228,16 @@ def rgb(
input_dir: Path,
output_dir: Path,
chunk_size: int = 10,
factor: float = 1.0,
readout_std: float = 20.0,
fwc: int | None = None,
duplicate: float = 1.0,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This duplicate arg was a hack, so I'm glad it's gone, but it's meant to address emulation from a short sequence, how do you deal with this?

Copy link
Contributor Author

@shantanu-gupta shantanu-gupta Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's nothing here to specifically help with that; this just sums up the frames based on the chunk_size parameter, so there can be juddering-type artifacts for chunk_sizes like {2, 3, 4, ...}. I think chunk_size = 1 should work fine as that just uses the original frames as-is -- at least the frames I got from the pre-release dataset looked alright.

I would prefer to recommend to the user to use the interpolation modules or render originally at higher frame rates to avoid artifacts, rather than resorting to any hacks here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, maybe it's worth adding this to the docs. In fact a new troubleshooting page might be a good idea.

shutter_frac: float = 1.0,
readout_std: float = 16.0,
fwc: float | None = None,
flux_gain: float = 2.0**12,
iso_gain: float = 1.0,
adc_bitdepth: int = 12,
mosaic: bool = False,
demosaic: Literal["off", "bilinear", "MHC04"] = "MHC04",
denoise_sigma: float = 0.0,
sharpen_weight: float = 0.0,
pattern: str | None = None,
force: bool = False,
) -> None:
Expand All @@ -211,23 +247,29 @@ def rgb(
input_dir: directory in which to look for frames
output_dir: directory in which to save binary frames
chunk_size: number of consecutive frames to average together
factor: multiply image's linear intensity by this weight
readout_std: standard deviation of gaussian read noise
fwc: full well capacity of sensor in arbitrary units (relative to factor & chunk_size)
duplicate: when chunk size is too small, this model is ill-suited and creates unrealistic noise.
This parameter artificially increases the chunk size by using each input image ``duplicate`` number of times
shutter_frac: fraction of inter-frame duration shutter is active (0 to 1)
readout_std: standard deviation of gaussian read noise in photoelectrons
fwc: full well capacity of sensor in photoelectrons
flux_gain: factor to scale the input images before Poisson simulation
iso_gain: gain for photo-electron reading after Poisson rng
adc_bitdepth: ADC bitdepth
mosaic: implement mosaiced R-/G-/B- pixels or an innately 3-channel sensor
demosaic: demosaicing method (default Malvar et al.'s method)
denoise_sigma: Gaussian blur with this sigma will be used (default 0.0 disables this)
sharpen_weight: weight used in sharpening (default 0.0 disables this)
pattern: used to find source image files to convert to rgb frames,
not needed when ``input_dir`` points to a valid dataset.
force: if true, overwrite output file(s) if present
"""
import imageio.v3 as iio
import more_itertools as mitertools

from visionsim.cli import _log_once
from visionsim.dataset import Dataset, Metadata
from visionsim.emulate.rgb import emulate_rgb_from_sequence
from visionsim.interpolate.pose import pose_interp
from visionsim.simulate.blender import INDEX_PADDING, ITEMS_PER_SUBFOLDER
from visionsim.utils.color import srgb_to_linearrgb
from visionsim.utils.color import linearrgb_to_srgb, srgb_to_linearrgb
from visionsim.utils.progress import ElapsedProgress

if input_dir.resolve() == output_dir.resolve():
Expand Down Expand Up @@ -260,27 +302,39 @@ def rgb(
# Assume images have been tonemapped and undo mapping
imgs = srgb_to_linearrgb(imgs)

if len(imgs.shape) == 4 and imgs.shape[-1] in (2, 4): # LA/RGBA
_log_once(imgs.shape, "Alpha channel detected, ignoring it.", "info")
imgs = imgs[..., :-1]

rgb_img = emulate_rgb_from_sequence(
imgs * duplicate,
imgs,
readout_std=readout_std,
fwc=fwc or (chunk_size * duplicate),
factor=factor,
fwc=fwc or np.inf,
shutter_frac=shutter_frac,
flux_gain=flux_gain,
iso_gain=iso_gain,
adc_bitdepth=adc_bitdepth,
mosaic=mosaic,
demosaic=demosaic,
denoise_sigma=denoise_sigma,
sharpen_weight=sharpen_weight,
)

if not pattern:
# We checked that there's only a single camera, just re-use any transforms dict
(transform, *_), transforms_iter = mitertools.spy(transforms_iter)
poses = np.array([t["transform_matrix"] for t in transforms_iter])
transform["transform_matrix"] = pose_interp(poses, k=np.clip(len(poses) - 1, 2, 3))(0.5)

if len(poses) > 1:
transform["transform_matrix"] = pose_interp(poses, k=min(len(poses) - 1, 3))(0.5)
else:
transform["transform_matrix"] = poses[0]

transform["file_path"] = outpath.relative_to(output_dir)
transforms.append(transform)

# TODO: Alpha and grayscale?
# if rgb_img.shape[-1] == 1:
# rgb_img = np.repeat(rgb_img, 3, axis=-1)

outpath.parent.mkdir(exist_ok=True, parents=True)
iio.imwrite(outpath, (rgb_img * 255).astype(np.uint8))
iio.imwrite(outpath, (linearrgb_to_srgb(rgb_img) * 255).astype(np.uint8))
progress.update(task, advance=chunk_size)

if not pattern:
Expand Down
Loading
Loading