From 7ecf1f19b1271c88964cce9ee011f622a612f815 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Fri, 27 Jun 2025 14:45:41 -0500 Subject: [PATCH 01/24] allow chunk sizes in {1,2,3} --- visionsim/cli/emulate.py | 5 ++++- visionsim/interpolate/pose.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index f3976b0..fd70ed3 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -269,7 +269,10 @@ def rgb( fwc=fwc or (chunk_size * duplicate), factor=factor, ) - pose = pose_interp(poses)(0.5) if transforms_new else None + if transforms_new: + pose = pose_interp(poses)(0.5) if chunk_size != 1 else poses[0] + else: + pose = None if rgb_img.shape[-1] == 1: rgb_img = np.repeat(rgb_img, 3, axis=-1) diff --git a/visionsim/interpolate/pose.py b/visionsim/interpolate/pose.py index 8b3f7d2..8064527 100644 --- a/visionsim/interpolate/pose.py +++ b/visionsim/interpolate/pose.py @@ -26,6 +26,8 @@ def __init__(self, transforms: npt.ArrayLike, ts: npt.ArrayLike | None = None, k self.transforms = np.array(transforms) self.ts = np.linspace(0, 1, len(self.transforms)) if ts is None else np.array(ts) self.determinants = np.linalg.det(self.transforms[:, :3, :3]) + + k = min(len(self.transforms)-1, k) # for small chunk_sizes self.k = k if normalize: From 1ba4dda3c0aa833f043e8bfdf5bc4442e9d652ef Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Sun, 29 Jun 2025 22:19:56 -0500 Subject: [PATCH 02/24] RGB sensor model partly from Noise-Optimal HDR paper --- visionsim/cli/emulate.py | 16 +++++++++++----- visionsim/emulate/rgb.py | 39 +++++++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index fd70ed3..47781e0 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -196,10 +196,12 @@ def rgb( input_dir: str | os.PathLike, output_dir: str | os.PathLike, chunk_size: int = 10, - factor: float = 1.0, + frac_shutter_angle: float = 1.0, readout_std: float = 20.0, - fwc: int | None = None, + fwc: float | None = None, duplicate: float = 1.0, + scale_flux: float = 1.0, + gain_ISO: float = 1.0, pattern: str = "frame_{:06}.png", mode: Literal["npy", "img"] = "npy", force: bool = False, @@ -210,10 +212,12 @@ def rgb( input_dir: directory in which to look for frames output_dir: directory in which to save binary frames chunk_size: number of consecutive frames to average together - factor: multiply image's linear intensity by this weight + frac_shutter_angle: fraction of inter-frame duration shutter is active readout_std: standard deviation of gaussian read noise fwc: full well capacity of sensor in arbitrary units (relative to factor & chunk_size) duplicate: when chunk size is too small, this model is ill-suited and creates unrealistic noise. This parameter artificially increases the chunk size by using each input image `duplicate` number of times + scale_flux: factor to scale the input images before Poisson simulation + gain_ISO: gain for photo-electron reading after Poisson rng pattern: filenames of frames should match this mode: how to save binary frames force: if true, overwrite output file(s) if present @@ -266,8 +270,10 @@ def rgb( rgb_img = emulate_rgb_from_sequence( imgs * duplicate, readout_std=readout_std, - fwc=fwc or (chunk_size * duplicate), - factor=factor, + fwc=fwc or np.inf, + frac_shutter_angle=frac_shutter_angle, + scale_flux=scale_flux, + gain_ISO=gain_ISO, ) if transforms_new: pose = pose_interp(poses)(0.5) if chunk_size != 1 else poses[0] diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index 945ded0..155acf7 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -8,10 +8,12 @@ def emulate_rgb_from_sequence( sequence: npt.ArrayLike, + frac_shutter_angle: float = 1.0, readout_std: float = 20.0, - fwc: float = 500.0, + fwc: float = 10000.0, bitdepth: int = 12, - factor: float = 1.0, + scale_flux: float = 1.0, + gain_ISO: float = 1.0, rng: np.random.Generator | None = None, ) -> npt.NDArray: """Emulates a conventional RGB camera from a sequence of intensity frames. @@ -24,31 +26,40 @@ def emulate_rgb_from_sequence( Args: sequence (npt.ArrayLike): Input sequence of linear-intensity frames, can be a collection of frames, or np/torch array with time as the first dimension. + frac_shutter_angle (float, optional): fraction of inter-frame duration the shutter is active. readout_std (float, optional): Standard deviation of zero mean Gaussian read noise. Defaults to 20.0. fwc (float, optional): Full well capacity, used for normalization. Defaults to 500.0. bitdepth (int, optional): Resolution of ADC in bits. Defaults to 12. - factor (float, optional): Scaling factor to control intensity of output RGB image. Defaults to 1.0. + scale_flux (float, optional): factor to scale the input [0, 1] image _before_ Poisson rng + gain_ISO (float, optional): factor to scale the photo-electron reading _after_ Poisson rng rng (np.random.Generator, optional): Optional random number generator. Defaults to none. Returns: Quantized sRGB patch is returned """ # Get sum of linear-intensity frames. - sequence = np.array(sequence) - burst_size = len(sequence) - patch = np.sum(sequence, axis=0) * factor + burst_size = int(max(1, np.ceil(len(sequence) * frac_shutter_angle))) + sequence = np.array(sequence[:burst_size]) - # Perform poisson sampling and add zero-mean gaussian read noise + patch = np.sum(sequence, axis=0) * scale_flux + + # Roughly translating the model in Eqs. (1,2) and Fig. 1 of Hasinoff et al.: + # S. W. Hasinoff, F. Durand, and W. T. Freeman, + # “Noise-optimal capture for high dynamic range photography,” CVPR 2010. + + # Perform poisson sampling rng = np.random.default_rng() if rng is None else rng patch = rng.poisson(patch).astype(float) - patch += rng.normal(0, readout_std * burst_size / 255.0, size=patch.shape) - - # Normalize by full well capacity, clip highlights, and quantize to 12-bits - patch = np.clip(patch / fwc, 0, 1.0) - patch = np.round(patch * (2**bitdepth - 1)) / (2**bitdepth - 1) + # full-well capacity + patch = np.clip(patch, 0, fwc) + # readout noise + patch += rng.normal(0, readout_std, size=patch.shape) + # apply ISO gain + patch *= gain_ISO + # assume a "perfect" ADC with no additional noise except quantization + patch = np.round(np.clip(patch, 0, (2**bitdepth-1))) / (2**bitdepth - 1) - # Multiply by gain to keep constant(-ish) brightness - patch *= fwc / (burst_size * factor) + # do not attempt to keep the same levels as the input # Convert to sRGB color space for viewing and quantize to 8-bits patch = linearrgb_to_srgb(patch) From 76636ecd47ca09a83deb68f0c3c971b2593ef250 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Fri, 26 Sep 2025 16:00:02 -0500 Subject: [PATCH 03/24] Bayer pattern and demosaicing, skimage dependency addition --- pyproject.toml | 1 + visionsim/cli/emulate.py | 49 ++++++++----- visionsim/emulate/rgb.py | 45 +++++++++--- visionsim/utils/color.py | 149 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 213 insertions(+), 31 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e1081a6..de9ad61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ dependencies = [ "opencv-python", "imageio", + "scikit-image", "more_itertools", "numpy", "torch", diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index 47781e0..95d86ff 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -196,12 +196,15 @@ def rgb( input_dir: str | os.PathLike, output_dir: str | os.PathLike, chunk_size: int = 10, - frac_shutter_angle: float = 1.0, - readout_std: float = 20.0, + shutr_angl: float = 360.0, + rdout_std: float = 0.3, fwc: float | None = None, - duplicate: float = 1.0, scale_flux: float = 1.0, - gain_ISO: float = 1.0, + g_ISO: float = 1.0, + bitdepth: int = 12, + demosaic: Literal["off", "bilinear", "MHC04"] = "bilinear", + dnois_sigma: float = 0.0, + sharpn_wt: float = 0.0, pattern: str = "frame_{:06}.png", mode: Literal["npy", "img"] = "npy", force: bool = False, @@ -212,14 +215,17 @@ def rgb( input_dir: directory in which to look for frames output_dir: directory in which to save binary frames chunk_size: number of consecutive frames to average together - frac_shutter_angle: fraction of inter-frame duration shutter is active - readout_std: standard deviation of gaussian read noise - fwc: full well capacity of sensor in arbitrary units (relative to factor & chunk_size) - duplicate: when chunk size is too small, this model is ill-suited and creates unrealistic noise. This parameter artificially increases the chunk size by using each input image `duplicate` number of times + shutr_angl: fraction of inter-frame duration shutter is active (0-360 deg) + rdout_std: standard deviation of gaussian read noise in photoelectrons + fwc: full well capacity of sensor in photoelectrons scale_flux: factor to scale the input images before Poisson simulation - gain_ISO: gain for photo-electron reading after Poisson rng + g_ISO: gain for photo-electron reading after Poisson rng + bitdepth: ADC bitdepth + demosaic: demosaicing method (default bilinear) + dnois_sigma: Gaussian blur with this sigma will be used (default 0.0 disables this) + sharpn_wt: weight used in sharpening (default 0.0 disables this) pattern: filenames of frames should match this - mode: how to save binary frames + mode: how to save generated frames force: if true, overwrite output file(s) if present """ import copy @@ -267,23 +273,28 @@ def rgb( # Assume images have been tonemapped and undo mapping imgs = srgb_to_linearrgb(imgs) + # batch_size is not relevant here + imgs = imgs[:,0,...] + rgb_img = emulate_rgb_from_sequence( - imgs * duplicate, - readout_std=readout_std, + imgs, + readout_std=rdout_std, fwc=fwc or np.inf, - frac_shutter_angle=frac_shutter_angle, + shutter_angle_degrees=shutr_angl, scale_flux=scale_flux, - gain_ISO=gain_ISO, + gain_ISO=g_ISO, + bitdepth=bitdepth, + demosaic_method=demosaic, + denoise_sigma=dnois_sigma, + sharpen_weight=sharpn_wt, ) + if transforms_new: - pose = pose_interp(poses)(0.5) if chunk_size != 1 else poses[0] + pose = pose_interp(poses)(0.5) if len(poses) != 1 else poses[0] else: pose = None - if rgb_img.shape[-1] == 1: - rgb_img = np.repeat(rgb_img, 3, axis=-1) - - writer[i] = ((rgb_img * 255).astype(np.uint8), pose) + writer[i] = (rgb_img, pose) progress.update(task, advance=len(idxs)) diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index 155acf7..c31f5f5 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -2,18 +2,23 @@ import numpy as np import numpy.typing as npt +from skimage.filters import gaussian, unsharp_mask +from typing_extensions import Literal -from visionsim.utils.color import linearrgb_to_srgb +from visionsim.utils.color import linearrgb_to_srgb, raw2rgb_bayer, rgb2raw_bayer def emulate_rgb_from_sequence( sequence: npt.ArrayLike, - frac_shutter_angle: float = 1.0, - readout_std: float = 20.0, + shutter_angle_degrees: float = 360.0, + readout_std: float = 0.3, fwc: float = 10000.0, bitdepth: int = 12, scale_flux: float = 1.0, gain_ISO: float = 1.0, + demosaic_method: Literal["off", "bilinear", "MHC04"] = "bilinear", + denoise_sigma: float = 0.0, + sharpen_weight: float = 0.0, rng: np.random.Generator | None = None, ) -> npt.NDArray: """Emulates a conventional RGB camera from a sequence of intensity frames. @@ -26,23 +31,29 @@ def emulate_rgb_from_sequence( Args: sequence (npt.ArrayLike): Input sequence of linear-intensity frames, can be a collection of frames, or np/torch array with time as the first dimension. - frac_shutter_angle (float, optional): fraction of inter-frame duration the shutter is active. + shutter_angle_degrees (float, optional): fraction of inter-frame duration the shutter is active. readout_std (float, optional): Standard deviation of zero mean Gaussian read noise. Defaults to 20.0. fwc (float, optional): Full well capacity, used for normalization. Defaults to 500.0. bitdepth (int, optional): Resolution of ADC in bits. Defaults to 12. scale_flux (float, optional): factor to scale the input [0, 1] image _before_ Poisson rng gain_ISO (float, optional): factor to scale the photo-electron reading _after_ Poisson rng + demosaic_method (string, optional): demosaicing method to use + denoise_sigma (float, optional): Gaussian blur kernel sigma (disabled if 0.0) + sharpen_weight (float, optional): sharpening weight (disabled if 0.0) rng (np.random.Generator, optional): Optional random number generator. Defaults to none. Returns: Quantized sRGB patch is returned """ # Get sum of linear-intensity frames. - burst_size = int(max(1, np.ceil(len(sequence) * frac_shutter_angle))) + burst_size = int(max(1, np.ceil(len(sequence) * (shutter_angle_degrees/360.0)))) sequence = np.array(sequence[:burst_size]) - patch = np.sum(sequence, axis=0) * scale_flux + color = (len(patch.shape) > 2) and (patch.shape[2] > 1) + if color: + patch = rgb2raw_bayer(patch) + # Roughly translating the model in Eqs. (1,2) and Fig. 1 of Hasinoff et al.: # S. W. Hasinoff, F. Durand, and W. T. Freeman, # “Noise-optimal capture for high dynamic range photography,” CVPR 2010. @@ -56,14 +67,26 @@ def emulate_rgb_from_sequence( patch += rng.normal(0, readout_std, size=patch.shape) # apply ISO gain patch *= gain_ISO - # assume a "perfect" ADC with no additional noise except quantization - patch = np.round(np.clip(patch, 0, (2**bitdepth-1))) / (2**bitdepth - 1) + # assume perfect quantization in ADC + patch = np.round(np.clip(patch, 0, (2**bitdepth-1))) + patch = patch * (1.0 / (2**bitdepth - 1)) + + # de-mosaicing + if color: + patch = raw2rgb_bayer(patch, method=demosaic_method) - # do not attempt to keep the same levels as the input + # de-noising and sharpening + if denoise_sigma != 0.0: + patch = gaussian(patch, denoise_sigma) + if sharpen_weight != 0.0: + patch = unsharp_mask(patch, amount=sharpen_weight, + channel_axis=2 if color else None) # Convert to sRGB color space for viewing and quantize to 8-bits - patch = linearrgb_to_srgb(patch) - patch = np.round(patch * 2**8) / 2**8 + patch = linearrgb_to_srgb(patch.astype(np.double)) + patch = np.round(patch * 255).astype(np.uint8) + if not color: # fake it anyway + patch = np.repeat(patch, 3, axis=-1) return patch diff --git a/visionsim/utils/color.py b/visionsim/utils/color.py index 1fb6645..2223b1f 100644 --- a/visionsim/utils/color.py +++ b/visionsim/utils/color.py @@ -3,7 +3,9 @@ import numpy as np import numpy.typing as npt import torch -from typing_extensions import overload +from scipy.ndimage import correlate +from skimage.util import img_as_float +from typing_extensions import Literal, overload @overload @@ -56,3 +58,148 @@ def linearrgb_to_srgb(img: torch.Tensor | npt.NDArray) -> torch.Tensor | npt.NDA img[mask] = img[mask] * 12.92 # type: ignore img[~mask] = module.clip(1.055 * img[~mask] ** (1.0 / 2.4) - 0.055, 0.0, 1.0) return img + + +def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, + cfa_pattern: Literal["rggb"] = "rggb" +) -> torch.Tensor | npt.NDArray: + """Mosaicing + + Realizes a mosaiced CFA image as would be sampled by a real Bayer-patterned sensor + + Args: + rgb: hypothetical true RGB signal + cfa_pattern: Bayer pattern (only RGGB implemented for now) + + Returns: + raw: CFA mosaiced data + """ + if cfa_pattern == "rggb": + raw = np.copy(rgb[:,:,0]) + raw[::2, 1::2] = rgb[::2, 1::2, 1] + raw[1::2, ::2] = rgb[1::2, ::2, 1] + raw[1::2, 1::2] = rgb[1::2, 1::2, 2] + else: + raise NotImplementedError + return raw + + +def raw2rgb_bayer(raw: torch.Tensor | npt.NDArray, + cfa_pattern: Literal["rggb"] = "rggb", + method: Literal["off", "bilinear", "MHC04"] = "bilinear" +) -> torch.Tensor | npt.NDArray: + """Demosaicing + + Convolution-based implementation as suggested by Malvar et al., + "High-quality linear interpolation for demosaicing of Bayer-patterned color images", ICASSP 2004. + https://home.cis.rit.edu/~cnspci/references/dip/demosaicking/malvar2004.pdf + + Alternative implementations are also available from OpenCV: + rgb = cv2.cvtColor(, cv2.COLOR_BAYER_BG2BGR)[:,:,::-1], + and the colour-demosaicing library (https://pypi.org/project/colour-demosaicing): + rgb = demosaicing_CFA_Bayer_bilinear(raw, pattern="RGGB") + rgb = demosaicing_CFA_Bayer_Malvar2004(raw, pattern="RGGB"), + which appear to give similar results but could run faster (not benchmarked). + + Args: + raw: input array (has to be exactly 2D) + cfa_pattern: Bayer pattern shape (only RGGB implemented for now) + + Returns: + rgb: demosaiced RGB image + """ + + if cfa_pattern == "rggb": + if any(((L % 2) != 0) for L in raw.shape): + raise RuntimeError(f"Expected even-length raw array, got {(raw.shape)}") + + if not isinstance(raw, np.floating): + raw = img_as_float(raw) + + R, G1, G2, B = raw[::2, ::2], raw[::2, 1::2], raw[1::2, ::2], raw[1::2, 1::2] + + rgb = np.zeros(raw.shape + (3,), dtype=raw.dtype) + rgb[::2, ::2, 0] = R + rgb[::2, 1::2, 1] = G1 + rgb[1::2, ::2, 1] = G2 + rgb[1::2, 1::2, 2] = B + + if method == "off": + return rgb + + if method in ["bilinear", "MHC04"]: + h_avg_fw_x = np.array([[0., 0.5, 0.5]]) + h_avg_bw_x = np.fliplr(h_avg_fw_x) + + rgb[::2, ::2, 1] = 0.5 * (correlate(G1, h_avg_bw_x, mode="nearest") + + correlate(G2, h_avg_bw_x.T, mode="nearest")) + rgb[::2, ::2, 2] = correlate(correlate(B, h_avg_bw_x, mode="nearest"), + h_avg_bw_x.T, mode="nearest") + + rgb[::2, 1::2, 0] = correlate(R, h_avg_fw_x, mode="nearest") + rgb[::2, 1::2, 2] = correlate(B, h_avg_bw_x.T, mode="nearest") + rgb[1::2, ::2, 0] = correlate(R, h_avg_fw_x.T, mode="nearest") + rgb[1::2, ::2, 2] = correlate(B, h_avg_bw_x, mode="nearest") + + rgb[1::2, 1::2, 0] = correlate(correlate(R, h_avg_fw_x, mode="nearest"), + h_avg_fw_x.T, mode="nearest") + rgb[1::2, 1::2, 1] = 0.5 * (correlate(G1, h_avg_fw_x.T, mode="nearest") + + correlate(G2, h_avg_fw_x, mode="nearest")) + + if method == "bilinear": + return rgb + + if method == "MHC04": + # Kernels in Fig. 2 of Malvar et al., along with weight factors for combination. + # This implementation is a little idiosyncratic because it realizes the 2D convolution + # via a sequence of 1D convolutions, probably not worth it in hindsight... + h_avg_nhood_x = np.array([[0.5, 0., 0.5]]) + + av_R = correlate(correlate(R, h_avg_nhood_x, mode="nearest"), + h_avg_nhood_x.T, mode="nearest") + diff_R = R - av_R + + av_B = correlate(correlate(B, h_avg_nhood_x, mode="nearest"), + h_avg_nhood_x.T, mode="nearest") + diff_B = B - av_B + + rgb[::2, ::2, 1] += (1/2) * diff_R + rgb[::2, ::2, 2] += (3/4) * diff_B + rgb[1::2, 1::2, 1] += (1/2) * diff_B + rgb[1::2, 1::2, 0] += (3/4) * diff_R + + av_G2_at_G1 = correlate(correlate(G2, h_avg_fw_x, mode="nearest"), + h_avg_bw_x.T, mode="nearest") + + av_G1_at_G1_R = (correlate(G1, h_avg_nhood_x, mode="nearest") + - correlate(G1, 0.5*h_avg_nhood_x.T, mode="nearest")) + av_G1_R = (1/5) * (2*av_G1_at_G1_R + 4*av_G2_at_G1) + diff_G1_R = G1 - av_G1_R + rgb[::2, 1::2, 0] += (5/8) * diff_G1_R + + av_G1_at_G1_B = (correlate(G1, h_avg_nhood_x.T, mode="nearest") + - correlate(G1, 0.5*h_avg_nhood_x, mode="nearest")) + av_G1_B = (1/5) * (2*av_G1_at_G1_B + 4*av_G2_at_G1) + diff_G1_B = G1 - av_G1_B + rgb[::2, 1::2, 2] += (5/8) * diff_G1_B + + av_G1_at_G2 = correlate(correlate(G1, h_avg_bw_x, mode="nearest"), + h_avg_fw_x.T, mode="nearest") + + av_G2_at_G2_R = (correlate(G2, h_avg_nhood_x.T, mode="nearest") + - correlate(G2, 0.5*h_avg_nhood_x, mode="nearest")) + av_G2_R = (1/5) * (4*av_G1_at_G2 + 2*av_G2_at_G2_R) + diff_G2_R = G2 - av_G2_R + rgb[1::2, ::2, 0] += (5/8) * diff_G2_R + + av_G2_at_G2_B = (correlate(G2, h_avg_nhood_x, mode="nearest") + - correlate(G2, 0.5*h_avg_nhood_x.T, mode="nearest")) + av_G2_B = (1/5) * (4*av_G1_at_G2 + 2*av_G2_at_G2_B) + diff_G2_B = G2 - av_G2_B + rgb[1::2, ::2, 2] += (5/8) * diff_G2_B + + return rgb + else: + raise NotImplementedError + return rgb + From 6bdbaa596cb12bb419d023d3fbc8795df690b505 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Fri, 27 Jun 2025 14:45:41 -0500 Subject: [PATCH 04/24] allow chunk sizes in {1,2,3} --- visionsim/cli/emulate.py | 5 ++++- visionsim/interpolate/pose.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index f3976b0..fd70ed3 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -269,7 +269,10 @@ def rgb( fwc=fwc or (chunk_size * duplicate), factor=factor, ) - pose = pose_interp(poses)(0.5) if transforms_new else None + if transforms_new: + pose = pose_interp(poses)(0.5) if chunk_size != 1 else poses[0] + else: + pose = None if rgb_img.shape[-1] == 1: rgb_img = np.repeat(rgb_img, 3, axis=-1) diff --git a/visionsim/interpolate/pose.py b/visionsim/interpolate/pose.py index 8b3f7d2..8064527 100644 --- a/visionsim/interpolate/pose.py +++ b/visionsim/interpolate/pose.py @@ -26,6 +26,8 @@ def __init__(self, transforms: npt.ArrayLike, ts: npt.ArrayLike | None = None, k self.transforms = np.array(transforms) self.ts = np.linspace(0, 1, len(self.transforms)) if ts is None else np.array(ts) self.determinants = np.linalg.det(self.transforms[:, :3, :3]) + + k = min(len(self.transforms)-1, k) # for small chunk_sizes self.k = k if normalize: From 3378feaab2c515dff2ca0b28c1a73782633cb4d7 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Sun, 29 Jun 2025 22:19:56 -0500 Subject: [PATCH 05/24] RGB sensor model partly from Noise-Optimal HDR paper --- visionsim/cli/emulate.py | 16 +++++++++++----- visionsim/emulate/rgb.py | 39 +++++++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index fd70ed3..47781e0 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -196,10 +196,12 @@ def rgb( input_dir: str | os.PathLike, output_dir: str | os.PathLike, chunk_size: int = 10, - factor: float = 1.0, + frac_shutter_angle: float = 1.0, readout_std: float = 20.0, - fwc: int | None = None, + fwc: float | None = None, duplicate: float = 1.0, + scale_flux: float = 1.0, + gain_ISO: float = 1.0, pattern: str = "frame_{:06}.png", mode: Literal["npy", "img"] = "npy", force: bool = False, @@ -210,10 +212,12 @@ def rgb( input_dir: directory in which to look for frames output_dir: directory in which to save binary frames chunk_size: number of consecutive frames to average together - factor: multiply image's linear intensity by this weight + frac_shutter_angle: fraction of inter-frame duration shutter is active readout_std: standard deviation of gaussian read noise fwc: full well capacity of sensor in arbitrary units (relative to factor & chunk_size) duplicate: when chunk size is too small, this model is ill-suited and creates unrealistic noise. This parameter artificially increases the chunk size by using each input image `duplicate` number of times + scale_flux: factor to scale the input images before Poisson simulation + gain_ISO: gain for photo-electron reading after Poisson rng pattern: filenames of frames should match this mode: how to save binary frames force: if true, overwrite output file(s) if present @@ -266,8 +270,10 @@ def rgb( rgb_img = emulate_rgb_from_sequence( imgs * duplicate, readout_std=readout_std, - fwc=fwc or (chunk_size * duplicate), - factor=factor, + fwc=fwc or np.inf, + frac_shutter_angle=frac_shutter_angle, + scale_flux=scale_flux, + gain_ISO=gain_ISO, ) if transforms_new: pose = pose_interp(poses)(0.5) if chunk_size != 1 else poses[0] diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index 945ded0..155acf7 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -8,10 +8,12 @@ def emulate_rgb_from_sequence( sequence: npt.ArrayLike, + frac_shutter_angle: float = 1.0, readout_std: float = 20.0, - fwc: float = 500.0, + fwc: float = 10000.0, bitdepth: int = 12, - factor: float = 1.0, + scale_flux: float = 1.0, + gain_ISO: float = 1.0, rng: np.random.Generator | None = None, ) -> npt.NDArray: """Emulates a conventional RGB camera from a sequence of intensity frames. @@ -24,31 +26,40 @@ def emulate_rgb_from_sequence( Args: sequence (npt.ArrayLike): Input sequence of linear-intensity frames, can be a collection of frames, or np/torch array with time as the first dimension. + frac_shutter_angle (float, optional): fraction of inter-frame duration the shutter is active. readout_std (float, optional): Standard deviation of zero mean Gaussian read noise. Defaults to 20.0. fwc (float, optional): Full well capacity, used for normalization. Defaults to 500.0. bitdepth (int, optional): Resolution of ADC in bits. Defaults to 12. - factor (float, optional): Scaling factor to control intensity of output RGB image. Defaults to 1.0. + scale_flux (float, optional): factor to scale the input [0, 1] image _before_ Poisson rng + gain_ISO (float, optional): factor to scale the photo-electron reading _after_ Poisson rng rng (np.random.Generator, optional): Optional random number generator. Defaults to none. Returns: Quantized sRGB patch is returned """ # Get sum of linear-intensity frames. - sequence = np.array(sequence) - burst_size = len(sequence) - patch = np.sum(sequence, axis=0) * factor + burst_size = int(max(1, np.ceil(len(sequence) * frac_shutter_angle))) + sequence = np.array(sequence[:burst_size]) - # Perform poisson sampling and add zero-mean gaussian read noise + patch = np.sum(sequence, axis=0) * scale_flux + + # Roughly translating the model in Eqs. (1,2) and Fig. 1 of Hasinoff et al.: + # S. W. Hasinoff, F. Durand, and W. T. Freeman, + # “Noise-optimal capture for high dynamic range photography,” CVPR 2010. + + # Perform poisson sampling rng = np.random.default_rng() if rng is None else rng patch = rng.poisson(patch).astype(float) - patch += rng.normal(0, readout_std * burst_size / 255.0, size=patch.shape) - - # Normalize by full well capacity, clip highlights, and quantize to 12-bits - patch = np.clip(patch / fwc, 0, 1.0) - patch = np.round(patch * (2**bitdepth - 1)) / (2**bitdepth - 1) + # full-well capacity + patch = np.clip(patch, 0, fwc) + # readout noise + patch += rng.normal(0, readout_std, size=patch.shape) + # apply ISO gain + patch *= gain_ISO + # assume a "perfect" ADC with no additional noise except quantization + patch = np.round(np.clip(patch, 0, (2**bitdepth-1))) / (2**bitdepth - 1) - # Multiply by gain to keep constant(-ish) brightness - patch *= fwc / (burst_size * factor) + # do not attempt to keep the same levels as the input # Convert to sRGB color space for viewing and quantize to 8-bits patch = linearrgb_to_srgb(patch) From 007d9a22040f972d6058d07d494650acfe770546 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Fri, 26 Sep 2025 16:00:02 -0500 Subject: [PATCH 06/24] Bayer pattern and demosaicing, skimage dependency addition --- pyproject.toml | 1 + visionsim/cli/emulate.py | 49 ++++++++----- visionsim/emulate/rgb.py | 45 +++++++++--- visionsim/utils/color.py | 149 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 213 insertions(+), 31 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5c8ef1f..9b09f5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ dependencies = [ "opencv-python", "imageio", + "scikit-image", "more_itertools", "numpy", "torch", diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index 47781e0..95d86ff 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -196,12 +196,15 @@ def rgb( input_dir: str | os.PathLike, output_dir: str | os.PathLike, chunk_size: int = 10, - frac_shutter_angle: float = 1.0, - readout_std: float = 20.0, + shutr_angl: float = 360.0, + rdout_std: float = 0.3, fwc: float | None = None, - duplicate: float = 1.0, scale_flux: float = 1.0, - gain_ISO: float = 1.0, + g_ISO: float = 1.0, + bitdepth: int = 12, + demosaic: Literal["off", "bilinear", "MHC04"] = "bilinear", + dnois_sigma: float = 0.0, + sharpn_wt: float = 0.0, pattern: str = "frame_{:06}.png", mode: Literal["npy", "img"] = "npy", force: bool = False, @@ -212,14 +215,17 @@ def rgb( input_dir: directory in which to look for frames output_dir: directory in which to save binary frames chunk_size: number of consecutive frames to average together - frac_shutter_angle: fraction of inter-frame duration shutter is active - readout_std: standard deviation of gaussian read noise - fwc: full well capacity of sensor in arbitrary units (relative to factor & chunk_size) - duplicate: when chunk size is too small, this model is ill-suited and creates unrealistic noise. This parameter artificially increases the chunk size by using each input image `duplicate` number of times + shutr_angl: fraction of inter-frame duration shutter is active (0-360 deg) + rdout_std: standard deviation of gaussian read noise in photoelectrons + fwc: full well capacity of sensor in photoelectrons scale_flux: factor to scale the input images before Poisson simulation - gain_ISO: gain for photo-electron reading after Poisson rng + g_ISO: gain for photo-electron reading after Poisson rng + bitdepth: ADC bitdepth + demosaic: demosaicing method (default bilinear) + dnois_sigma: Gaussian blur with this sigma will be used (default 0.0 disables this) + sharpn_wt: weight used in sharpening (default 0.0 disables this) pattern: filenames of frames should match this - mode: how to save binary frames + mode: how to save generated frames force: if true, overwrite output file(s) if present """ import copy @@ -267,23 +273,28 @@ def rgb( # Assume images have been tonemapped and undo mapping imgs = srgb_to_linearrgb(imgs) + # batch_size is not relevant here + imgs = imgs[:,0,...] + rgb_img = emulate_rgb_from_sequence( - imgs * duplicate, - readout_std=readout_std, + imgs, + readout_std=rdout_std, fwc=fwc or np.inf, - frac_shutter_angle=frac_shutter_angle, + shutter_angle_degrees=shutr_angl, scale_flux=scale_flux, - gain_ISO=gain_ISO, + gain_ISO=g_ISO, + bitdepth=bitdepth, + demosaic_method=demosaic, + denoise_sigma=dnois_sigma, + sharpen_weight=sharpn_wt, ) + if transforms_new: - pose = pose_interp(poses)(0.5) if chunk_size != 1 else poses[0] + pose = pose_interp(poses)(0.5) if len(poses) != 1 else poses[0] else: pose = None - if rgb_img.shape[-1] == 1: - rgb_img = np.repeat(rgb_img, 3, axis=-1) - - writer[i] = ((rgb_img * 255).astype(np.uint8), pose) + writer[i] = (rgb_img, pose) progress.update(task, advance=len(idxs)) diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index 155acf7..c31f5f5 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -2,18 +2,23 @@ import numpy as np import numpy.typing as npt +from skimage.filters import gaussian, unsharp_mask +from typing_extensions import Literal -from visionsim.utils.color import linearrgb_to_srgb +from visionsim.utils.color import linearrgb_to_srgb, raw2rgb_bayer, rgb2raw_bayer def emulate_rgb_from_sequence( sequence: npt.ArrayLike, - frac_shutter_angle: float = 1.0, - readout_std: float = 20.0, + shutter_angle_degrees: float = 360.0, + readout_std: float = 0.3, fwc: float = 10000.0, bitdepth: int = 12, scale_flux: float = 1.0, gain_ISO: float = 1.0, + demosaic_method: Literal["off", "bilinear", "MHC04"] = "bilinear", + denoise_sigma: float = 0.0, + sharpen_weight: float = 0.0, rng: np.random.Generator | None = None, ) -> npt.NDArray: """Emulates a conventional RGB camera from a sequence of intensity frames. @@ -26,23 +31,29 @@ def emulate_rgb_from_sequence( Args: sequence (npt.ArrayLike): Input sequence of linear-intensity frames, can be a collection of frames, or np/torch array with time as the first dimension. - frac_shutter_angle (float, optional): fraction of inter-frame duration the shutter is active. + shutter_angle_degrees (float, optional): fraction of inter-frame duration the shutter is active. readout_std (float, optional): Standard deviation of zero mean Gaussian read noise. Defaults to 20.0. fwc (float, optional): Full well capacity, used for normalization. Defaults to 500.0. bitdepth (int, optional): Resolution of ADC in bits. Defaults to 12. scale_flux (float, optional): factor to scale the input [0, 1] image _before_ Poisson rng gain_ISO (float, optional): factor to scale the photo-electron reading _after_ Poisson rng + demosaic_method (string, optional): demosaicing method to use + denoise_sigma (float, optional): Gaussian blur kernel sigma (disabled if 0.0) + sharpen_weight (float, optional): sharpening weight (disabled if 0.0) rng (np.random.Generator, optional): Optional random number generator. Defaults to none. Returns: Quantized sRGB patch is returned """ # Get sum of linear-intensity frames. - burst_size = int(max(1, np.ceil(len(sequence) * frac_shutter_angle))) + burst_size = int(max(1, np.ceil(len(sequence) * (shutter_angle_degrees/360.0)))) sequence = np.array(sequence[:burst_size]) - patch = np.sum(sequence, axis=0) * scale_flux + color = (len(patch.shape) > 2) and (patch.shape[2] > 1) + if color: + patch = rgb2raw_bayer(patch) + # Roughly translating the model in Eqs. (1,2) and Fig. 1 of Hasinoff et al.: # S. W. Hasinoff, F. Durand, and W. T. Freeman, # “Noise-optimal capture for high dynamic range photography,” CVPR 2010. @@ -56,14 +67,26 @@ def emulate_rgb_from_sequence( patch += rng.normal(0, readout_std, size=patch.shape) # apply ISO gain patch *= gain_ISO - # assume a "perfect" ADC with no additional noise except quantization - patch = np.round(np.clip(patch, 0, (2**bitdepth-1))) / (2**bitdepth - 1) + # assume perfect quantization in ADC + patch = np.round(np.clip(patch, 0, (2**bitdepth-1))) + patch = patch * (1.0 / (2**bitdepth - 1)) + + # de-mosaicing + if color: + patch = raw2rgb_bayer(patch, method=demosaic_method) - # do not attempt to keep the same levels as the input + # de-noising and sharpening + if denoise_sigma != 0.0: + patch = gaussian(patch, denoise_sigma) + if sharpen_weight != 0.0: + patch = unsharp_mask(patch, amount=sharpen_weight, + channel_axis=2 if color else None) # Convert to sRGB color space for viewing and quantize to 8-bits - patch = linearrgb_to_srgb(patch) - patch = np.round(patch * 2**8) / 2**8 + patch = linearrgb_to_srgb(patch.astype(np.double)) + patch = np.round(patch * 255).astype(np.uint8) + if not color: # fake it anyway + patch = np.repeat(patch, 3, axis=-1) return patch diff --git a/visionsim/utils/color.py b/visionsim/utils/color.py index 1fb6645..2223b1f 100644 --- a/visionsim/utils/color.py +++ b/visionsim/utils/color.py @@ -3,7 +3,9 @@ import numpy as np import numpy.typing as npt import torch -from typing_extensions import overload +from scipy.ndimage import correlate +from skimage.util import img_as_float +from typing_extensions import Literal, overload @overload @@ -56,3 +58,148 @@ def linearrgb_to_srgb(img: torch.Tensor | npt.NDArray) -> torch.Tensor | npt.NDA img[mask] = img[mask] * 12.92 # type: ignore img[~mask] = module.clip(1.055 * img[~mask] ** (1.0 / 2.4) - 0.055, 0.0, 1.0) return img + + +def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, + cfa_pattern: Literal["rggb"] = "rggb" +) -> torch.Tensor | npt.NDArray: + """Mosaicing + + Realizes a mosaiced CFA image as would be sampled by a real Bayer-patterned sensor + + Args: + rgb: hypothetical true RGB signal + cfa_pattern: Bayer pattern (only RGGB implemented for now) + + Returns: + raw: CFA mosaiced data + """ + if cfa_pattern == "rggb": + raw = np.copy(rgb[:,:,0]) + raw[::2, 1::2] = rgb[::2, 1::2, 1] + raw[1::2, ::2] = rgb[1::2, ::2, 1] + raw[1::2, 1::2] = rgb[1::2, 1::2, 2] + else: + raise NotImplementedError + return raw + + +def raw2rgb_bayer(raw: torch.Tensor | npt.NDArray, + cfa_pattern: Literal["rggb"] = "rggb", + method: Literal["off", "bilinear", "MHC04"] = "bilinear" +) -> torch.Tensor | npt.NDArray: + """Demosaicing + + Convolution-based implementation as suggested by Malvar et al., + "High-quality linear interpolation for demosaicing of Bayer-patterned color images", ICASSP 2004. + https://home.cis.rit.edu/~cnspci/references/dip/demosaicking/malvar2004.pdf + + Alternative implementations are also available from OpenCV: + rgb = cv2.cvtColor(, cv2.COLOR_BAYER_BG2BGR)[:,:,::-1], + and the colour-demosaicing library (https://pypi.org/project/colour-demosaicing): + rgb = demosaicing_CFA_Bayer_bilinear(raw, pattern="RGGB") + rgb = demosaicing_CFA_Bayer_Malvar2004(raw, pattern="RGGB"), + which appear to give similar results but could run faster (not benchmarked). + + Args: + raw: input array (has to be exactly 2D) + cfa_pattern: Bayer pattern shape (only RGGB implemented for now) + + Returns: + rgb: demosaiced RGB image + """ + + if cfa_pattern == "rggb": + if any(((L % 2) != 0) for L in raw.shape): + raise RuntimeError(f"Expected even-length raw array, got {(raw.shape)}") + + if not isinstance(raw, np.floating): + raw = img_as_float(raw) + + R, G1, G2, B = raw[::2, ::2], raw[::2, 1::2], raw[1::2, ::2], raw[1::2, 1::2] + + rgb = np.zeros(raw.shape + (3,), dtype=raw.dtype) + rgb[::2, ::2, 0] = R + rgb[::2, 1::2, 1] = G1 + rgb[1::2, ::2, 1] = G2 + rgb[1::2, 1::2, 2] = B + + if method == "off": + return rgb + + if method in ["bilinear", "MHC04"]: + h_avg_fw_x = np.array([[0., 0.5, 0.5]]) + h_avg_bw_x = np.fliplr(h_avg_fw_x) + + rgb[::2, ::2, 1] = 0.5 * (correlate(G1, h_avg_bw_x, mode="nearest") + + correlate(G2, h_avg_bw_x.T, mode="nearest")) + rgb[::2, ::2, 2] = correlate(correlate(B, h_avg_bw_x, mode="nearest"), + h_avg_bw_x.T, mode="nearest") + + rgb[::2, 1::2, 0] = correlate(R, h_avg_fw_x, mode="nearest") + rgb[::2, 1::2, 2] = correlate(B, h_avg_bw_x.T, mode="nearest") + rgb[1::2, ::2, 0] = correlate(R, h_avg_fw_x.T, mode="nearest") + rgb[1::2, ::2, 2] = correlate(B, h_avg_bw_x, mode="nearest") + + rgb[1::2, 1::2, 0] = correlate(correlate(R, h_avg_fw_x, mode="nearest"), + h_avg_fw_x.T, mode="nearest") + rgb[1::2, 1::2, 1] = 0.5 * (correlate(G1, h_avg_fw_x.T, mode="nearest") + + correlate(G2, h_avg_fw_x, mode="nearest")) + + if method == "bilinear": + return rgb + + if method == "MHC04": + # Kernels in Fig. 2 of Malvar et al., along with weight factors for combination. + # This implementation is a little idiosyncratic because it realizes the 2D convolution + # via a sequence of 1D convolutions, probably not worth it in hindsight... + h_avg_nhood_x = np.array([[0.5, 0., 0.5]]) + + av_R = correlate(correlate(R, h_avg_nhood_x, mode="nearest"), + h_avg_nhood_x.T, mode="nearest") + diff_R = R - av_R + + av_B = correlate(correlate(B, h_avg_nhood_x, mode="nearest"), + h_avg_nhood_x.T, mode="nearest") + diff_B = B - av_B + + rgb[::2, ::2, 1] += (1/2) * diff_R + rgb[::2, ::2, 2] += (3/4) * diff_B + rgb[1::2, 1::2, 1] += (1/2) * diff_B + rgb[1::2, 1::2, 0] += (3/4) * diff_R + + av_G2_at_G1 = correlate(correlate(G2, h_avg_fw_x, mode="nearest"), + h_avg_bw_x.T, mode="nearest") + + av_G1_at_G1_R = (correlate(G1, h_avg_nhood_x, mode="nearest") + - correlate(G1, 0.5*h_avg_nhood_x.T, mode="nearest")) + av_G1_R = (1/5) * (2*av_G1_at_G1_R + 4*av_G2_at_G1) + diff_G1_R = G1 - av_G1_R + rgb[::2, 1::2, 0] += (5/8) * diff_G1_R + + av_G1_at_G1_B = (correlate(G1, h_avg_nhood_x.T, mode="nearest") + - correlate(G1, 0.5*h_avg_nhood_x, mode="nearest")) + av_G1_B = (1/5) * (2*av_G1_at_G1_B + 4*av_G2_at_G1) + diff_G1_B = G1 - av_G1_B + rgb[::2, 1::2, 2] += (5/8) * diff_G1_B + + av_G1_at_G2 = correlate(correlate(G1, h_avg_bw_x, mode="nearest"), + h_avg_fw_x.T, mode="nearest") + + av_G2_at_G2_R = (correlate(G2, h_avg_nhood_x.T, mode="nearest") + - correlate(G2, 0.5*h_avg_nhood_x, mode="nearest")) + av_G2_R = (1/5) * (4*av_G1_at_G2 + 2*av_G2_at_G2_R) + diff_G2_R = G2 - av_G2_R + rgb[1::2, ::2, 0] += (5/8) * diff_G2_R + + av_G2_at_G2_B = (correlate(G2, h_avg_nhood_x, mode="nearest") + - correlate(G2, 0.5*h_avg_nhood_x.T, mode="nearest")) + av_G2_B = (1/5) * (4*av_G1_at_G2 + 2*av_G2_at_G2_B) + diff_G2_B = G2 - av_G2_B + rgb[1::2, ::2, 2] += (5/8) * diff_G2_B + + return rgb + else: + raise NotImplementedError + return rgb + From fd6d71abf713fcda678d2aba321903e4c8db35cb Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Wed, 14 Jan 2026 22:54:36 -0600 Subject: [PATCH 07/24] return to full words in CLI interface --- visionsim/cli/emulate.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index 95d86ff..d55fa30 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -196,15 +196,15 @@ def rgb( input_dir: str | os.PathLike, output_dir: str | os.PathLike, chunk_size: int = 10, - shutr_angl: float = 360.0, - rdout_std: float = 0.3, + shutter_angle: float = 360.0, + readout_std: float = 0.3, fwc: float | None = None, scale_flux: float = 1.0, g_ISO: float = 1.0, bitdepth: int = 12, demosaic: Literal["off", "bilinear", "MHC04"] = "bilinear", - dnois_sigma: float = 0.0, - sharpn_wt: float = 0.0, + denoise_sigma: float = 0.0, + sharpen_weight: float = 0.0, pattern: str = "frame_{:06}.png", mode: Literal["npy", "img"] = "npy", force: bool = False, @@ -215,15 +215,15 @@ def rgb( input_dir: directory in which to look for frames output_dir: directory in which to save binary frames chunk_size: number of consecutive frames to average together - shutr_angl: fraction of inter-frame duration shutter is active (0-360 deg) - rdout_std: standard deviation of gaussian read noise in photoelectrons + shutter_angle: fraction of inter-frame duration shutter is active (0-360 deg) + readout_std: standard deviation of gaussian read noise in photoelectrons fwc: full well capacity of sensor in photoelectrons scale_flux: factor to scale the input images before Poisson simulation g_ISO: gain for photo-electron reading after Poisson rng bitdepth: ADC bitdepth demosaic: demosaicing method (default bilinear) - dnois_sigma: Gaussian blur with this sigma will be used (default 0.0 disables this) - sharpn_wt: weight used in sharpening (default 0.0 disables this) + denoise_sigma: Gaussian blur with this sigma will be used (default 0.0 disables this) + sharpen_weight: weight used in sharpening (default 0.0 disables this) pattern: filenames of frames should match this mode: how to save generated frames force: if true, overwrite output file(s) if present @@ -278,15 +278,15 @@ def rgb( rgb_img = emulate_rgb_from_sequence( imgs, - readout_std=rdout_std, + readout_std=readout_std, fwc=fwc or np.inf, - shutter_angle_degrees=shutr_angl, + shutter_angle_degrees=shutter_angle, scale_flux=scale_flux, gain_ISO=g_ISO, bitdepth=bitdepth, demosaic_method=demosaic, - denoise_sigma=dnois_sigma, - sharpen_weight=sharpn_wt, + denoise_sigma=denoise_sigma, + sharpen_weight=sharpen_weight, ) if transforms_new: From 9208923fed145144b92f6b7fc47adc3484fe8834 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Wed, 14 Jan 2026 22:56:43 -0600 Subject: [PATCH 08/24] add editor swap files to gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 6db82cf..308dc74 100644 --- a/.gitignore +++ b/.gitignore @@ -105,6 +105,10 @@ ENV/ .vscode/ .idea/ +# Editor swap files +*.swp +*.asv + # Custom paper/ out/ From fb737b82c1c8393c761263ef79306c7dfa0138e9 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Wed, 14 Jan 2026 22:57:38 -0600 Subject: [PATCH 09/24] typo --- visionsim/dataset/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/visionsim/dataset/dataset.py b/visionsim/dataset/dataset.py index a1c0baa..44658a4 100644 --- a/visionsim/dataset/dataset.py +++ b/visionsim/dataset/dataset.py @@ -206,7 +206,7 @@ def full_shape(self) -> tuple[int, int, int, int]: if self.transforms: h, w, c = self.transforms["h"], self.transforms["w"], self.transforms["c"] else: - # Peak into dataset to get H/W + # Peek into dataset to get H/W _, im, _ = self[0] h, w, c = np.array(im).shape self._full_shape = (len(self), h, w, c) From f28f4f61355921415d05a42f651a8498a70cd39c Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Wed, 14 Jan 2026 22:58:18 -0600 Subject: [PATCH 10/24] allow grayscale SPAD simulation and binomial samples with bit-depth > 1 --- visionsim/cli/emulate.py | 22 +++++++++++++++++----- visionsim/emulate/spc.py | 6 ++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index d55fa30..f240e2d 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -10,12 +10,14 @@ from visionsim.emulate.rgb import emulate_rgb_from_sequence -def _spad_collate(batch, *, mode, rng, factor, is_tonemapped=True): +def _spad_collate(batch, *, mode, rng, factor, bitdepth=1, gray=False, is_tonemapped=True): """Use default collate function on batch and then simulate SPAD, enabling compute to be done in threads""" from visionsim.dataset import default_collate from visionsim.emulate.spc import emulate_spc from visionsim.utils.color import srgb_to_linearrgb + from skimage.color import rgb2gray + idxs, imgs, poses = default_collate(batch) if is_tonemapped: @@ -23,8 +25,11 @@ def _spad_collate(batch, *, mode, rng, factor, is_tonemapped=True): imgs = srgb_to_linearrgb((imgs / 255.0).astype(float)) else: imgs = imgs.astype(float) / 255.0 + + if gray and (imgs.shape[-1] == 3): + imgs = rgb2gray(imgs) - binary_img = emulate_spc(imgs, factor=factor, rng=rng) * 255 + binary_img = emulate_spc(imgs, factor=factor, bitdepth=bitdepth, rng=rng) * 255 binary_img = binary_img.astype(np.uint8) if mode.lower() == "npy": @@ -38,6 +43,8 @@ def spad( output_dir: str | os.PathLike, pattern: str = "frame_{:06}.png", factor: float = 1.0, + gray: bool = False, + bitdepth: int = 1, seed: int = 2147483647, mode: Literal["npy", "img"] = "npy", batch_size: int = 4, @@ -50,6 +57,8 @@ def spad( output_dir: directory in which to save binary frames pattern: filenames of frames should match this factor: multiplicative factor controlling dynamic range of output + gray: to set grayscale instead of 3-channel sensing + bitdepth: representing number of underlying binary measurements aggregated (bitdepth = k => 2^k - 1 binary measurements averaged) seed: random seed to use while sampling, ensures reproducibility mode: how to save binary frames batch_size: number of frames to write at once @@ -68,8 +77,11 @@ def spad( dataset = Dataset.from_path(input_path) transforms_new = copy.deepcopy(dataset.transforms or {}) shape = np.array(dataset.full_shape) - shape[-1] = transforms_new["c"] = 3 - + if gray: + shape[-1] = transforms_new["c"] = 1 + else: + shape[-1] = transforms_new["c"] = 3 + if mode.lower() == "img": ... elif mode.lower() == "npy": @@ -87,7 +99,7 @@ def spad( dataset, batch_size=batch_size, num_workers=os.cpu_count() or 1, - collate_fn=functools.partial(_spad_collate, mode=mode, rng=rng, factor=factor, is_tonemapped=is_tonemapped), + collate_fn=functools.partial(_spad_collate, mode=mode, rng=rng, factor=factor, bitdepth=bitdepth, gray=gray, is_tonemapped=is_tonemapped), ) with ( diff --git a/visionsim/emulate/spc.py b/visionsim/emulate/spc.py index 9cb9b10..ccee7b4 100644 --- a/visionsim/emulate/spc.py +++ b/visionsim/emulate/spc.py @@ -7,13 +7,14 @@ def emulate_spc( - img: npt.NDArray[np.floating], factor: float = 1.0, rng: np.random.Generator | None = None + img: npt.NDArray[np.floating], factor: float = 1.0, bitdepth: int = 1, rng: np.random.Generator | None = None ) -> npt.NDArray[np.integer]: """Perform bernoulli sampling on linearized RGB frames to yield binary frames. Args: img (npt.ArrayLike): Linear intensity image to sample binary frame from. factor (float, optional): Arbitrary corrective brightness factor. Defaults to 1.0. + bitdepth(int, optional): number of underlying binary measurements to average (default 1) rng (np.random.Generator, optional): Optional random number generator. Defaults to none. Returns: @@ -21,7 +22,8 @@ def emulate_spc( """ # Perform bernoulli sampling (equivalent to binomial w/ n=1) rng = np.random.default_rng() if rng is None else rng - return rng.binomial(cast(npt.NDArray[np.integer], 1), 1.0 - np.exp(-img * factor)) + N = (2**bitdepth) - 1 + return (1.0/N) * rng.binomial(cast(npt.NDArray[np.integer], N), 1.0 - np.exp(-img * factor)) def spc_avg_to_rgb( From 1896d4658375616237812995c9fe61d52d598881 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Wed, 14 Jan 2026 23:15:43 -0600 Subject: [PATCH 11/24] lint fix --- visionsim/cli/emulate.py | 14 ++--- visionsim/emulate/rgb.py | 9 ++-- visionsim/emulate/spc.py | 4 +- visionsim/interpolate/pose.py | 2 +- visionsim/utils/color.py | 98 +++++++++++++++++------------------ 5 files changed, 63 insertions(+), 64 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index f240e2d..3b82646 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -12,12 +12,12 @@ def _spad_collate(batch, *, mode, rng, factor, bitdepth=1, gray=False, is_tonemapped=True): """Use default collate function on batch and then simulate SPAD, enabling compute to be done in threads""" + from skimage.color import rgb2gray + from visionsim.dataset import default_collate from visionsim.emulate.spc import emulate_spc from visionsim.utils.color import srgb_to_linearrgb - from skimage.color import rgb2gray - idxs, imgs, poses = default_collate(batch) if is_tonemapped: @@ -25,7 +25,7 @@ def _spad_collate(batch, *, mode, rng, factor, bitdepth=1, gray=False, is_tonema imgs = srgb_to_linearrgb((imgs / 255.0).astype(float)) else: imgs = imgs.astype(float) / 255.0 - + if gray and (imgs.shape[-1] == 3): imgs = rgb2gray(imgs) @@ -81,7 +81,7 @@ def spad( shape[-1] = transforms_new["c"] = 1 else: shape[-1] = transforms_new["c"] = 3 - + if mode.lower() == "img": ... elif mode.lower() == "npy": @@ -99,7 +99,9 @@ def spad( dataset, batch_size=batch_size, num_workers=os.cpu_count() or 1, - collate_fn=functools.partial(_spad_collate, mode=mode, rng=rng, factor=factor, bitdepth=bitdepth, gray=gray, is_tonemapped=is_tonemapped), + collate_fn=functools.partial( + _spad_collate, mode=mode, rng=rng, factor=factor, bitdepth=bitdepth, gray=gray, is_tonemapped=is_tonemapped + ), ) with ( @@ -286,7 +288,7 @@ def rgb( imgs = srgb_to_linearrgb(imgs) # batch_size is not relevant here - imgs = imgs[:,0,...] + imgs = imgs[:, 0, ...] rgb_img = emulate_rgb_from_sequence( imgs, diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index c31f5f5..86df5b6 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -46,7 +46,7 @@ def emulate_rgb_from_sequence( Quantized sRGB patch is returned """ # Get sum of linear-intensity frames. - burst_size = int(max(1, np.ceil(len(sequence) * (shutter_angle_degrees/360.0)))) + burst_size = int(max(1, np.ceil(len(sequence) * (shutter_angle_degrees / 360.0)))) sequence = np.array(sequence[:burst_size]) patch = np.sum(sequence, axis=0) * scale_flux @@ -68,7 +68,7 @@ def emulate_rgb_from_sequence( # apply ISO gain patch *= gain_ISO # assume perfect quantization in ADC - patch = np.round(np.clip(patch, 0, (2**bitdepth-1))) + patch = np.round(np.clip(patch, 0, (2**bitdepth - 1))) patch = patch * (1.0 / (2**bitdepth - 1)) # de-mosaicing @@ -79,13 +79,12 @@ def emulate_rgb_from_sequence( if denoise_sigma != 0.0: patch = gaussian(patch, denoise_sigma) if sharpen_weight != 0.0: - patch = unsharp_mask(patch, amount=sharpen_weight, - channel_axis=2 if color else None) + patch = unsharp_mask(patch, amount=sharpen_weight, channel_axis=2 if color else None) # Convert to sRGB color space for viewing and quantize to 8-bits patch = linearrgb_to_srgb(patch.astype(np.double)) patch = np.round(patch * 255).astype(np.uint8) - if not color: # fake it anyway + if not color: # fake it anyway patch = np.repeat(patch, 3, axis=-1) return patch diff --git a/visionsim/emulate/spc.py b/visionsim/emulate/spc.py index ccee7b4..1dbd1a3 100644 --- a/visionsim/emulate/spc.py +++ b/visionsim/emulate/spc.py @@ -7,7 +7,7 @@ def emulate_spc( - img: npt.NDArray[np.floating], factor: float = 1.0, bitdepth: int = 1, rng: np.random.Generator | None = None + img: npt.NDArray[np.floating], factor: float = 1.0, bitdepth: int = 1, rng: np.random.Generator | None = None ) -> npt.NDArray[np.integer]: """Perform bernoulli sampling on linearized RGB frames to yield binary frames. @@ -23,7 +23,7 @@ def emulate_spc( # Perform bernoulli sampling (equivalent to binomial w/ n=1) rng = np.random.default_rng() if rng is None else rng N = (2**bitdepth) - 1 - return (1.0/N) * rng.binomial(cast(npt.NDArray[np.integer], N), 1.0 - np.exp(-img * factor)) + return (1.0 / N) * rng.binomial(cast(npt.NDArray[np.integer], N), 1.0 - np.exp(-img * factor)) def spc_avg_to_rgb( diff --git a/visionsim/interpolate/pose.py b/visionsim/interpolate/pose.py index 8064527..cd386cd 100644 --- a/visionsim/interpolate/pose.py +++ b/visionsim/interpolate/pose.py @@ -27,7 +27,7 @@ def __init__(self, transforms: npt.ArrayLike, ts: npt.ArrayLike | None = None, k self.ts = np.linspace(0, 1, len(self.transforms)) if ts is None else np.array(ts) self.determinants = np.linalg.det(self.transforms[:, :3, :3]) - k = min(len(self.transforms)-1, k) # for small chunk_sizes + k = min(len(self.transforms) - 1, k) # for small chunk_sizes self.k = k if normalize: diff --git a/visionsim/utils/color.py b/visionsim/utils/color.py index 2223b1f..86589b5 100644 --- a/visionsim/utils/color.py +++ b/visionsim/utils/color.py @@ -60,9 +60,7 @@ def linearrgb_to_srgb(img: torch.Tensor | npt.NDArray) -> torch.Tensor | npt.NDA return img -def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, - cfa_pattern: Literal["rggb"] = "rggb" -) -> torch.Tensor | npt.NDArray: +def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, cfa_pattern: Literal["rggb"] = "rggb") -> torch.Tensor | npt.NDArray: """Mosaicing Realizes a mosaiced CFA image as would be sampled by a real Bayer-patterned sensor @@ -75,7 +73,7 @@ def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, raw: CFA mosaiced data """ if cfa_pattern == "rggb": - raw = np.copy(rgb[:,:,0]) + raw = np.copy(rgb[:, :, 0]) raw[::2, 1::2] = rgb[::2, 1::2, 1] raw[1::2, ::2] = rgb[1::2, ::2, 1] raw[1::2, 1::2] = rgb[1::2, 1::2, 2] @@ -84,13 +82,14 @@ def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, return raw -def raw2rgb_bayer(raw: torch.Tensor | npt.NDArray, - cfa_pattern: Literal["rggb"] = "rggb", - method: Literal["off", "bilinear", "MHC04"] = "bilinear" +def raw2rgb_bayer( + raw: torch.Tensor | npt.NDArray, + cfa_pattern: Literal["rggb"] = "rggb", + method: Literal["off", "bilinear", "MHC04"] = "bilinear", ) -> torch.Tensor | npt.NDArray: """Demosaicing - Convolution-based implementation as suggested by Malvar et al., + Convolution-based implementation as suggested by Malvar et al., "High-quality linear interpolation for demosaicing of Bayer-patterned color images", ICASSP 2004. https://home.cis.rit.edu/~cnspci/references/dip/demosaicking/malvar2004.pdf @@ -128,23 +127,23 @@ def raw2rgb_bayer(raw: torch.Tensor | npt.NDArray, return rgb if method in ["bilinear", "MHC04"]: - h_avg_fw_x = np.array([[0., 0.5, 0.5]]) + h_avg_fw_x = np.array([[0.0, 0.5, 0.5]]) h_avg_bw_x = np.fliplr(h_avg_fw_x) - - rgb[::2, ::2, 1] = 0.5 * (correlate(G1, h_avg_bw_x, mode="nearest") - + correlate(G2, h_avg_bw_x.T, mode="nearest")) - rgb[::2, ::2, 2] = correlate(correlate(B, h_avg_bw_x, mode="nearest"), - h_avg_bw_x.T, mode="nearest") + + rgb[::2, ::2, 1] = 0.5 * ( + correlate(G1, h_avg_bw_x, mode="nearest") + correlate(G2, h_avg_bw_x.T, mode="nearest") + ) + rgb[::2, ::2, 2] = correlate(correlate(B, h_avg_bw_x, mode="nearest"), h_avg_bw_x.T, mode="nearest") rgb[::2, 1::2, 0] = correlate(R, h_avg_fw_x, mode="nearest") rgb[::2, 1::2, 2] = correlate(B, h_avg_bw_x.T, mode="nearest") rgb[1::2, ::2, 0] = correlate(R, h_avg_fw_x.T, mode="nearest") rgb[1::2, ::2, 2] = correlate(B, h_avg_bw_x, mode="nearest") - rgb[1::2, 1::2, 0] = correlate(correlate(R, h_avg_fw_x, mode="nearest"), - h_avg_fw_x.T, mode="nearest") - rgb[1::2, 1::2, 1] = 0.5 * (correlate(G1, h_avg_fw_x.T, mode="nearest") - + correlate(G2, h_avg_fw_x, mode="nearest")) + rgb[1::2, 1::2, 0] = correlate(correlate(R, h_avg_fw_x, mode="nearest"), h_avg_fw_x.T, mode="nearest") + rgb[1::2, 1::2, 1] = 0.5 * ( + correlate(G1, h_avg_fw_x.T, mode="nearest") + correlate(G2, h_avg_fw_x, mode="nearest") + ) if method == "bilinear": return rgb @@ -153,53 +152,52 @@ def raw2rgb_bayer(raw: torch.Tensor | npt.NDArray, # Kernels in Fig. 2 of Malvar et al., along with weight factors for combination. # This implementation is a little idiosyncratic because it realizes the 2D convolution # via a sequence of 1D convolutions, probably not worth it in hindsight... - h_avg_nhood_x = np.array([[0.5, 0., 0.5]]) + h_avg_nhood_x = np.array([[0.5, 0.0, 0.5]]) - av_R = correlate(correlate(R, h_avg_nhood_x, mode="nearest"), - h_avg_nhood_x.T, mode="nearest") + av_R = correlate(correlate(R, h_avg_nhood_x, mode="nearest"), h_avg_nhood_x.T, mode="nearest") diff_R = R - av_R - av_B = correlate(correlate(B, h_avg_nhood_x, mode="nearest"), - h_avg_nhood_x.T, mode="nearest") + av_B = correlate(correlate(B, h_avg_nhood_x, mode="nearest"), h_avg_nhood_x.T, mode="nearest") diff_B = B - av_B - rgb[::2, ::2, 1] += (1/2) * diff_R - rgb[::2, ::2, 2] += (3/4) * diff_B - rgb[1::2, 1::2, 1] += (1/2) * diff_B - rgb[1::2, 1::2, 0] += (3/4) * diff_R - - av_G2_at_G1 = correlate(correlate(G2, h_avg_fw_x, mode="nearest"), - h_avg_bw_x.T, mode="nearest") - - av_G1_at_G1_R = (correlate(G1, h_avg_nhood_x, mode="nearest") - - correlate(G1, 0.5*h_avg_nhood_x.T, mode="nearest")) - av_G1_R = (1/5) * (2*av_G1_at_G1_R + 4*av_G2_at_G1) + rgb[::2, ::2, 1] += (1 / 2) * diff_R + rgb[::2, ::2, 2] += (3 / 4) * diff_B + rgb[1::2, 1::2, 1] += (1 / 2) * diff_B + rgb[1::2, 1::2, 0] += (3 / 4) * diff_R + + av_G2_at_G1 = correlate(correlate(G2, h_avg_fw_x, mode="nearest"), h_avg_bw_x.T, mode="nearest") + + av_G1_at_G1_R = correlate(G1, h_avg_nhood_x, mode="nearest") - correlate( + G1, 0.5 * h_avg_nhood_x.T, mode="nearest" + ) + av_G1_R = (1 / 5) * (2 * av_G1_at_G1_R + 4 * av_G2_at_G1) diff_G1_R = G1 - av_G1_R - rgb[::2, 1::2, 0] += (5/8) * diff_G1_R + rgb[::2, 1::2, 0] += (5 / 8) * diff_G1_R - av_G1_at_G1_B = (correlate(G1, h_avg_nhood_x.T, mode="nearest") - - correlate(G1, 0.5*h_avg_nhood_x, mode="nearest")) - av_G1_B = (1/5) * (2*av_G1_at_G1_B + 4*av_G2_at_G1) + av_G1_at_G1_B = correlate(G1, h_avg_nhood_x.T, mode="nearest") - correlate( + G1, 0.5 * h_avg_nhood_x, mode="nearest" + ) + av_G1_B = (1 / 5) * (2 * av_G1_at_G1_B + 4 * av_G2_at_G1) diff_G1_B = G1 - av_G1_B - rgb[::2, 1::2, 2] += (5/8) * diff_G1_B + rgb[::2, 1::2, 2] += (5 / 8) * diff_G1_B - av_G1_at_G2 = correlate(correlate(G1, h_avg_bw_x, mode="nearest"), - h_avg_fw_x.T, mode="nearest") + av_G1_at_G2 = correlate(correlate(G1, h_avg_bw_x, mode="nearest"), h_avg_fw_x.T, mode="nearest") - av_G2_at_G2_R = (correlate(G2, h_avg_nhood_x.T, mode="nearest") - - correlate(G2, 0.5*h_avg_nhood_x, mode="nearest")) - av_G2_R = (1/5) * (4*av_G1_at_G2 + 2*av_G2_at_G2_R) + av_G2_at_G2_R = correlate(G2, h_avg_nhood_x.T, mode="nearest") - correlate( + G2, 0.5 * h_avg_nhood_x, mode="nearest" + ) + av_G2_R = (1 / 5) * (4 * av_G1_at_G2 + 2 * av_G2_at_G2_R) diff_G2_R = G2 - av_G2_R - rgb[1::2, ::2, 0] += (5/8) * diff_G2_R + rgb[1::2, ::2, 0] += (5 / 8) * diff_G2_R - av_G2_at_G2_B = (correlate(G2, h_avg_nhood_x, mode="nearest") - - correlate(G2, 0.5*h_avg_nhood_x.T, mode="nearest")) - av_G2_B = (1/5) * (4*av_G1_at_G2 + 2*av_G2_at_G2_B) + av_G2_at_G2_B = correlate(G2, h_avg_nhood_x, mode="nearest") - correlate( + G2, 0.5 * h_avg_nhood_x.T, mode="nearest" + ) + av_G2_B = (1 / 5) * (4 * av_G1_at_G2 + 2 * av_G2_at_G2_B) diff_G2_B = G2 - av_G2_B - rgb[1::2, ::2, 2] += (5/8) * diff_G2_B + rgb[1::2, ::2, 2] += (5 / 8) * diff_G2_B return rgb else: raise NotImplementedError return rgb - From 0417c04f7c27d1b7846cc1a469c7482588bdc047 Mon Sep 17 00:00:00 2001 From: Sacha Jungerman Date: Thu, 15 Jan 2026 11:07:16 -0600 Subject: [PATCH 12/24] re-format --- visionsim/cli/emulate.py | 2 +- visionsim/emulate/rgb.py | 9 ++-- visionsim/interpolate/pose.py | 2 +- visionsim/utils/color.py | 98 +++++++++++++++++------------------ 4 files changed, 54 insertions(+), 57 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index 95d86ff..c0c8e63 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -274,7 +274,7 @@ def rgb( imgs = srgb_to_linearrgb(imgs) # batch_size is not relevant here - imgs = imgs[:,0,...] + imgs = imgs[:, 0, ...] rgb_img = emulate_rgb_from_sequence( imgs, diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index c31f5f5..86df5b6 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -46,7 +46,7 @@ def emulate_rgb_from_sequence( Quantized sRGB patch is returned """ # Get sum of linear-intensity frames. - burst_size = int(max(1, np.ceil(len(sequence) * (shutter_angle_degrees/360.0)))) + burst_size = int(max(1, np.ceil(len(sequence) * (shutter_angle_degrees / 360.0)))) sequence = np.array(sequence[:burst_size]) patch = np.sum(sequence, axis=0) * scale_flux @@ -68,7 +68,7 @@ def emulate_rgb_from_sequence( # apply ISO gain patch *= gain_ISO # assume perfect quantization in ADC - patch = np.round(np.clip(patch, 0, (2**bitdepth-1))) + patch = np.round(np.clip(patch, 0, (2**bitdepth - 1))) patch = patch * (1.0 / (2**bitdepth - 1)) # de-mosaicing @@ -79,13 +79,12 @@ def emulate_rgb_from_sequence( if denoise_sigma != 0.0: patch = gaussian(patch, denoise_sigma) if sharpen_weight != 0.0: - patch = unsharp_mask(patch, amount=sharpen_weight, - channel_axis=2 if color else None) + patch = unsharp_mask(patch, amount=sharpen_weight, channel_axis=2 if color else None) # Convert to sRGB color space for viewing and quantize to 8-bits patch = linearrgb_to_srgb(patch.astype(np.double)) patch = np.round(patch * 255).astype(np.uint8) - if not color: # fake it anyway + if not color: # fake it anyway patch = np.repeat(patch, 3, axis=-1) return patch diff --git a/visionsim/interpolate/pose.py b/visionsim/interpolate/pose.py index 8064527..cd386cd 100644 --- a/visionsim/interpolate/pose.py +++ b/visionsim/interpolate/pose.py @@ -27,7 +27,7 @@ def __init__(self, transforms: npt.ArrayLike, ts: npt.ArrayLike | None = None, k self.ts = np.linspace(0, 1, len(self.transforms)) if ts is None else np.array(ts) self.determinants = np.linalg.det(self.transforms[:, :3, :3]) - k = min(len(self.transforms)-1, k) # for small chunk_sizes + k = min(len(self.transforms) - 1, k) # for small chunk_sizes self.k = k if normalize: diff --git a/visionsim/utils/color.py b/visionsim/utils/color.py index eca6fca..da10d6a 100644 --- a/visionsim/utils/color.py +++ b/visionsim/utils/color.py @@ -44,9 +44,7 @@ def linearrgb_to_srgb(img: torch.Tensor | npt.NDArray) -> torch.Tensor | npt.NDA return img -def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, - cfa_pattern: Literal["rggb"] = "rggb" -) -> torch.Tensor | npt.NDArray: +def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, cfa_pattern: Literal["rggb"] = "rggb") -> torch.Tensor | npt.NDArray: """Mosaicing Realizes a mosaiced CFA image as would be sampled by a real Bayer-patterned sensor @@ -59,7 +57,7 @@ def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, raw: CFA mosaiced data """ if cfa_pattern == "rggb": - raw = np.copy(rgb[:,:,0]) + raw = np.copy(rgb[:, :, 0]) raw[::2, 1::2] = rgb[::2, 1::2, 1] raw[1::2, ::2] = rgb[1::2, ::2, 1] raw[1::2, 1::2] = rgb[1::2, 1::2, 2] @@ -68,13 +66,14 @@ def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, return raw -def raw2rgb_bayer(raw: torch.Tensor | npt.NDArray, - cfa_pattern: Literal["rggb"] = "rggb", - method: Literal["off", "bilinear", "MHC04"] = "bilinear" +def raw2rgb_bayer( + raw: torch.Tensor | npt.NDArray, + cfa_pattern: Literal["rggb"] = "rggb", + method: Literal["off", "bilinear", "MHC04"] = "bilinear", ) -> torch.Tensor | npt.NDArray: """Demosaicing - Convolution-based implementation as suggested by Malvar et al., + Convolution-based implementation as suggested by Malvar et al., "High-quality linear interpolation for demosaicing of Bayer-patterned color images", ICASSP 2004. https://home.cis.rit.edu/~cnspci/references/dip/demosaicking/malvar2004.pdf @@ -112,23 +111,23 @@ def raw2rgb_bayer(raw: torch.Tensor | npt.NDArray, return rgb if method in ["bilinear", "MHC04"]: - h_avg_fw_x = np.array([[0., 0.5, 0.5]]) + h_avg_fw_x = np.array([[0.0, 0.5, 0.5]]) h_avg_bw_x = np.fliplr(h_avg_fw_x) - - rgb[::2, ::2, 1] = 0.5 * (correlate(G1, h_avg_bw_x, mode="nearest") - + correlate(G2, h_avg_bw_x.T, mode="nearest")) - rgb[::2, ::2, 2] = correlate(correlate(B, h_avg_bw_x, mode="nearest"), - h_avg_bw_x.T, mode="nearest") + + rgb[::2, ::2, 1] = 0.5 * ( + correlate(G1, h_avg_bw_x, mode="nearest") + correlate(G2, h_avg_bw_x.T, mode="nearest") + ) + rgb[::2, ::2, 2] = correlate(correlate(B, h_avg_bw_x, mode="nearest"), h_avg_bw_x.T, mode="nearest") rgb[::2, 1::2, 0] = correlate(R, h_avg_fw_x, mode="nearest") rgb[::2, 1::2, 2] = correlate(B, h_avg_bw_x.T, mode="nearest") rgb[1::2, ::2, 0] = correlate(R, h_avg_fw_x.T, mode="nearest") rgb[1::2, ::2, 2] = correlate(B, h_avg_bw_x, mode="nearest") - rgb[1::2, 1::2, 0] = correlate(correlate(R, h_avg_fw_x, mode="nearest"), - h_avg_fw_x.T, mode="nearest") - rgb[1::2, 1::2, 1] = 0.5 * (correlate(G1, h_avg_fw_x.T, mode="nearest") - + correlate(G2, h_avg_fw_x, mode="nearest")) + rgb[1::2, 1::2, 0] = correlate(correlate(R, h_avg_fw_x, mode="nearest"), h_avg_fw_x.T, mode="nearest") + rgb[1::2, 1::2, 1] = 0.5 * ( + correlate(G1, h_avg_fw_x.T, mode="nearest") + correlate(G2, h_avg_fw_x, mode="nearest") + ) if method == "bilinear": return rgb @@ -137,53 +136,52 @@ def raw2rgb_bayer(raw: torch.Tensor | npt.NDArray, # Kernels in Fig. 2 of Malvar et al., along with weight factors for combination. # This implementation is a little idiosyncratic because it realizes the 2D convolution # via a sequence of 1D convolutions, probably not worth it in hindsight... - h_avg_nhood_x = np.array([[0.5, 0., 0.5]]) + h_avg_nhood_x = np.array([[0.5, 0.0, 0.5]]) - av_R = correlate(correlate(R, h_avg_nhood_x, mode="nearest"), - h_avg_nhood_x.T, mode="nearest") + av_R = correlate(correlate(R, h_avg_nhood_x, mode="nearest"), h_avg_nhood_x.T, mode="nearest") diff_R = R - av_R - av_B = correlate(correlate(B, h_avg_nhood_x, mode="nearest"), - h_avg_nhood_x.T, mode="nearest") + av_B = correlate(correlate(B, h_avg_nhood_x, mode="nearest"), h_avg_nhood_x.T, mode="nearest") diff_B = B - av_B - rgb[::2, ::2, 1] += (1/2) * diff_R - rgb[::2, ::2, 2] += (3/4) * diff_B - rgb[1::2, 1::2, 1] += (1/2) * diff_B - rgb[1::2, 1::2, 0] += (3/4) * diff_R - - av_G2_at_G1 = correlate(correlate(G2, h_avg_fw_x, mode="nearest"), - h_avg_bw_x.T, mode="nearest") - - av_G1_at_G1_R = (correlate(G1, h_avg_nhood_x, mode="nearest") - - correlate(G1, 0.5*h_avg_nhood_x.T, mode="nearest")) - av_G1_R = (1/5) * (2*av_G1_at_G1_R + 4*av_G2_at_G1) + rgb[::2, ::2, 1] += (1 / 2) * diff_R + rgb[::2, ::2, 2] += (3 / 4) * diff_B + rgb[1::2, 1::2, 1] += (1 / 2) * diff_B + rgb[1::2, 1::2, 0] += (3 / 4) * diff_R + + av_G2_at_G1 = correlate(correlate(G2, h_avg_fw_x, mode="nearest"), h_avg_bw_x.T, mode="nearest") + + av_G1_at_G1_R = correlate(G1, h_avg_nhood_x, mode="nearest") - correlate( + G1, 0.5 * h_avg_nhood_x.T, mode="nearest" + ) + av_G1_R = (1 / 5) * (2 * av_G1_at_G1_R + 4 * av_G2_at_G1) diff_G1_R = G1 - av_G1_R - rgb[::2, 1::2, 0] += (5/8) * diff_G1_R + rgb[::2, 1::2, 0] += (5 / 8) * diff_G1_R - av_G1_at_G1_B = (correlate(G1, h_avg_nhood_x.T, mode="nearest") - - correlate(G1, 0.5*h_avg_nhood_x, mode="nearest")) - av_G1_B = (1/5) * (2*av_G1_at_G1_B + 4*av_G2_at_G1) + av_G1_at_G1_B = correlate(G1, h_avg_nhood_x.T, mode="nearest") - correlate( + G1, 0.5 * h_avg_nhood_x, mode="nearest" + ) + av_G1_B = (1 / 5) * (2 * av_G1_at_G1_B + 4 * av_G2_at_G1) diff_G1_B = G1 - av_G1_B - rgb[::2, 1::2, 2] += (5/8) * diff_G1_B + rgb[::2, 1::2, 2] += (5 / 8) * diff_G1_B - av_G1_at_G2 = correlate(correlate(G1, h_avg_bw_x, mode="nearest"), - h_avg_fw_x.T, mode="nearest") + av_G1_at_G2 = correlate(correlate(G1, h_avg_bw_x, mode="nearest"), h_avg_fw_x.T, mode="nearest") - av_G2_at_G2_R = (correlate(G2, h_avg_nhood_x.T, mode="nearest") - - correlate(G2, 0.5*h_avg_nhood_x, mode="nearest")) - av_G2_R = (1/5) * (4*av_G1_at_G2 + 2*av_G2_at_G2_R) + av_G2_at_G2_R = correlate(G2, h_avg_nhood_x.T, mode="nearest") - correlate( + G2, 0.5 * h_avg_nhood_x, mode="nearest" + ) + av_G2_R = (1 / 5) * (4 * av_G1_at_G2 + 2 * av_G2_at_G2_R) diff_G2_R = G2 - av_G2_R - rgb[1::2, ::2, 0] += (5/8) * diff_G2_R + rgb[1::2, ::2, 0] += (5 / 8) * diff_G2_R - av_G2_at_G2_B = (correlate(G2, h_avg_nhood_x, mode="nearest") - - correlate(G2, 0.5*h_avg_nhood_x.T, mode="nearest")) - av_G2_B = (1/5) * (4*av_G1_at_G2 + 2*av_G2_at_G2_B) + av_G2_at_G2_B = correlate(G2, h_avg_nhood_x, mode="nearest") - correlate( + G2, 0.5 * h_avg_nhood_x.T, mode="nearest" + ) + av_G2_B = (1 / 5) * (4 * av_G1_at_G2 + 2 * av_G2_at_G2_B) diff_G2_B = G2 - av_G2_B - rgb[1::2, ::2, 2] += (5/8) * diff_G2_B + rgb[1::2, ::2, 2] += (5 / 8) * diff_G2_B return rgb else: raise NotImplementedError return rgb - From f5a185334cec40bac47da8811c8de9f5e509b31f Mon Sep 17 00:00:00 2001 From: Sacha Jungerman Date: Thu, 15 Jan 2026 15:49:12 -0600 Subject: [PATCH 13/24] Test docs build on PR From d199131878b80fcfb4cc7372d776a7634e63d773 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Mon, 26 Jan 2026 19:07:19 -0600 Subject: [PATCH 14/24] remove skimage dependency --- visionsim/interpolate/pose.py | 4 ++++ visionsim/utils/color.py | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/visionsim/interpolate/pose.py b/visionsim/interpolate/pose.py index cd386cd..4348d11 100644 --- a/visionsim/interpolate/pose.py +++ b/visionsim/interpolate/pose.py @@ -6,6 +6,8 @@ from scipy.spatial.transform import Rotation, RotationSpline from typing_extensions import Literal, cast +import logging +logger = logging.getLogger(__name__) class pose_interp: """Linearly interpolate between 4x4 (or 3x4) transformation matrices by interpolating it's components""" @@ -27,6 +29,8 @@ def __init__(self, transforms: npt.ArrayLike, ts: npt.ArrayLike | None = None, k self.ts = np.linspace(0, 1, len(self.transforms)) if ts is None else np.array(ts) self.determinants = np.linalg.det(self.transforms[:, :3, :3]) + if k >= len(self.transforms): + logger.warning(f"spline degree {k} >= #poses ({len(self.transforms)})") k = min(len(self.transforms) - 1, k) # for small chunk_sizes self.k = k diff --git a/visionsim/utils/color.py b/visionsim/utils/color.py index da10d6a..6d2d6d4 100644 --- a/visionsim/utils/color.py +++ b/visionsim/utils/color.py @@ -4,7 +4,6 @@ import numpy.typing as npt import torch from scipy.ndimage import correlate -from skimage.util import img_as_float from typing_extensions import Literal @@ -97,7 +96,7 @@ def raw2rgb_bayer( raise RuntimeError(f"Expected even-length raw array, got {(raw.shape)}") if not isinstance(raw, np.floating): - raw = img_as_float(raw) + raise RuntimeError(f"Expected floating-point array, got {raw.dtype}") R, G1, G2, B = raw[::2, ::2], raw[::2, 1::2], raw[1::2, ::2], raw[1::2, 1::2] From bbff6c74966d7c1ec4dac0691c162b876441227d Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Mon, 26 Jan 2026 21:47:12 -0600 Subject: [PATCH 15/24] rgb simulation interface cleanup; handling potential alpha channel in emulate_rgb_from_sequence input; more inoffensive defaults --- visionsim/cli/emulate.py | 37 ++++++++-------- visionsim/emulate/rgb.py | 79 +++++++++++++++++++++-------------- visionsim/interpolate/pose.py | 6 ++- visionsim/utils/color.py | 20 ++++++--- 4 files changed, 85 insertions(+), 57 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index 3b82646..eb2acc4 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -209,14 +209,15 @@ def events( def rgb( input_dir: str | os.PathLike, output_dir: str | os.PathLike, - chunk_size: int = 10, - shutter_angle: float = 360.0, - readout_std: float = 0.3, + chunk_size: int = 1, + shutter_frac: float = 1.0, + readout_std: float = 0.0, fwc: float | None = None, - scale_flux: float = 1.0, - g_ISO: float = 1.0, - bitdepth: int = 12, - demosaic: Literal["off", "bilinear", "MHC04"] = "bilinear", + flux_gain: float = 1.0, + iso_gain: float = 1.0, + adc_bitdepth: int = 12, + mosaic: bool = False, + demosaic: Literal["off", "bilinear", "MHC04"] = "MHC04", denoise_sigma: float = 0.0, sharpen_weight: float = 0.0, pattern: str = "frame_{:06}.png", @@ -229,13 +230,14 @@ def rgb( input_dir: directory in which to look for frames output_dir: directory in which to save binary frames chunk_size: number of consecutive frames to average together - shutter_angle: fraction of inter-frame duration shutter is active (0-360 deg) + shutter_frac: fraction of inter-frame duration shutter is active (0 to 1) readout_std: standard deviation of gaussian read noise in photoelectrons fwc: full well capacity of sensor in photoelectrons - scale_flux: factor to scale the input images before Poisson simulation - g_ISO: gain for photo-electron reading after Poisson rng - bitdepth: ADC bitdepth - demosaic: demosaicing method (default bilinear) + flux_gain: factor to scale the input images before Poisson simulation + iso_gain: gain for photo-electron reading after Poisson rng + adc_bitdepth: ADC bitdepth + mosaic: implement mosaiced R-/G-/B- pixels or an innately 3-channel sensor + demosaic: demosaicing method (default Malvar et al.'s method) denoise_sigma: Gaussian blur with this sigma will be used (default 0.0 disables this) sharpen_weight: weight used in sharpening (default 0.0 disables this) pattern: filenames of frames should match this @@ -294,11 +296,12 @@ def rgb( imgs, readout_std=readout_std, fwc=fwc or np.inf, - shutter_angle_degrees=shutter_angle, - scale_flux=scale_flux, - gain_ISO=g_ISO, - bitdepth=bitdepth, - demosaic_method=demosaic, + shutter_frac=shutter_frac, + flux_gain=flux_gain, + iso_gain=iso_gain, + adc_bitdepth=adc_bitdepth, + mosaic=mosaic, + demosaic=demosaic, denoise_sigma=denoise_sigma, sharpen_weight=sharpen_weight, ) diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index 86df5b6..a89619e 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -5,24 +5,27 @@ from skimage.filters import gaussian, unsharp_mask from typing_extensions import Literal -from visionsim.utils.color import linearrgb_to_srgb, raw2rgb_bayer, rgb2raw_bayer +from visionsim.utils.color import linearrgb_to_srgb, raw_to_rgb_bayer, rgb_to_raw_bayer def emulate_rgb_from_sequence( sequence: npt.ArrayLike, - shutter_angle_degrees: float = 360.0, - readout_std: float = 0.3, + shutter_frac: float = 1.0, + readout_std: float = 0.0, fwc: float = 10000.0, - bitdepth: int = 12, - scale_flux: float = 1.0, - gain_ISO: float = 1.0, - demosaic_method: Literal["off", "bilinear", "MHC04"] = "bilinear", + adc_bitdepth: int = 12, + flux_gain: float = 1.0, + iso_gain: float = 1.0, + mosaic: bool = False, + demosaic: Literal["off", "bilinear", "MHC04"] = "MHC04", denoise_sigma: float = 0.0, sharpen_weight: float = 0.0, rng: np.random.Generator | None = None, ) -> npt.NDArray: """Emulates a conventional RGB camera from a sequence of intensity frames. + For camera model see [1]. For demosaicing details see docs for `utils/color/raw_to_rgb_bayer`. + Note: Motion-blur is approximated by averaging consecutive ground truth frames, this can be done more efficiently if optical flow is available. @@ -31,33 +34,39 @@ def emulate_rgb_from_sequence( Args: sequence (npt.ArrayLike): Input sequence of linear-intensity frames, can be a collection of frames, or np/torch array with time as the first dimension. - shutter_angle_degrees (float, optional): fraction of inter-frame duration the shutter is active. - readout_std (float, optional): Standard deviation of zero mean Gaussian read noise. Defaults to 20.0. - fwc (float, optional): Full well capacity, used for normalization. Defaults to 500.0. - bitdepth (int, optional): Resolution of ADC in bits. Defaults to 12. - scale_flux (float, optional): factor to scale the input [0, 1] image _before_ Poisson rng - gain_ISO (float, optional): factor to scale the photo-electron reading _after_ Poisson rng - demosaic_method (string, optional): demosaicing method to use + shutter_frac (float, optional): fraction of inter-frame duration the shutter is active. Range [0, 1] + readout_std (float, optional): Standard deviation of zero mean Gaussian read noise. Defaults to 0.0. + fwc (float, optional): Full well capacity, used for normalization. Defaults to 10000.0. + adc_bitdepth (int, optional): Resolution of ADC in bits. Defaults to 12. + flux_gain (float, optional): factor to scale the input [0, 1] image _before_ Poisson rng + iso_gain (float, optional): factor to scale the photo-electron reading _after_ Poisson rng + mosaic (bool, optional): implement one array with mosaiced R-/G-/B-sensitive pixels or an innately 3-channel sensor + demosaic (string, optional): demosaicing method to use denoise_sigma (float, optional): Gaussian blur kernel sigma (disabled if 0.0) sharpen_weight (float, optional): sharpening weight (disabled if 0.0) rng (np.random.Generator, optional): Optional random number generator. Defaults to none. Returns: - Quantized sRGB patch is returned + Quantized sRGB patch is returned as _uint8_ array (range [0, 255]) + + References: + .. [1] S. W. Hasinoff, F. Durand, and W. T. Freeman, + “Noise-optimal capture for high dynamic range photography,” + CVPR 2010. """ # Get sum of linear-intensity frames. - burst_size = int(max(1, np.ceil(len(sequence) * (shutter_angle_degrees / 360.0)))) + burst_size = int(max(1, np.ceil(len(sequence) * shutter_frac))) sequence = np.array(sequence[:burst_size]) - patch = np.sum(sequence, axis=0) * scale_flux - - color = (len(patch.shape) > 2) and (patch.shape[2] > 1) - if color: - patch = rgb2raw_bayer(patch) + patch = np.sum(sequence, axis=0) * flux_gain - # Roughly translating the model in Eqs. (1,2) and Fig. 1 of Hasinoff et al.: - # S. W. Hasinoff, F. Durand, and W. T. Freeman, - # “Noise-optimal capture for high dynamic range photography,” CVPR 2010. + has_alpha = len(patch.shape) > 2 and (patch.shape[2] in [2, 4]) # LA/RGBA + patch_alpha = patch[..., -1:] if has_alpha else None + has_color = len(patch.shape) > 2 and (patch.shape[2] in [3, 4]) # RGB/RGBA + patch = patch[..., :-1] if has_alpha else patch + if has_color and mosaic: + patch = rgb_to_raw_bayer(patch) + # Roughly translating the model in Eqs. (1,2) and Fig. 1 of Hasinoff et al. # Perform poisson sampling rng = np.random.default_rng() if rng is None else rng patch = rng.poisson(patch).astype(float) @@ -66,26 +75,32 @@ def emulate_rgb_from_sequence( # readout noise patch += rng.normal(0, readout_std, size=patch.shape) # apply ISO gain - patch *= gain_ISO + patch *= iso_gain # assume perfect quantization in ADC - patch = np.round(np.clip(patch, 0, (2**bitdepth - 1))) - patch = patch * (1.0 / (2**bitdepth - 1)) + patch = np.round(np.clip(patch, 0, (2**adc_bitdepth - 1))) + patch = patch * (1.0 / (2**adc_bitdepth - 1)) - # de-mosaicing - if color: - patch = raw2rgb_bayer(patch, method=demosaic_method) + # de-mosaicing: necessary if data is mosaiced, so can't be `None`. + # ("off" is not a no-op, it still creates a full 3-channel image from 1, + # albeit a bad one) + if has_color and mosaic: + patch = raw_to_rgb_bayer(patch, method=demosaic) # de-noising and sharpening if denoise_sigma != 0.0: patch = gaussian(patch, denoise_sigma) if sharpen_weight != 0.0: - patch = unsharp_mask(patch, amount=sharpen_weight, channel_axis=2 if color else None) + patch = unsharp_mask(patch, amount=sharpen_weight, channel_axis=2 if has_color else None) # Convert to sRGB color space for viewing and quantize to 8-bits patch = linearrgb_to_srgb(patch.astype(np.double)) patch = np.round(patch * 255).astype(np.uint8) - if not color: # fake it anyway + if not has_color: # fake it anyway, because this is emulate____rgb____ + if len(patch.shape) < 3: + patch = np.atleast_3d(patch) patch = np.repeat(patch, 3, axis=-1) + if has_alpha: + patch = np.concatenate((patch, patch_alpha), axis=-1) return patch diff --git a/visionsim/interpolate/pose.py b/visionsim/interpolate/pose.py index 4348d11..6ef98d6 100644 --- a/visionsim/interpolate/pose.py +++ b/visionsim/interpolate/pose.py @@ -1,14 +1,16 @@ from __future__ import annotations +import logging + import numpy as np import numpy.typing as npt from scipy.interpolate import make_interp_spline from scipy.spatial.transform import Rotation, RotationSpline from typing_extensions import Literal, cast -import logging logger = logging.getLogger(__name__) + class pose_interp: """Linearly interpolate between 4x4 (or 3x4) transformation matrices by interpolating it's components""" @@ -30,7 +32,7 @@ def __init__(self, transforms: npt.ArrayLike, ts: npt.ArrayLike | None = None, k self.determinants = np.linalg.det(self.transforms[:, :3, :3]) if k >= len(self.transforms): - logger.warning(f"spline degree {k} >= #poses ({len(self.transforms)})") + logger.warning(f"spline degree {k} >= #poses ({len(self.transforms)}), therefore downgrading it") k = min(len(self.transforms) - 1, k) # for small chunk_sizes self.k = k diff --git a/visionsim/utils/color.py b/visionsim/utils/color.py index 6d2d6d4..d00d09a 100644 --- a/visionsim/utils/color.py +++ b/visionsim/utils/color.py @@ -43,7 +43,9 @@ def linearrgb_to_srgb(img: torch.Tensor | npt.NDArray) -> torch.Tensor | npt.NDA return img -def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, cfa_pattern: Literal["rggb"] = "rggb") -> torch.Tensor | npt.NDArray: +def rgb_to_raw_bayer( + rgb: torch.Tensor | npt.NDArray, cfa_pattern: Literal["rggb"] = "rggb" +) -> torch.Tensor | npt.NDArray: """Mosaicing Realizes a mosaiced CFA image as would be sampled by a real Bayer-patterned sensor @@ -65,16 +67,16 @@ def rgb2raw_bayer(rgb: torch.Tensor | npt.NDArray, cfa_pattern: Literal["rggb"] return raw -def raw2rgb_bayer( +def raw_to_rgb_bayer( raw: torch.Tensor | npt.NDArray, cfa_pattern: Literal["rggb"] = "rggb", method: Literal["off", "bilinear", "MHC04"] = "bilinear", ) -> torch.Tensor | npt.NDArray: """Demosaicing - Convolution-based implementation as suggested by Malvar et al., - "High-quality linear interpolation for demosaicing of Bayer-patterned color images", ICASSP 2004. - https://home.cis.rit.edu/~cnspci/references/dip/demosaicking/malvar2004.pdf + Convolution-based implementation as suggested by Malvar et al. [1]. + + Bilinear is simpler but visually not as nice as Malvar et al.'s method. Alternative implementations are also available from OpenCV: rgb = cv2.cvtColor(, cv2.COLOR_BAYER_BG2BGR)[:,:,::-1], @@ -89,13 +91,19 @@ def raw2rgb_bayer( Returns: rgb: demosaiced RGB image + + References: + .. [1] Malvar et al. (2004), "High-quality linear interpolation for demosaicing of Bayer-patterned color images", ICASSP 2004. """ if cfa_pattern == "rggb": if any(((L % 2) != 0) for L in raw.shape): raise RuntimeError(f"Expected even-length raw array, got {(raw.shape)}") - if not isinstance(raw, np.floating): + if torch.is_tensor(raw): + raw = raw.numpy() + + if not np.issubdtype(raw.dtype, np.floating): raise RuntimeError(f"Expected floating-point array, got {raw.dtype}") R, G1, G2, B = raw[::2, ::2], raw[::2, 1::2], raw[1::2, ::2], raw[1::2, 1::2] From 6bb38c3411aaa4a8028cb09b73de3afdd35439a6 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Wed, 28 Jan 2026 13:39:05 -0600 Subject: [PATCH 16/24] remove skimage dependency --- pyproject.toml | 1 - visionsim/cli/emulate.py | 4 +-- visionsim/emulate/rgb.py | 15 +++++----- visionsim/utils/imgproc.py | 57 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 11 deletions(-) create mode 100644 visionsim/utils/imgproc.py diff --git a/pyproject.toml b/pyproject.toml index 165a6d6..33a81c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,6 @@ classifiers = [ dependencies = [ "opencv-python", "imageio", - "scikit-image", "more_itertools", "numpy", "torch", diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index eb2acc4..41bed06 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -12,8 +12,6 @@ def _spad_collate(batch, *, mode, rng, factor, bitdepth=1, gray=False, is_tonemapped=True): """Use default collate function on batch and then simulate SPAD, enabling compute to be done in threads""" - from skimage.color import rgb2gray - from visionsim.dataset import default_collate from visionsim.emulate.spc import emulate_spc from visionsim.utils.color import srgb_to_linearrgb @@ -27,7 +25,7 @@ def _spad_collate(batch, *, mode, rng, factor, bitdepth=1, gray=False, is_tonema imgs = imgs.astype(float) / 255.0 if gray and (imgs.shape[-1] == 3): - imgs = rgb2gray(imgs) + imgs = imgs.mean(axis=-1) binary_img = emulate_spc(imgs, factor=factor, bitdepth=bitdepth, rng=rng) * 255 binary_img = binary_img.astype(np.uint8) diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index a89619e..2f3988c 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -2,10 +2,11 @@ import numpy as np import numpy.typing as npt -from skimage.filters import gaussian, unsharp_mask +from scipy.ndimage import gaussian_filter from typing_extensions import Literal from visionsim.utils.color import linearrgb_to_srgb, raw_to_rgb_bayer, rgb_to_raw_bayer +from visionsim.utils.imgproc import unsharp_mask def emulate_rgb_from_sequence( @@ -17,7 +18,7 @@ def emulate_rgb_from_sequence( flux_gain: float = 1.0, iso_gain: float = 1.0, mosaic: bool = False, - demosaic: Literal["off", "bilinear", "MHC04"] = "MHC04", + demosaic: Literal["off", "bilinear", "MHC04"] = "off", denoise_sigma: float = 0.0, sharpen_weight: float = 0.0, rng: np.random.Generator | None = None, @@ -41,7 +42,7 @@ def emulate_rgb_from_sequence( flux_gain (float, optional): factor to scale the input [0, 1] image _before_ Poisson rng iso_gain (float, optional): factor to scale the photo-electron reading _after_ Poisson rng mosaic (bool, optional): implement one array with mosaiced R-/G-/B-sensitive pixels or an innately 3-channel sensor - demosaic (string, optional): demosaicing method to use + demosaic (string, optional): demosaicing method to use if "mosaic" is set (default "off") denoise_sigma (float, optional): Gaussian blur kernel sigma (disabled if 0.0) sharpen_weight (float, optional): sharpening weight (disabled if 0.0) rng (np.random.Generator, optional): Optional random number generator. Defaults to none. @@ -59,9 +60,9 @@ def emulate_rgb_from_sequence( sequence = np.array(sequence[:burst_size]) patch = np.sum(sequence, axis=0) * flux_gain - has_alpha = len(patch.shape) > 2 and (patch.shape[2] in [2, 4]) # LA/RGBA + has_alpha = (patch.ndim > 2) and (patch.shape[2] in [2, 4]) # LA/RGBA patch_alpha = patch[..., -1:] if has_alpha else None - has_color = len(patch.shape) > 2 and (patch.shape[2] in [3, 4]) # RGB/RGBA + has_color = (patch.ndim > 2) and (patch.shape[2] in [3, 4]) # RGB/RGBA patch = patch[..., :-1] if has_alpha else patch if has_color and mosaic: patch = rgb_to_raw_bayer(patch) @@ -88,9 +89,9 @@ def emulate_rgb_from_sequence( # de-noising and sharpening if denoise_sigma != 0.0: - patch = gaussian(patch, denoise_sigma) + patch = gaussian_filter(patch, denoise_sigma) if sharpen_weight != 0.0: - patch = unsharp_mask(patch, amount=sharpen_weight, channel_axis=2 if has_color else None) + patch = unsharp_mask(patch, sigma=max(1,denoise_sigma), amount=sharpen_weight) # Convert to sRGB color space for viewing and quantize to 8-bits patch = linearrgb_to_srgb(patch.astype(np.double)) diff --git a/visionsim/utils/imgproc.py b/visionsim/utils/imgproc.py new file mode 100644 index 0000000..06856c1 --- /dev/null +++ b/visionsim/utils/imgproc.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +import numpy as np +import numpy.typing as npt +from scipy.ndimage import gaussian_filter + +def unsharp_mask( + img: npt.ArrayLike, + sigma: float = 1.0, + amount: float = 1.0, +) -> npt.NDArray: + """Unsharp-masking to sharpen an image + + Borrows interface from scikit-image's version: + + + Args: + img (npt.ArrayLike, required): input image, can be 2-, 3-, or 4-channel + sigma (float, default = 1.0): controls extent of blurring + amount (float, default = 1.0): controls how much details are amplified by + + Returns: + output (npt.NDArray): img with unsharp mask applied, + same shape and dtype as img + + References: + .. [1] Wikipedia. Unsharp masking. + """ + def _unsharp_mask1(img): + img_smooth = gaussian_filter(img, sigma) + if np.issubdtype(img.dtype, np.floating): + np.clip(img + (amount * (img - img_smooth)), 0, 1, out=img) + else: + # work with copy and convert back + if ~isinstance(img, np.uint8): + raise NotImplementedError("_unsharp_mask1 expects (float | uint8)") + img_float = (1.0/255) * img.astype(np.float) + img_smooth_float = (1.0/255) * img_smooth.astype(np.float) + np.clip(img_float + (amount * (img_float - img_smooth_float)), 0, 1, out=img_float) + img[...] = (255*img_float).astype(np.uint8) + return + + has_alpha = (img.ndim == 3) and (img.shape[2] in [2, 4]) + if has_alpha: + alpha_channel = img[:,:,-1:] + img = img[:,:,:-1] + has_color = (img.ndim == 3) and (img.shape[2] == 3) + if has_color: + # do each channel individually + for c in range(3): + _unsharp_mask1(img[:,:,c]) + else: + _unsharp_mask1(img) + if has_alpha: + img = np.concatenate((img, alpha_channel), axis=-1) + return img + From 9edb88ef39d4f4692bf7d1ef21a3e4b9758c6d69 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Wed, 28 Jan 2026 21:53:21 -0600 Subject: [PATCH 17/24] spad sim refactoring, removed skimage dependency --- visionsim/cli/emulate.py | 88 ++++++++++++++++++++++++++------------ visionsim/emulate/spc.py | 16 ++++--- visionsim/utils/imgproc.py | 2 +- 3 files changed, 70 insertions(+), 36 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index 41bed06..cba0267 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -4,59 +4,81 @@ import os from pathlib import Path +import logging +logger = logging.getLogger(__name__) + import numpy as np from typing_extensions import Literal from visionsim.emulate.rgb import emulate_rgb_from_sequence -def _spad_collate(batch, *, mode, rng, factor, bitdepth=1, gray=False, is_tonemapped=True): +def _spad_collate(batch, *, mode, rng, is_tonemapped=True, has_alpha=False, + flux_gain=1, bitdepth=1, force_gray=False): """Use default collate function on batch and then simulate SPAD, enabling compute to be done in threads""" from visionsim.dataset import default_collate from visionsim.emulate.spc import emulate_spc from visionsim.utils.color import srgb_to_linearrgb idxs, imgs, poses = default_collate(batch) + + if has_alpha: + imgs_alpha = imgs[...,-1:] + if np.issubdtype(imgs.dtype, np.floating): + # returned imgs are np.uint8 + imgs_alpha = (255 * imgs_alpha).astype(np.uint8) + imgs = imgs[...,:-1] if is_tonemapped: # Image has been tonemapped so undo mapping imgs = srgb_to_linearrgb((imgs / 255.0).astype(float)) else: imgs = imgs.astype(float) / 255.0 - - if gray and (imgs.shape[-1] == 3): - imgs = imgs.mean(axis=-1) - - binary_img = emulate_spc(imgs, factor=factor, bitdepth=bitdepth, rng=rng) * 255 - binary_img = binary_img.astype(np.uint8) - - if mode.lower() == "npy": - binary_img = binary_img >= 128 - binary_img = np.packbits(binary_img, axis=2) - return idxs, binary_img, poses + + # The warning about missing mosaicing should really be here... + # but it will swamp the logs if we do that, so it got moved to the calling + # code + if imgs.shape[-1] == 3: + if force_gray: + w = (0.2125, 0.7154, 0.0721) + imgs = (imgs[...,0] * w[0]) + (imgs[...,1] * w[1]) + (imgs[...,2] * w[2]) + # else: + # logger.warning("emulate.spad: mosaicing/demosaicing not implemented") + + imgs = emulate_spc(imgs, flux_gain=flux_gain, bitdepth=bitdepth, rng=rng) + imgs = (255*imgs).astype(np.uint8) + if has_alpha: + if imgs.ndim < 4: + imgs = imgs[...,np.newaxis] + imgs = np.concatenate((imgs, imgs_alpha), axis=3) + + if (mode.lower() == "npy") and (bitdepth == 1) and not has_alpha: + imgs = imgs >= 128 + imgs = np.packbits(imgs, axis=2) + return idxs, imgs, poses def spad( input_dir: str | os.PathLike, output_dir: str | os.PathLike, - pattern: str = "frame_{:06}.png", - factor: float = 1.0, - gray: bool = False, + flux_gain: float = 1.0, bitdepth: int = 1, + force_gray: bool = False, seed: int = 2147483647, + pattern: str = "frame_{:06}.png", mode: Literal["npy", "img"] = "npy", batch_size: int = 4, force: bool = False, ): - """Perform bernoulli sampling on linearized RGB frames to yield binary frames + """Perform binomial sampling on linearized RGB frames to yield binary frames Args: input_dir: directory in which to look for frames output_dir: directory in which to save binary frames pattern: filenames of frames should match this - factor: multiplicative factor controlling dynamic range of output - gray: to set grayscale instead of 3-channel sensing - bitdepth: representing number of underlying binary measurements aggregated (bitdepth = k => 2^k - 1 binary measurements averaged) + flux_gain: multiplicative factor controlling dynamic range of output + bitdepth: representing number of underlying binary measurements aggregated (2^bitdepth - 1) + force_gray: to disable RGB sensing even if the input images are color seed: random seed to use while sampling, ensures reproducibility mode: how to save binary frames batch_size: number of frames to write at once @@ -75,18 +97,27 @@ def spad( dataset = Dataset.from_path(input_path) transforms_new = copy.deepcopy(dataset.transforms or {}) shape = np.array(dataset.full_shape) - if gray: - shape[-1] = transforms_new["c"] = 1 - else: - shape[-1] = transforms_new["c"] = 3 + + has_alpha = (len(shape) == 4) and (shape[-1] in [2, 4]) # LA/RGBA + + if force_gray: + shape[-1] = 1 + (1 if has_alpha else 0) + elif shape[-1] in [3, 4]: # RGB/RGBA + # this warning could go to _spad_collate instead, + # but it swamps the logs if we do that + logger.warning("emulate.spad: mosaicing/demosaicing not implemented") + transforms_new["c"] = shape[-1] if mode.lower() == "img": ... elif mode.lower() == "npy": - # Default to bitpacking width - transforms_new["bitpack"] = True - transforms_new["bitpack_dim"] = 2 - shape[2] /= 8 + if bitdepth == 1: + # Default to bitpacking width + transforms_new["bitpack"] = True + transforms_new["bitpack_dim"] = 2 + shape[2] /= 8 + else: + transforms_new["bitpack"] = False else: raise ValueError(f"Mode should be one of 'img' or 'npy', got {mode}.") @@ -98,7 +129,8 @@ def spad( batch_size=batch_size, num_workers=os.cpu_count() or 1, collate_fn=functools.partial( - _spad_collate, mode=mode, rng=rng, factor=factor, bitdepth=bitdepth, gray=gray, is_tonemapped=is_tonemapped + _spad_collate, mode=mode, rng=rng, flux_gain=flux_gain, bitdepth=bitdepth, + force_gray=force_gray, is_tonemapped=is_tonemapped, has_alpha=has_alpha, ), ) diff --git a/visionsim/emulate/spc.py b/visionsim/emulate/spc.py index 1dbd1a3..5fae285 100644 --- a/visionsim/emulate/spc.py +++ b/visionsim/emulate/spc.py @@ -7,23 +7,25 @@ def emulate_spc( - img: npt.NDArray[np.floating], factor: float = 1.0, bitdepth: int = 1, rng: np.random.Generator | None = None + img: npt.NDArray[np.floating], + flux_gain: float = 1.0, + bitdepth: int = 1, + rng: np.random.Generator | None = None, ) -> npt.NDArray[np.integer]: """Perform bernoulli sampling on linearized RGB frames to yield binary frames. Args: img (npt.ArrayLike): Linear intensity image to sample binary frame from. - factor (float, optional): Arbitrary corrective brightness factor. Defaults to 1.0. - bitdepth(int, optional): number of underlying binary measurements to average (default 1) + flux_gain(float, default=1): scale factor to convert img in [0, 1] range to other flux levels + bitdepth (int, default=1): representing a SPAD sensor that averages `2^bitdepth - 1` binary samples internally for each measurement, operating at framerate `(1 / (2^bitdepth - 1))` of a binary SPAD sensor rng (np.random.Generator, optional): Optional random number generator. Defaults to none. Returns: - Binary single photon frame + binomial-distributed single-photon frame """ - # Perform bernoulli sampling (equivalent to binomial w/ n=1) rng = np.random.default_rng() if rng is None else rng - N = (2**bitdepth) - 1 - return (1.0 / N) * rng.binomial(cast(npt.NDArray[np.integer], N), 1.0 - np.exp(-img * factor)) + N = int(2**bitdepth) - 1 + return (1.0 / N) * rng.binomial(cast(npt.NDArray[np.integer], N), 1.0 - np.exp(-img * flux_gain)) def spc_avg_to_rgb( diff --git a/visionsim/utils/imgproc.py b/visionsim/utils/imgproc.py index 06856c1..95eae1d 100644 --- a/visionsim/utils/imgproc.py +++ b/visionsim/utils/imgproc.py @@ -32,7 +32,7 @@ def _unsharp_mask1(img): np.clip(img + (amount * (img - img_smooth)), 0, 1, out=img) else: # work with copy and convert back - if ~isinstance(img, np.uint8): + if not isinstance(img, np.uint8): raise NotImplementedError("_unsharp_mask1 expects (float | uint8)") img_float = (1.0/255) * img.astype(np.float) img_smooth_float = (1.0/255) * img_smooth.astype(np.float) From f25fee0bb1b390afb8d00ff4b2633f342dc22abe Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Wed, 28 Jan 2026 21:58:18 -0600 Subject: [PATCH 18/24] forgot linting --- visionsim/cli/emulate.py | 39 +++++++++++++++++++++----------------- visionsim/emulate/rgb.py | 2 +- visionsim/emulate/spc.py | 2 +- visionsim/utils/imgproc.py | 15 ++++++++------- 4 files changed, 32 insertions(+), 26 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index cba0267..b18d9a9 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -1,55 +1,54 @@ from __future__ import annotations import functools +import logging import os from pathlib import Path -import logging -logger = logging.getLogger(__name__) - import numpy as np from typing_extensions import Literal from visionsim.emulate.rgb import emulate_rgb_from_sequence +logger = logging.getLogger(__name__) -def _spad_collate(batch, *, mode, rng, is_tonemapped=True, has_alpha=False, - flux_gain=1, bitdepth=1, force_gray=False): + +def _spad_collate(batch, *, mode, rng, is_tonemapped=True, has_alpha=False, flux_gain=1, bitdepth=1, force_gray=False): """Use default collate function on batch and then simulate SPAD, enabling compute to be done in threads""" from visionsim.dataset import default_collate from visionsim.emulate.spc import emulate_spc from visionsim.utils.color import srgb_to_linearrgb idxs, imgs, poses = default_collate(batch) - + if has_alpha: - imgs_alpha = imgs[...,-1:] + imgs_alpha = imgs[..., -1:] if np.issubdtype(imgs.dtype, np.floating): # returned imgs are np.uint8 imgs_alpha = (255 * imgs_alpha).astype(np.uint8) - imgs = imgs[...,:-1] + imgs = imgs[..., :-1] if is_tonemapped: # Image has been tonemapped so undo mapping imgs = srgb_to_linearrgb((imgs / 255.0).astype(float)) else: imgs = imgs.astype(float) / 255.0 - + # The warning about missing mosaicing should really be here... # but it will swamp the logs if we do that, so it got moved to the calling # code if imgs.shape[-1] == 3: if force_gray: w = (0.2125, 0.7154, 0.0721) - imgs = (imgs[...,0] * w[0]) + (imgs[...,1] * w[1]) + (imgs[...,2] * w[2]) + imgs = (imgs[..., 0] * w[0]) + (imgs[..., 1] * w[1]) + (imgs[..., 2] * w[2]) # else: # logger.warning("emulate.spad: mosaicing/demosaicing not implemented") imgs = emulate_spc(imgs, flux_gain=flux_gain, bitdepth=bitdepth, rng=rng) - imgs = (255*imgs).astype(np.uint8) + imgs = (255 * imgs).astype(np.uint8) if has_alpha: if imgs.ndim < 4: - imgs = imgs[...,np.newaxis] + imgs = imgs[..., np.newaxis] imgs = np.concatenate((imgs, imgs_alpha), axis=3) if (mode.lower() == "npy") and (bitdepth == 1) and not has_alpha: @@ -97,12 +96,12 @@ def spad( dataset = Dataset.from_path(input_path) transforms_new = copy.deepcopy(dataset.transforms or {}) shape = np.array(dataset.full_shape) - - has_alpha = (len(shape) == 4) and (shape[-1] in [2, 4]) # LA/RGBA + + has_alpha = (len(shape) == 4) and (shape[-1] in [2, 4]) # LA/RGBA if force_gray: shape[-1] = 1 + (1 if has_alpha else 0) - elif shape[-1] in [3, 4]: # RGB/RGBA + elif shape[-1] in [3, 4]: # RGB/RGBA # this warning could go to _spad_collate instead, # but it swamps the logs if we do that logger.warning("emulate.spad: mosaicing/demosaicing not implemented") @@ -129,8 +128,14 @@ def spad( batch_size=batch_size, num_workers=os.cpu_count() or 1, collate_fn=functools.partial( - _spad_collate, mode=mode, rng=rng, flux_gain=flux_gain, bitdepth=bitdepth, - force_gray=force_gray, is_tonemapped=is_tonemapped, has_alpha=has_alpha, + _spad_collate, + mode=mode, + rng=rng, + flux_gain=flux_gain, + bitdepth=bitdepth, + force_gray=force_gray, + is_tonemapped=is_tonemapped, + has_alpha=has_alpha, ), ) diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index 2f3988c..2397862 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -91,7 +91,7 @@ def emulate_rgb_from_sequence( if denoise_sigma != 0.0: patch = gaussian_filter(patch, denoise_sigma) if sharpen_weight != 0.0: - patch = unsharp_mask(patch, sigma=max(1,denoise_sigma), amount=sharpen_weight) + patch = unsharp_mask(patch, sigma=max(1, denoise_sigma), amount=sharpen_weight) # Convert to sRGB color space for viewing and quantize to 8-bits patch = linearrgb_to_srgb(patch.astype(np.double)) diff --git a/visionsim/emulate/spc.py b/visionsim/emulate/spc.py index 5fae285..52c3d50 100644 --- a/visionsim/emulate/spc.py +++ b/visionsim/emulate/spc.py @@ -9,7 +9,7 @@ def emulate_spc( img: npt.NDArray[np.floating], flux_gain: float = 1.0, - bitdepth: int = 1, + bitdepth: int = 1, rng: np.random.Generator | None = None, ) -> npt.NDArray[np.integer]: """Perform bernoulli sampling on linearized RGB frames to yield binary frames. diff --git a/visionsim/utils/imgproc.py b/visionsim/utils/imgproc.py index 95eae1d..ff4aa32 100644 --- a/visionsim/utils/imgproc.py +++ b/visionsim/utils/imgproc.py @@ -4,6 +4,7 @@ import numpy.typing as npt from scipy.ndimage import gaussian_filter + def unsharp_mask( img: npt.ArrayLike, sigma: float = 1.0, @@ -26,6 +27,7 @@ def unsharp_mask( References: .. [1] Wikipedia. Unsharp masking. """ + def _unsharp_mask1(img): img_smooth = gaussian_filter(img, sigma) if np.issubdtype(img.dtype, np.floating): @@ -34,24 +36,23 @@ def _unsharp_mask1(img): # work with copy and convert back if not isinstance(img, np.uint8): raise NotImplementedError("_unsharp_mask1 expects (float | uint8)") - img_float = (1.0/255) * img.astype(np.float) - img_smooth_float = (1.0/255) * img_smooth.astype(np.float) + img_float = (1.0 / 255) * img.astype(np.float) + img_smooth_float = (1.0 / 255) * img_smooth.astype(np.float) np.clip(img_float + (amount * (img_float - img_smooth_float)), 0, 1, out=img_float) - img[...] = (255*img_float).astype(np.uint8) + img[...] = (255 * img_float).astype(np.uint8) return has_alpha = (img.ndim == 3) and (img.shape[2] in [2, 4]) if has_alpha: - alpha_channel = img[:,:,-1:] - img = img[:,:,:-1] + alpha_channel = img[:, :, -1:] + img = img[:, :, :-1] has_color = (img.ndim == 3) and (img.shape[2] == 3) if has_color: # do each channel individually for c in range(3): - _unsharp_mask1(img[:,:,c]) + _unsharp_mask1(img[:, :, c]) else: _unsharp_mask1(img) if has_alpha: img = np.concatenate((img, alpha_channel), axis=-1) return img - From ce8cc9169ed0adfb986d2ddd231b0c7d132737bd Mon Sep 17 00:00:00 2001 From: Sacha Jungerman Date: Tue, 24 Feb 2026 12:13:17 -0600 Subject: [PATCH 19/24] fix typing issues --- visionsim/emulate/rgb.py | 4 +++- visionsim/utils/color.py | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index 3418467..47b4450 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import cast + import numpy as np import numpy.typing as npt from scipy.ndimage import gaussian_filter @@ -70,7 +72,7 @@ def emulate_rgb_from_sequence( # Roughly translating the model in Eqs. (1,2) and Fig. 1 of Hasinoff et al. # Perform poisson sampling rng = np.random.default_rng() if rng is None else rng - patch = rng.poisson(patch).astype(float) + patch = cast(np.ndarray, rng.poisson(patch)).astype(float) # full-well capacity patch = np.clip(patch, 0, fwc) # readout noise diff --git a/visionsim/utils/color.py b/visionsim/utils/color.py index 2fb2326..ffed37b 100644 --- a/visionsim/utils/color.py +++ b/visionsim/utils/color.py @@ -169,10 +169,14 @@ def raw_to_rgb_bayer( # via a sequence of 1D convolutions, probably not worth it in hindsight... h_avg_nhood_x = np.array([[0.5, 0.0, 0.5]]) - av_R = correlate(correlate(R, h_avg_nhood_x, mode="nearest"), h_avg_nhood_x.T, mode="nearest") + av_R: npt.NDArray = correlate( + correlate(R, h_avg_nhood_x, mode="nearest"), h_avg_nhood_x.T, mode="nearest" + ) diff_R = R - av_R - av_B = correlate(correlate(B, h_avg_nhood_x, mode="nearest"), h_avg_nhood_x.T, mode="nearest") + av_B: npt.NDArray = correlate( + correlate(B, h_avg_nhood_x, mode="nearest"), h_avg_nhood_x.T, mode="nearest" + ) diff_B = B - av_B rgb[::2, ::2, 1] += (1 / 2) * diff_R @@ -180,12 +184,14 @@ def raw_to_rgb_bayer( rgb[1::2, 1::2, 1] += (1 / 2) * diff_B rgb[1::2, 1::2, 0] += (3 / 4) * diff_R - av_G2_at_G1 = correlate(correlate(G2, h_avg_fw_x, mode="nearest"), h_avg_bw_x.T, mode="nearest") + av_G2_at_G1: npt.NDArray = correlate( + correlate(G2, h_avg_fw_x, mode="nearest"), h_avg_bw_x.T, mode="nearest" + ) av_G1_at_G1_R = correlate(G1, h_avg_nhood_x, mode="nearest") - correlate( G1, 0.5 * h_avg_nhood_x.T, mode="nearest" ) - av_G1_R = (1 / 5) * (2 * av_G1_at_G1_R + 4 * av_G2_at_G1) + av_G1_R: npt.NDArray = (1 / 5) * (2 * av_G1_at_G1_R + 4 * av_G2_at_G1) diff_G1_R = G1 - av_G1_R rgb[::2, 1::2, 0] += (5 / 8) * diff_G1_R @@ -196,7 +202,9 @@ def raw_to_rgb_bayer( diff_G1_B = G1 - av_G1_B rgb[::2, 1::2, 2] += (5 / 8) * diff_G1_B - av_G1_at_G2 = correlate(correlate(G1, h_avg_bw_x, mode="nearest"), h_avg_fw_x.T, mode="nearest") + av_G1_at_G2: npt.NDArray = correlate( + correlate(G1, h_avg_bw_x, mode="nearest"), h_avg_fw_x.T, mode="nearest" + ) av_G2_at_G2_R = correlate(G2, h_avg_nhood_x.T, mode="nearest") - correlate( G2, 0.5 * h_avg_nhood_x, mode="nearest" From 5e15ca7df4f95b744679df1c46f4466f3120cf44 Mon Sep 17 00:00:00 2001 From: Sacha Jungerman Date: Wed, 4 Mar 2026 10:14:09 -0600 Subject: [PATCH 20/24] add bitplanes arg for spc emulation and in schemas, use it in ffmpeg.animate too, add migration --- visionsim/cli/__init__.py | 10 +++++++- visionsim/cli/emulate.py | 30 +++++++++++++++--------- visionsim/cli/ffmpeg.py | 44 ++++++++++++++++++++++++++++-------- visionsim/cli/interpolate.py | 7 +++++- visionsim/dataset/models.py | 6 +++-- visionsim/emulate/rgb.py | 4 ++-- visionsim/emulate/spc.py | 8 +++---- visionsim/simulate/schema.py | 29 ++++++++++++------------ 8 files changed, 93 insertions(+), 45 deletions(-) diff --git a/visionsim/cli/__init__.py b/visionsim/cli/__init__.py index 409c510..31cbda6 100644 --- a/visionsim/cli/__init__.py +++ b/visionsim/cli/__init__.py @@ -8,8 +8,9 @@ import shlex import subprocess import sys +from functools import lru_cache from pathlib import Path -from typing import overload +from typing import Any, Literal, overload import tyro from natsort import natsorted @@ -81,6 +82,13 @@ def _validate_directories( return input_path, output_path +@lru_cache +def _log_once(value: Any, msg: str, level: Literal["debug", "info", "warning", "error", "critical"] = "warning") -> Any: + """Log a message once per unique value, returns the value.""" + getattr(_log, level)(msg) + return value + + def _run( command: list[str] | str, shell: bool = False, diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index 89033a4..1ac51cd 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -23,7 +23,8 @@ def spad( input_dir: Path, output_dir: Path, flux_gain: float = 1.0, - bitdepth: int = 1, + bitplanes: int = 1, + bitdepth: int | None = None, force_gray: bool = False, seed: int = 2147483647, pattern: str | None = None, @@ -32,8 +33,8 @@ def spad( ) -> None: """Perform binomial sampling on linearized RGB frames to yield (summed) single photon frames - This will save numpy files which may be bitpacked (when bitdepth == 1) and may have different dtypes - depending on the bitdepth. The shape of the output arrays will be (max_size, h, w, c) or (remainder, h, w, c) + This will save numpy files which may be bitpacked (when bitplanes == 1) and may have different dtypes + depending on the number of summed bitplanes. The shape of the output arrays will be (max_size, h, w, c) or (remainder, h, w, c) where remainder = len(dataset) % max_size, where the width dimension is ceil(width / 8) when bitpacked. If the input contains alpha channel (determined by the last dimension of the input images), it will be stripped. @@ -44,7 +45,8 @@ def spad( pattern: used to find source image files to convert to single photon frames, not needed when ``input_dir`` points to a valid dataset. flux_gain: multiplicative factor controlling dynamic range of output - bitdepth: representing number of underlying binary measurements aggregated (2**bitdepth - 1) + bitplanes: number of summed binary measurements + bitdepth: if set, ``bitplanes`` will be overridden to ``2**bitdepth - 1`` force_gray: to disable RGB sensing even if the input images are color seed: random seed to use while sampling, ensures reproducibility max_size: maximum number of frames per output array before rolling over to new file @@ -52,6 +54,7 @@ def spad( """ from numpy.lib.format import open_memmap + from visionsim.cli import _log, _log_once from visionsim.dataset import Dataset, Metadata from visionsim.emulate.spc import emulate_spc from visionsim.utils.color import rgb_to_grayscale, srgb_to_linearrgb @@ -69,12 +72,15 @@ def spad( else: dataset = Dataset.from_path(input_dir) - if bitdepth > 64: - raise ValueError("Bitdepth must be <= 64") + if bitdepth is not None: + _log.info(f"Overriding bitplanes to {2**bitdepth - 1} since bitdepth is set to {bitdepth}.") + bitplanes = 2**bitdepth - 1 - # Map bitdepth to the smallest uint type that can hold it (minimum 8 bits) + # Map bitplanes to the smallest uint type that can hold it (minimum 8 bits) out_dtype = next( - dtype for limit, dtype in [(8, np.uint8), (16, np.uint16), (32, np.uint32), (64, np.uint64)] if bitdepth <= limit + dtype + for limit, dtype in [(8, np.uint8), (16, np.uint16), (32, np.uint32), (64, np.uint64)] + if bitplanes <= 2**limit - 1 ) rng = np.random.default_rng(int(seed)) @@ -93,21 +99,23 @@ def spad( else: data = data.astype(float) / 255.0 - if _strip_alpha(data.shape): + if len(data.shape) == 3 and data.shape[-1] in (2, 4): # LA/RGBA + _log_once(data.shape, "Alpha channel detected, ignoring it.", "info") data = data[..., :-1] if force_gray: data = rgb_to_grayscale(data) - imgs = emulate_spc(data, flux_gain=flux_gain, bitdepth=bitdepth, rng=rng) + imgs = emulate_spc(data, flux_gain=flux_gain, bitplanes=bitplanes, rng=rng) offset = i % max_size file_path = output_dir / f"{i // max_size:04}.npy" transform["file_path"] = file_path.name + transform["bitplanes"] = bitplanes transform["offset"] = offset h, w, c = data.shape - if bitdepth == 1: + if bitplanes == 1: # Default to bitpacking width imgs = imgs >= 0.5 imgs = np.packbits(imgs, axis=1) diff --git a/visionsim/cli/ffmpeg.py b/visionsim/cli/ffmpeg.py index f9bf5bf..0b12441 100644 --- a/visionsim/cli/ffmpeg.py +++ b/visionsim/cli/ffmpeg.py @@ -1,14 +1,14 @@ from __future__ import annotations from pathlib import Path -from typing import cast +from typing import Any, cast def animate( input_dir: Path, pattern: str | None = None, outfile: Path = Path("out.mp4"), - fps: int = 25, + fps: float | None = None, crf: int = 22, vcodec: str = "libx264", step: int = 1, @@ -20,13 +20,15 @@ def animate( """Combine generated frames into an MP4 using ffmpeg wizardry. This is roughly equivalent to running the "image2" demuxer in ffmpeg, with the added benefit of being able to - skip frames using a step size, strip alpha channels from PNGs, and automatically handling the case where the input frames are numpy arrays. + skip frames using a step size, strip alpha channels from PNGs, and automatically handling the case where the + input frames are numpy arrays. If using a dataset as input, the ``bitplanes`` attribute will be used to normalize + the data to uint8 for visualization. Args: input_dir: directory in which to look for frames, pattern: If provided search for files matching this pattern. Otherwise, look for a valid dataset in the input directory. outfile: where to save generated mp4 - fps: frames per second in video + fps: frames per second in video, if None, will be inferred from dataset crf: constant rate factor for video encoding (0-51), lower is better quality but more memory vcodec: video codec to use (either libx264 or libx265) step: drop some frames when making video, use frames 0+step*n @@ -41,7 +43,7 @@ def animate( import numpy as np from rich.progress import track - from visionsim.cli import _run + from visionsim.cli import _log, _log_once, _run from visionsim.dataset import Dataset if _run("ffmpeg -version", hide=True).returncode != 0: @@ -57,6 +59,28 @@ def animate( else: dataset = Dataset.from_path(input_dir) + # Infer fps from dataset if not provided + if fps is None: + if dataset.cameras: + framerates = set(c.fps for c in dataset.cameras) + if len(framerates) != 1: + raise ValueError(f"All cameras must have the same fps. Found: {framerates}") + fps = next(iter(framerates)) + + if fps: + if fps > 240.0: + _log.warning( + f"Inferred frame rate of {fps} fps may cause issues with some codecs. " + "Consider manually setting a lower fps and using a step size to reduce the number of frames." + ) + else: + _log.warning("No fps found in dataset, using default frame rate of 25 fps.") + fps = 25.0 + else: + _log.info("No cameras found in dataset, using default frame rate of 25 fps.") + fps = 25.0 + + # Ensure all files are of the same type exts = {p.suffix for p in dataset.paths} if len(exts) != 1: @@ -86,15 +110,17 @@ def animate( # Iterate over dataset with step and extract frame from npy if neccesary if ext.lower() == ".npy": for i, idx in enumerate(track(range(0, len(dataset), step), description="Extracting frames")): - data, transform = dataset[idx] + data, transform = cast(tuple[np.ndarray, dict[str, Any]], dataset[idx]) - if cast(dict, transform).get("bitpack_dim"): - data = np.array(data * 255).astype(np.uint8) + if transform.get("bitplanes"): + _log_once(transform.get("bitplanes"), "Bitplanes detected, normalizing to uint8.", "info") + data = np.array(data / transform["bitplanes"] * 255).astype(np.uint8) + # Handle grayscale images by repeating the channel if data.ndim == 3 and data.shape[-1] != 3: # type: ignore data = np.repeat(data, 3, axis=-1) - # TODO: Normalize by sum (if bitdepth != 1) and maybe tonemap/invert_response + # TODO: Maybe tonemap/invert_response iio.imwrite(tmpdirname / f"{i:09}.png", data) else: for i, p in enumerate(dataset.paths[::step]): diff --git a/visionsim/cli/interpolate.py b/visionsim/cli/interpolate.py index eb6316c..815cc82 100644 --- a/visionsim/cli/interpolate.py +++ b/visionsim/cli/interpolate.py @@ -102,9 +102,14 @@ def dataset( _log.info("Interpolating poses...") interp_poses = interpolate_poses(dataset.poses, n=n) interp_paths = natsorted(output_dir.glob("**/*.png")) + camera = next(iter(dataset.cameras)) + + if camera.fps: + camera.fps *= n + Metadata.from_frames( frames=[ dict(file_path=p.relative_to(output_dir), transform_matrix=m) for p, m in zip(interp_paths, interp_poses) ], - camera=next(iter(dataset.cameras)), + camera=camera, ).save(output_dir / "transforms.json") diff --git a/visionsim/dataset/models.py b/visionsim/dataset/models.py index ce20f8e..a288831 100644 --- a/visionsim/dataset/models.py +++ b/visionsim/dataset/models.py @@ -68,6 +68,8 @@ class Data(BaseModel): """path to data, usually an image or ndarray file""" bitpack_dim: int | None = None """dimension that has been bitpacked""" + bitplanes: int | None = None + """number of summed bitplanes in image""" class Frame(Camera, Data): @@ -83,8 +85,8 @@ class Frame(Camera, Data): class Metadata(Camera): """A superset of the `Nerfstudio `_ - ``transforms.json`` format with a few additional fields such as additional data paths (eg: flow/segmentation) - and a channels dimension.""" + ``transforms.json`` format which enables use of numpy arrays for single photon data, and allows for additional + data paths (eg: flow/segmentation) and metadata attributes such as a channels dimension.""" _REQUIRED_FIELDS: ClassVar[tuple[str, ...]] = ("fl_x", "fl_y", "cx", "cy", "h", "w") _data_types: set[str] diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index 47b4450..85ee95f 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -25,9 +25,9 @@ def emulate_rgb_from_sequence( sharpen_weight: float = 0.0, rng: np.random.Generator | None = None, ) -> npt.NDArray: - """Emulates a conventional RGB camera from a sequence of intensity frames. + """Emulates a conventional RGB camera [1]_ from a sequence of intensity frames. - For camera model see [1]. For demosaicing details see :func:`raw_to_rgb_bayer `. + For demosaicing details see :func:`raw_to_rgb_bayer `. Note: Motion-blur is approximated by averaging consecutive ground truth frames, diff --git a/visionsim/emulate/spc.py b/visionsim/emulate/spc.py index ba1f9e9..429620a 100644 --- a/visionsim/emulate/spc.py +++ b/visionsim/emulate/spc.py @@ -10,7 +10,7 @@ def emulate_spc( img: npt.NDArray[np.floating], flux_gain: float = 1.0, - bitdepth: int = 1, + bitplanes: int = 1, rng: np.random.Generator | None = None, ) -> npt.NDArray[np.integer]: """Perform bernoulli sampling on linearized RGB frames to yield binary frames. @@ -18,15 +18,15 @@ def emulate_spc( Args: img (npt.NDArray[np.floating]): Linear intensity image to sample binary frame from. flux_gain(float, optional): scale factor to convert img in [0, 1] range to other flux levels. Defaults to 1.0. - bitdepth (int, optional): representing a SPAD sensor that sums ``2**bitdepth - 1`` binary samples internally - for each measurement, operating at framerate ``1 / (2**bitdepth - 1)`` of a binary SPAD sensor. Defaults to 1. + bitplanes (int, optional): when bitplanes > 1, this represents a binomial SPAD sensor that sums binary samples internally + for each measurement, operating at framerate ``1 / bitplanes`` of a binary SPAD sensor. Defaults to 1. rng (np.random.Generator, optional): Optional random number generator. Defaults to none. Returns: npt.NDArray[np.integer]: binomial-distributed single-photon frame """ rng = np.random.default_rng() if rng is None else rng - return rng.binomial(cast(npt.NDArray[np.integer], 2**bitdepth - 1), 1.0 - np.exp(-img * flux_gain)) + return rng.binomial(cast(npt.NDArray[np.integer], bitplanes), 1.0 - np.exp(-img * flux_gain)) def spc_avg_to_rgb( diff --git a/visionsim/simulate/schema.py b/visionsim/simulate/schema.py index 451ab98..5c0dbbb 100644 --- a/visionsim/simulate/schema.py +++ b/visionsim/simulate/schema.py @@ -14,6 +14,7 @@ SqliteDatabase, TextField, ) +from playhouse.migrate import SqliteMigrator, migrate from playhouse.shortcuts import ThreadSafeDatabaseMetadata from playhouse.sqlite_ext import JSONField from typing_extensions import Self @@ -73,6 +74,7 @@ class _Data(_BaseModel): path = TextField() bitpack_dim = IntegerField(null=True) + bitplanes = IntegerField(null=True) class _Frame(_BaseModel): @@ -91,7 +93,7 @@ class _Metadata: """The ``.db`` equivalent of :class:`models.Metadata `""" def __init__(self, path: str | os.PathLike) -> None: - """Initialize a metadata instance. + """Initialize a metadata instance. Unlike :meth:`load`, this method does not automatically apply any migrations. Args: path (str | os.PathLike): Metadata dataset path @@ -100,26 +102,23 @@ def __init__(self, path: str | os.PathLike) -> None: @classmethod def load(cls, path: str | os.PathLike) -> Self: - """Same as :meth:`__init__`, added to better mirror :class:`models.Metadata `""" + """Initialize a metadata instance and apply any necessary migrations.""" instance = cls(path) instance._migrate() return instance def _migrate(self) -> None: """Apply any necessary database migrations.""" - # TODO: Implement database migrations if needed. Below is an example of how to do it for a new "date" field in _Camera model. - - # from playhouse.migrate import SqliteMigrator, migrate - - # db = SqliteDatabase(self.path, pragmas=_DEFAULT_PRAGMAS) - # with db.connection_context(): - # columns = db.get_columns(_Camera._meta.table_name) - # column_names = [c.name for c in columns] - - # if "date" not in column_names: - # migrator = SqliteMigrator(db) - # with db.atomic(): - # migrate(migrator.add_column(_Camera._meta.table_name, "date", _Camera.date)) + db = SqliteDatabase(self.path, pragmas=_DEFAULT_PRAGMAS) + with db.connection_context(): + table_name = _Data._meta.table_name # type: ignore + columns = db.get_columns(table_name) + column_names = [c.name for c in columns] + + if "bitplanes" not in column_names: + migrator = SqliteMigrator(db) + with db.atomic(): + migrate(migrator.add_column(table_name, "bitplanes", _Data.bitplanes)) @classmethod def from_dense_transforms(cls, path: str | os.PathLike, transforms: Iterator[dict[str, Any]]) -> Self: From bfd5aefd1bf035082b8acff86eb80b0dbe3ecffa Mon Sep 17 00:00:00 2001 From: Sacha Jungerman Date: Wed, 4 Mar 2026 10:43:49 -0600 Subject: [PATCH 21/24] fix interpolate dataset --- examples/quickstart.sh | 2 +- tasks.py | 2 +- visionsim/cli/interpolate.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/quickstart.sh b/examples/quickstart.sh index 601fb89..6f80fbc 100644 --- a/examples/quickstart.sh +++ b/examples/quickstart.sh @@ -2,7 +2,7 @@ visionsim blender.render-animation lego.blend quickstart/lego-gt/ --render-config.keyframe-multiplier=5.0 visionsim ffmpeg.animate --input-dir=quickstart/lego-gt/ --outfile=quickstart/preview.mp4 --step=5 --fps=25 --force -visionsim interpolate.frames --input-dir=quickstart/lego-gt/ --output-dir=quickstart/lego-interp/ --n=32 +visionsim interpolate.dataset --input-dir=quickstart/lego-gt/ --output-dir=quickstart/lego-interp/ --n=32 visionsim emulate.rgb --input-dir=quickstart/lego-interp/ --output-dir=quickstart/lego-rgb25fps/ --chunk-size=160 --readout-std=0 visionsim emulate.spad --input-dir=quickstart/lego-interp/ --output-dir=quickstart/lego-spc4kHz/ visionsim emulate.events --input-dir=quickstart/lego-gt/ --output-dir=quickstart/lego-dvs125fps/ --fps=125 diff --git a/tasks.py b/tasks.py index 20f0f54..0a7394f 100644 --- a/tasks.py +++ b/tasks.py @@ -146,7 +146,7 @@ def build_docs(c, preview=False, full=False): for i, n in enumerate((25, 50, 100, 200)): for cmd in ( f"visionsim blender.render-animation lego.blend interpolation/lego-{n:04}/ --keyframe-multiplier={n / 100} --width=320 --height=320", - f"visionsim interpolate.frames interpolation/lego-{n:04}/ -o interpolation/lego{n:04}-interp/ -n={int(64 / 2**i)}", + f"visionsim interpolate.dataset interpolation/lego-{n:04}/ -o interpolation/lego{n:04}-interp/ -n={int(64 / 2**i)}", f"gifski $(ls -1a interpolation/lego{n:04}-interp/frames/*.png | sed -n '1~8p') --fps 25 -o {DOCS_STATIC}/lego{n:04}-interp.gif", ): _run(c, cmd, echo=True, warn=True) diff --git a/visionsim/cli/interpolate.py b/visionsim/cli/interpolate.py index 815cc82..79794ef 100644 --- a/visionsim/cli/interpolate.py +++ b/visionsim/cli/interpolate.py @@ -87,7 +87,7 @@ def dataset( if method.lower() == "rife": rife( - input_dir, + dataset.root, output_dir, input_files=dataset.paths, exp=np.log2(n).astype(int), From 00650d21d7ec44a6bb11671eed3198769776603b Mon Sep 17 00:00:00 2001 From: Sacha Jungerman Date: Wed, 4 Mar 2026 10:44:17 -0600 Subject: [PATCH 22/24] remove unused strip_alpha helper --- visionsim/cli/emulate.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index 1ac51cd..780697e 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -1,6 +1,5 @@ from __future__ import annotations -import functools import math import shutil from pathlib import Path @@ -9,16 +8,6 @@ import numpy as np -@functools.lru_cache -def _strip_alpha(shape: tuple[int, ...]) -> bool: - from visionsim.cli import _log - - if len(shape) == 3 and shape[-1] in (2, 4): # LA/RGBA - _log.info("Alpha channel detected, ignoring it.") - return True - return False - - def spad( input_dir: Path, output_dir: Path, From 618cb3c072e1b417dc3c3f0b415ecc6e140d50b5 Mon Sep 17 00:00:00 2001 From: Sacha Jungerman Date: Wed, 4 Mar 2026 13:36:21 -0600 Subject: [PATCH 23/24] rename raw_to_rgb_bayer to raw_bayer_to_rgb --- visionsim/emulate/rgb.py | 6 +++--- visionsim/utils/color.py | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index 85ee95f..3b822fc 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -7,7 +7,7 @@ from scipy.ndimage import gaussian_filter from typing_extensions import Literal -from visionsim.utils.color import raw_to_rgb_bayer, rgb_to_raw_bayer +from visionsim.utils.color import raw_bayer_to_rgb, rgb_to_raw_bayer from visionsim.utils.imgproc import unsharp_mask @@ -27,7 +27,7 @@ def emulate_rgb_from_sequence( ) -> npt.NDArray: """Emulates a conventional RGB camera [1]_ from a sequence of intensity frames. - For demosaicing details see :func:`raw_to_rgb_bayer `. + For demosaicing details see :func:`raw_bayer_to_rgb `. Note: Motion-blur is approximated by averaging consecutive ground truth frames, @@ -87,7 +87,7 @@ def emulate_rgb_from_sequence( # ("off" is not a no-op, it still creates a full 3-channel image from 1, # albeit a bad one) if has_color and mosaic: - patch = raw_to_rgb_bayer(patch, method=demosaic) + patch = raw_bayer_to_rgb(patch, method=demosaic) # de-noising and sharpening if denoise_sigma != 0.0: diff --git a/visionsim/utils/color.py b/visionsim/utils/color.py index ffed37b..6bc2e9a 100644 --- a/visionsim/utils/color.py +++ b/visionsim/utils/color.py @@ -69,9 +69,10 @@ def rgb_to_grayscale(img: npt.NDArray[np.floating]) -> npt.NDArray[np.floating]: def rgb_to_raw_bayer(rgb: npt.NDArray, cfa_pattern: Literal["rggb"] = "rggb") -> npt.NDArray: - """Mosaicing + """Bayer Mosaicing Realizes a mosaiced CFA image as would be sampled by a real Bayer-patterned sensor + by simply subsampling the RGB image. Args: rgb (npt.NDArray): hypothetical true RGB signal @@ -90,7 +91,7 @@ def rgb_to_raw_bayer(rgb: npt.NDArray, cfa_pattern: Literal["rggb"] = "rggb") -> return raw -def raw_to_rgb_bayer( +def raw_bayer_to_rgb( raw: npt.NDArray, cfa_pattern: Literal["rggb"] = "rggb", method: Literal["off", "bilinear", "MHC04"] = "bilinear", From 151f0b09f09d0f101a1ef8ee3808894bad166718 Mon Sep 17 00:00:00 2001 From: Sacha Jungerman Date: Wed, 4 Mar 2026 19:17:49 -0600 Subject: [PATCH 24/24] make emulate rgb from seq independent from seq length --- visionsim/cli/emulate.py | 24 +++++++++++++++--------- visionsim/emulate/rgb.py | 37 +++++++++++++++---------------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/visionsim/cli/emulate.py b/visionsim/cli/emulate.py index 780697e..c6493d9 100644 --- a/visionsim/cli/emulate.py +++ b/visionsim/cli/emulate.py @@ -229,9 +229,9 @@ def rgb( output_dir: Path, chunk_size: int = 10, shutter_frac: float = 1.0, - readout_std: float = 0.0, + readout_std: float = 16.0, fwc: float | None = None, - flux_gain: float = 1.0, + flux_gain: float = 2.0**12, iso_gain: float = 1.0, adc_bitdepth: int = 12, mosaic: bool = False, @@ -264,11 +264,12 @@ def rgb( import imageio.v3 as iio import more_itertools as mitertools + from visionsim.cli import _log_once from visionsim.dataset import Dataset, Metadata from visionsim.emulate.rgb import emulate_rgb_from_sequence from visionsim.interpolate.pose import pose_interp from visionsim.simulate.blender import INDEX_PADDING, ITEMS_PER_SUBFOLDER - from visionsim.utils.color import srgb_to_linearrgb + from visionsim.utils.color import linearrgb_to_srgb, srgb_to_linearrgb from visionsim.utils.progress import ElapsedProgress if input_dir.resolve() == output_dir.resolve(): @@ -301,6 +302,10 @@ def rgb( # Assume images have been tonemapped and undo mapping imgs = srgb_to_linearrgb(imgs) + if len(imgs.shape) == 4 and imgs.shape[-1] in (2, 4): # LA/RGBA + _log_once(imgs.shape, "Alpha channel detected, ignoring it.", "info") + imgs = imgs[..., :-1] + rgb_img = emulate_rgb_from_sequence( imgs, readout_std=readout_std, @@ -319,16 +324,17 @@ def rgb( # We checked that there's only a single camera, just re-use any transforms dict (transform, *_), transforms_iter = mitertools.spy(transforms_iter) poses = np.array([t["transform_matrix"] for t in transforms_iter]) - transform["transform_matrix"] = pose_interp(poses, k=np.clip(len(poses) - 1, 2, 3))(0.5) + + if len(poses) > 1: + transform["transform_matrix"] = pose_interp(poses, k=min(len(poses) - 1, 3))(0.5) + else: + transform["transform_matrix"] = poses[0] + transform["file_path"] = outpath.relative_to(output_dir) transforms.append(transform) - # TODO: Alpha and grayscale? - # if rgb_img.shape[-1] == 1: - # rgb_img = np.repeat(rgb_img, 3, axis=-1) - outpath.parent.mkdir(exist_ok=True, parents=True) - iio.imwrite(outpath, (rgb_img * 255).astype(np.uint8)) + iio.imwrite(outpath, (linearrgb_to_srgb(rgb_img) * 255).astype(np.uint8)) progress.update(task, advance=chunk_size) if not pattern: diff --git a/visionsim/emulate/rgb.py b/visionsim/emulate/rgb.py index 3b822fc..703a0f0 100644 --- a/visionsim/emulate/rgb.py +++ b/visionsim/emulate/rgb.py @@ -17,7 +17,7 @@ def emulate_rgb_from_sequence( readout_std: float = 0.0, fwc: float = 10000.0, adc_bitdepth: int = 12, - flux_gain: float = 1.0, + flux_gain: float = 2**12, iso_gain: float = 1.0, mosaic: bool = False, demosaic: Literal["off", "bilinear", "MHC04"] = "off", @@ -57,50 +57,43 @@ def emulate_rgb_from_sequence( “Noise-optimal capture for high dynamic range photography,” CVPR 2010. """ - # Get sum of linear-intensity frames. + # Get mean of linear-intensity frames. burst_size = int(max(1, np.ceil(len(sequence) * shutter_frac))) sequence = np.array(sequence[:burst_size]) - patch = np.sum(sequence, axis=0) * flux_gain + patch = np.mean(sequence, axis=0) - has_alpha = (patch.ndim > 2) and (patch.shape[2] in [2, 4]) # LA/RGBA - patch_alpha = patch[..., -1:] if has_alpha else None - has_color = (patch.ndim > 2) and (patch.shape[2] in [3, 4]) # RGB/RGBA - patch = patch[..., :-1] if has_alpha else patch - if has_color and mosaic: + # Convert to raw bayer if mosaic, this just samples the color channels + if mosaic: patch = rgb_to_raw_bayer(patch) + # So far patch is GT motion blurred frame, without any noise or gain applied + # we need to scale it by flux gain and shutter fraction to get the correct amount of light + patch *= flux_gain * shutter_frac + # Roughly translating the model in Eqs. (1,2) and Fig. 1 of Hasinoff et al. # Perform poisson sampling rng = np.random.default_rng() if rng is None else rng patch = cast(np.ndarray, rng.poisson(patch)).astype(float) - # full-well capacity + + # Clip to full-well capacity, add readout noise, apply ISO gain patch = np.clip(patch, 0, fwc) - # readout noise patch += rng.normal(0, readout_std, size=patch.shape) - # apply ISO gain patch *= iso_gain - # assume perfect quantization in ADC + # Assume perfect quantization in ADC patch = np.round(np.clip(patch, 0, (2**adc_bitdepth - 1))) patch = patch / (2**adc_bitdepth - 1) - # de-mosaicing: necessary if data is mosaiced, so can't be `None`. + # De-mosaicing: necessary if data is mosaiced, so can't be `None`. # ("off" is not a no-op, it still creates a full 3-channel image from 1, # albeit a bad one) - if has_color and mosaic: + if mosaic: patch = raw_bayer_to_rgb(patch, method=demosaic) - # de-noising and sharpening + # De-noising and sharpening if denoise_sigma != 0.0: patch = gaussian_filter(patch, denoise_sigma) if sharpen_weight != 0.0: patch = unsharp_mask(patch, sigma=max(1, denoise_sigma), amount=sharpen_weight) - - if not has_color: # fake it anyway, because this is emulate____rgb____ - if len(patch.shape) < 3: - patch = np.atleast_3d(patch) - patch = np.repeat(patch, 3, axis=-1) - if has_alpha: - patch = np.concatenate((patch, patch_alpha), axis=-1) return patch