From bd5e078d41dbb234f34309752a8f51184b7c55e1 Mon Sep 17 00:00:00 2001 From: kleinicke Date: Sat, 18 Oct 2025 14:34:05 +0200 Subject: [PATCH] Fix depth_uint8_decoding calculation for output uint8 images were incorrectly converted by multiplying by 255 and not shifting correctly by 8 bit by multiplying by 256. This might cause some serious issues, might have even harmed the training of the network. When the dataset for foundation stereo was computed, was this formula used to save the images as 24bit? Or does this issue only occur in the training process? As long as it's consistent between dataset creation and training, this issue is fine for this network. But it should be marked for everyone else trying to train with the dataset, that this formula was used. The previous formula basically interprets 00000000 00000001 00000000 (1*255) 00000000 00000000 10000000 (255) both as 255. --- Utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils.py b/Utils.py index a14f719..2e2d59a 100644 --- a/Utils.py +++ b/Utils.py @@ -136,6 +136,6 @@ def vis_disparity(disp, min_val=None, max_val=None, invalid_thres=np.inf, color_ def depth_uint8_decoding(depth_uint8, scale=1000): depth_uint8 = depth_uint8.astype(float) - out = depth_uint8[...,0]*255*255 + depth_uint8[...,1]*255 + depth_uint8[...,2] + out = depth_uint8[...,0]*256*256 + depth_uint8[...,1]*256 + depth_uint8[...,2] return out/float(scale)