LiDUT-Depth/evaluate_make3d.py at master · haoo999/LiDUT-Depth · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# make3d 数据集评估， 以Monodepth2为例
from layers import disp_to_depth
import networks
import cv2
import os
import torch
import scipy.misc
from scipy import io
import numpy as np

load_weights_folder = './models/lidut-depth_640x192'
main_path = '../../datasets/make3d'
encoder_path = os.path.join(load_weights_folder, "encoder.pth")
decoder_path = os.path.join(load_weights_folder, "depth.pth")
encoder_dict = torch.load(encoder_path)
encoder = networks.LiteMono(model="lite-mono",
                            drop_path_rate=0.2,
                            width=640, height=192).to(torch.device('cuda'))
depth_decoder = networks.DepthDecoder(encoder.num_ch_enc,
                                      [0, 1, 2]).to(torch.device('cuda'))

model_dict = encoder.state_dict()
encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in model_dict})
depth_decoder.load_state_dict(torch.load(decoder_path))

# encoder.cuda()
encoder.eval()
# depth_decoder.cuda()
depth_decoder.eval()


def compute_errors(gt, pred):
    rmse = (gt - pred) ** 2
    rmse = np.sqrt(rmse.mean())

    rmse_log = (np.log10(gt) - np.log10(pred)) ** 2
    rmse_log = np.sqrt(rmse_log.mean())

    abs_rel = np.mean(np.abs(gt - pred) / gt)

    sq_rel = np.mean(((gt - pred) ** 2) / gt)

    return abs_rel, sq_rel, rmse, rmse_log


with open(os.path.join(main_path, "make3d_test_files.txt")) as f:
    test_filenames = f.read().splitlines()
test_filenames = map(lambda x: x[4:], test_filenames)

depths_gt = []
images = []
ratio = 2
h_ratio = 1 / (1.33333 * ratio)
color_new_height = int(1704 / 2)
depth_new_height = 21
for filename in test_filenames:
    mat = io.loadmat(os.path.join(main_path, "Gridlaserdata", "depth_sph_corr-{}.mat".format(filename)))
    depths_gt.append(mat["Position3DGrid"][:, :, 3])

    image = cv2.imread(os.path.join(main_path, "Test134", "img-{}.jpg".format(filename)))
    image = image[int((2272 - color_new_height) / 2):int((2272 + color_new_height) / 2), :, :]
    images.append(image[:, :, ::-1])
depths_gt_resized = map(lambda x: cv2.resize(x, (305, 407), interpolation=cv2.INTER_NEAREST), depths_gt)
depths_gt_cropped = map(lambda x: x[int((55 - 21) / 2):int((55 + 21) / 2), :], depths_gt)

depths_gt_cropped = list(depths_gt_cropped)
errors = []
with torch.no_grad():
    for i in range(len(images)):
        input_color = images[i]
        input_color = cv2.resize(input_color / 255.0, (640, 192), interpolation=cv2.INTER_NEAREST)  # <----1
        input_color = torch.tensor(input_color, dtype=torch.float).permute(2, 0, 1)[None, :, :, :].cuda()
        output = depth_decoder(encoder(input_color))
        pred_disp, _ = disp_to_depth(output[("disp", 0)], 0.1, 100)  # <---2
        pred_disp = pred_disp.squeeze().cpu().numpy()
        depth_gt = depths_gt_cropped[i]
        depth_pred = 1 / pred_disp
        depth_pred = cv2.resize(depth_pred, depth_gt.shape[::-1], interpolation=cv2.INTER_NEAREST)
        mask = np.logical_and(depth_gt > 0, depth_gt < 70)
        depth_gt = depth_gt[mask]
        depth_pred = depth_pred[mask]
        depth_pred *= np.median(depth_gt) / np.median(depth_pred)
        depth_pred[depth_pred > 70] = 70
        errors.append(compute_errors(depth_gt, depth_pred))
    mean_errors = np.mean(errors, 0)

print(("{:>8} | " * 4).format("abs_rel", "sq_rel", "rmse", "rmse_log"))
print(("{: 8.3f} , " * 4).format(*mean_errors.tolist()))