From 9d707420f714004f931b8018dcd97ebfda600d62 Mon Sep 17 00:00:00 2001 From: Bartek Olechno Date: Tue, 22 Jun 2021 15:47:47 +0200 Subject: [PATCH 01/13] gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..732ac30 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +examples\ +results\ From a1d220f95f6e8c331fbdccf200c9d14a9ff81ad5 Mon Sep 17 00:00:00 2001 From: Bartek Olechno Date: Tue, 22 Jun 2021 15:48:47 +0200 Subject: [PATCH 02/13] move to python 3 --- InGAN.py | 2 +- SceneScripts.py | 8 +- networks.py | 12 +- supp_video.py | 6 +- test.py | 454 +++++++++++++++++++++++----------------------- train.py | 6 +- train_supp_mat.py | 8 +- util.py | 28 +-- 8 files changed, 264 insertions(+), 260 deletions(-) diff --git a/InGAN.py b/InGAN.py index 05acdc6..a7226c7 100644 --- a/InGAN.py +++ b/InGAN.py @@ -134,7 +134,7 @@ def resume(self, resume_path, test_flag=False): if len(missing): warnings.warn('Missing the following state dicts from checkpoint: {}'.format(', '.join(missing))) - print('resuming checkpoint {}'.format(self.conf.resume)) + print(('resuming checkpoint {}'.format(self.conf.resume))) def test(self, input_tensor, output_size, rand_affine, input_size, run_d_pred=True, run_reconstruct=True): with torch.no_grad(): diff --git a/SceneScripts.py b/SceneScripts.py index f8dc142..5659e95 100644 --- a/SceneScripts.py +++ b/SceneScripts.py @@ -188,7 +188,7 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh elif script_name == 'random': stops = np.random.rand(10, 4) * np.array([max_v-min_v, max_h-min_h, 2*max_t, 2*max_t])[None, :] + np.array([min_v, min_h, -max_t, -max_t])[None, :] stops = np.vstack([stops, [1, 1, 0, 0]]) - print stops + print(stops) size_v = np.concatenate([l(stop_0[0], stop_1[0], frames_per_resize) for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops)]) @@ -206,8 +206,8 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh stops_l = np.random.rand(11) * 2 * max_t - max_t stops_l[-1] = 0 stops_r = np.random.rand(11) * max_t * (stops_l / np.abs(stops_l)) - stops = zip(stops_l, stops_r) - print stops + stops = list(zip(stops_l, stops_r)) + print(stops) size_h = np.concatenate([ l(1, 1, 20 * frames_per_resize)]) @@ -220,7 +220,7 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops)]) - return [[-1, -1, -1, -1]] * 20 + zip(size_v, size_h, shift_l, shift_r) * repeat if show_input else zip(size_v, size_h, shift_l, shift_r) * repeat + return [[-1, -1, -1, -1]] * 20 + list(zip(size_v, size_h, shift_l, shift_r)) * repeat if show_input else list(zip(size_v, size_h, shift_l, shift_r)) * repeat INPUT_DICT = { diff --git a/networks.py b/networks.py index 449639f..e3f341f 100644 --- a/networks.py +++ b/networks.py @@ -34,13 +34,13 @@ def __init__(self, num_features): def forward(self, input_tensor): local_mean = self.get_local_mean(input_tensor) - print local_mean + print(local_mean) centered_input_tensor = input_tensor - local_mean - print centered_input_tensor + print(centered_input_tensor) squared_diff = centered_input_tensor ** 2 - print squared_diff + print(squared_diff) local_std = self.get_var(squared_diff) ** 0.5 - print local_std + print(local_std) normalized_tensor = centered_input_tensor / (local_std + 1e-8) return normalized_tensor # * self.weight[None, :, None, None] + self.bias[None, :, None, None] @@ -258,7 +258,7 @@ def forward(self, input_tensor, scale_weights): map_size = aggregated_result_maps_from_all_scales.shape[2:] # Run all nets over all scales and aggregate the interpolated results - for net, scale_weight, i in zip(self.nets[1:], scale_weights[1:], range(1, len(scale_weights))): + for net, scale_weight, i in zip(self.nets[1:], scale_weights[1:], list(range(1, len(scale_weights)))): downscaled_image = f.interpolate(input_tensor, scale_factor=self.scale_factor**(-i), mode='bilinear') result_map_for_current_scale = net(downscaled_image) upscaled_result_map_for_current_scale = f.interpolate(result_map_for_current_scale, @@ -279,7 +279,7 @@ def __init__(self, n_layers, scale=0.5, base_channels=64, use_bias=True): in_channel_power = scale > 1 out_channel_power = scale < 1 - i_range = range(n_layers) if scale < 1 else range(n_layers-1, -1, -1) + i_range = list(range(n_layers)) if scale < 1 else list(range(n_layers-1, -1, -1)) for i in i_range: self.conv_layers[i] = nn.Sequential(nn.ReflectionPad2d(1), diff --git a/supp_video.py b/supp_video.py index 535b194..ec0e5c2 100644 --- a/supp_video.py +++ b/supp_video.py @@ -93,10 +93,10 @@ def generate_full_video(video_script, frame_shape): cur_scene_script_param = scene_script_param[:] if scene_script_param[1] is None: cur_scene_script_param[1] = cur_frame_shape[0] * 1.0 / input_tensor.shape[2] - print 'max scale vertical:', cur_scene_script_param[1] + print('max scale vertical:', cur_scene_script_param[1]) if cur_scene_script_param[3] is None: cur_scene_script_param[3] = cur_frame_shape[1] * 1.0 / input_tensor.shape[3] - print 'max scale horizontal:', cur_scene_script_param[3] + print('max scale horizontal:', cur_scene_script_param[3]) scene_script = make_scene_script(scene_script_name, *cur_scene_script_param) @@ -106,7 +106,7 @@ def generate_full_video(video_script, frame_shape): scene = generate_one_scene(gan, input_tensor, scene_script, np.array([cur_frame_shape[0], cur_frame_shape[1]]), center) partial_screen_scenes.append(scene) - print 'Done with %s, (scene %d/%d)' % (name, i + 1, n_scenes) + print('Done with %s, (scene %d/%d)' % (name, i + 1, n_scenes)) scene = np.concatenate(partial_screen_scenes, axis=concat_axis) if len(partial_screen_scenes) > 1 else partial_screen_scenes[0] diff --git a/test.py b/test.py index 0cc38b6..629e16b 100644 --- a/test.py +++ b/test.py @@ -1,226 +1,228 @@ -from networks import GeoTransform -from PIL import Image -import util -from InGAN import InGAN -from configs import Config -from traceback import print_exc -from skvideo.io import FFmpegWriter -import os -from non_rect import * - - -def test_one_scale(gan, input_tensor, scale, must_divide, affine=None, return_tensor=False, size_instead_scale=False): - with torch.no_grad(): - in_size = input_tensor.shape[2:] - if size_instead_scale: - out_size = scale - else: - out_size = (np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), - np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide)) - - output_tensor, _, _ = gan.test(input_tensor=input_tensor, - input_size=in_size, - output_size=out_size, - rand_affine=affine, - run_d_pred=False, - run_reconstruct=False) - if return_tensor: - return output_tensor[1] - else: - return util.tensor2im(output_tensor[1]) - - -def concat_images(images, margin, input_spot): - h_sizes = [im.shape[0] for im in zip(*images)[0]] - w_sizes = [im.shape[1] for im in images[0]] - h_total_size = np.sum(h_sizes) + margin * (len(images) - 1) - w_total_size = np.sum(w_sizes) + margin * (len(images) - 1) - - collage = np.ones([h_total_size, w_total_size, 3]) * 255 - for i in range(len(images)): - for j in range(len(images)): - top_left_corner_h = int(np.sum(h_sizes[:j]) + j * margin) - top_left_corner_w = int(np.sum(w_sizes[:i]) + i * margin) - bottom_right_corner_h = int(top_left_corner_h + h_sizes[j]) - bottom_right_corner_w = int(top_left_corner_w + w_sizes[i]) - - if [i, j] == input_spot: - collage[top_left_corner_h - margin/2: bottom_right_corner_h + margin/2, - top_left_corner_w - margin/2: bottom_right_corner_w + margin/2, - :] = [255, 0, 0] - collage[top_left_corner_h:bottom_right_corner_h, top_left_corner_w:bottom_right_corner_w] = images[j][i] - - return collage - - -def generate_images_for_collage(gan, input_tensor, scales, must_divide): - # NOTE: scales here is different from in the other funcs: here we only need 1d scales. - # Prepare output images list - output_images = [[[None] for _ in range(len(scales))] for _ in range(len(scales))] - - # Run over all scales and test the network for each one - for i, scale_h in enumerate(scales): - for j, scale_w in enumerate(scales): - output_images[i][j] = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) - return output_images - - -def retarget_video(gan, input_tensor, scales, must_divide, output_dir_path): - max_scale = np.max(np.array(scales)) - frame_shape = np.uint32(np.array(input_tensor.shape[2:]) * max_scale) - frame_shape[0] += (frame_shape[0] % 2) - frame_shape[1] += (frame_shape[1] % 2) - frames = np.zeros([len(scales), frame_shape[0], frame_shape[1], 3]) - for i, (scale_h, scale_w) in enumerate(scales): - output_image = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) - frames[i, 0:output_image.shape[0], 0:output_image.shape[1], :] = output_image - writer = FFmpegWriter(output_dir_path + '/vid.mp4', verbosity=1, outputdict={'-b': '30000000', '-r': '100.0'}) - - for i, _ in enumerate(scales): - for j in range(3): - writer.writeFrame(frames[i, :, :, :]) - writer.close() - - -def define_video_scales(scales): - max_v, min_v, max_h, min_h = scales - frames_per_resize = 10 - - x = np.concatenate([ - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize), - np.linspace(min_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, 1, frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize), - np.linspace(min_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, 1, frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize)]) - y = np.concatenate([ - np.linspace(1, 1, frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, max_h, 2 * frames_per_resize), - np.linspace(max_h, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, max_h, 2 * frames_per_resize), - np.linspace(max_h, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, min_h, 2 * frames_per_resize)]) - - return zip(x, y) - - -def generate_collage_and_outputs(conf, gan, input_tensor): - output_images = generate_images_for_collage(gan, input_tensor, conf.collage_scales, conf.must_divide) - - for i in range(len(output_images)): - for j in range(len(output_images)): - Image.fromarray(output_images[i][j], 'RGB').save(conf.output_dir_path + '/test_%d_%d.png' % (i, j)) - - input_spot = conf.collage_input_spot - output_images[input_spot[0]][input_spot[1]] = util.tensor2im(input_tensor) - - collage = concat_images(output_images, margin=10, input_spot=input_spot) - - Image.fromarray(np.uint8(collage), 'RGB').save(conf.output_dir_path + '/test_collage.png') - - -def _make_homography_mask(in_mask, target_size, rand_h): - theta = homography_based_on_top_corners_x_shift(rand_h) - target_size4d = torch.Size([in_mask.shape[0], in_mask.shape[1], target_size[0], target_size[1]]) - grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), target_size4d) - out = f.grid_sample(in_mask, grid, mode='bilinear', padding_mode='border') - return out - - -def test_homo(conf, gan, input_tensor, must_divide=8): - shift_range = np.arange(conf.non_rect_shift_range[0], conf.non_rect_shift_range[1], conf.non_rect_shift_range[2]) - total = (len(conf.non_rect_scales)*len(shift_range))**2 - ind = 0 - for scale1 in conf.non_rect_scales: - for scale2 in conf.non_rect_scales: - scale = [scale1, scale2] - for shift1 in shift_range: - for shift2 in shift_range: - ind += 1 - shifts = (shift1, shift2) - sz = input_tensor.shape - out_pad = np.uint8(255*np.ones([np.uint32(np.floor(sz[2]*scale[0])), np.uint32(np.floor(3*sz[3]*scale[1])), 3])) - - pad_l = np.abs(np.int(np.ceil(sz[3] * shifts[0]))) - pad_r = np.abs(np.int(np.ceil(sz[3] * shifts[1]))) - - in_mask = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() - input_for_regular = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() - - in_size = in_mask.shape[2:] - - out_size = (np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), - np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide)) - - if pad_r > 0: - in_mask[:,:, :, pad_l:-pad_r] = torch.ones_like(input_tensor) - input_for_regular[:, :, :, pad_l:-pad_r] = input_tensor - else: - in_mask[:, :, :, pad_l:] = torch.ones_like(input_tensor) - input_for_regular[:, :, :, pad_l:] = input_tensor - - out = test_one_scale(gan, input_tensor, out_size, conf.must_divide, affine=shifts, return_tensor=True, size_instead_scale=True) - # regular = transform(input_tensor, out_size, shifts) - out_mask = _make_homography_mask(in_mask, out_size, shifts) - - out = util.tensor2im(out_mask * out + 1 - out_mask) - # regular_out = util.tensor2im(out_mask * regular + 1 - out_mask) - # out_pad[:, sz[3] - pad_l: sz[3] - pad_l + out_size[1], :] = out - shift_str = "{1:0{0}d}_{3:0{2}d}".format(2 if shift1>=0 else 3, int(10*shift1), 2 if shift2>=0 else 3, int(10*shift2)) - - # out = np.rot90(out, 3) - # regular_out = np.rot90(regular_out, 3) - - Image.fromarray(out, 'RGB').save(conf.output_dir_path + '/scale_%02d_%02d_transform %s_ingan.png' % (int(10*scale1), int(10*scale2), shift_str)) - # Image.fromarray(regular_out, 'RGB').save(conf.output_dir_path + '/scale_%02d_%02d_transform %s_ref.png' % (scale1, scale2, shift_str)) - print ind, '/', total, 'scale:', scale, 'shift:', shifts - - -def main(): - conf = Config().parse(create_dir_flag=False) - conf.name = 'TEST_' + conf.name - conf.output_dir_path = util.prepare_result_dir(conf) - gan = InGAN(conf) - - try: - gan.resume(conf.test_params_path, test_flag=True) - [input_tensor] = util.read_data(conf) - - if conf.test_video: - retarget_video(gan, input_tensor, define_video_scales(conf.test_vid_scales), 8, conf.output_dir_path) - if conf.test_collage: - generate_collage_and_outputs(conf, gan, input_tensor) - if conf.test_non_rect: - test_homo(conf, gan, input_tensor) - - print 'Done with %s' % conf.input_image_path - - except KeyboardInterrupt: - raise - except Exception as e: - # print 'Something went wrong with %s (%d/%d), iter %dk' % (input_image_path, i, n_files, snapshot_iter) - print_exc() - - -if __name__ == '__main__': - main() +from networks import GeoTransform +from PIL import Image +import util +from InGAN import InGAN +from configs import Config +from traceback import print_exc +from skvideo.io import FFmpegWriter +import os +from non_rect import * + + +def test_one_scale(gan, input_tensor, scale, must_divide, affine=None, return_tensor=False, size_instead_scale=False): + with torch.no_grad(): + in_size = input_tensor.shape[2:] + if size_instead_scale: + out_size = scale + else: + out_size = (np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), + np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide)) + + output_tensor, _, _ = gan.test(input_tensor=input_tensor, + input_size=in_size, + output_size=out_size, + rand_affine=affine, + run_d_pred=False, + run_reconstruct=False) + if return_tensor: + return output_tensor[1] + else: + return util.tensor2im(output_tensor[1]) + + +def concat_images(images, margin, input_spot): + h_sizes = [im.shape[0] for im in zip(*images)[0]] + w_sizes = [im.shape[1] for im in images[0]] + h_total_size = np.sum(h_sizes) + margin * (len(images) - 1) + w_total_size = np.sum(w_sizes) + margin * (len(images) - 1) + + collage = np.ones([h_total_size, w_total_size, 3]) * 255 + for i in range(len(images)): + for j in range(len(images)): + top_left_corner_h = int(np.sum(h_sizes[:j]) + j * margin) + top_left_corner_w = int(np.sum(w_sizes[:i]) + i * margin) + bottom_right_corner_h = int(top_left_corner_h + h_sizes[j]) + bottom_right_corner_w = int(top_left_corner_w + w_sizes[i]) + + if [i, j] == input_spot: + collage[top_left_corner_h - margin/2: bottom_right_corner_h + margin/2, + top_left_corner_w - margin/2: bottom_right_corner_w + margin/2, + :] = [255, 0, 0] + collage[top_left_corner_h:bottom_right_corner_h, top_left_corner_w:bottom_right_corner_w] = images[j][i] + + return collage + + +def generate_images_for_collage(gan, input_tensor, scales, must_divide): + # NOTE: scales here is different from in the other funcs: here we only need 1d scales. + # Prepare output images list + output_images = [[[None] for _ in range(len(scales))] for _ in range(len(scales))] + + # Run over all scales and test the network for each one + for i, scale_h in enumerate(scales): + for j, scale_w in enumerate(scales): + output_images[i][j] = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) + return output_images + + +def retarget_video(gan, input_tensor, scales, must_divide, output_dir_path): + max_scale = np.max(np.array(scales)) + frame_shape = np.uint32(np.array(input_tensor.shape[2:]) * max_scale) + frame_shape[0] += (frame_shape[0] % 2) + frame_shape[1] += (frame_shape[1] % 2) + frames = np.zeros([len(scales), frame_shape[0], frame_shape[1], 3]) + for i, (scale_h, scale_w) in enumerate(scales): + output_image = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) + frames[i, 0:output_image.shape[0], 0:output_image.shape[1], :] = output_image + from IPython.core.debugger import set_trace + set_trace() + writer = FFmpegWriter(output_dir_path + '/vid.mp4', verbosity=1, outputdict={'-b': '30000000', '-r': '100.0'}) + + for i, _ in enumerate(scales): + for j in range(3): + writer.writeFrame(frames[i, :, :, :]) + writer.close() + + +def define_video_scales(scales): + max_v, min_v, max_h, min_h = scales + frames_per_resize = 10 + + x = np.concatenate([ + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize), + np.linspace(min_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, 1, frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize), + np.linspace(min_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, 1, frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize)]) + y = np.concatenate([ + np.linspace(1, 1, frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, max_h, 2 * frames_per_resize), + np.linspace(max_h, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, max_h, 2 * frames_per_resize), + np.linspace(max_h, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, min_h, 2 * frames_per_resize)]) + + return list(zip(x, y)) + + +def generate_collage_and_outputs(conf, gan, input_tensor): + output_images = generate_images_for_collage(gan, input_tensor, conf.collage_scales, conf.must_divide) + + for i in range(len(output_images)): + for j in range(len(output_images)): + Image.fromarray(output_images[i][j], 'RGB').save(conf.output_dir_path + '/test_%d_%d.png' % (i, j)) + + input_spot = conf.collage_input_spot + output_images[input_spot[0]][input_spot[1]] = util.tensor2im(input_tensor) + + collage = concat_images(output_images, margin=10, input_spot=input_spot) + + Image.fromarray(np.uint8(collage), 'RGB').save(conf.output_dir_path + '/test_collage.png') + + +def _make_homography_mask(in_mask, target_size, rand_h): + theta = homography_based_on_top_corners_x_shift(rand_h) + target_size4d = torch.Size([in_mask.shape[0], in_mask.shape[1], target_size[0], target_size[1]]) + grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), target_size4d) + out = f.grid_sample(in_mask, grid, mode='bilinear', padding_mode='border') + return out + + +def test_homo(conf, gan, input_tensor, must_divide=8): + shift_range = np.arange(conf.non_rect_shift_range[0], conf.non_rect_shift_range[1], conf.non_rect_shift_range[2]) + total = (len(conf.non_rect_scales)*len(shift_range))**2 + ind = 0 + for scale1 in conf.non_rect_scales: + for scale2 in conf.non_rect_scales: + scale = [scale1, scale2] + for shift1 in shift_range: + for shift2 in shift_range: + ind += 1 + shifts = (shift1, shift2) + sz = input_tensor.shape + out_pad = np.uint8(255*np.ones([np.uint32(np.floor(sz[2]*scale[0])), np.uint32(np.floor(3*sz[3]*scale[1])), 3])) + + pad_l = np.abs(np.int(np.ceil(sz[3] * shifts[0]))) + pad_r = np.abs(np.int(np.ceil(sz[3] * shifts[1]))) + + in_mask = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() + input_for_regular = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() + + in_size = in_mask.shape[2:] + + out_size = (np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), + np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide)) + + if pad_r > 0: + in_mask[:,:, :, pad_l:-pad_r] = torch.ones_like(input_tensor) + input_for_regular[:, :, :, pad_l:-pad_r] = input_tensor + else: + in_mask[:, :, :, pad_l:] = torch.ones_like(input_tensor) + input_for_regular[:, :, :, pad_l:] = input_tensor + + out = test_one_scale(gan, input_tensor, out_size, conf.must_divide, affine=shifts, return_tensor=True, size_instead_scale=True) + # regular = transform(input_tensor, out_size, shifts) + out_mask = _make_homography_mask(in_mask, out_size, shifts) + + out = util.tensor2im(out_mask * out + 1 - out_mask) + # regular_out = util.tensor2im(out_mask * regular + 1 - out_mask) + # out_pad[:, sz[3] - pad_l: sz[3] - pad_l + out_size[1], :] = out + shift_str = "{1:0{0}d}_{3:0{2}d}".format(2 if shift1>=0 else 3, int(10*shift1), 2 if shift2>=0 else 3, int(10*shift2)) + + # out = np.rot90(out, 3) + # regular_out = np.rot90(regular_out, 3) + + Image.fromarray(out, 'RGB').save(conf.output_dir_path + '/scale_%02d_%02d_transform %s_ingan.png' % (int(10*scale1), int(10*scale2), shift_str)) + # Image.fromarray(regular_out, 'RGB').save(conf.output_dir_path + '/scale_%02d_%02d_transform %s_ref.png' % (scale1, scale2, shift_str)) + print((ind, '/', total, 'scale:', scale, 'shift:', shifts)) + + +def main(): + conf = Config().parse(create_dir_flag=False) + conf.name = 'TEST_' + conf.name + conf.output_dir_path = util.prepare_result_dir(conf) + gan = InGAN(conf) + + try: + gan.resume(conf.test_params_path, test_flag=True) + [input_tensor] = util.read_data(conf) + + if conf.test_video: + retarget_video(gan, input_tensor, define_video_scales(conf.test_vid_scales), 8, conf.output_dir_path) + if conf.test_collage: + generate_collage_and_outputs(conf, gan, input_tensor) + if conf.test_non_rect: + test_homo(conf, gan, input_tensor) + + print(('Done with %s' % conf.input_image_path)) + + except KeyboardInterrupt: + raise + except Exception as e: + # print 'Something went wrong with %s (%d/%d), iter %dk' % (input_image_path, i, n_files, snapshot_iter) + print_exc() + + +if __name__ == '__main__': + main() diff --git a/train.py b/train.py index 7759b36..7f29d8f 100644 --- a/train.py +++ b/train.py @@ -30,7 +30,7 @@ except KeyboardInterrupt: raise except Exception as e: - print 'Something went wrong in iteration %d, While training.' % i + print('Something went wrong in iteration %d, While training.' % i) print_exc() # Take care of all testing, saving and presenting of current results and status @@ -39,7 +39,7 @@ except KeyboardInterrupt: raise except Exception as e: - print 'Something went wrong in iteration %d, While testing or visualizing.' % i + print('Something went wrong in iteration %d, While testing or visualizing.' % i) print_exc() # Save snapshot when needed @@ -53,5 +53,5 @@ except KeyboardInterrupt: raise except Exception as e: - print 'Something went wrong in iteration %d, While saving snapshot.' % i + print('Something went wrong in iteration %d, While saving snapshot.' % i) print_exc() diff --git a/train_supp_mat.py b/train_supp_mat.py index 2b7a25d..68480da 100644 --- a/train_supp_mat.py +++ b/train_supp_mat.py @@ -1,6 +1,6 @@ import os import threading -import Queue +import queue import subprocess base_dir = './side/' @@ -27,7 +27,7 @@ def run(self): while True: try: exp_name, item = self.inQ.get() - except Queue.Empty: + except queue.Empty: break # verify that this experiment was not executed already if experiment_was_not_already_exec(exp_name): @@ -36,13 +36,13 @@ def run(self): def main(): - q = Queue.Queue() + q = queue.Queue() workers = [Worker(q, gpu_id) for gpu_id in [0, 1]] for imgname in os.listdir(base_dir): full_img_name = os.path.join(base_dir, imgname) short_name = os.path.splitext(imgname)[0] cmd = ['python', 'train.py', '--input_image_path', full_img_name, '--gpu_id', '0'] - for aname, aa in abl_args.items(): + for aname, aa in list(abl_args.items()): exp_name = '{}_{}'.format(short_name, aname) full_cmd = cmd + aa + ['--name', exp_name] q.put((exp_name, full_cmd)) diff --git a/util.py b/util.py index af1b09d..a6204b2 100644 --- a/util.py +++ b/util.py @@ -5,7 +5,7 @@ import glob from time import strftime, localtime from shutil import copy -from scipy.misc import imresize +# from scipy.misc import imresize import torch @@ -83,9 +83,12 @@ def image_concat(g_preds, d_preds=None, size=None): dsize = g_pred.shape[1] if size is None or size[1] is None else size[1] result = np.ones([(1 + (d_pred is not None)) * hsize, dsize, 3]) * 255 if d_pred is not None: - d_pred_new = imresize((np.concatenate([d_pred] * 3, 2) - 128) * 2, g_pred.shape[0:2], interp='nearest') - result[hsize-g_pred.shape[0]:hsize+g_pred.shape[0], :g_pred.shape[1], :] = np.concatenate([g_pred, - d_pred_new], 0) + img = (np.concatenate([d_pred] * 3, 2) - 128) * 2 + import cv2 + # d_pred_new = imresize(img, g_pred.shape[0:2], interp='nearest') + d_pred_new = cv2.resize(img, dsize=g_pred.shape[0:2][::-1], interpolation=cv2.INTER_NEAREST) + con = np.concatenate([g_pred, d_pred_new], 0) + result[hsize-g_pred.shape[0]:hsize+g_pred.shape[0], :g_pred.shape[1], :] = con else: result[hsize - g_pred.shape[0]:, :, :] = g_pred results.append(np.uint8(np.round(result))) @@ -177,13 +180,13 @@ def test_and_display(self, i): self.Rec_loss[i-self.conf.print_freq:i] = self.gan.losses_G_reconstruct.detach().cpu().float().numpy().tolist() if self.conf.reconstruct_loss_stop_iter < i: - print('iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, LR: %f' % + print(('iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, LR: %f' % (i, self.G_loss[i-1], self.D_loss_real[i-1], self.D_loss_fake[i-1], - self.gan.lr_scheduler_G.get_lr()[0])) + self.gan.lr_scheduler_G.get_lr()[0]))) else: - print('iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, Rec_loss: %f, LR: %f' % + print(('iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, Rec_loss: %f, LR: %f' % (i, self.G_loss[i-1], self.D_loss_real[i-1], self.D_loss_fake[i-1], self.Rec_loss[i-1], - self.gan.lr_scheduler_G.get_lr()[0])) + self.gan.lr_scheduler_G.get_lr()[0]))) if not i % self.conf.display_freq and i > 0: plt.gcf().clear() @@ -210,13 +213,13 @@ def test_and_display(self, i): input_size = self.gan.input_tensor_noised.shape[2:] result = image_concat(tensor2im(g_preds), tensor2im(d_preds), (input_size[0]*2, input_size[1]*2)) - self.plot_gan_loss[0].set_data(range(i), self.G_loss[:i]) - self.plot_gan_loss[1].set_data(range(i), self.D_loss_real[:i]) - self.plot_gan_loss[2].set_data(range(i), self.D_loss_fake[:i]) + self.plot_gan_loss[0].set_data(list(range(i)), self.G_loss[:i]) + self.plot_gan_loss[1].set_data(list(range(i)), self.D_loss_real[:i]) + self.plot_gan_loss[2].set_data(list(range(i)), self.D_loss_fake[:i]) self.gan_loss.set_xlim(0, i) if self.conf.reconstruct_loss_stop_iter > i: - self.plot_reconstruct_loss[0].set_data(range(i), self.Rec_loss[:i]) + self.plot_reconstruct_loss[0].set_data(list(range(i)), self.Rec_loss[:i]) self.reconstruct_loss.set_ylim(np.min(self.Rec_loss[:i]), np.max(self.Rec_loss[:i])) self.reconstruct_loss.set_xlim(0, i) @@ -247,7 +250,6 @@ def prepare_result_dir(conf): return conf.output_dir_path - def homography_based_on_top_corners_x_shift(rand_h): p = np.array([[1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), -1. + rand_h[0]], [0, 0, 0, 1., 1., -1., 1., 1., -1.], From 77ab99a2f1305b419d37d9c0117519946ff7a38b Mon Sep 17 00:00:00 2001 From: Bartek Olechno Date: Tue, 22 Jun 2021 15:55:07 +0200 Subject: [PATCH 03/13] environment --- environment.yml | Bin 0 -> 486 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 environment.yml diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..b23011ef32488cdad0f39abdfd1d8540b50dd2a1 GIT binary patch literal 486 zcmZuu?Fzy$47}fh@30q8lqsU1@3Ied#Vr Date: Mon, 5 Jul 2021 17:57:16 +0200 Subject: [PATCH 04/13] test.py works with cv2 --- test.py | 201 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 128 insertions(+), 73 deletions(-) diff --git a/test.py b/test.py index 629e16b..08e84c7 100644 --- a/test.py +++ b/test.py @@ -1,22 +1,33 @@ -from networks import GeoTransform +import os +import cv2 from PIL import Image import util from InGAN import InGAN from configs import Config from traceback import print_exc -from skvideo.io import FFmpegWriter -import os -from non_rect import * +from networks import GeoTransform +from non_rect import (apply_resize_and_radial, homography_based_on_top_corners_x_shift, apply_resize_and_homograhpy, homography_grid, apply_resize_and_affine) -def test_one_scale(gan, input_tensor, scale, must_divide, affine=None, return_tensor=False, size_instead_scale=False): +def test_one_scale(gan, + input_tensor, + scale, + must_divide, + affine=None, + return_tensor=False, + size_instead_scale=False): with torch.no_grad(): in_size = input_tensor.shape[2:] if size_instead_scale: out_size = scale else: - out_size = (np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), - np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide)) + out_size = ( + np.uint32( + np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * + must_divide), + np.uint32( + np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * + must_divide)) output_tensor, _, _ = gan.test(input_tensor=input_tensor, input_size=in_size, @@ -31,7 +42,7 @@ def test_one_scale(gan, input_tensor, scale, must_divide, affine=None, return_te def concat_images(images, margin, input_spot): - h_sizes = [im.shape[0] for im in zip(*images)[0]] + h_sizes = [im.shape[0] for im in list(zip(*images))[0]] w_sizes = [im.shape[1] for im in images[0]] h_total_size = np.sum(h_sizes) + margin * (len(images) - 1) w_total_size = np.sum(w_sizes) + margin * (len(images) - 1) @@ -45,10 +56,12 @@ def concat_images(images, margin, input_spot): bottom_right_corner_w = int(top_left_corner_w + w_sizes[i]) if [i, j] == input_spot: - collage[top_left_corner_h - margin/2: bottom_right_corner_h + margin/2, - top_left_corner_w - margin/2: bottom_right_corner_w + margin/2, - :] = [255, 0, 0] - collage[top_left_corner_h:bottom_right_corner_h, top_left_corner_w:bottom_right_corner_w] = images[j][i] + collage[top_left_corner_h - margin // 2:bottom_right_corner_h + + margin // 2, + top_left_corner_w - margin // 2:bottom_right_corner_w + + margin // 2, :] = [255, 0, 0] + collage[top_left_corner_h:bottom_right_corner_h, + top_left_corner_w:bottom_right_corner_w] = images[j][i] return collage @@ -56,12 +69,15 @@ def concat_images(images, margin, input_spot): def generate_images_for_collage(gan, input_tensor, scales, must_divide): # NOTE: scales here is different from in the other funcs: here we only need 1d scales. # Prepare output images list - output_images = [[[None] for _ in range(len(scales))] for _ in range(len(scales))] + output_images = [[[None] for _ in range(len(scales))] + for _ in range(len(scales))] # Run over all scales and test the network for each one for i, scale_h in enumerate(scales): for j, scale_w in enumerate(scales): - output_images[i][j] = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) + output_images[i][j] = test_one_scale(gan, input_tensor, + [scale_h, scale_w], + must_divide) return output_images @@ -72,16 +88,20 @@ def retarget_video(gan, input_tensor, scales, must_divide, output_dir_path): frame_shape[1] += (frame_shape[1] % 2) frames = np.zeros([len(scales), frame_shape[0], frame_shape[1], 3]) for i, (scale_h, scale_w) in enumerate(scales): - output_image = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) - frames[i, 0:output_image.shape[0], 0:output_image.shape[1], :] = output_image - from IPython.core.debugger import set_trace - set_trace() - writer = FFmpegWriter(output_dir_path + '/vid.mp4', verbosity=1, outputdict={'-b': '30000000', '-r': '100.0'}) - + output_image = test_one_scale(gan, input_tensor, [scale_h, scale_w], + must_divide) + frames[i, 0:output_image.shape[0], + 0:output_image.shape[1], :] = output_image + frame_size = frame_shape[:2] + fourcc = cv2.VideoWriter_fourcc(*'MP4V') + writer = cv2.VideoWriter(output_dir_path + '/vid.mp4', fourcc, 20.0, + frame_size) + + frames = frames.astype(np.uint8)[::-1] for i, _ in enumerate(scales): - for j in range(3): - writer.writeFrame(frames[i, :, :, :]) - writer.close() + for _ in range(3): + writer.write(frames[i, :, :, :]) + writer.release() def define_video_scales(scales): @@ -89,69 +109,80 @@ def define_video_scales(scales): frames_per_resize = 10 x = np.concatenate([ - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize), - np.linspace(min_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, 1, frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize), - np.linspace(min_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, 1, frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize)]) + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize), + np.linspace(min_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, 1, frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize), + np.linspace(min_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, 1, frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize) + ]) y = np.concatenate([ - np.linspace(1, 1, frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, max_h, 2 * frames_per_resize), - np.linspace(max_h, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, max_h, 2 * frames_per_resize), - np.linspace(max_h, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, min_h, 2 * frames_per_resize)]) + np.linspace(1, 1, frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, max_h, 2 * frames_per_resize), + np.linspace(max_h, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, max_h, 2 * frames_per_resize), + np.linspace(max_h, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, min_h, 2 * frames_per_resize) + ]) return list(zip(x, y)) def generate_collage_and_outputs(conf, gan, input_tensor): - output_images = generate_images_for_collage(gan, input_tensor, conf.collage_scales, conf.must_divide) + output_images = generate_images_for_collage(gan, input_tensor, + conf.collage_scales, + conf.must_divide) for i in range(len(output_images)): for j in range(len(output_images)): - Image.fromarray(output_images[i][j], 'RGB').save(conf.output_dir_path + '/test_%d_%d.png' % (i, j)) + Image.fromarray(output_images[i][j], + 'RGB').save(conf.output_dir_path + + '/test_%d_%d.png' % (i, j)) input_spot = conf.collage_input_spot output_images[input_spot[0]][input_spot[1]] = util.tensor2im(input_tensor) collage = concat_images(output_images, margin=10, input_spot=input_spot) - Image.fromarray(np.uint8(collage), 'RGB').save(conf.output_dir_path + '/test_collage.png') + Image.fromarray(np.uint8(collage), + 'RGB').save(conf.output_dir_path + '/test_collage.png') def _make_homography_mask(in_mask, target_size, rand_h): theta = homography_based_on_top_corners_x_shift(rand_h) - target_size4d = torch.Size([in_mask.shape[0], in_mask.shape[1], target_size[0], target_size[1]]) - grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), target_size4d) + target_size4d = torch.Size( + [in_mask.shape[0], in_mask.shape[1], target_size[0], target_size[1]]) + grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), + target_size4d) out = f.grid_sample(in_mask, grid, mode='bilinear', padding_mode='border') return out def test_homo(conf, gan, input_tensor, must_divide=8): - shift_range = np.arange(conf.non_rect_shift_range[0], conf.non_rect_shift_range[1], conf.non_rect_shift_range[2]) - total = (len(conf.non_rect_scales)*len(shift_range))**2 + shift_range = np.arange(conf.non_rect_shift_range[0], + conf.non_rect_shift_range[1], + conf.non_rect_shift_range[2]) + total = (len(conf.non_rect_scales) * len(shift_range))**2 ind = 0 for scale1 in conf.non_rect_scales: for scale2 in conf.non_rect_scales: @@ -161,39 +192,61 @@ def test_homo(conf, gan, input_tensor, must_divide=8): ind += 1 shifts = (shift1, shift2) sz = input_tensor.shape - out_pad = np.uint8(255*np.ones([np.uint32(np.floor(sz[2]*scale[0])), np.uint32(np.floor(3*sz[3]*scale[1])), 3])) + out_pad = np.uint8(255 * np.ones([ + np.uint32(np.floor(sz[2] * scale[0])), + np.uint32(np.floor(3 * sz[3] * scale[1])), 3 + ])) pad_l = np.abs(np.int(np.ceil(sz[3] * shifts[0]))) pad_r = np.abs(np.int(np.ceil(sz[3] * shifts[1]))) - in_mask = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() - input_for_regular = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() + in_mask = torch.zeros(sz[0], sz[1], sz[2], + pad_l + sz[3] + pad_r).cuda() + input_for_regular = torch.zeros( + sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() in_size = in_mask.shape[2:] - out_size = (np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), - np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide)) + out_size = (np.uint32( + np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * + must_divide), + np.uint32( + np.floor(scale[1] * in_size[1] * 1.0 / + must_divide) * must_divide)) if pad_r > 0: - in_mask[:,:, :, pad_l:-pad_r] = torch.ones_like(input_tensor) + in_mask[:, :, :, + pad_l:-pad_r] = torch.ones_like(input_tensor) input_for_regular[:, :, :, pad_l:-pad_r] = input_tensor else: - in_mask[:, :, :, pad_l:] = torch.ones_like(input_tensor) + in_mask[:, :, :, + pad_l:] = torch.ones_like(input_tensor) input_for_regular[:, :, :, pad_l:] = input_tensor - out = test_one_scale(gan, input_tensor, out_size, conf.must_divide, affine=shifts, return_tensor=True, size_instead_scale=True) + out = test_one_scale(gan, + input_tensor, + out_size, + conf.must_divide, + affine=shifts, + return_tensor=True, + size_instead_scale=True) # regular = transform(input_tensor, out_size, shifts) out_mask = _make_homography_mask(in_mask, out_size, shifts) out = util.tensor2im(out_mask * out + 1 - out_mask) # regular_out = util.tensor2im(out_mask * regular + 1 - out_mask) # out_pad[:, sz[3] - pad_l: sz[3] - pad_l + out_size[1], :] = out - shift_str = "{1:0{0}d}_{3:0{2}d}".format(2 if shift1>=0 else 3, int(10*shift1), 2 if shift2>=0 else 3, int(10*shift2)) + shift_str = "{1:0{0}d}_{3:0{2}d}".format( + 2 if shift1 >= 0 else 3, int(10 * shift1), + 2 if shift2 >= 0 else 3, int(10 * shift2)) # out = np.rot90(out, 3) # regular_out = np.rot90(regular_out, 3) - Image.fromarray(out, 'RGB').save(conf.output_dir_path + '/scale_%02d_%02d_transform %s_ingan.png' % (int(10*scale1), int(10*scale2), shift_str)) + Image.fromarray(out, 'RGB').save( + conf.output_dir_path + + '/scale_%02d_%02d_transform %s_ingan.png' % + (int(10 * scale1), int(10 * scale2), shift_str)) # Image.fromarray(regular_out, 'RGB').save(conf.output_dir_path + '/scale_%02d_%02d_transform %s_ref.png' % (scale1, scale2, shift_str)) print((ind, '/', total, 'scale:', scale, 'shift:', shifts)) @@ -209,7 +262,9 @@ def main(): [input_tensor] = util.read_data(conf) if conf.test_video: - retarget_video(gan, input_tensor, define_video_scales(conf.test_vid_scales), 8, conf.output_dir_path) + retarget_video(gan, input_tensor, + define_video_scales(conf.test_vid_scales), 8, + conf.output_dir_path) if conf.test_collage: generate_collage_and_outputs(conf, gan, input_tensor) if conf.test_non_rect: From 56488793bd6cffb5ea1510febc80b4eae9f2bd6b Mon Sep 17 00:00:00 2001 From: bartolo1024 Date: Mon, 5 Jul 2021 17:57:42 +0200 Subject: [PATCH 05/13] environment works --- environment.yml | Bin 486 -> 252 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/environment.yml b/environment.yml index b23011ef32488cdad0f39abdfd1d8540b50dd2a1..9e0696b1b442b760ee1ed1f51319136afae7d6bf 100644 GIT binary patch literal 252 zcmXv}(GG(k41D)1_yLJVlO-nlFCBEV8MZ{w#lIgQd+PPt-t`zZoD}gJxG^g*qYG1` zRIB_IWjAZV7GH3<7#fQn*&-Xlx@HawQN(>HE_2`Mz8vte`RkS;=j2aI}5{G>p6v(sZCgP|&$P;zV?!p`>y;VBh+@;O6JA?aUpwkb@qtX$G4 F-4DGUN{;{l literal 486 zcmZuu?Fzy$47}fh@30q8lqsU1@3Ied#Vr Date: Mon, 5 Jul 2021 17:58:06 +0200 Subject: [PATCH 06/13] pycharm files in gitignore --- .gitignore | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 732ac30..8de7ad4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ -examples\ -results\ +examples/ +results/ +.idea/ From c87e0b352587e91fdf441ef68cba161c8fd945e0 Mon Sep 17 00:00:00 2001 From: bartolo1024 Date: Tue, 6 Jul 2021 13:53:39 +0200 Subject: [PATCH 07/13] add simple utility test --- README.md | 1 + environment.yml | 7 ++++--- test_util.py | 18 ++++++++++++++++++ util.py | 4 ++-- 4 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 test_util.py diff --git a/README.md b/README.md index 09ccc23..4a6f71f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +**ported to Python 3.8 and PyTorch 1.9** # InGAN ### Official code for the paper "InGAN: Capturing and Retargeting the DNA of a Natural Image" diff --git a/environment.yml b/environment.yml index 9e0696b..099fb21 100644 --- a/environment.yml +++ b/environment.yml @@ -3,12 +3,12 @@ channels: - pytorch - defaults dependencies: - - cudatoolkit>=10.1 + - cudatoolkit>=10.2 - numpy - pillow - pip - - python>=3.7.3 - - pytorch>=1.4.0 + - python>=3.8.10 + - pytorch>=1.9.0 - torchvision>=0.4 - scipy - scikit-learn @@ -16,3 +16,4 @@ dependencies: - pip: - opencv-python - ipython + - pytest \ No newline at end of file diff --git a/test_util.py b/test_util.py new file mode 100644 index 0000000..a6a2a91 --- /dev/null +++ b/test_util.py @@ -0,0 +1,18 @@ +from pytest import fixture +import torch +import numpy as np +from PIL import Image +from util import tensor2im, im2tensor + + +@fixture +def test_image(): + img = Image.open('examples/fruit/fruit.png') + img = np.array(img) + return img + + +def test_tensor2im(test_image): + tensor = torch.tensor(test_image).permute(2, 0, 1).unsqueeze(0) / 255. * 2 - 1 + img = tensor2im(tensor) + assert np.allclose(img, test_image) diff --git a/util.py b/util.py index a6204b2..d2223c8 100644 --- a/util.py +++ b/util.py @@ -48,13 +48,13 @@ def tensor2im(image_tensors, imtype=np.uint8): return image_numpys -def im2tensor(image_numpy, int_flag=False): +def im2tensor(image_numpy, int_flag=False, device=torch.device('cuda')): # the int flag indicates whether the input image is integer (and [0,255]) or float ([0,1]) if int_flag: image_numpy /= 255.0 # Undo the tensor shifting (see tensor2im function) transformed_image = np.transpose(image_numpy, (2, 0, 1)) * 2.0 - 1.0 - return torch.FloatTensor(transformed_image).unsqueeze(0).cuda() + return torch.FloatTensor(transformed_image).unsqueeze(0).to(device) def random_size(orig_size, curriculum=True, i=None, iter_for_max_range=None, must_divide=8.0, From f7913d73337691b119662df1fd370246c38c2682 Mon Sep 17 00:00:00 2001 From: bartolo1024 Date: Tue, 6 Jul 2021 13:56:31 +0200 Subject: [PATCH 08/13] yapf --- InGAN.py | 217 ++++++++++++++++++++----------- SceneScripts.py | 298 +++++++++++++++++++++++++++++-------------- configs.py | 318 ++++++++++++++++++++++++++++++++++++++-------- networks.py | 292 +++++++++++++++++++++++++++--------------- non_rect.py | 42 +++--- supp_video.py | 151 +++++++++++++++------- test.py | 5 +- test_util.py | 3 +- train.py | 16 ++- train_supp_mat.py | 5 +- util.py | 181 +++++++++++++++++--------- 11 files changed, 1078 insertions(+), 450 deletions(-) diff --git a/InGAN.py b/InGAN.py index a7226c7..934c750 100644 --- a/InGAN.py +++ b/InGAN.py @@ -13,7 +13,9 @@ def __init__(self, start, end): self.end = end def __call__(self, citer): - return 1. - max(0., float(citer - self.start) / float(self.end - self.start)) + return 1. - max( + 0., + float(citer - self.start) / float(self.end - self.start)) # noinspection PyAttributeOutsideInit @@ -25,18 +27,26 @@ def __init__(self, conf): self.max_iters = conf.max_iters # Define input tensor - self.input_tensor = torch.FloatTensor(1, 3, conf.input_crop_size, conf.input_crop_size).cuda() - self.real_example = torch.FloatTensor(1, 3, conf.output_crop_size, conf.output_crop_size).cuda() + self.input_tensor = torch.FloatTensor(1, 3, conf.input_crop_size, + conf.input_crop_size).cuda() + self.real_example = torch.FloatTensor(1, 3, conf.output_crop_size, + conf.output_crop_size).cuda() # Define networks - self.G = networks.Generator(conf.G_base_channels, conf.G_num_resblocks, conf.G_num_downscales, conf.G_use_bias, + self.G = networks.Generator(conf.G_base_channels, conf.G_num_resblocks, + conf.G_num_downscales, conf.G_use_bias, conf.G_skip) - self.D = networks.MultiScaleDiscriminator(conf.output_crop_size, self.conf.D_max_num_scales, - self.conf.D_scale_factor, self.conf.D_base_channels) + self.D = networks.MultiScaleDiscriminator(conf.output_crop_size, + self.conf.D_max_num_scales, + self.conf.D_scale_factor, + self.conf.D_base_channels) self.GAN_loss_layer = networks.GANLoss() self.Reconstruct_loss = networks.WeightedMSELoss(use_L1=conf.use_L1) - self.RandCrop = networks.RandomCrop([conf.input_crop_size, conf.input_crop_size], must_divide=conf.must_divide) - self.SwapCrops = networks.SwapCrops(conf.crop_swap_min_size, conf.crop_swap_max_size) + self.RandCrop = networks.RandomCrop( + [conf.input_crop_size, conf.input_crop_size], + must_divide=conf.must_divide) + self.SwapCrops = networks.SwapCrops(conf.crop_swap_min_size, + conf.crop_swap_max_size) # Make all networks run on GPU self.G.cuda() @@ -56,15 +66,20 @@ def __init__(self, conf): self.losses_D_fake = torch.FloatTensor(conf.print_freq).cuda() self.losses_G_reconstruct = torch.FloatTensor(conf.print_freq).cuda() if self.conf.reconstruct_loss_stop_iter > 0: - self.losses_D_reconstruct = torch.FloatTensor(conf.print_freq).cuda() + self.losses_D_reconstruct = torch.FloatTensor( + conf.print_freq).cuda() # Initialize networks self.G.apply(networks.weights_init) self.D.apply(networks.weights_init) # Initialize optimizers - self.optimizer_G = torch.optim.Adam(self.G.parameters(), lr=conf.g_lr, betas=(conf.beta1, 0.999)) - self.optimizer_D = torch.optim.Adam(self.D.parameters(), lr=conf.d_lr, betas=(conf.beta1, 0.999)) + self.optimizer_G = torch.optim.Adam(self.G.parameters(), + lr=conf.g_lr, + betas=(conf.beta1, 0.999)) + self.optimizer_D = torch.optim.Adam(self.D.parameters(), + lr=conf.d_lr, + betas=(conf.beta1, 0.999)) # Learning rate scheduler # First define linearly decaying functions (decay starts at a special iter) @@ -74,8 +89,10 @@ def __init__(self, conf): # return 1 - max(0, 1.0 * (n_iter - start_decay) / (conf.max_iters - start_decay)) lr_function = LRPolicy(start_decay, end_decay) # Define learning rate schedulers - self.lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR(self.optimizer_G, lr_function) - self.lr_scheduler_D = torch.optim.lr_scheduler.LambdaLR(self.optimizer_D, lr_function) + self.lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR( + self.optimizer_G, lr_function) + self.lr_scheduler_D = torch.optim.lr_scheduler.LambdaLR( + self.optimizer_D, lr_function) # # do we resume from checkpoint? # if self.conf.resume: @@ -89,15 +106,17 @@ def save(self, citer=None): filename = citer else: filename = 'snapshot-{:05d}.pth.tar'.format(citer) - torch.save({'G': self.G.state_dict(), - 'D': self.D.state_dict(), - 'optim_G': self.optimizer_G.state_dict(), - 'optim_D': self.optimizer_D.state_dict(), - 'sched_G': self.lr_scheduler_G.state_dict(), - 'sched_D': self.lr_scheduler_D.state_dict(), - 'loss': self.GAN_loss_layer.state_dict(), - 'iter': citer if citer else self.cur_iter}, - os.path.join(self.conf.output_dir_path, filename)) + torch.save( + { + 'G': self.G.state_dict(), + 'D': self.D.state_dict(), + 'optim_G': self.optimizer_G.state_dict(), + 'optim_D': self.optimizer_D.state_dict(), + 'sched_G': self.lr_scheduler_G.state_dict(), + 'sched_D': self.lr_scheduler_D.state_dict(), + 'loss': self.GAN_loss_layer.state_dict(), + 'iter': citer if citer else self.cur_iter + }, os.path.join(self.conf.output_dir_path, filename)) def resume(self, resume_path, test_flag=False): resume = torch.load(resume_path, map_location={'cuda:5': 'cuda:0'}) @@ -132,36 +151,55 @@ def resume(self, resume_path, test_flag=False): else: missing.append('GAN loss') if len(missing): - warnings.warn('Missing the following state dicts from checkpoint: {}'.format(', '.join(missing))) + warnings.warn( + 'Missing the following state dicts from checkpoint: {}'.format( + ', '.join(missing))) print(('resuming checkpoint {}'.format(self.conf.resume))) - def test(self, input_tensor, output_size, rand_affine, input_size, run_d_pred=True, run_reconstruct=True): + def test(self, + input_tensor, + output_size, + rand_affine, + input_size, + run_d_pred=True, + run_reconstruct=True): with torch.no_grad(): - self.G_pred = self.G.forward(Variable(input_tensor.detach()), output_size=output_size, random_affine=rand_affine) + self.G_pred = self.G.forward(Variable(input_tensor.detach()), + output_size=output_size, + random_affine=rand_affine) if run_d_pred: - scale_weights_for_output = get_scale_weights(i=self.cur_iter, - max_i=self.conf.D_scale_weights_iter_for_even_scales, - start_factor=self.conf.D_scale_weights_sigma, - input_shape=self.G_pred.shape[2:], - min_size=self.conf.D_min_input_size, - num_scales_limit=self.conf.D_max_num_scales, - scale_factor=self.conf.D_scale_factor) - scale_weights_for_input = get_scale_weights(i=self.cur_iter, - max_i=self.conf.D_scale_weights_iter_for_even_scales, - start_factor=self.conf.D_scale_weights_sigma, - input_shape=input_tensor.shape[2:], - min_size=self.conf.D_min_input_size, - num_scales_limit=self.conf.D_max_num_scales, - scale_factor=self.conf.D_scale_factor) - self.D_preds = [self.D.forward(Variable(input_tensor.detach()), scale_weights_for_input), - self.D.forward(Variable(self.G_pred.detach()), scale_weights_for_output)] + scale_weights_for_output = get_scale_weights( + i=self.cur_iter, + max_i=self.conf.D_scale_weights_iter_for_even_scales, + start_factor=self.conf.D_scale_weights_sigma, + input_shape=self.G_pred.shape[2:], + min_size=self.conf.D_min_input_size, + num_scales_limit=self.conf.D_max_num_scales, + scale_factor=self.conf.D_scale_factor) + scale_weights_for_input = get_scale_weights( + i=self.cur_iter, + max_i=self.conf.D_scale_weights_iter_for_even_scales, + start_factor=self.conf.D_scale_weights_sigma, + input_shape=input_tensor.shape[2:], + min_size=self.conf.D_min_input_size, + num_scales_limit=self.conf.D_max_num_scales, + scale_factor=self.conf.D_scale_factor) + self.D_preds = [ + self.D.forward(Variable(input_tensor.detach()), + scale_weights_for_input), + self.D.forward(Variable(self.G_pred.detach()), + scale_weights_for_output) + ] else: self.D_preds = None self.G_preds = [input_tensor, self.G_pred] - self.reconstruct = self.G.forward(self.G_pred, output_size=input_size, random_affine=-rand_affine) if run_reconstruct else None + self.reconstruct = self.G.forward( + self.G_pred, + output_size=input_size, + random_affine=-rand_affine) if run_reconstruct else None return self.G_preds, self.D_preds, self.reconstruct @@ -171,35 +209,44 @@ def train_g(self): self.optimizer_D.zero_grad() # Determine output size of G (dynamic change) - output_size, random_affine = random_size(orig_size=self.input_tensor.shape[2:], - curriculum=self.conf.curriculum, - i=self.cur_iter, - iter_for_max_range=self.conf.iter_for_max_range, - must_divide=self.conf.must_divide, - min_scale=self.conf.min_scale, - max_scale=self.conf.max_scale, - max_transform_magniutude=self.conf.max_transform_magnitude) + output_size, random_affine = random_size( + orig_size=self.input_tensor.shape[2:], + curriculum=self.conf.curriculum, + i=self.cur_iter, + iter_for_max_range=self.conf.iter_for_max_range, + must_divide=self.conf.must_divide, + min_scale=self.conf.min_scale, + max_scale=self.conf.max_scale, + max_transform_magniutude=self.conf.max_transform_magnitude) # Add noise to G input for better generalization (make it ignore the 1/255 binning) - self.input_tensor_noised = self.input_tensor + (torch.rand_like(self.input_tensor) - 0.5) * 2.0 / 255 + self.input_tensor_noised = self.input_tensor + ( + torch.rand_like(self.input_tensor) - 0.5) * 2.0 / 255 # Generator forward pass - self.G_pred = self.G.forward(self.input_tensor_noised, output_size=output_size, random_affine=random_affine) + self.G_pred = self.G.forward(self.input_tensor_noised, + output_size=output_size, + random_affine=random_affine) # Run generator result through discriminator forward pass - self.scale_weights = get_scale_weights(i=self.cur_iter, - max_i=self.conf.D_scale_weights_iter_for_even_scales, - start_factor=self.conf.D_scale_weights_sigma, - input_shape=self.G_pred.shape[2:], - min_size=self.conf.D_min_input_size, - num_scales_limit=self.conf.D_max_num_scales, - scale_factor=self.conf.D_scale_factor) + self.scale_weights = get_scale_weights( + i=self.cur_iter, + max_i=self.conf.D_scale_weights_iter_for_even_scales, + start_factor=self.conf.D_scale_weights_sigma, + input_shape=self.G_pred.shape[2:], + min_size=self.conf.D_min_input_size, + num_scales_limit=self.conf.D_max_num_scales, + scale_factor=self.conf.D_scale_factor) d_pred_fake = self.D.forward(self.G_pred, self.scale_weights) # If reconstruction-loss is used, run through decoder to reconstruct, then calculate reconstruction loss if self.conf.reconstruct_loss_stop_iter > self.cur_iter: - self.reconstruct = self.G.forward(self.G_pred, output_size=self.input_tensor.shape[2:], random_affine=-random_affine) - self.loss_G_reconstruct = self.criterionReconstruction(self.reconstruct, self.input_tensor, self.loss_mask) + self.reconstruct = self.G.forward( + self.G_pred, + output_size=self.input_tensor.shape[2:], + random_affine=-random_affine) + self.loss_G_reconstruct = self.criterionReconstruction( + self.reconstruct, self.input_tensor, self.loss_mask) # Calculate generator loss, based on discriminator prediction on generator result self.loss_G_GAN = self.criterionGAN(d_pred_fake, is_d_input_real=True) @@ -209,7 +256,8 @@ def train_g(self): if self.conf.reconstruct_loss_stop_iter < self.cur_iter: self.loss_G = self.loss_G_GAN else: - self.loss_G = (self.conf.reconstruct_loss_proportion * self.loss_G_reconstruct + self.loss_G_GAN) + self.loss_G = (self.conf.reconstruct_loss_proportion * + self.loss_G_reconstruct + self.loss_G_GAN) # Calculate gradients # Note that the gradients are propagated from the loss through discriminator and then through generator @@ -224,8 +272,12 @@ def train_g(self): if self.cur_iter > self.conf.G_extra_inverse_train_start_iter: for _ in range(self.conf.G_extra_inverse_train): self.optimizer_G.zero_grad() - self.inverse = self.G.forward(self.G_pred.detach(), output_size=self.input_tensor.shape[2:], random_affine=-random_affine) - self.loss_G_inverse = (self.criterionReconstruction(self.inverse, self.input_tensor, self.loss_mask) * + self.inverse = self.G.forward( + self.G_pred.detach(), + output_size=self.input_tensor.shape[2:], + random_affine=-random_affine) + self.loss_G_inverse = (self.criterionReconstruction( + self.inverse, self.input_tensor, self.loss_mask) * self.conf.G_extra_inverse_train_ratio) self.loss_G_inverse.backward() self.optimizer_G.step() @@ -238,21 +290,27 @@ def train_d(self): self.optimizer_D.zero_grad() # Adding noise to D input to prevent overfitting to 1/255 bins - real_example_with_noise = self.real_example + (torch.rand_like(self.real_example[-1]) - 0.5) * 2.0 / 255.0 + real_example_with_noise = self.real_example + ( + torch.rand_like(self.real_example[-1]) - 0.5) * 2.0 / 255.0 # Discriminator forward pass over real example - self.d_pred_real = self.D.forward(real_example_with_noise, self.scale_weights) + self.d_pred_real = self.D.forward(real_example_with_noise, + self.scale_weights) # Adding noise to D input to prevent overfitting to 1/255 bins # Note that generator result is detached so that gradients are not propagating back through generator - g_pred_with_noise = self.G_pred.detach() + (torch.rand_like(self.G_pred) - 0.5) * 2.0 / 255 + g_pred_with_noise = self.G_pred.detach() + ( + torch.rand_like(self.G_pred) - 0.5) * 2.0 / 255 # Discriminator forward pass over generated example example - self.d_pred_fake = self.D.forward(g_pred_with_noise, self.scale_weights) + self.d_pred_fake = self.D.forward(g_pred_with_noise, + self.scale_weights) # Calculate discriminator loss - self.loss_D_fake = self.criterionGAN(self.d_pred_fake, is_d_input_real=False) - self.loss_D_real = self.criterionGAN(self.d_pred_real, is_d_input_real=True) + self.loss_D_fake = self.criterionGAN(self.d_pred_fake, + is_d_input_real=False) + self.loss_D_real = self.criterionGAN(self.d_pred_real, + is_d_input_real=True) self.loss_D = (self.loss_D_real + self.loss_D_fake) * 0.5 # Calculate gradients @@ -277,8 +335,10 @@ def train_one_iter(self, cur_iter, input_tensors): real_example_crops += self.RandCrop.forward([input_tensor]) if np.random.rand() < self.conf.crop_swap_probability: - swapped_input_tensor, loss_mask = self.SwapCrops.forward(input_tensor) - [input_crop, mask_crop] = self.RandCrop.forward([swapped_input_tensor, loss_mask]) + swapped_input_tensor, loss_mask = self.SwapCrops.forward( + input_tensor) + [input_crop, mask_crop + ] = self.RandCrop.forward([swapped_input_tensor, loss_mask]) input_crops.append(input_crop) mask_crops.append(mask_crop) mask_flag = True @@ -305,8 +365,13 @@ def train_one_iter(self, cur_iter, input_tensors): # Accumulate stats # Accumulating as cuda tensors is much more efficient than passing info from GPU to CPU at every iteration - self.losses_G_gan[cur_iter % self.conf.print_freq] = self.loss_G_GAN.item() - self.losses_D_fake[cur_iter % self.conf.print_freq] = self.loss_D_fake.item() - self.losses_D_real[cur_iter % self.conf.print_freq] = self.loss_D_real.item() + self.losses_G_gan[cur_iter % + self.conf.print_freq] = self.loss_G_GAN.item() + self.losses_D_fake[cur_iter % + self.conf.print_freq] = self.loss_D_fake.item() + self.losses_D_real[cur_iter % + self.conf.print_freq] = self.loss_D_real.item() if self.conf.reconstruct_loss_stop_iter > self.cur_iter: - self.losses_G_reconstruct[cur_iter % self.conf.print_freq] = self.loss_G_reconstruct.item() + self.losses_G_reconstruct[ + cur_iter % + self.conf.print_freq] = self.loss_G_reconstruct.item() diff --git a/SceneScripts.py b/SceneScripts.py index 5659e95..a4f8475 100644 --- a/SceneScripts.py +++ b/SceneScripts.py @@ -1,18 +1,28 @@ import numpy as np -def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, show_input=True, frames_per_resize=10): +def make_scene_script(script_name, + min_v, + max_v, + min_h, + max_h, + max_t, + repeat, + show_input=True, + frames_per_resize=10): l = np.linspace if script_name == 'vertical_grow_shrink': size_v = np.concatenate([ l(1, max_v, frames_per_resize), l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize)]) + l(min_v, 1, frames_per_resize) + ]) size_h = np.concatenate([ l(1, 1, frames_per_resize), l(1, 1, 2 * frames_per_resize), - l(1, 1, frames_per_resize)]) + l(1, 1, frames_per_resize) + ]) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] @@ -20,23 +30,27 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh size_v = np.concatenate([ l(1, 1, frames_per_resize), l(1, 1, 2 * frames_per_resize), - l(1, 1, frames_per_resize)]) + l(1, 1, frames_per_resize) + ]) size_h = np.concatenate([ l(1, max_h, frames_per_resize), l(max_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize)]) + l(min_h, 1, frames_per_resize) + ]) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'horizontal_grow_shrink_slow': size_v = np.concatenate([ - l(1, 1, 2 *frames_per_resize), l(1, 1, 2 * frames_per_resize), - l(1, 1, frames_per_resize)]) + l(1, 1, 2 * frames_per_resize), + l(1, 1, frames_per_resize) + ]) size_h = np.concatenate([ l(1, max_h, 2 * frames_per_resize), l(max_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize)]) + l(min_h, 1, frames_per_resize) + ]) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] @@ -44,11 +58,13 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh size_v = np.concatenate([ l(1, max_v, frames_per_resize), l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize)]) + l(min_v, 1, frames_per_resize) + ]) size_h = np.concatenate([ l(1, max_h, frames_per_resize), l(max_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize)]) + l(min_h, 1, frames_per_resize) + ]) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] @@ -58,32 +74,36 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh l(1, max_v, frames_per_resize), l(max_v, max_v, 2 * frames_per_resize), l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize)]) + l(min_v, 1, frames_per_resize) + ]) size_h = np.concatenate([ l(1, max_h, frames_per_resize), l(max_h, max_h, frames_per_resize), l(max_h, min_h, 2 * frames_per_resize), l(min_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize)]) + l(min_h, 1, frames_per_resize) + ]) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'special_resize_round': size_v = np.concatenate([ - l(1, 1, frames_per_resize/2), + l(1, 1, frames_per_resize / 2), l(1, max_v, frames_per_resize), l(max_v, max_v, frames_per_resize), l(max_v, max_v, 2 * frames_per_resize), l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize)]) + l(min_v, 1, frames_per_resize) + ]) size_h = np.concatenate([ - l(1, max_h/2, frames_per_resize/2), - l(max_h/2, max_h/2, frames_per_resize), - l(max_h/2, max_h, frames_per_resize), + l(1, max_h / 2, frames_per_resize / 2), + l(max_h / 2, max_h / 2, frames_per_resize), + l(max_h / 2, max_h, frames_per_resize), l(max_h, min_h, 2 * frames_per_resize), l(min_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize)]) + l(min_h, 1, frames_per_resize) + ]) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] @@ -91,51 +111,59 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh size_v = np.concatenate([ l(1, max_v, frames_per_resize), l(max_v, min_v, frames_per_resize), - l(min_v, 1, frames_per_resize)]) + l(min_v, 1, frames_per_resize) + ]) size_h = np.concatenate([ l(1, max_v, frames_per_resize), l(max_v, min_v, frames_per_resize), - l(min_v, 1, frames_per_resize)]) + l(min_v, 1, frames_per_resize) + ]) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'affine_dance': shift_l = np.concatenate([ l(0, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, 0, frames_per_resize)]) + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, 0, frames_per_resize) + ]) shift_r = np.concatenate([ - l(0, - max_t, frames_per_resize), - l(- max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize)]) - size_v = [1for _ in shift_l] + l(0, -max_t, frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ]) + size_v = [1 for _ in shift_l] size_h = [1 for _ in shift_l] elif script_name == 'trapezoids': shift_l = np.concatenate([ l(0, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize)]) + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ]) shift_r = np.concatenate([ l(0, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize)]) - size_v = [1for _ in shift_l] + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ]) + size_v = [1 for _ in shift_l] size_h = [1 for _ in shift_l] elif script_name == 'trapezoids_vresize': shift_l = np.concatenate([ l(0, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize)]) + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ]) shift_r = np.concatenate([ l(0, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize)]) + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ]) size_v = np.concatenate([ l(1, max_v, frames_per_resize), l(max_v, 1, frames_per_resize), @@ -144,12 +172,10 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh l(1, max_v, frames_per_resize), l(max_v, 1, frames_per_resize), ]) - size_h = np.concatenate([ - l(1, 1, 6*frames_per_resize)]) + size_h = np.concatenate([l(1, 1, 6 * frames_per_resize)]) elif script_name == 'flicker': - size_h = np.concatenate([ - l(1, 1, 6 * frames_per_resize)]) + size_h = np.concatenate([l(1, 1, 6 * frames_per_resize)]) size_v = size_h shift_l = np.concatenate([ l(max_t, max_t, frames_per_resize), @@ -157,50 +183,61 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh l(max_t, max_t, frames_per_resize), l(-max_t, -max_t, frames_per_resize), l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize),]) + l(-max_t, -max_t, frames_per_resize), + ]) shift_r = np.concatenate([ l(-max_t, -max_t, frames_per_resize), l(max_t, max_t, frames_per_resize), l(-max_t, -max_t, frames_per_resize), l(max_t, max_t, frames_per_resize), l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize)]) + l(max_t, max_t, frames_per_resize) + ]) elif script_name == 'homography': - size_h = np.concatenate([ - l(1, 1, 6 * frames_per_resize)]) + size_h = np.concatenate([l(1, 1, 6 * frames_per_resize)]) size_v = size_h shift_l = np.concatenate([ l(0, max_t, frames_per_resize), l(max_t, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, - max_t, 2 * frames_per_resize), - l(- max_t, 0, frames_per_resize)]) + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, -max_t, 2 * frames_per_resize), + l(-max_t, 0, frames_per_resize) + ]) shift_r = np.concatenate([ l(0, 0, frames_per_resize), l(0, max_t, frames_per_resize), l(max_t, max_t, 2 * frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, 0, frames_per_resize)]) - - + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, 0, frames_per_resize) + ]) elif script_name == 'random': - stops = np.random.rand(10, 4) * np.array([max_v-min_v, max_h-min_h, 2*max_t, 2*max_t])[None, :] + np.array([min_v, min_h, -max_t, -max_t])[None, :] + stops = np.random.rand(10, 4) * np.array([ + max_v - min_v, max_h - min_h, 2 * max_t, 2 * max_t + ])[None, :] + np.array([min_v, min_h, -max_t, -max_t])[None, :] stops = np.vstack([stops, [1, 1, 0, 0]]) print(stops) - size_v = np.concatenate([l(stop_0[0], stop_1[0], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops)]) + size_v = np.concatenate([ + l(stop_0[0], stop_1[0], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ]) - size_h = np.concatenate([l(stop_0[1], stop_1[1], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops)]) + size_h = np.concatenate([ + l(stop_0[1], stop_1[1], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ]) - shift_l = np.concatenate([l(stop_0[2], stop_1[2], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops)]) + shift_l = np.concatenate([ + l(stop_0[2], stop_1[2], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ]) - shift_r = np.concatenate([l(stop_0[3], stop_1[3], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops)]) + shift_r = np.concatenate([ + l(stop_0[3], stop_1[3], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ]) elif script_name == 'random_trapezoids': stops_l = np.random.rand(11) * 2 * max_t - max_t @@ -209,45 +246,112 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh stops = list(zip(stops_l, stops_r)) print(stops) - size_h = np.concatenate([ - l(1, 1, 20 * frames_per_resize)]) + size_h = np.concatenate([l(1, 1, 20 * frames_per_resize)]) size_v = size_h - shift_l = np.concatenate([l(stop_0[0], stop_1[0], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops)]) - - shift_r = np.concatenate([l(stop_0[1], stop_1[1], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops)]) + shift_l = np.concatenate([ + l(stop_0[0], stop_1[0], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops) + ]) + shift_r = np.concatenate([ + l(stop_0[1], stop_1[1], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops) + ]) - return [[-1, -1, -1, -1]] * 20 + list(zip(size_v, size_h, shift_l, shift_r)) * repeat if show_input else list(zip(size_v, size_h, shift_l, shift_r)) * repeat + return [[-1, -1, -1, -1]] * 20 + list(zip( + size_v, size_h, shift_l, shift_r)) * repeat if show_input else list( + zip(size_v, size_h, shift_l, shift_r)) * repeat INPUT_DICT = { - 'fruits': ['fruits_ss.png', '/experiment_old_code_with_homo_2/results/fruits_ss_geo_new_pad_Mar_16_18_00_17/checkpoint_0075000.pth.tar'], - 'farm_house': ['farm_house_s.png', '/results/farm_house_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_16_07_59/checkpoint_0050000.pth.tar'], - 'cab_building': ['cab_building_s.png', '/results/cab_building_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_10_25/checkpoint_0065000.pth.tar'], - 'capitol': ['capitol.png', '/results/capitol_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_13_22/checkpoint_0055000.pth.tar'], - 'rome': ['rome_s.png', '/results/rome_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_09_19/checkpoint_0045000.pth.tar'], - 'soldiers': ['china_soldiers.png', '/results/china_soldiers_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_46_09/checkpoint_0075000.pth.tar'], - 'corn': ['corn.png', '/results/corn_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_10_29_00/checkpoint_0075000.pth.tar'], - 'sushi': ['sushi.png', '/results/sushi_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_47_39/checkpoint_0075000.pth.tar'], - 'penguins': ['penguins.png', '/results/penguins_Nov_13_16_26_14/checkpoint_0075000.pth.tar'], - 'emojis': ['emojis3.png', '/results/emojis3_Nov_23_09_59_59/checkpoint_0075000.pth.tar'], - 'fish': ['input/fish.png', '/results/fish_plethora_75_Mar_18_03_36_25/checkpoint_0075000.pth.tar'], - 'ny': ['textures/ny.png', '/results/ny_texture_synth_Mar_19_04_51_14/checkpoint_0075000.pth.tar'], - 'metal_circles': ['metal_circles.jpg', '/results/metal_circles_Mar_26_20_04_11/checkpoint_0075000.pth.tar'], - 'quilt': ['quilt.png', '/results/quilt/checkpoint_0075000.pth.tar'], - 'sapa': ['sapa.png', '/results/sapa_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_44_59/checkpoint_0075000.pth.tar'], - 'nkorea': ['nkorea.png', '/results/nkorea_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_48_00/checkpoint_0075000.pth.tar'], - 'wood': ['wood.png', '/results/wood/checkpoint_0075000.pth.tar'], - 'starry': ['starry.png', '/results/starry/checkpoint_0075000.pth.tar'], - 'umbrella': ['umbrella.png', '/results/umbrella/checkpoint_0075000.pth.tar'], - 'fruits_old': ['fruits_ss.png', '/results/fruits_ss_256_COARSE2FINE_extraInv_2_30_until60_killReconstruct_20_Oct_24_12_35_33/checkpoint_0040000.pth.tar'], - 'peacock': ['scaled_nird/ours_1_scaled.jpg', '/results/ours_1/checkpoint_0050000.pth.tar'], - 'windows': ['scaled_nird/ours_2_scaled.jpg', '/results/ours_2/checkpoint_0050000.pth.tar'], - 'light_house': ['scaled_nird/ours_23_scaled.jpg', '/results/ours_23/checkpoint_0050000.pth.tar'], - 'hats': ['scaled_nird/ours_26_scaled.jpg', '/results/ours_26/checkpoint_0050000.pth.tar'], - 'nature': ['scaled_nird/ours_32_scaled.jpg', '/results/ours_32/checkpoint_0050000.pth.tar'], - + 'fruits': [ + 'fruits_ss.png', + '/experiment_old_code_with_homo_2/results/fruits_ss_geo_new_pad_Mar_16_18_00_17/checkpoint_0075000.pth.tar' + ], + 'farm_house': [ + 'farm_house_s.png', + '/results/farm_house_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_16_07_59/checkpoint_0050000.pth.tar' + ], + 'cab_building': [ + 'cab_building_s.png', + '/results/cab_building_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_10_25/checkpoint_0065000.pth.tar' + ], + 'capitol': [ + 'capitol.png', + '/results/capitol_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_13_22/checkpoint_0055000.pth.tar' + ], + 'rome': [ + 'rome_s.png', + '/results/rome_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_09_19/checkpoint_0045000.pth.tar' + ], + 'soldiers': [ + 'china_soldiers.png', + '/results/china_soldiers_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_46_09/checkpoint_0075000.pth.tar' + ], + 'corn': [ + 'corn.png', + '/results/corn_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_10_29_00/checkpoint_0075000.pth.tar' + ], + 'sushi': [ + 'sushi.png', + '/results/sushi_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_47_39/checkpoint_0075000.pth.tar' + ], + 'penguins': [ + 'penguins.png', + '/results/penguins_Nov_13_16_26_14/checkpoint_0075000.pth.tar' + ], + 'emojis': [ + 'emojis3.png', + '/results/emojis3_Nov_23_09_59_59/checkpoint_0075000.pth.tar' + ], + 'fish': [ + 'input/fish.png', + '/results/fish_plethora_75_Mar_18_03_36_25/checkpoint_0075000.pth.tar' + ], + 'ny': [ + 'textures/ny.png', + '/results/ny_texture_synth_Mar_19_04_51_14/checkpoint_0075000.pth.tar' + ], + 'metal_circles': [ + 'metal_circles.jpg', + '/results/metal_circles_Mar_26_20_04_11/checkpoint_0075000.pth.tar' + ], + 'quilt': ['quilt.png', '/results/quilt/checkpoint_0075000.pth.tar'], + 'sapa': [ + 'sapa.png', + '/results/sapa_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_44_59/checkpoint_0075000.pth.tar' + ], + 'nkorea': [ + 'nkorea.png', + '/results/nkorea_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_48_00/checkpoint_0075000.pth.tar' + ], + 'wood': ['wood.png', '/results/wood/checkpoint_0075000.pth.tar'], + 'starry': ['starry.png', '/results/starry/checkpoint_0075000.pth.tar'], + 'umbrella': + ['umbrella.png', '/results/umbrella/checkpoint_0075000.pth.tar'], + 'fruits_old': [ + 'fruits_ss.png', + '/results/fruits_ss_256_COARSE2FINE_extraInv_2_30_until60_killReconstruct_20_Oct_24_12_35_33/checkpoint_0040000.pth.tar' + ], + 'peacock': [ + 'scaled_nird/ours_1_scaled.jpg', + '/results/ours_1/checkpoint_0050000.pth.tar' + ], + 'windows': [ + 'scaled_nird/ours_2_scaled.jpg', + '/results/ours_2/checkpoint_0050000.pth.tar' + ], + 'light_house': [ + 'scaled_nird/ours_23_scaled.jpg', + '/results/ours_23/checkpoint_0050000.pth.tar' + ], + 'hats': [ + 'scaled_nird/ours_26_scaled.jpg', + '/results/ours_26/checkpoint_0050000.pth.tar' + ], + 'nature': [ + 'scaled_nird/ours_32_scaled.jpg', + '/results/ours_32/checkpoint_0050000.pth.tar' + ], } diff --git a/configs.py b/configs.py index 0438ce0..d5bbb26 100644 --- a/configs.py +++ b/configs.py @@ -11,81 +11,293 @@ def __init__(self): self.conf = None # Paths - self.parser.add_argument('--input_image_path', default=[os.path.dirname(os.path.abspath(__file__)) + '/examples/fruit/fruit.png'], nargs='+', help='path to one specific image file') - self.parser.add_argument('--output_dir_path', default=os.path.dirname(os.path.abspath(__file__)) + '/results', help='path to a directory to save results to') - self.parser.add_argument('--name', default='fruit', help='name of current experiment, to be used for saving the results') - self.parser.add_argument('--resume', type=str, default=None, help='checkpoint to resume from') - self.parser.add_argument('--test_params_path', type=str, default=os.path.dirname(os.path.abspath(__file__)) + '/examples/fruit/checkpoint_0075000.pth.tar', help='checkpoint for testing') + self.parser.add_argument( + '--input_image_path', + default=[ + os.path.dirname(os.path.abspath(__file__)) + + '/examples/fruit/fruit.png' + ], + nargs='+', + help='path to one specific image file') + self.parser.add_argument( + '--output_dir_path', + default=os.path.dirname(os.path.abspath(__file__)) + '/results', + help='path to a directory to save results to') + self.parser.add_argument( + '--name', + default='fruit', + help='name of current experiment, to be used for saving the results' + ) + self.parser.add_argument('--resume', + type=str, + default=None, + help='checkpoint to resume from') + self.parser.add_argument( + '--test_params_path', + type=str, + default=os.path.dirname(os.path.abspath(__file__)) + + '/examples/fruit/checkpoint_0075000.pth.tar', + help='checkpoint for testing') # Test - self.parser.add_argument('--test_collage', default=True, action='store_true', help='Create collage in test?') - self.parser.add_argument('--test_video', default=True, action='store_true', help='Create retarget-video in test?') - self.parser.add_argument('--test_non_rect', default=False, action='store_true', help='Produce non-rectangular transformations in test?') - self.parser.add_argument('--test_vid_scales', type=float, default=[2.2, 0.1, 2.2, 0.1], nargs='+', help='boundary scales for output video: [max_v, min_v, max_h, min_h]') - self.parser.add_argument('--collage_scales', type=float, default=[2.0, 1.25, 1.0, 0.66, 0.33], nargs='+', help='scales for collage (h=w, only one number)') - self.parser.add_argument('--collage_input_spot', type=float, default=[2, 2], nargs='+', help='replaces one spot in the collage with original input. must match a spot with scale 1.0') - self.parser.add_argument('--non_rect_shift_range', type=float, default=[-0.8, 1.0, 0.2], nargs='+', help='range for homography shifts for non rect transforms [min, max, step]') - self.parser.add_argument('--non_rect_scales', type=float, default=[0.7, 1.0], nargs='+', help='list of scales for non_rect outputs') + self.parser.add_argument('--test_collage', + default=True, + action='store_true', + help='Create collage in test?') + self.parser.add_argument('--test_video', + default=True, + action='store_true', + help='Create retarget-video in test?') + self.parser.add_argument( + '--test_non_rect', + default=False, + action='store_true', + help='Produce non-rectangular transformations in test?') + self.parser.add_argument( + '--test_vid_scales', + type=float, + default=[2.2, 0.1, 2.2, 0.1], + nargs='+', + help= + 'boundary scales for output video: [max_v, min_v, max_h, min_h]') + self.parser.add_argument( + '--collage_scales', + type=float, + default=[2.0, 1.25, 1.0, 0.66, 0.33], + nargs='+', + help='scales for collage (h=w, only one number)') + self.parser.add_argument( + '--collage_input_spot', + type=float, + default=[2, 2], + nargs='+', + help= + 'replaces one spot in the collage with original input. must match a spot with scale 1.0' + ) + self.parser.add_argument( + '--non_rect_shift_range', + type=float, + default=[-0.8, 1.0, 0.2], + nargs='+', + help= + 'range for homography shifts for non rect transforms [min, max, step]' + ) + self.parser.add_argument('--non_rect_scales', + type=float, + default=[0.7, 1.0], + nargs='+', + help='list of scales for non_rect outputs') # Architecture (Generator) - self.parser.add_argument('--G_base_channels', type=int, default=64, help='# of base channels in G') - self.parser.add_argument('--G_num_resblocks', type=int, default=6, help='# of resblocks in G\'s bottleneck') - self.parser.add_argument('--G_num_downscales', type=int, default=3, help='# of downscaling layers in G') - self.parser.add_argument('--G_use_bias', type=bool, default=True, help='Determinhes whether bias is used in G\'s conv layers') - self.parser.add_argument('--G_skip', type=bool, default=True, help='Determines wether G uses skip connections (U-net)') + self.parser.add_argument('--G_base_channels', + type=int, + default=64, + help='# of base channels in G') + self.parser.add_argument('--G_num_resblocks', + type=int, + default=6, + help='# of resblocks in G\'s bottleneck') + self.parser.add_argument('--G_num_downscales', + type=int, + default=3, + help='# of downscaling layers in G') + self.parser.add_argument( + '--G_use_bias', + type=bool, + default=True, + help='Determinhes whether bias is used in G\'s conv layers') + self.parser.add_argument( + '--G_skip', + type=bool, + default=True, + help='Determines wether G uses skip connections (U-net)') # Architecture (Discriminator) - self.parser.add_argument('--D_base_channels', type=int, default=64, help='# of base channels in D') - self.parser.add_argument('--D_max_num_scales', type=int, default=99, help='Limits the # of scales for the multiscale D') - self.parser.add_argument('--D_scale_factor', type=float, default=1.4, help='Determines the downscaling factor for multiscale D') - self.parser.add_argument('--D_scale_weights_sigma', type=float, default=1.4, help='Determines the downscaling factor for multiscale D') - self.parser.add_argument('--D_min_input_size', type=int, default=13, help='Determines the downscaling factor for multiscale D') - self.parser.add_argument('--D_scale_weights_iter_for_even_scales', type=int, default=25000, help='Determines the downscaling factor for multiscale D') + self.parser.add_argument('--D_base_channels', + type=int, + default=64, + help='# of base channels in D') + self.parser.add_argument( + '--D_max_num_scales', + type=int, + default=99, + help='Limits the # of scales for the multiscale D') + self.parser.add_argument( + '--D_scale_factor', + type=float, + default=1.4, + help='Determines the downscaling factor for multiscale D') + self.parser.add_argument( + '--D_scale_weights_sigma', + type=float, + default=1.4, + help='Determines the downscaling factor for multiscale D') + self.parser.add_argument( + '--D_min_input_size', + type=int, + default=13, + help='Determines the downscaling factor for multiscale D') + self.parser.add_argument( + '--D_scale_weights_iter_for_even_scales', + type=int, + default=25000, + help='Determines the downscaling factor for multiscale D') # Optimization hyper-parameters - self.parser.add_argument('--g_lr', type=float, default=0.00005, help='initial learning rate for generator') - self.parser.add_argument('--d_lr', type=float, default=0.00005, help='initial learning rate for discriminator') - self.parser.add_argument('--lr_start_decay_iter', type=float, default=20000, help='iteration from which linear decay of lr starts until max_iter') - self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') - self.parser.add_argument('--curriculum', type=bool, default=True, help='Enable curriculum learning') - self.parser.add_argument('--iter_for_max_range', type=int, default=10000, help='In curriculum learning, when getting to this iteration all range is covered') + self.parser.add_argument('--g_lr', + type=float, + default=0.00005, + help='initial learning rate for generator') + self.parser.add_argument( + '--d_lr', + type=float, + default=0.00005, + help='initial learning rate for discriminator') + self.parser.add_argument( + '--lr_start_decay_iter', + type=float, + default=20000, + help='iteration from which linear decay of lr starts until max_iter' + ) + self.parser.add_argument('--beta1', + type=float, + default=0.5, + help='momentum term of adam') + self.parser.add_argument('--curriculum', + type=bool, + default=True, + help='Enable curriculum learning') + self.parser.add_argument( + '--iter_for_max_range', + type=int, + default=10000, + help= + 'In curriculum learning, when getting to this iteration all range is covered' + ) # Sizes - self.parser.add_argument('--input_crop_size', type=int, default=256, help='input is cropped to this size') - self.parser.add_argument('--output_crop_size', type=int, default=256, help='output is cropped to this size') - self.parser.add_argument('--max_scale', type=float, default=2.25, help='max retargeting scale') - self.parser.add_argument('--min_scale', type=float, default=0.15, help='min retargeting scale') - self.parser.add_argument('--must_divide', type=int, default=8, help='In curriculum learning, when getting to this iteration all range is covered') - self.parser.add_argument('--max_transform_magnitude', type=float, default=0.0, help='max manitude of geometric transformation') + self.parser.add_argument('--input_crop_size', + type=int, + default=256, + help='input is cropped to this size') + self.parser.add_argument('--output_crop_size', + type=int, + default=256, + help='output is cropped to this size') + self.parser.add_argument('--max_scale', + type=float, + default=2.25, + help='max retargeting scale') + self.parser.add_argument('--min_scale', + type=float, + default=0.15, + help='min retargeting scale') + self.parser.add_argument( + '--must_divide', + type=int, + default=8, + help= + 'In curriculum learning, when getting to this iteration all range is covered' + ) + self.parser.add_argument( + '--max_transform_magnitude', + type=float, + default=0.0, + help='max manitude of geometric transformation') # Crop Swap - self.parser.add_argument('--crop_swap_min_size', type=int, default=32, help='swapping crops augmnetation') - self.parser.add_argument('--crop_swap_max_size', type=int, default=256, help='swapping crops augmnetation') - self.parser.add_argument('--crop_swap_probability', type=float, default=0.0, help='probability for crop swapping to occur') + self.parser.add_argument('--crop_swap_min_size', + type=int, + default=32, + help='swapping crops augmnetation') + self.parser.add_argument('--crop_swap_max_size', + type=int, + default=256, + help='swapping crops augmnetation') + self.parser.add_argument('--crop_swap_probability', + type=float, + default=0.0, + help='probability for crop swapping to occur') # GPU - self.parser.add_argument('--gpu_id', type=int, default=0, help='gpu id number') + self.parser.add_argument('--gpu_id', + type=int, + default=0, + help='gpu id number') # Monitoring display frequencies - self.parser.add_argument('--display_freq', type=int, default=200, help='frequency of showing training results on screen') - self.parser.add_argument('--print_freq', type=int, default=20, help='frequency of showing training results on console') - self.parser.add_argument('--save_snapshot_freq', type=int, default=5000, help='frequency of saving the latest results') + self.parser.add_argument( + '--display_freq', + type=int, + default=200, + help='frequency of showing training results on screen') + self.parser.add_argument( + '--print_freq', + type=int, + default=20, + help='frequency of showing training results on console') + self.parser.add_argument('--save_snapshot_freq', + type=int, + default=5000, + help='frequency of saving the latest results') # Iterations - self.parser.add_argument('--max_iters', type=int, default=75000, help='max # of iters') - self.parser.add_argument('--G_iters', type=int, default=1, help='# of sub-iters for the generator per each global iteration') - self.parser.add_argument('--D_iters', type=int, default=1, help='# of sub-iters for the discriminator per each global iteration') + self.parser.add_argument('--max_iters', + type=int, + default=75000, + help='max # of iters') + self.parser.add_argument( + '--G_iters', + type=int, + default=1, + help='# of sub-iters for the generator per each global iteration') + self.parser.add_argument( + '--D_iters', + type=int, + default=1, + help= + '# of sub-iters for the discriminator per each global iteration') # Losses - self.parser.add_argument('--reconstruct_loss_proportion', type=float, default=0.1, help='relative part of reconstruct-loss (out of 1)') - self.parser.add_argument('--reconstruct_loss_stop_iter', type=int, default=200000, help='from this iter and on, reconstruct loss is deactivated') - self.parser.add_argument('--G_extra_inverse_train', type=int, default=1, help='number of extra training iters for G on inverse direction') - self.parser.add_argument('--G_extra_inverse_train_start_iter', type=int, default=10000, help='number of extra training iters for G on inverse direction') - self.parser.add_argument('--G_extra_inverse_train_ratio', type=int, default=1.0, help='number of extra training iters for G on inverse direction') - self.parser.add_argument('--use_L1', type=bool, default=True, help='Determine whether to use L1 or L2 for reconstruction') + self.parser.add_argument( + '--reconstruct_loss_proportion', + type=float, + default=0.1, + help='relative part of reconstruct-loss (out of 1)') + self.parser.add_argument( + '--reconstruct_loss_stop_iter', + type=int, + default=200000, + help='from this iter and on, reconstruct loss is deactivated') + self.parser.add_argument( + '--G_extra_inverse_train', + type=int, + default=1, + help='number of extra training iters for G on inverse direction') + self.parser.add_argument( + '--G_extra_inverse_train_start_iter', + type=int, + default=10000, + help='number of extra training iters for G on inverse direction') + self.parser.add_argument( + '--G_extra_inverse_train_ratio', + type=int, + default=1.0, + help='number of extra training iters for G on inverse direction') + self.parser.add_argument( + '--use_L1', + type=bool, + default=True, + help='Determine whether to use L1 or L2 for reconstruction') # Misc - self.parser.add_argument('--create_code_copy', type=bool, default=True, help='when set to true, all .py files are saved to results directory to keep track') + self.parser.add_argument( + '--create_code_copy', + type=bool, + default=True, + help= + 'when set to true, all .py files are saved to results directory to keep track' + ) def parse(self, create_dir_flag=True): # Parse arguments diff --git a/networks.py b/networks.py index e3f341f..142c9cd 100644 --- a/networks.py +++ b/networks.py @@ -37,13 +37,13 @@ def forward(self, input_tensor): print(local_mean) centered_input_tensor = input_tensor - local_mean print(centered_input_tensor) - squared_diff = centered_input_tensor ** 2 + squared_diff = centered_input_tensor**2 print(squared_diff) - local_std = self.get_var(squared_diff) ** 0.5 + local_std = self.get_var(squared_diff)**0.5 print(local_std) normalized_tensor = centered_input_tensor / (local_std + 1e-8) - return normalized_tensor # * self.weight[None, :, None, None] + self.bias[None, :, None, None] + return normalized_tensor # * self.weight[None, :, None, None] + self.bias[None, :, None, None] normalization_layer = nn.BatchNorm2d # BatchReNorm2d # LocalNorm @@ -52,7 +52,6 @@ def forward(self, input_tensor): class GANLoss(nn.Module): """ Receiving the final layer form the discriminator and a boolean indicating whether the input to the discriminator is real or fake (generated by generator), this returns a patch""" - def __init__(self): super(GANLoss, self).__init__() @@ -66,7 +65,8 @@ def __init__(self): def forward(self, d_last_layer, is_d_input_real): # Determine label map according to whether current input to discriminator is real or fake - self.label_tensor = Variable(torch.ones_like(d_last_layer).cuda(), requires_grad=False) * is_d_input_real + self.label_tensor = Variable(torch.ones_like(d_last_layer).cuda(), + requires_grad=False) * is_d_input_real # Finally return the loss return self.loss(d_last_layer, self.label_tensor) @@ -80,7 +80,7 @@ def __init__(self, use_L1=False): def forward(self, input_tensor, target_tensor, loss_mask): if loss_mask is not None: - e = (target_tensor.detach() - input_tensor) ** 2 + e = (target_tensor.detach() - input_tensor)**2 e *= loss_mask return torch.sum(e) / torch.sum(loss_mask) else: @@ -98,15 +98,21 @@ def forward(self, input_tensor, target_tensor, scale_weights): # Run all nets over all scales and aggregate the interpolated results loss = 0 for i, scale_weight in enumerate(scale_weights): - input_tensor = f.interpolate(input_tensor, scale_factor=self.scale_factor**(-i), mode='bilinear') + input_tensor = f.interpolate(input_tensor, + scale_factor=self.scale_factor**(-i), + mode='bilinear') loss += scale_weight * self.mse(input_tensor, target_tensor) return loss class Generator(nn.Module): """ Architecture of the Generator, uses res-blocks """ - - def __init__(self, base_channels=64, n_blocks=6, n_downsampling=3, use_bias=True, skip_flag=True): + def __init__(self, + base_channels=64, + n_blocks=6, + n_downsampling=3, + use_bias=True, + skip_flag=True): super(Generator, self).__init__() # Determine whether to use skip connections @@ -114,35 +120,41 @@ def __init__(self, base_channels=64, n_blocks=6, n_downsampling=3, use_bias=True # Entry block # First conv-block, no stride so image dims are kept and channels dim is expanded (pad-conv-norm-relu) - self.entry_block = nn.Sequential(nn.ReflectionPad2d(3), - nn.utils.spectral_norm(nn.Conv2d(3, base_channels, kernel_size=7, bias=use_bias)), - normalization_layer(base_channels), - nn.LeakyReLU(0.2, True)) + self.entry_block = nn.Sequential( + nn.ReflectionPad2d(3), + nn.utils.spectral_norm( + nn.Conv2d(3, base_channels, kernel_size=7, bias=use_bias)), + normalization_layer(base_channels), nn.LeakyReLU(0.2, True)) # Geometric transformation self.geo_transform = GeoTransform() # Downscaling # A sequence of strided conv-blocks. Image dims shrink by 2, channels dim expands by 2 at each block - self.downscale_block = RescaleBlock(n_downsampling, 0.5, base_channels, True) + self.downscale_block = RescaleBlock(n_downsampling, 0.5, base_channels, + True) # Bottleneck # A sequence of res-blocks bottleneck_block = [] for _ in range(n_blocks): # noinspection PyUnboundLocalVariable - bottleneck_block += [ResnetBlock(base_channels * 2 ** n_downsampling, use_bias=use_bias)] + bottleneck_block += [ + ResnetBlock(base_channels * 2**n_downsampling, + use_bias=use_bias) + ] self.bottleneck_block = nn.Sequential(*bottleneck_block) # Upscaling # A sequence of transposed-conv-blocks, Image dims expand by 2, channels dim shrinks by 2 at each block\ - self.upscale_block = RescaleBlock(n_downsampling, 2.0, base_channels, True) + self.upscale_block = RescaleBlock(n_downsampling, 2.0, base_channels, + True) # Final block # No stride so image dims are kept and channels dim shrinks to 3 (output image channels) - self.final_block = nn.Sequential(nn.ReflectionPad2d(3), - nn.Conv2d(base_channels, 3, kernel_size=7), - nn.Tanh()) + self.final_block = nn.Sequential( + nn.ReflectionPad2d(3), nn.Conv2d(base_channels, 3, kernel_size=7), + nn.Tanh()) def forward(self, input_tensor, output_size, random_affine): # A condition for having the output at same size as the scaled input is having even output_size @@ -152,18 +164,24 @@ def forward(self, input_tensor, output_size, random_affine): # Change scale to output scale by interpolation if random_affine is None: - feature_map = f.interpolate(feature_map, size=output_size, mode='bilinear') + feature_map = f.interpolate(feature_map, + size=output_size, + mode='bilinear') else: - feature_map = self.geo_transform.forward(feature_map, output_size, random_affine) + feature_map = self.geo_transform.forward(feature_map, output_size, + random_affine) # Downscale block - feature_map, downscales = self.downscale_block.forward(feature_map, return_all_scales=self.skip) + feature_map, downscales = self.downscale_block.forward( + feature_map, return_all_scales=self.skip) # Bottleneck (res-blocks) feature_map = self.bottleneck_block(feature_map) # Upscale block - feature_map, _ = self.upscale_block.forward(feature_map, pyramid=downscales, skip=self.skip) + feature_map, _ = self.upscale_block.forward(feature_map, + pyramid=downscales, + skip=self.skip) # Final block output_tensor = self.final_block(feature_map) @@ -173,20 +191,21 @@ def forward(self, input_tensor, output_size, random_affine): class ResnetBlock(nn.Module): """ A single Res-Block module """ - def __init__(self, dim, use_bias): super(ResnetBlock, self).__init__() # A res-block without the skip-connection, pad-conv-norm-relu-pad-conv-norm - self.conv_block = nn.Sequential(nn.utils.spectral_norm(nn.Conv2d(dim, dim // 4, kernel_size=1, bias=use_bias)), - normalization_layer(dim // 4), - nn.LeakyReLU(0.2, True), - nn.ReflectionPad2d(1), - nn.utils.spectral_norm(nn.Conv2d(dim // 4, dim // 4, kernel_size=3, bias=use_bias)), - normalization_layer(dim // 4), - nn.LeakyReLU(0.2, True), - nn.utils.spectral_norm(nn.Conv2d(dim // 4, dim, kernel_size=1, bias=use_bias)), - normalization_layer(dim)) + self.conv_block = nn.Sequential( + nn.utils.spectral_norm( + nn.Conv2d(dim, dim // 4, kernel_size=1, bias=use_bias)), + normalization_layer(dim // 4), nn.LeakyReLU(0.2, True), + nn.ReflectionPad2d(1), + nn.utils.spectral_norm( + nn.Conv2d(dim // 4, dim // 4, kernel_size=3, bias=use_bias)), + normalization_layer(dim // 4), nn.LeakyReLU(0.2, True), + nn.utils.spectral_norm( + nn.Conv2d(dim // 4, dim, kernel_size=1, bias=use_bias)), + normalization_layer(dim)) def forward(self, input_tensor): # The skip connection is applied here @@ -194,7 +213,12 @@ def forward(self, input_tensor): class MultiScaleDiscriminator(nn.Module): - def __init__(self, real_crop_size, max_n_scales=9, scale_factor=2, base_channels=128, extra_conv_layers=0): + def __init__(self, + real_crop_size, + max_n_scales=9, + scale_factor=2, + base_channels=128, + extra_conv_layers=0): super(MultiScaleDiscriminator, self).__init__() self.base_channels = base_channels self.scale_factor = scale_factor @@ -203,8 +227,12 @@ def __init__(self, real_crop_size, max_n_scales=9, scale_factor=2, base_channels # We want the max num of scales to fit the size of the real examples. further scaling would create networks that # only train on fake examples - self.max_n_scales = np.min([np.int(np.ceil(np.log(np.min(real_crop_size) * 1.0 / self.min_size) - / np.log(self.scale_factor))), max_n_scales]) + self.max_n_scales = np.min([ + np.int( + np.ceil( + np.log(np.min(real_crop_size) * 1.0 / self.min_size) / + np.log(self.scale_factor))), max_n_scales + ]) # Prepare a list of all the networks for all the wanted scales self.nets = nn.ModuleList() @@ -218,52 +246,72 @@ def make_net(self): net = [] # Entry block - net += [nn.utils.spectral_norm(nn.Conv2d(3, base_channels, kernel_size=3, stride=1)), - nn.BatchNorm2d(base_channels), - nn.LeakyReLU(0.2, True)] + net += [ + nn.utils.spectral_norm( + nn.Conv2d(3, base_channels, kernel_size=3, stride=1)), + nn.BatchNorm2d(base_channels), + nn.LeakyReLU(0.2, True) + ] # Downscaling blocks # A sequence of strided conv-blocks. Image dims shrink by 2, channels dim expands by 2 at each block - net += [nn.utils.spectral_norm(nn.Conv2d(base_channels, base_channels * 2, kernel_size=3, stride=2)), - nn.BatchNorm2d(base_channels * 2), - nn.LeakyReLU(0.2, True)] + net += [ + nn.utils.spectral_norm( + nn.Conv2d(base_channels, + base_channels * 2, + kernel_size=3, + stride=2)), + nn.BatchNorm2d(base_channels * 2), + nn.LeakyReLU(0.2, True) + ] # Regular conv-block - net += [nn.utils.spectral_norm(nn.Conv2d(in_channels=base_channels * 2, - out_channels=base_channels * 2, - kernel_size=3, - bias=True)), - nn.BatchNorm2d(base_channels * 2), - nn.LeakyReLU(0.2, True)] + net += [ + nn.utils.spectral_norm( + nn.Conv2d(in_channels=base_channels * 2, + out_channels=base_channels * 2, + kernel_size=3, + bias=True)), + nn.BatchNorm2d(base_channels * 2), + nn.LeakyReLU(0.2, True) + ] # Additional 1x1 conv-blocks for _ in range(self.extra_conv_layers): - net += [nn.utils.spectral_norm(nn.Conv2d(in_channels=base_channels * 2, - out_channels=base_channels * 2, - kernel_size=3, - bias=True)), - nn.BatchNorm2d(base_channels * 2), - nn.LeakyReLU(0.2, True)] + net += [ + nn.utils.spectral_norm( + nn.Conv2d(in_channels=base_channels * 2, + out_channels=base_channels * 2, + kernel_size=3, + bias=True)), + nn.BatchNorm2d(base_channels * 2), + nn.LeakyReLU(0.2, True) + ] # Final conv-block # Ends with a Sigmoid to get a range of 0-1 - net += nn.Sequential(nn.utils.spectral_norm(nn.Conv2d(base_channels * 2, 1, kernel_size=1)), - nn.Sigmoid()) + net += nn.Sequential( + nn.utils.spectral_norm( + nn.Conv2d(base_channels * 2, 1, kernel_size=1)), nn.Sigmoid()) # Make it a valid layers sequence and return return nn.Sequential(*net) def forward(self, input_tensor, scale_weights): - aggregated_result_maps_from_all_scales = self.nets[0](input_tensor) * scale_weights[0] + aggregated_result_maps_from_all_scales = self.nets[0]( + input_tensor) * scale_weights[0] map_size = aggregated_result_maps_from_all_scales.shape[2:] # Run all nets over all scales and aggregate the interpolated results - for net, scale_weight, i in zip(self.nets[1:], scale_weights[1:], list(range(1, len(scale_weights)))): - downscaled_image = f.interpolate(input_tensor, scale_factor=self.scale_factor**(-i), mode='bilinear') + for net, scale_weight, i in zip(self.nets[1:], scale_weights[1:], + list(range(1, len(scale_weights)))): + downscaled_image = f.interpolate( + input_tensor, + scale_factor=self.scale_factor**(-i), + mode='bilinear') result_map_for_current_scale = net(downscaled_image) - upscaled_result_map_for_current_scale = f.interpolate(result_map_for_current_scale, - size=map_size, - mode='bilinear') + upscaled_result_map_for_current_scale = f.interpolate( + result_map_for_current_scale, size=map_size, mode='bilinear') aggregated_result_maps_from_all_scales += upscaled_result_map_for_current_scale * scale_weight return aggregated_result_maps_from_all_scales @@ -279,18 +327,23 @@ def __init__(self, n_layers, scale=0.5, base_channels=64, use_bias=True): in_channel_power = scale > 1 out_channel_power = scale < 1 - i_range = list(range(n_layers)) if scale < 1 else list(range(n_layers-1, -1, -1)) + i_range = list(range(n_layers)) if scale < 1 else list( + range(n_layers - 1, -1, -1)) for i in i_range: - self.conv_layers[i] = nn.Sequential(nn.ReflectionPad2d(1), - nn.utils.spectral_norm(nn.Conv2d( - in_channels=base_channels * 2 ** (i + in_channel_power), - out_channels=base_channels * 2 ** (i + out_channel_power), - kernel_size=3, - stride=1, - bias=use_bias)), - normalization_layer(base_channels * 2 ** (i + out_channel_power)), - nn.LeakyReLU(0.2, True)) + self.conv_layers[i] = nn.Sequential( + nn.ReflectionPad2d(1), + nn.utils.spectral_norm( + nn.Conv2d(in_channels=base_channels * + 2**(i + in_channel_power), + out_channels=base_channels * + 2**(i + out_channel_power), + kernel_size=3, + stride=1, + bias=use_bias)), + normalization_layer(base_channels * + 2**(i + out_channel_power)), + nn.LeakyReLU(0.2, True)) self.add_module("conv_%d" % i, self.conv_layers[i]) if scale > 1: @@ -298,7 +351,11 @@ def __init__(self, n_layers, scale=0.5, base_channels=64, use_bias=True): self.max_pool = nn.MaxPool2d(2, 2) - def forward(self, input_tensor, pyramid=None, return_all_scales=False, skip=False): + def forward(self, + input_tensor, + pyramid=None, + return_all_scales=False, + skip=False): feature_map = input_tensor all_scales = [] @@ -308,12 +365,14 @@ def forward(self, input_tensor, pyramid=None, return_all_scales=False, skip=Fals for i, conv_layer in enumerate(self.conv_layers): if self.scale > 1.0: - feature_map = f.interpolate(feature_map, scale_factor=self.scale, mode='nearest') + feature_map = f.interpolate(feature_map, + scale_factor=self.scale, + mode='nearest') feature_map = conv_layer(feature_map) if skip: - feature_map = feature_map + pyramid[-i-2] + feature_map = feature_map + pyramid[-i - 2] if self.scale < 1.0: feature_map = self.max_pool(feature_map) @@ -321,7 +380,8 @@ def forward(self, input_tensor, pyramid=None, return_all_scales=False, skip=Fals if return_all_scales: all_scales.append(feature_map) - return (feature_map, all_scales) if return_all_scales else (feature_map, None) + return (feature_map, + all_scales) if return_all_scales else (feature_map, None) class RandomCrop(nn.Module): @@ -336,18 +396,29 @@ def __init__(self, crop_size, return_pos=False, must_divide=4.0): def forward(self, input_tensors, crop_size=None): im_v_sz, im_h_sz = input_tensors[0].shape[2:] if crop_size is None: - cr_v_sz, cr_h_sz = np.clip(self.crop_size, [0, 0], [im_v_sz-1, im_h_sz-1]) - cr_v_sz, cr_h_sz = np.uint32(np.floor(np.array([cr_v_sz, cr_h_sz]) - * 1.0 / self.must_divide) * self.must_divide) + cr_v_sz, cr_h_sz = np.clip(self.crop_size, [0, 0], + [im_v_sz - 1, im_h_sz - 1]) + cr_v_sz, cr_h_sz = np.uint32( + np.floor( + np.array([cr_v_sz, cr_h_sz]) * 1.0 / self.must_divide) * + self.must_divide) else: cr_v_sz, cr_h_sz = crop_size - top_left_v, top_left_h = [np.random.randint(0, im_v_sz - cr_v_sz), np.random.randint(0, im_h_sz - cr_h_sz)] + top_left_v, top_left_h = [ + np.random.randint(0, im_v_sz - cr_v_sz), + np.random.randint(0, im_h_sz - cr_h_sz) + ] - out_tensors = [input_tensor[:, :, top_left_v:top_left_v + cr_v_sz, top_left_h:top_left_h + cr_h_sz] - if input_tensor is not None else None for input_tensor in input_tensors] + out_tensors = [ + input_tensor[:, :, top_left_v:top_left_v + cr_v_sz, + top_left_h:top_left_h + + cr_h_sz] if input_tensor is not None else None + for input_tensor in input_tensors + ] - return (out_tensors, (top_left_v, top_left_h)) if self.return_pos else out_tensors + return (out_tensors, (top_left_v, + top_left_h)) if self.return_pos else out_tensors class SwapCrops(nn.Module): @@ -363,28 +434,46 @@ def __init__(self, min_crop_size, max_crop_size, mask_width=5): self.mask_width = mask_width def forward(self, input_tensor): - cr_v_sz, cr_h_sz = np.uint32(np.random.rand(2) * (self.max_crop_size - self.min_crop_size) + self.min_crop_size) + cr_v_sz, cr_h_sz = np.uint32( + np.random.rand(2) * (self.max_crop_size - self.min_crop_size) + + self.min_crop_size) - [crop_1], (top_left_v_1, top_left_h_1) = self.rand_crop_1.forward([input_tensor], (cr_v_sz, cr_h_sz)) - [crop_2], (top_left_v_2, top_left_h_2) = self.rand_crop_1.forward([input_tensor], (cr_v_sz, cr_h_sz)) + [crop_1], (top_left_v_1, + top_left_h_1) = self.rand_crop_1.forward([input_tensor], + (cr_v_sz, cr_h_sz)) + [crop_2], (top_left_v_2, + top_left_h_2) = self.rand_crop_1.forward([input_tensor], + (cr_v_sz, cr_h_sz)) output_tensor = torch.zeros_like(input_tensor) output_tensor[:, :, :, :] = input_tensor - output_tensor[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, top_left_h_1:top_left_h_1 + cr_h_sz] = crop_2 - output_tensor[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, top_left_h_2:top_left_h_2 + cr_h_sz] = crop_1 + output_tensor[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, + top_left_h_1:top_left_h_1 + cr_h_sz] = crop_2 + output_tensor[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, + top_left_h_2:top_left_h_2 + cr_h_sz] = crop_1 # Creating a mask. this is drawing a line in width 2*mask_width over the boundaries of the cropped image loss_mask = torch.ones_like(input_tensor) mw = self.mask_width - loss_mask[:, :, top_left_v_1:top_left_v_1+cr_v_sz, top_left_h_1-mw:top_left_h_1+mw] = 0 - loss_mask[:, :, top_left_v_1-mw:top_left_v_1+mw, top_left_h_1:top_left_h_1+cr_h_sz] = 0 - loss_mask[:, :, top_left_v_1:top_left_v_1+cr_v_sz, top_left_h_1+cr_h_sz-mw:top_left_h_1+cr_h_sz+mw] = 0 - loss_mask[:, :, top_left_v_1+cr_v_sz-mw:top_left_v_1+cr_v_sz+mw, top_left_h_1:top_left_h_1+cr_h_sz] = 0 - loss_mask[:, :, top_left_v_2:top_left_v_2+cr_v_sz, top_left_h_2-mw:top_left_h_2+mw] = 0 - loss_mask[:, :, top_left_v_2-mw:top_left_v_2+mw, top_left_h_2:top_left_h_2+cr_h_sz] = 0 - loss_mask[:, :, top_left_v_2:top_left_v_2+cr_v_sz, top_left_h_2+cr_h_sz-mw:top_left_h_2+cr_h_sz+mw] = 0 - loss_mask[:, :, top_left_v_2+cr_v_sz-mw:top_left_v_2+cr_v_sz+mw, top_left_h_2:top_left_h_2+cr_h_sz] = 0 + loss_mask[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, + top_left_h_1 - mw:top_left_h_1 + mw] = 0 + loss_mask[:, :, top_left_v_1 - mw:top_left_v_1 + mw, + top_left_h_1:top_left_h_1 + cr_h_sz] = 0 + loss_mask[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, + top_left_h_1 + cr_h_sz - mw:top_left_h_1 + cr_h_sz + mw] = 0 + loss_mask[:, :, + top_left_v_1 + cr_v_sz - mw:top_left_v_1 + cr_v_sz + mw, + top_left_h_1:top_left_h_1 + cr_h_sz] = 0 + loss_mask[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, + top_left_h_2 - mw:top_left_h_2 + mw] = 0 + loss_mask[:, :, top_left_v_2 - mw:top_left_v_2 + mw, + top_left_h_2:top_left_h_2 + cr_h_sz] = 0 + loss_mask[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, + top_left_h_2 + cr_h_sz - mw:top_left_h_2 + cr_h_sz + mw] = 0 + loss_mask[:, :, + top_left_v_2 + cr_v_sz - mw:top_left_v_2 + cr_v_sz + mw, + top_left_h_2:top_left_h_2 + cr_h_sz] = 0 return output_tensor, loss_mask @@ -397,9 +486,14 @@ def forward(self, input_tensor, target_size, shifts): sz = input_tensor.shape theta = homography_based_on_top_corners_x_shift(shifts) - pad = f.pad(input_tensor, (np.abs(np.int(np.ceil(sz[3] * shifts[0]))), np.abs(np.int(np.ceil(-sz[3] * shifts[1]))), 0, 0), 'reflect') - target_size4d = torch.Size([pad.shape[0], pad.shape[1], target_size[0], target_size[1]]) + pad = f.pad(input_tensor, + (np.abs(np.int(np.ceil(sz[3] * shifts[0]))), + np.abs(np.int(np.ceil(-sz[3] * shifts[1]))), 0, 0), + 'reflect') + target_size4d = torch.Size( + [pad.shape[0], pad.shape[1], target_size[0], target_size[1]]) - grid = homography_grid(theta.expand(pad.shape[0], -1, -1), target_size4d) + grid = homography_grid(theta.expand(pad.shape[0], -1, -1), + target_size4d) return f.grid_sample(pad, grid, mode='bilinear', padding_mode='border') diff --git a/non_rect.py b/non_rect.py index 4652ac3..b2c02d3 100644 --- a/non_rect.py +++ b/non_rect.py @@ -9,14 +9,16 @@ def affine_based_on_top_left_corner_x_shift(rand_affine): :param sig: amount of random x perturbation :return: forward and backward affine transforms """ - aff = np.array([[1., -0.5 * rand_affine, 0.5 * rand_affine], [0, 1., 0]], dtype=np.float32) + aff = np.array([[1., -0.5 * rand_affine, 0.5 * rand_affine], [0, 1., 0]], + dtype=np.float32) return torch.from_numpy(aff).clone().cuda() def apply_resize_and_affine(x, target_size, rand_affine): aff = affine_based_on_top_left_corner_x_shift(rand_affine) - target_size4d = torch.Size([x.shape[0], x.shape[1], target_size[0], target_size[1]]) + target_size4d = torch.Size( + [x.shape[0], x.shape[1], target_size[0], target_size[1]]) grid = f.affine_grid(aff.expand(x.shape[0], -1, -1), target_size4d) out = f.grid_sample(x, grid, mode='bilinear', padding_mode='border') return out @@ -35,7 +37,8 @@ def homography_grid(theta, size): Returns: output (Tensor): output Tensor of size (:math:`N \times H \times W \times 2`) """ - y, x = torch.meshgrid((torch.linspace(-1., 1., size[-2]), torch.linspace(-1., 1., size[-1]))) + y, x = torch.meshgrid( + (torch.linspace(-1., 1., size[-2]), torch.linspace(-1., 1., size[-1]))) n = size[-2] * size[-1] hxy = torch.ones(n, 3, dtype=torch.float) hxy[:, 0] = x.contiguous().view(-1) @@ -48,7 +51,8 @@ def homography_grid(theta, size): def apply_resize_and_homograhpy(x, target_size, rand_h): theta = homography_based_on_top_corners_x_shift(rand_h) - target_size4d = torch.Size([x.shape[0], x.shape[1], target_size[0], target_size[1]]) + target_size4d = torch.Size( + [x.shape[0], x.shape[1], target_size[0], target_size[1]]) grid = homography_grid(theta.expand(x.shape[0], -1, -1), target_size4d) out = f.grid_sample(x, grid, mode='bilinear', padding_mode='border') return out @@ -66,15 +70,16 @@ def homography_based_on_top_corners_x_shift(rand_h): # [0, 0, 0, -1, 0, -1, 0, 0, 0], # [0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=np.float32) # play with top left and bottom right - p = np.array([[1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), -1. + rand_h[0]], - [0, 0, 0, 1., 1., -1., 1., 1., -1.], - [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], - [0, 0, 0, -1, -1, -1, 1, 1, 1], - [1, 0, -1, 0, 0, 0, 1, 0, -1], - [0, 0, 0, 1, 0, -1, 0, 0, 0], - [-1, 0, -1, 0, 0, 0, 1, 0, 1], - [0, 0, 0, -1, 0, -1, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=np.float32) + p = np.array( + [[ + 1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), + -1. + rand_h[0] + ], [0, 0, 0, 1., 1., -1., 1., 1., -1.], + [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], + [0, 0, 0, -1, -1, -1, 1, 1, 1], [1, 0, -1, 0, 0, 0, 1, 0, -1], + [0, 0, 0, 1, 0, -1, 0, 0, 0], [-1, 0, -1, 0, 0, 0, 1, 0, 1], + [0, 0, 0, -1, 0, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1]], + dtype=np.float32) b = np.zeros((9, 1), dtype=np.float32) b[8, 0] = 1. h = np.dot(np.linalg.inv(p), b) @@ -82,16 +87,21 @@ def homography_based_on_top_corners_x_shift(rand_h): def apply_resize_and_radial(x, target_size, rand_r): - target_size4d = torch.Size([x.shape[0], x.shape[1], target_size[0], target_size[1]]) + target_size4d = torch.Size( + [x.shape[0], x.shape[1], target_size[0], target_size[1]]) grid = make_radial_scale_grid(rand_r, target_size4d) out = f.grid_sample(x, grid, mode='bilinear', padding_mode='border') return out + def make_radial_scale_grid(rand_r, size4d): - y, x = torch.meshgrid((torch.linspace(-1., 1., size4d[-2]), torch.linspace(-1., 1., size4d[-1]))) + y, x = torch.meshgrid( + (torch.linspace(-1., 1., + size4d[-2]), torch.linspace(-1., 1., size4d[-1]))) theta = torch.atan2(x, y) r = torch.sqrt() + ''' def test_time(): def _make_pink_noise(sz_): @@ -156,4 +166,4 @@ def _make_homography_mask(in_mask, target_size, rand_h): frame = util.tensor2im(out*out_mask - 1 + out_mask) writer.writeFrame(frame) writer.close() -''' \ No newline at end of file +''' diff --git a/supp_video.py b/supp_video.py index ec0e5c2..fffd628 100644 --- a/supp_video.py +++ b/supp_video.py @@ -6,28 +6,54 @@ from non_rect import * from SceneScripts import * - FRAME_SHAPE = [500, 1000] MUST_DIVIDE = 8 VIDEO_SCRIPT = [ # [nameses, script_name, script_params=(min_v, max_v, min_h, max_h, max_t, repeat)] -[[['fruits'], ['fruits_old'], ['fruits_old'], ['fruits'], ['fruits']], ['horizontal_grow_shrink_slow', 'vertical_grow_shrink', 'resize_round', 'affine_dance', 'random'], [[0.55, None, 0.55, None, None, 1], [0.3, 1.8, 0.3, 2.0, None, 1, False], [0.3, 1.8, 0.3, 2.0, None, 1, False], [None, None, None, None, 0.45, 1, False], [0.3, 1.3, 0.3, 1.6, 0.45, 1, False]]], -['farm_house', 'special_resize_round', [0.45, None, 0.45, None, None, 2]], -['cab_building', 'resize_round', [0.5, None, 0.3, 2.5, None, 2]], -['rome', 'horizontal_grow_shrink', [0.3, None, 0.3, None, None, 3]], -[[['peacock', 'windows']], 'resize_round', [0.5, 2, 0.5, 1.75, None, 3]], -[[['soldiers', 'penguins']], 'horizontal_grow_shrink', [0.3, None, 0.3, None, None, 3]], -[[['nkorea', 'sapa']], 'horizontal_grow_shrink', [0.15, None, 0.15, None, None, 3]], -[[['quilt']] * 5, ['horizontal_grow_shrink', 'vertical_grow_shrink', 'resize_round', 'affine_dance', 'random'], [[0.55, None, 0.55, None, None, 1], [0.3, None, 0.3, None, None, 1, False], [0.3, None, 0.3, None, None, 1, False], [None, None, None, None, 0.45, 1, False], [0.6, 1.6, 0.6, 1.75, 0.55, 1, False]]], -[[['umbrella'], ['umbrella'], ['umbrella']], ['horizontal_grow_shrink', 'resize_round', 'trapezoids'], [[0.55, None, 0.55, None, None, 1], [0.55, None, 0.55, None, None, 1, False], [1, 1, 0.8, 1.2, 0.3, 1, False]]], -[[['metal_circles']] * 5, ['vertical_grow_shrink', 'random'], [[0.15, None, 0.55, None, None, 2], [0.15, 1.8, 0.15, 1.45, 0.55, 1, False]]], -[[['fish'], ['fish']], ['affine_dance', 'random'], [[1, 1, 1, 1, 0.4, 1], [1, 1, 1, 1, 0.5, 1, False]]], -['wood', 'special_zoom', [0.3, None, 0.3, None, None, 2]], -['ny', 'affine_dance', [None, None, None, None, 0.3, 2]], -['sushi', 'resize_round', [0.5, None, 0.3, None, None, 1]], + [[['fruits'], ['fruits_old'], ['fruits_old'], ['fruits'], ['fruits']], + [ + 'horizontal_grow_shrink_slow', 'vertical_grow_shrink', 'resize_round', + 'affine_dance', 'random' + ], + [[0.55, None, 0.55, None, None, 1], [0.3, 1.8, 0.3, 2.0, None, 1, False], + [0.3, 1.8, 0.3, 2.0, None, 1, False], + [None, None, None, None, 0.45, 1, False], + [0.3, 1.3, 0.3, 1.6, 0.45, 1, False]]], + ['farm_house', 'special_resize_round', [0.45, None, 0.45, None, None, 2]], + ['cab_building', 'resize_round', [0.5, None, 0.3, 2.5, None, 2]], + ['rome', 'horizontal_grow_shrink', [0.3, None, 0.3, None, None, 3]], + [[['peacock', 'windows']], 'resize_round', [0.5, 2, 0.5, 1.75, None, 3]], + [[['soldiers', 'penguins']], 'horizontal_grow_shrink', + [0.3, None, 0.3, None, None, 3]], + [[['nkorea', 'sapa']], 'horizontal_grow_shrink', + [0.15, None, 0.15, None, None, 3]], + [[['quilt']] * 5, + [ + 'horizontal_grow_shrink', 'vertical_grow_shrink', 'resize_round', + 'affine_dance', 'random' + ], + [[0.55, None, 0.55, None, None, 1], + [0.3, None, 0.3, None, None, 1, False], + [0.3, None, 0.3, None, None, 1, False], + [None, None, None, None, 0.45, 1, False], + [0.6, 1.6, 0.6, 1.75, 0.55, 1, False]]], + [[['umbrella'], ['umbrella'], ['umbrella']], + ['horizontal_grow_shrink', 'resize_round', 'trapezoids'], + [[0.55, None, 0.55, None, None, 1], + [0.55, None, 0.55, None, None, 1, False], + [1, 1, 0.8, 1.2, 0.3, 1, False]]], + [[['metal_circles']] * 5, ['vertical_grow_shrink', 'random'], + [[0.15, None, 0.55, None, None, 2], + [0.15, 1.8, 0.15, 1.45, 0.55, 1, False]]], + [[['fish'], ['fish']], ['affine_dance', 'random'], + [[1, 1, 1, 1, 0.4, 1], [1, 1, 1, 1, 0.5, 1, False]]], + ['wood', 'special_zoom', [0.3, None, 0.3, None, None, 2]], + ['ny', 'affine_dance', [None, None, None, None, 0.3, 2]], + ['sushi', 'resize_round', [0.5, None, 0.3, None, None, 1]], ] -def generate_one_frame(gan, input_tensor, frame_shape, scale, geo_shifts, center): +def generate_one_frame(gan, input_tensor, frame_shape, scale, geo_shifts, + center): with torch.no_grad(): base_sz = input_tensor.shape in_size = base_sz[2:] @@ -49,15 +75,21 @@ def generate_one_frame(gan, input_tensor, frame_shape, scale, geo_shifts, center run_reconstruct=False) out = out_mask * output_tensor[1] - 1 + out_mask - margin = np.uint16((frame_shape - np.array(out_size)) / 2) if center else [0, 0] - out_pad[margin[0]:margin[0] + out_size[0], margin[1]:margin[1] + out_size[1], :] = util.hist_match(util.tensor2im(out), util.tensor2im(input_tensor), util.tensor2im(out_mask)) + margin = np.uint16( + (frame_shape - np.array(out_size)) / 2) if center else [0, 0] + out_pad[margin[0]:margin[0] + out_size[0], + margin[1]:margin[1] + out_size[1], :] = util.hist_match( + util.tensor2im(out), util.tensor2im(input_tensor), + util.tensor2im(out_mask)) return out_pad def generate_one_scene(gan, input_tensor, scene_script, frame_shape, center): frames = [] for i, (scale_v, scale_h, shift_l, shift_r) in enumerate(scene_script): - output_image = generate_one_frame(gan, input_tensor, frame_shape, [scale_v, scale_h], [shift_l, shift_r], center) + output_image = generate_one_frame(gan, input_tensor, frame_shape, + [scale_v, scale_h], + [shift_l, shift_r], center) frames.append(output_image) return np.stack(frames, axis=0) @@ -68,21 +100,29 @@ def generate_full_video(video_script, frame_shape): conf.output_dir_path = util.prepare_result_dir(conf) n_scenes = len(video_script) - for i, (nameses, scene_script_names, scene_script_params) in enumerate(video_script): + for i, (nameses, scene_script_names, + scene_script_params) in enumerate(video_script): if not isinstance(nameses, list): nameses = [[nameses]] if not isinstance(scene_script_names, list): scene_script_names = [scene_script_names] scene_script_params = [scene_script_params] scenes = [] - for names, scene_script_name, scene_script_param in zip(nameses, scene_script_names, scene_script_params): + for names, scene_script_name, scene_script_param in zip( + nameses, scene_script_names, scene_script_params): partial_screen_scenes = [] for name in names: - conf.input_image_path = [os.path.dirname(os.path.abspath(__file__)) + '/' + INPUT_DICT[name][0]] - conf.test_params_path = os.path.dirname(os.path.abspath(__file__)) + INPUT_DICT[name][1] + conf.input_image_path = [ + os.path.dirname(os.path.abspath(__file__)) + '/' + + INPUT_DICT[name][0] + ] + conf.test_params_path = os.path.dirname( + os.path.abspath(__file__)) + INPUT_DICT[name][1] gan = InGAN(conf) - gan.G.load_state_dict(torch.load(conf.test_params_path, map_location='cuda:0')['G']) + gan.G.load_state_dict( + torch.load(conf.test_params_path, + map_location='cuda:0')['G']) [input_tensor] = util.read_data(conf) cur_frame_shape = frame_shape[:] @@ -92,38 +132,54 @@ def generate_full_video(video_script, frame_shape): cur_scene_script_param = scene_script_param[:] if scene_script_param[1] is None: - cur_scene_script_param[1] = cur_frame_shape[0] * 1.0 / input_tensor.shape[2] + cur_scene_script_param[ + 1] = cur_frame_shape[0] * 1.0 / input_tensor.shape[2] print('max scale vertical:', cur_scene_script_param[1]) if cur_scene_script_param[3] is None: - cur_scene_script_param[3] = cur_frame_shape[1] * 1.0 / input_tensor.shape[3] + cur_scene_script_param[ + 3] = cur_frame_shape[1] * 1.0 / input_tensor.shape[3] print('max scale horizontal:', cur_scene_script_param[3]) - scene_script = make_scene_script(scene_script_name, *cur_scene_script_param) + scene_script = make_scene_script(scene_script_name, + *cur_scene_script_param) center = (cur_scene_script_param[4] is not None) - - scene = generate_one_scene(gan, input_tensor, scene_script, np.array([cur_frame_shape[0], cur_frame_shape[1]]), center) + scene = generate_one_scene( + gan, input_tensor, scene_script, + np.array([cur_frame_shape[0], cur_frame_shape[1]]), center) partial_screen_scenes.append(scene) print('Done with %s, (scene %d/%d)' % (name, i + 1, n_scenes)) - - scene = np.concatenate(partial_screen_scenes, axis=concat_axis) if len(partial_screen_scenes) > 1 else partial_screen_scenes[0] + scene = np.concatenate( + partial_screen_scenes, axis=concat_axis + ) if len(partial_screen_scenes) > 1 else partial_screen_scenes[0] scenes.append(scene) scene = np.concatenate(scenes, axis=0) - outputdict = {'-b:v': '30000000', '-r': '100.0', - '-vf': 'drawtext="text=\'Input image\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)*7/8:enable=\'between(t,0,2)\'"', - '-preset': 'slow', '-profile:v': 'high444', '-level:v': '4.0', '-crf': '22'} + outputdict = { + '-b:v': '30000000', + '-r': '100.0', + '-vf': + 'drawtext="text=\'Input image\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)*7/8:enable=\'between(t,0,2)\'"', + '-preset': 'slow', + '-profile:v': 'high444', + '-level:v': '4.0', + '-crf': '22' + } if len(names) > 1: - outputdict['-vf'] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,2)\'"' + outputdict[ + '-vf'] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,2)\'"' if not scene_script_params[-1]: - outputdict['-vf'] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,0)\'"' + outputdict[ + '-vf'] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,0)\'"' - writer = FFmpegWriter(conf.output_dir_path + '/vid%d_%s.mp4' % (i, '_'.join(names)), verbosity=1, + writer = FFmpegWriter(conf.output_dir_path + '/vid%d_%s.mp4' % + (i, '_'.join(names)), + verbosity=1, outputdict=outputdict) for frame in scene: for j in range(3): @@ -134,19 +190,28 @@ def generate_full_video(video_script, frame_shape): def prepare_geometric(base_sz, scale, geo_shifts): pad_l = np.abs(np.int(np.ceil(base_sz[3] * geo_shifts[0]))) pad_r = np.abs(np.int(np.ceil(base_sz[3] * geo_shifts[1]))) - in_mask = torch.zeros(base_sz[0], base_sz[1], base_sz[2], pad_l + base_sz[3] + pad_r).cuda() + in_mask = torch.zeros(base_sz[0], base_sz[1], base_sz[2], + pad_l + base_sz[3] + pad_r).cuda() in_size = in_mask.shape[2:] - out_size = (np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / MUST_DIVIDE) * MUST_DIVIDE), - np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / MUST_DIVIDE) * MUST_DIVIDE)) + out_size = (np.uint32( + np.floor(scale[0] * in_size[0] * 1.0 / MUST_DIVIDE) * MUST_DIVIDE), + np.uint32( + np.floor(scale[1] * in_size[1] * 1.0 / MUST_DIVIDE) * + MUST_DIVIDE)) if pad_r > 0: in_mask[:, :, :, pad_l:-pad_r] = torch.ones(base_sz) else: in_mask[:, :, :, pad_l:] = torch.ones(base_sz) theta = homography_based_on_top_corners_x_shift(geo_shifts) - target_size4d = torch.Size([in_mask.shape[0], in_mask.shape[1], out_size[0], out_size[1]]) - grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), target_size4d) - out_mask = f.grid_sample(in_mask, grid, mode='bilinear', padding_mode='zeros') + target_size4d = torch.Size( + [in_mask.shape[0], in_mask.shape[1], out_size[0], out_size[1]]) + grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), + target_size4d) + out_mask = f.grid_sample(in_mask, + grid, + mode='bilinear', + padding_mode='zeros') return out_mask, out_size diff --git a/test.py b/test.py index 08e84c7..a110fe6 100644 --- a/test.py +++ b/test.py @@ -6,7 +6,10 @@ from configs import Config from traceback import print_exc from networks import GeoTransform -from non_rect import (apply_resize_and_radial, homography_based_on_top_corners_x_shift, apply_resize_and_homograhpy, homography_grid, apply_resize_and_affine) +from non_rect import (apply_resize_and_radial, + homography_based_on_top_corners_x_shift, + apply_resize_and_homograhpy, homography_grid, + apply_resize_and_affine) def test_one_scale(gan, diff --git a/test_util.py b/test_util.py index a6a2a91..af1ec9d 100644 --- a/test_util.py +++ b/test_util.py @@ -13,6 +13,7 @@ def test_image(): def test_tensor2im(test_image): - tensor = torch.tensor(test_image).permute(2, 0, 1).unsqueeze(0) / 255. * 2 - 1 + tensor = torch.tensor(test_image).permute(2, 0, + 1).unsqueeze(0) / 255. * 2 - 1 img = tensor2im(tensor) assert np.allclose(img, test_image) diff --git a/train.py b/train.py index 7f29d8f..4e8b094 100644 --- a/train.py +++ b/train.py @@ -4,7 +4,6 @@ from util import Visualizer, read_data from traceback import print_exc - # Load configuration conf = Config().parse() @@ -39,19 +38,26 @@ except KeyboardInterrupt: raise except Exception as e: - print('Something went wrong in iteration %d, While testing or visualizing.' % i) + print( + 'Something went wrong in iteration %d, While testing or visualizing.' + % i) print_exc() # Save snapshot when needed try: if i > 0 and not i % conf.save_snapshot_freq: - gan.save(os.path.join(conf.output_dir_path, 'checkpoint_%07d.pth.tar' % i)) + gan.save( + os.path.join(conf.output_dir_path, + 'checkpoint_%07d.pth.tar' % i)) del gan gan = InGAN(conf) - gan.resume(os.path.join(conf.output_dir_path, 'checkpoint_%07d.pth.tar' % i)) + gan.resume( + os.path.join(conf.output_dir_path, + 'checkpoint_%07d.pth.tar' % i)) visualizer.gan = gan except KeyboardInterrupt: raise except Exception as e: - print('Something went wrong in iteration %d, While saving snapshot.' % i) + print('Something went wrong in iteration %d, While saving snapshot.' % + i) print_exc() diff --git a/train_supp_mat.py b/train_supp_mat.py index 68480da..8d9ed49 100644 --- a/train_supp_mat.py +++ b/train_supp_mat.py @@ -41,7 +41,10 @@ def main(): for imgname in os.listdir(base_dir): full_img_name = os.path.join(base_dir, imgname) short_name = os.path.splitext(imgname)[0] - cmd = ['python', 'train.py', '--input_image_path', full_img_name, '--gpu_id', '0'] + cmd = [ + 'python', 'train.py', '--input_image_path', full_img_name, + '--gpu_id', '0' + ] for aname, aa in list(abl_args.items()): exp_name = '{}_{}'.format(short_name, aname) full_cmd = cmd + aa + ['--name', exp_name] diff --git a/util.py b/util.py index d2223c8..d144d54 100644 --- a/util.py +++ b/util.py @@ -10,16 +10,19 @@ def read_data(conf): - input_images = [read_shave_tensorize(path, conf.must_divide) for path in conf.input_image_path] + input_images = [ + read_shave_tensorize(path, conf.must_divide) + for path in conf.input_image_path + ] return input_images def read_shave_tensorize(path, must_divide): input_np = (np.array(Image.open(path).convert('RGB')) / 255.0) - input_np_shaved = input_np[:(input_np.shape[0] // must_divide) * must_divide, - :(input_np.shape[1] // must_divide) * must_divide, - :] + input_np_shaved = input_np[:(input_np.shape[0] // must_divide) * + must_divide, :(input_np.shape[1] // + must_divide) * must_divide, :] input_tensor = im2tensor(input_np_shaved) @@ -57,20 +60,32 @@ def im2tensor(image_numpy, int_flag=False, device=torch.device('cuda')): return torch.FloatTensor(transformed_image).unsqueeze(0).to(device) -def random_size(orig_size, curriculum=True, i=None, iter_for_max_range=None, must_divide=8.0, - min_scale=0.25, max_scale=2.0, max_transform_magniutude=0.3): - cur_max_scale = 1.0 + (max_scale - 1.0) * np.clip(1.0 * i / iter_for_max_range, 0, 1) if curriculum else max_scale - cur_min_scale = 1.0 + (min_scale - 1.0) * np.clip(1.0 * i / iter_for_max_range, 0, 1) if curriculum else min_scale - cur_max_transform_magnitude = (max_transform_magniutude * np.clip(1.0 * i / iter_for_max_range, 0, 1) +def random_size(orig_size, + curriculum=True, + i=None, + iter_for_max_range=None, + must_divide=8.0, + min_scale=0.25, + max_scale=2.0, + max_transform_magniutude=0.3): + cur_max_scale = 1.0 + (max_scale - 1.0) * np.clip( + 1.0 * i / iter_for_max_range, 0, 1) if curriculum else max_scale + cur_min_scale = 1.0 + (min_scale - 1.0) * np.clip( + 1.0 * i / iter_for_max_range, 0, 1) if curriculum else min_scale + cur_max_transform_magnitude = (max_transform_magniutude * + np.clip(1.0 * i / iter_for_max_range, 0, 1) if curriculum else max_transform_magniutude) # set random transformation magnitude. scalar = affine, pair = homography. - random_affine = -cur_max_transform_magnitude + 2 * cur_max_transform_magnitude * np.random.rand(2) + random_affine = -cur_max_transform_magnitude + 2 * cur_max_transform_magnitude * np.random.rand( + 2) # set new size for the output image - new_size = np.array(orig_size) * (cur_min_scale + (cur_max_scale - cur_min_scale) * np.random.rand(2)) + new_size = np.array(orig_size) * ( + cur_min_scale + (cur_max_scale - cur_min_scale) * np.random.rand(2)) - return tuple(np.uint32(np.ceil(new_size * 1.0 / must_divide) * must_divide)), random_affine + return tuple(np.uint32( + np.ceil(new_size * 1.0 / must_divide) * must_divide)), random_affine def image_concat(g_preds, d_preds=None, size=None): @@ -86,9 +101,12 @@ def image_concat(g_preds, d_preds=None, size=None): img = (np.concatenate([d_pred] * 3, 2) - 128) * 2 import cv2 # d_pred_new = imresize(img, g_pred.shape[0:2], interp='nearest') - d_pred_new = cv2.resize(img, dsize=g_pred.shape[0:2][::-1], interpolation=cv2.INTER_NEAREST) + d_pred_new = cv2.resize(img, + dsize=g_pred.shape[0:2][::-1], + interpolation=cv2.INTER_NEAREST) con = np.concatenate([g_pred, d_pred_new], 0) - result[hsize-g_pred.shape[0]:hsize+g_pred.shape[0], :g_pred.shape[1], :] = con + result[hsize - g_pred.shape[0]:hsize + + g_pred.shape[0], :g_pred.shape[1], :] = con else: result[hsize - g_pred.shape[0]:, :, :] = g_pred results.append(np.uint8(np.round(result))) @@ -101,16 +119,21 @@ def save_image(image_tensor, image_path): image_pil.save(image_path) -def get_scale_weights(i, max_i, start_factor, input_shape, min_size, num_scales_limit, scale_factor): - num_scales = np.min([np.int(np.ceil(np.log(np.min(input_shape) * 1.0 / min_size) - / np.log(scale_factor))), num_scales_limit]) +def get_scale_weights(i, max_i, start_factor, input_shape, min_size, + num_scales_limit, scale_factor): + num_scales = np.min([ + np.int( + np.ceil( + np.log(np.min(input_shape) * 1.0 / min_size) / + np.log(scale_factor))), num_scales_limit + ]) # if i > max_i * 2: # i = max_i * 2 - factor = start_factor ** ((max_i - i) * 1.0 / max_i) + factor = start_factor**((max_i - i) * 1.0 / max_i) - un_normed_weights = factor ** np.arange(num_scales) + un_normed_weights = factor**np.arange(num_scales) weights = un_normed_weights / np.sum(un_normed_weights) # # np.clip(i, 0, max_i) @@ -130,7 +153,9 @@ def __init__(self, gan, conf, test_inputs): self.D_loss_fake = [None] * conf.max_iters self.test_inputs = test_inputs - self.test_input_sizes = [test_input.shape[2:] for test_input in test_inputs] + self.test_input_sizes = [ + test_input.shape[2:] for test_input in test_inputs + ] if conf.reconstruct_loss_stop_iter > 0: self.Rec_loss = [None] * conf.max_iters @@ -146,10 +171,11 @@ def recreate_fig(self): self.d_map_real = self.fig.add_subplot(gs[7, 7]) # First plot data - self.plot_gan_loss = self.gan_loss.plot([], [], 'b-', - [], [], 'c--', + self.plot_gan_loss = self.gan_loss.plot([], [], 'b-', [], [], 'c--', [], [], 'r--') - self.gan_loss.legend(('Generator loss', 'Discriminator loss (real image)', 'Discriminator loss (fake image)')) + self.gan_loss.legend( + ('Generator loss', 'Discriminator loss (real image)', + 'Discriminator loss (fake image)')) self.gan_loss.set_ylim(0, 1) if self.conf.reconstruct_loss_stop_iter > 0: @@ -159,7 +185,8 @@ def recreate_fig(self): self.gan_loss.set_title('Gan Losses') self.reconstruct_loss.set_title('Reconstruction Loss') self.reconstruction.set_title('Reconstruction') - self.d_map_real.set_xlabel('Current Discriminator \n map for real example') + self.d_map_real.set_xlabel( + 'Current Discriminator \n map for real example') self.real_example.set_xlabel('Real example') self.result.set_title('Current result') @@ -173,19 +200,31 @@ def recreate_fig(self): def test_and_display(self, i): if not i % self.conf.print_freq and i > 0: - self.G_loss[i-self.conf.print_freq:i] = self.gan.losses_G_gan.detach().cpu().float().numpy().tolist() - self.D_loss_real[i-self.conf.print_freq:i] = self.gan.losses_D_real.detach().cpu().float().numpy().tolist() - self.D_loss_fake[i-self.conf.print_freq:i] = self.gan.losses_D_fake.detach().cpu().float().numpy().tolist() + self.G_loss[i - + self.conf.print_freq:i] = self.gan.losses_G_gan.detach( + ).cpu().float().numpy().tolist() + self.D_loss_real[i - self.conf. + print_freq:i] = self.gan.losses_D_real.detach( + ).cpu().float().numpy().tolist() + self.D_loss_fake[i - self.conf. + print_freq:i] = self.gan.losses_D_fake.detach( + ).cpu().float().numpy().tolist() if self.conf.reconstruct_loss_stop_iter > i: - self.Rec_loss[i-self.conf.print_freq:i] = self.gan.losses_G_reconstruct.detach().cpu().float().numpy().tolist() + self.Rec_loss[i - self.conf.print_freq: + i] = self.gan.losses_G_reconstruct.detach().cpu( + ).float().numpy().tolist() if self.conf.reconstruct_loss_stop_iter < i: - print(('iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, LR: %f' % - (i, self.G_loss[i-1], self.D_loss_real[i-1], self.D_loss_fake[i-1], + print(( + 'iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, LR: %f' + % (i, self.G_loss[i - 1], + self.D_loss_real[i - 1], self.D_loss_fake[i - 1], self.gan.lr_scheduler_G.get_lr()[0]))) else: - print(('iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, Rec_loss: %f, LR: %f' % - (i, self.G_loss[i-1], self.D_loss_real[i-1], self.D_loss_fake[i-1], self.Rec_loss[i-1], + print(( + 'iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, Rec_loss: %f, LR: %f' + % (i, self.G_loss[i - 1], self.D_loss_real[i - 1], + self.D_loss_fake[i - 1], self.Rec_loss[i - 1], self.gan.lr_scheduler_G.get_lr()[0]))) if not i % self.conf.display_freq and i > 0: @@ -207,37 +246,56 @@ def test_and_display(self, i): # g_preds, d_preds, reconstructs = self.gan.test(test_input, output_size, rand_h, test_input_size) g_preds = [self.gan.input_tensor_noised, self.gan.G_pred] - d_preds = [self.gan.D.forward(self.gan.input_tensor_noised.detach(), self.gan.scale_weights), - self.gan.d_pred_fake] + d_preds = [ + self.gan.D.forward(self.gan.input_tensor_noised.detach(), + self.gan.scale_weights), + self.gan.d_pred_fake + ] reconstructs = self.gan.reconstruct input_size = self.gan.input_tensor_noised.shape[2:] - result = image_concat(tensor2im(g_preds), tensor2im(d_preds), (input_size[0]*2, input_size[1]*2)) + result = image_concat(tensor2im(g_preds), tensor2im(d_preds), + (input_size[0] * 2, input_size[1] * 2)) self.plot_gan_loss[0].set_data(list(range(i)), self.G_loss[:i]) - self.plot_gan_loss[1].set_data(list(range(i)), self.D_loss_real[:i]) - self.plot_gan_loss[2].set_data(list(range(i)), self.D_loss_fake[:i]) + self.plot_gan_loss[1].set_data(list(range(i)), + self.D_loss_real[:i]) + self.plot_gan_loss[2].set_data(list(range(i)), + self.D_loss_fake[:i]) self.gan_loss.set_xlim(0, i) if self.conf.reconstruct_loss_stop_iter > i: - self.plot_reconstruct_loss[0].set_data(list(range(i)), self.Rec_loss[:i]) - self.reconstruct_loss.set_ylim(np.min(self.Rec_loss[:i]), np.max(self.Rec_loss[:i])) + self.plot_reconstruct_loss[0].set_data(list(range(i)), + self.Rec_loss[:i]) + self.reconstruct_loss.set_ylim(np.min(self.Rec_loss[:i]), + np.max(self.Rec_loss[:i])) self.reconstruct_loss.set_xlim(0, i) self.result.imshow(np.clip(result, 0, 255), vmin=0, vmax=255) - self.real_example.imshow(np.clip(tensor2im(self.gan.real_example[0:1, :, :, :]), 0, 255), vmin=0, vmax=255) - self.d_map_real.imshow(self.gan.d_pred_real[0:1, :, :, :].detach().cpu().float().numpy().squeeze(), - cmap='gray', vmin=0, vmax=1) + self.real_example.imshow(np.clip( + tensor2im(self.gan.real_example[0:1, :, :, :]), 0, 255), + vmin=0, + vmax=255) + self.d_map_real.imshow(self.gan.d_pred_real[ + 0:1, :, :, :].detach().cpu().float().numpy().squeeze(), + cmap='gray', + vmin=0, + vmax=1) if self.conf.reconstruct_loss_stop_iter > i: - self.reconstruction.imshow(np.clip(image_concat([tensor2im(reconstructs)]), 0, 255), vmin=0, vmax=255) + self.reconstruction.imshow(np.clip( + image_concat([tensor2im(reconstructs)]), 0, 255), + vmin=0, + vmax=255) plt.savefig(self.conf.output_dir_path + '/monitor_%d' % i) - save_image(self.gan.G_pred, self.conf.output_dir_path + '/result_iter_%d.png' % i) + save_image(self.gan.G_pred, + self.conf.output_dir_path + '/result_iter_%d.png' % i) def prepare_result_dir(conf): # Create results directory - conf.output_dir_path += '/' + conf.name + strftime('_%b_%d_%H_%M_%S', localtime()) + conf.output_dir_path += '/' + conf.name + strftime('_%b_%d_%H_%M_%S', + localtime()) os.makedirs(conf.output_dir_path) # Put a copy of all *.py files in results path, to be able to reproduce experimental results @@ -246,20 +304,24 @@ def prepare_result_dir(conf): for py_file in glob.glob(local_dir + '/*.py'): copy(py_file, conf.output_dir_path) if conf.resume: - copy(conf.resume, os.path.join(conf.output_dir_path, 'starting_checkpoint.pth.tar')) + copy( + conf.resume, + os.path.join(conf.output_dir_path, + 'starting_checkpoint.pth.tar')) return conf.output_dir_path def homography_based_on_top_corners_x_shift(rand_h): - p = np.array([[1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), -1. + rand_h[0]], - [0, 0, 0, 1., 1., -1., 1., 1., -1.], - [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], - [0, 0, 0, -1, -1, -1, 1, 1, 1], - [1, 0, -1, 0, 0, 0, 1, 0, -1], - [0, 0, 0, 1, 0, -1, 0, 0, 0], - [-1, 0, -1, 0, 0, 0, 1, 0, 1], - [0, 0, 0, -1, 0, -1, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=np.float32) + p = np.array( + [[ + 1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), + -1. + rand_h[0] + ], [0, 0, 0, 1., 1., -1., 1., 1., -1.], + [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], + [0, 0, 0, -1, -1, -1, 1, 1, 1], [1, 0, -1, 0, 0, 0, 1, 0, -1], + [0, 0, 0, 1, 0, -1, 0, 0, 0], [-1, 0, -1, 0, 0, 0, 1, 0, 1], + [0, 0, 0, -1, 0, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1]], + dtype=np.float32) b = np.zeros((9, 1), dtype=np.float32) b[8, 0] = 1. h = np.dot(np.linalg.inv(p), b) @@ -281,7 +343,8 @@ def homography_grid(theta, size): """ a = 1 b = 1 - y, x = torch.meshgrid((torch.linspace(-b, b, np.int(size[-2]*a)), torch.linspace(-b, b, np.int(size[-1]*a)))) + y, x = torch.meshgrid((torch.linspace(-b, b, np.int(size[-2] * a)), + torch.linspace(-b, b, np.int(size[-1] * a)))) n = np.int(size[-2] * a) * np.int(size[-1] * a) hxy = torch.ones(n, 3, dtype=torch.float) hxy[:, 0] = x.contiguous().view(-1) @@ -289,7 +352,8 @@ def homography_grid(theta, size): out = hxy[None, ...].cuda().matmul(theta.transpose(1, 2)) # normalize out = out[:, :, :2] / out[:, :, 2:] - return out.view(theta.shape[0], np.int(size[-2]*a), np.int(size[-1]*a), 2) + return out.view(theta.shape[0], np.int(size[-2] * a), np.int(size[-1] * a), + 2) def hist_match(source, template, mask_3ch): @@ -315,7 +379,8 @@ def hist_match(source, template, mask_3ch): template = template.ravel() # get the set of unique pixel values and their corresponding indices and # counts - s_values, bin_idx, s_counts = np.unique(source_masked, return_inverse=True, + s_values, bin_idx, s_counts = np.unique(source_masked, + return_inverse=True, return_counts=True) t_values, t_counts = np.unique(template, return_counts=True) From 90260a8cb7bdbe2349d8ef75ca75ae7f32016757 Mon Sep 17 00:00:00 2001 From: bartolo1024 Date: Tue, 6 Jul 2021 13:56:55 +0200 Subject: [PATCH 09/13] yapf in environment --- environment.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 099fb21..83e6652 100644 --- a/environment.yml +++ b/environment.yml @@ -16,4 +16,5 @@ dependencies: - pip: - opencv-python - ipython - - pytest \ No newline at end of file + - pytest + - yapf From 833bcabd4fca29bafdad192f622fa33e07ac8575 Mon Sep 17 00:00:00 2001 From: bartolo1024 Date: Tue, 6 Jul 2021 13:57:20 +0200 Subject: [PATCH 10/13] Add workflow --- .github/workflow/pythonapp.yml | 34 ++++++++++++++++++++++++++++++++++ .yapfignore | 2 ++ 2 files changed, 36 insertions(+) create mode 100644 .github/workflow/pythonapp.yml create mode 100644 .yapfignore diff --git a/.github/workflow/pythonapp.yml b/.github/workflow/pythonapp.yml new file mode 100644 index 0000000..9ba0949 --- /dev/null +++ b/.github/workflow/pythonapp.yml @@ -0,0 +1,34 @@ +name: Python conda test + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - uses: conda-incubator/setup-miniconda@v2 + with: + miniconda-version: "latest" + activate-environment: ingan + environment-file: environment.yml + python-version: 3.8 + auto-activate-base: false + - shell: bash -l {0} + run: | + conda info + conda list + - name: Test with pytest + shell: bash -l {0} + run: | + pytest . + - name: yapf + id: yapf + uses: diegovalenzuelaiturra/yapf-action@v0.0.1 + with: + args: . --recursive --diff + - name: Fail if yapf made changes + if: steps.yapf.outputs.exit-code == 2 + run: exit 1 diff --git a/.yapfignore b/.yapfignore new file mode 100644 index 0000000..fc95ba6 --- /dev/null +++ b/.yapfignore @@ -0,0 +1,2 @@ +results/ +examples/ From 76e6c6370565f99ba2c41c61b2f981a0b3ec0b36 Mon Sep 17 00:00:00 2001 From: bartolo1024 Date: Tue, 6 Jul 2021 14:01:57 +0200 Subject: [PATCH 11/13] change name workflow -> workflows --- .github/{workflow => workflows}/pythonapp.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{workflow => workflows}/pythonapp.yml (100%) diff --git a/.github/workflow/pythonapp.yml b/.github/workflows/pythonapp.yml similarity index 100% rename from .github/workflow/pythonapp.yml rename to .github/workflows/pythonapp.yml From 6d5bbda68da26a8c737eae8a05c405ef531f4d5b Mon Sep 17 00:00:00 2001 From: bartolo1024 Date: Fri, 16 Jul 2021 15:01:36 +0200 Subject: [PATCH 12/13] Facebook yapf style && fix video writter --- InGAN.py | 158 +++++-------- SceneScripts.py | 559 +++++++++++++++++++++++----------------------- configs.py | 258 ++++++++------------- environment.yml | 1 + networks.py | 226 ++++++------------- non_rect.py | 36 ++- setup.cfg | 8 + supp_video.py | 125 +++++------ test.py | 243 +++++++++----------- test_util.py | 3 +- train.py | 15 +- train_supp_mat.py | 5 +- util.py | 206 +++++++---------- 13 files changed, 779 insertions(+), 1064 deletions(-) create mode 100644 setup.cfg diff --git a/InGAN.py b/InGAN.py index 934c750..ca4dc2c 100644 --- a/InGAN.py +++ b/InGAN.py @@ -13,9 +13,7 @@ def __init__(self, start, end): self.end = end def __call__(self, citer): - return 1. - max( - 0., - float(citer - self.start) / float(self.end - self.start)) + return 1. - max(0., float(citer - self.start) / float(self.end - self.start)) # noinspection PyAttributeOutsideInit @@ -27,26 +25,20 @@ def __init__(self, conf): self.max_iters = conf.max_iters # Define input tensor - self.input_tensor = torch.FloatTensor(1, 3, conf.input_crop_size, - conf.input_crop_size).cuda() - self.real_example = torch.FloatTensor(1, 3, conf.output_crop_size, - conf.output_crop_size).cuda() + self.input_tensor = torch.FloatTensor(1, 3, conf.input_crop_size, conf.input_crop_size).cuda() + self.real_example = torch.FloatTensor(1, 3, conf.output_crop_size, conf.output_crop_size).cuda() # Define networks - self.G = networks.Generator(conf.G_base_channels, conf.G_num_resblocks, - conf.G_num_downscales, conf.G_use_bias, - conf.G_skip) - self.D = networks.MultiScaleDiscriminator(conf.output_crop_size, - self.conf.D_max_num_scales, - self.conf.D_scale_factor, - self.conf.D_base_channels) + self.G = networks.Generator( + conf.G_base_channels, conf.G_num_resblocks, conf.G_num_downscales, conf.G_use_bias, conf.G_skip + ) + self.D = networks.MultiScaleDiscriminator( + conf.output_crop_size, self.conf.D_max_num_scales, self.conf.D_scale_factor, self.conf.D_base_channels + ) self.GAN_loss_layer = networks.GANLoss() self.Reconstruct_loss = networks.WeightedMSELoss(use_L1=conf.use_L1) - self.RandCrop = networks.RandomCrop( - [conf.input_crop_size, conf.input_crop_size], - must_divide=conf.must_divide) - self.SwapCrops = networks.SwapCrops(conf.crop_swap_min_size, - conf.crop_swap_max_size) + self.RandCrop = networks.RandomCrop([conf.input_crop_size, conf.input_crop_size], must_divide=conf.must_divide) + self.SwapCrops = networks.SwapCrops(conf.crop_swap_min_size, conf.crop_swap_max_size) # Make all networks run on GPU self.G.cuda() @@ -66,20 +58,15 @@ def __init__(self, conf): self.losses_D_fake = torch.FloatTensor(conf.print_freq).cuda() self.losses_G_reconstruct = torch.FloatTensor(conf.print_freq).cuda() if self.conf.reconstruct_loss_stop_iter > 0: - self.losses_D_reconstruct = torch.FloatTensor( - conf.print_freq).cuda() + self.losses_D_reconstruct = torch.FloatTensor(conf.print_freq).cuda() # Initialize networks self.G.apply(networks.weights_init) self.D.apply(networks.weights_init) # Initialize optimizers - self.optimizer_G = torch.optim.Adam(self.G.parameters(), - lr=conf.g_lr, - betas=(conf.beta1, 0.999)) - self.optimizer_D = torch.optim.Adam(self.D.parameters(), - lr=conf.d_lr, - betas=(conf.beta1, 0.999)) + self.optimizer_G = torch.optim.Adam(self.G.parameters(), lr=conf.g_lr, betas=(conf.beta1, 0.999)) + self.optimizer_D = torch.optim.Adam(self.D.parameters(), lr=conf.d_lr, betas=(conf.beta1, 0.999)) # Learning rate scheduler # First define linearly decaying functions (decay starts at a special iter) @@ -89,10 +76,8 @@ def __init__(self, conf): # return 1 - max(0, 1.0 * (n_iter - start_decay) / (conf.max_iters - start_decay)) lr_function = LRPolicy(start_decay, end_decay) # Define learning rate schedulers - self.lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR( - self.optimizer_G, lr_function) - self.lr_scheduler_D = torch.optim.lr_scheduler.LambdaLR( - self.optimizer_D, lr_function) + self.lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR(self.optimizer_G, lr_function) + self.lr_scheduler_D = torch.optim.lr_scheduler.LambdaLR(self.optimizer_D, lr_function) # # do we resume from checkpoint? # if self.conf.resume: @@ -116,7 +101,8 @@ def save(self, citer=None): 'sched_D': self.lr_scheduler_D.state_dict(), 'loss': self.GAN_loss_layer.state_dict(), 'iter': citer if citer else self.cur_iter - }, os.path.join(self.conf.output_dir_path, filename)) + }, os.path.join(self.conf.output_dir_path, filename) + ) def resume(self, resume_path, test_flag=False): resume = torch.load(resume_path, map_location={'cuda:5': 'cuda:0'}) @@ -151,23 +137,15 @@ def resume(self, resume_path, test_flag=False): else: missing.append('GAN loss') if len(missing): - warnings.warn( - 'Missing the following state dicts from checkpoint: {}'.format( - ', '.join(missing))) + warnings.warn('Missing the following state dicts from checkpoint: {}'.format(', '.join(missing))) print(('resuming checkpoint {}'.format(self.conf.resume))) - def test(self, - input_tensor, - output_size, - rand_affine, - input_size, - run_d_pred=True, - run_reconstruct=True): + def test(self, input_tensor, output_size, rand_affine, input_size, run_d_pred=True, run_reconstruct=True): with torch.no_grad(): - self.G_pred = self.G.forward(Variable(input_tensor.detach()), - output_size=output_size, - random_affine=rand_affine) + self.G_pred = self.G.forward( + Variable(input_tensor.detach()), output_size=output_size, random_affine=rand_affine + ) if run_d_pred: scale_weights_for_output = get_scale_weights( i=self.cur_iter, @@ -176,7 +154,8 @@ def test(self, input_shape=self.G_pred.shape[2:], min_size=self.conf.D_min_input_size, num_scales_limit=self.conf.D_max_num_scales, - scale_factor=self.conf.D_scale_factor) + scale_factor=self.conf.D_scale_factor + ) scale_weights_for_input = get_scale_weights( i=self.cur_iter, max_i=self.conf.D_scale_weights_iter_for_even_scales, @@ -184,12 +163,11 @@ def test(self, input_shape=input_tensor.shape[2:], min_size=self.conf.D_min_input_size, num_scales_limit=self.conf.D_max_num_scales, - scale_factor=self.conf.D_scale_factor) + scale_factor=self.conf.D_scale_factor + ) self.D_preds = [ - self.D.forward(Variable(input_tensor.detach()), - scale_weights_for_input), - self.D.forward(Variable(self.G_pred.detach()), - scale_weights_for_output) + self.D.forward(Variable(input_tensor.detach()), scale_weights_for_input), + self.D.forward(Variable(self.G_pred.detach()), scale_weights_for_output) ] else: self.D_preds = None @@ -197,9 +175,8 @@ def test(self, self.G_preds = [input_tensor, self.G_pred] self.reconstruct = self.G.forward( - self.G_pred, - output_size=input_size, - random_affine=-rand_affine) if run_reconstruct else None + self.G_pred, output_size=input_size, random_affine=-rand_affine + ) if run_reconstruct else None return self.G_preds, self.D_preds, self.reconstruct @@ -217,16 +194,14 @@ def train_g(self): must_divide=self.conf.must_divide, min_scale=self.conf.min_scale, max_scale=self.conf.max_scale, - max_transform_magniutude=self.conf.max_transform_magnitude) + max_transform_magniutude=self.conf.max_transform_magnitude + ) # Add noise to G input for better generalization (make it ignore the 1/255 binning) - self.input_tensor_noised = self.input_tensor + ( - torch.rand_like(self.input_tensor) - 0.5) * 2.0 / 255 + self.input_tensor_noised = self.input_tensor + (torch.rand_like(self.input_tensor) - 0.5) * 2.0 / 255 # Generator forward pass - self.G_pred = self.G.forward(self.input_tensor_noised, - output_size=output_size, - random_affine=random_affine) + self.G_pred = self.G.forward(self.input_tensor_noised, output_size=output_size, random_affine=random_affine) # Run generator result through discriminator forward pass self.scale_weights = get_scale_weights( @@ -236,17 +211,16 @@ def train_g(self): input_shape=self.G_pred.shape[2:], min_size=self.conf.D_min_input_size, num_scales_limit=self.conf.D_max_num_scales, - scale_factor=self.conf.D_scale_factor) + scale_factor=self.conf.D_scale_factor + ) d_pred_fake = self.D.forward(self.G_pred, self.scale_weights) # If reconstruction-loss is used, run through decoder to reconstruct, then calculate reconstruction loss if self.conf.reconstruct_loss_stop_iter > self.cur_iter: self.reconstruct = self.G.forward( - self.G_pred, - output_size=self.input_tensor.shape[2:], - random_affine=-random_affine) - self.loss_G_reconstruct = self.criterionReconstruction( - self.reconstruct, self.input_tensor, self.loss_mask) + self.G_pred, output_size=self.input_tensor.shape[2:], random_affine=-random_affine + ) + self.loss_G_reconstruct = self.criterionReconstruction(self.reconstruct, self.input_tensor, self.loss_mask) # Calculate generator loss, based on discriminator prediction on generator result self.loss_G_GAN = self.criterionGAN(d_pred_fake, is_d_input_real=True) @@ -256,8 +230,7 @@ def train_g(self): if self.conf.reconstruct_loss_stop_iter < self.cur_iter: self.loss_G = self.loss_G_GAN else: - self.loss_G = (self.conf.reconstruct_loss_proportion * - self.loss_G_reconstruct + self.loss_G_GAN) + self.loss_G = (self.conf.reconstruct_loss_proportion * self.loss_G_reconstruct + self.loss_G_GAN) # Calculate gradients # Note that the gradients are propagated from the loss through discriminator and then through generator @@ -273,12 +246,12 @@ def train_g(self): for _ in range(self.conf.G_extra_inverse_train): self.optimizer_G.zero_grad() self.inverse = self.G.forward( - self.G_pred.detach(), - output_size=self.input_tensor.shape[2:], - random_affine=-random_affine) - self.loss_G_inverse = (self.criterionReconstruction( - self.inverse, self.input_tensor, self.loss_mask) * - self.conf.G_extra_inverse_train_ratio) + self.G_pred.detach(), output_size=self.input_tensor.shape[2:], random_affine=-random_affine + ) + self.loss_G_inverse = ( + self.criterionReconstruction(self.inverse, self.input_tensor, self.loss_mask) * + self.conf.G_extra_inverse_train_ratio + ) self.loss_G_inverse.backward() self.optimizer_G.step() @@ -290,27 +263,21 @@ def train_d(self): self.optimizer_D.zero_grad() # Adding noise to D input to prevent overfitting to 1/255 bins - real_example_with_noise = self.real_example + ( - torch.rand_like(self.real_example[-1]) - 0.5) * 2.0 / 255.0 + real_example_with_noise = self.real_example + (torch.rand_like(self.real_example[-1]) - 0.5) * 2.0 / 255.0 # Discriminator forward pass over real example - self.d_pred_real = self.D.forward(real_example_with_noise, - self.scale_weights) + self.d_pred_real = self.D.forward(real_example_with_noise, self.scale_weights) # Adding noise to D input to prevent overfitting to 1/255 bins # Note that generator result is detached so that gradients are not propagating back through generator - g_pred_with_noise = self.G_pred.detach() + ( - torch.rand_like(self.G_pred) - 0.5) * 2.0 / 255 + g_pred_with_noise = self.G_pred.detach() + (torch.rand_like(self.G_pred) - 0.5) * 2.0 / 255 # Discriminator forward pass over generated example example - self.d_pred_fake = self.D.forward(g_pred_with_noise, - self.scale_weights) + self.d_pred_fake = self.D.forward(g_pred_with_noise, self.scale_weights) # Calculate discriminator loss - self.loss_D_fake = self.criterionGAN(self.d_pred_fake, - is_d_input_real=False) - self.loss_D_real = self.criterionGAN(self.d_pred_real, - is_d_input_real=True) + self.loss_D_fake = self.criterionGAN(self.d_pred_fake, is_d_input_real=False) + self.loss_D_real = self.criterionGAN(self.d_pred_real, is_d_input_real=True) self.loss_D = (self.loss_D_real + self.loss_D_fake) * 0.5 # Calculate gradients @@ -335,10 +302,8 @@ def train_one_iter(self, cur_iter, input_tensors): real_example_crops += self.RandCrop.forward([input_tensor]) if np.random.rand() < self.conf.crop_swap_probability: - swapped_input_tensor, loss_mask = self.SwapCrops.forward( - input_tensor) - [input_crop, mask_crop - ] = self.RandCrop.forward([swapped_input_tensor, loss_mask]) + swapped_input_tensor, loss_mask = self.SwapCrops.forward(input_tensor) + [input_crop, mask_crop] = self.RandCrop.forward([swapped_input_tensor, loss_mask]) input_crops.append(input_crop) mask_crops.append(mask_crop) mask_flag = True @@ -365,13 +330,8 @@ def train_one_iter(self, cur_iter, input_tensors): # Accumulate stats # Accumulating as cuda tensors is much more efficient than passing info from GPU to CPU at every iteration - self.losses_G_gan[cur_iter % - self.conf.print_freq] = self.loss_G_GAN.item() - self.losses_D_fake[cur_iter % - self.conf.print_freq] = self.loss_D_fake.item() - self.losses_D_real[cur_iter % - self.conf.print_freq] = self.loss_D_real.item() + self.losses_G_gan[cur_iter % self.conf.print_freq] = self.loss_G_GAN.item() + self.losses_D_fake[cur_iter % self.conf.print_freq] = self.loss_D_fake.item() + self.losses_D_real[cur_iter % self.conf.print_freq] = self.loss_D_real.item() if self.conf.reconstruct_loss_stop_iter > self.cur_iter: - self.losses_G_reconstruct[ - cur_iter % - self.conf.print_freq] = self.loss_G_reconstruct.item() + self.losses_G_reconstruct[cur_iter % self.conf.print_freq] = self.loss_G_reconstruct.item() diff --git a/SceneScripts.py b/SceneScripts.py index a4f8475..a281b2c 100644 --- a/SceneScripts.py +++ b/SceneScripts.py @@ -1,243 +1,270 @@ import numpy as np -def make_scene_script(script_name, - min_v, - max_v, - min_h, - max_h, - max_t, - repeat, - show_input=True, - frames_per_resize=10): +def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, show_input=True, frames_per_resize=10): l = np.linspace if script_name == 'vertical_grow_shrink': - size_v = np.concatenate([ - l(1, max_v, frames_per_resize), - l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize) - ]) - size_h = np.concatenate([ - l(1, 1, frames_per_resize), - l(1, 1, 2 * frames_per_resize), - l(1, 1, frames_per_resize) - ]) + size_v = np.concatenate( + [l(1, max_v, frames_per_resize), + l(max_v, min_v, 2 * frames_per_resize), + l(min_v, 1, frames_per_resize)] + ) + size_h = np.concatenate( + [l(1, 1, frames_per_resize), + l(1, 1, 2 * frames_per_resize), + l(1, 1, frames_per_resize)] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'horizontal_grow_shrink': - size_v = np.concatenate([ - l(1, 1, frames_per_resize), - l(1, 1, 2 * frames_per_resize), - l(1, 1, frames_per_resize) - ]) - size_h = np.concatenate([ - l(1, max_h, frames_per_resize), - l(max_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize) - ]) + size_v = np.concatenate( + [l(1, 1, frames_per_resize), + l(1, 1, 2 * frames_per_resize), + l(1, 1, frames_per_resize)] + ) + size_h = np.concatenate( + [l(1, max_h, frames_per_resize), + l(max_h, min_h, 2 * frames_per_resize), + l(min_h, 1, frames_per_resize)] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'horizontal_grow_shrink_slow': - size_v = np.concatenate([ - l(1, 1, 2 * frames_per_resize), - l(1, 1, 2 * frames_per_resize), - l(1, 1, frames_per_resize) - ]) - size_h = np.concatenate([ - l(1, max_h, 2 * frames_per_resize), - l(max_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize) - ]) + size_v = np.concatenate( + [l(1, 1, 2 * frames_per_resize), + l(1, 1, 2 * frames_per_resize), + l(1, 1, frames_per_resize)] + ) + size_h = np.concatenate( + [ + l(1, max_h, 2 * frames_per_resize), + l(max_h, min_h, 2 * frames_per_resize), + l(min_h, 1, frames_per_resize) + ] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == '2d_grow_shrink': - size_v = np.concatenate([ - l(1, max_v, frames_per_resize), - l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize) - ]) - size_h = np.concatenate([ - l(1, max_h, frames_per_resize), - l(max_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize) - ]) + size_v = np.concatenate( + [l(1, max_v, frames_per_resize), + l(max_v, min_v, 2 * frames_per_resize), + l(min_v, 1, frames_per_resize)] + ) + size_h = np.concatenate( + [l(1, max_h, frames_per_resize), + l(max_h, min_h, 2 * frames_per_resize), + l(min_h, 1, frames_per_resize)] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'resize_round': - size_v = np.concatenate([ - l(1, 1, frames_per_resize), - l(1, max_v, frames_per_resize), - l(max_v, max_v, 2 * frames_per_resize), - l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize) - ]) - size_h = np.concatenate([ - l(1, max_h, frames_per_resize), - l(max_h, max_h, frames_per_resize), - l(max_h, min_h, 2 * frames_per_resize), - l(min_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize) - ]) + size_v = np.concatenate( + [ + l(1, 1, frames_per_resize), + l(1, max_v, frames_per_resize), + l(max_v, max_v, 2 * frames_per_resize), + l(max_v, min_v, 2 * frames_per_resize), + l(min_v, 1, frames_per_resize) + ] + ) + size_h = np.concatenate( + [ + l(1, max_h, frames_per_resize), + l(max_h, max_h, frames_per_resize), + l(max_h, min_h, 2 * frames_per_resize), + l(min_h, min_h, 2 * frames_per_resize), + l(min_h, 1, frames_per_resize) + ] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'special_resize_round': - size_v = np.concatenate([ - l(1, 1, frames_per_resize / 2), - l(1, max_v, frames_per_resize), - l(max_v, max_v, frames_per_resize), - l(max_v, max_v, 2 * frames_per_resize), - l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize) - ]) + size_v = np.concatenate( + [ + l(1, 1, frames_per_resize / 2), + l(1, max_v, frames_per_resize), + l(max_v, max_v, frames_per_resize), + l(max_v, max_v, 2 * frames_per_resize), + l(max_v, min_v, 2 * frames_per_resize), + l(min_v, 1, frames_per_resize) + ] + ) - size_h = np.concatenate([ - l(1, max_h / 2, frames_per_resize / 2), - l(max_h / 2, max_h / 2, frames_per_resize), - l(max_h / 2, max_h, frames_per_resize), - l(max_h, min_h, 2 * frames_per_resize), - l(min_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize) - ]) + size_h = np.concatenate( + [ + l(1, max_h / 2, frames_per_resize / 2), + l(max_h / 2, max_h / 2, frames_per_resize), + l(max_h / 2, max_h, frames_per_resize), + l(max_h, min_h, 2 * frames_per_resize), + l(min_h, min_h, 2 * frames_per_resize), + l(min_h, 1, frames_per_resize) + ] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'special_zoom': - size_v = np.concatenate([ - l(1, max_v, frames_per_resize), - l(max_v, min_v, frames_per_resize), - l(min_v, 1, frames_per_resize) - ]) - size_h = np.concatenate([ - l(1, max_v, frames_per_resize), - l(max_v, min_v, frames_per_resize), - l(min_v, 1, frames_per_resize) - ]) + size_v = np.concatenate( + [l(1, max_v, frames_per_resize), + l(max_v, min_v, frames_per_resize), + l(min_v, 1, frames_per_resize)] + ) + size_h = np.concatenate( + [l(1, max_v, frames_per_resize), + l(max_v, min_v, frames_per_resize), + l(min_v, 1, frames_per_resize)] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'affine_dance': - shift_l = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, -max_t, 2 * frames_per_resize), - l(-max_t, 0, frames_per_resize) - ]) - shift_r = np.concatenate([ - l(0, -max_t, frames_per_resize), - l(-max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize) - ]) + shift_l = np.concatenate( + [l(0, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, 0, frames_per_resize)] + ) + shift_r = np.concatenate( + [l(0, -max_t, frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize)] + ) size_v = [1 for _ in shift_l] size_h = [1 for _ in shift_l] elif script_name == 'trapezoids': - shift_l = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, -max_t, 2 * frames_per_resize), - l(-max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize) - ]) - shift_r = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, -max_t, 2 * frames_per_resize), - l(-max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize) - ]) + shift_l = np.concatenate( + [ + l(0, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ] + ) + shift_r = np.concatenate( + [ + l(0, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ] + ) size_v = [1 for _ in shift_l] size_h = [1 for _ in shift_l] elif script_name == 'trapezoids_vresize': - shift_l = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, -max_t, 2 * frames_per_resize), - l(-max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize) - ]) - shift_r = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, -max_t, 2 * frames_per_resize), - l(-max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize) - ]) - size_v = np.concatenate([ - l(1, max_v, frames_per_resize), - l(max_v, 1, frames_per_resize), - l(1, max_v, frames_per_resize), - l(max_v, 1, frames_per_resize), - l(1, max_v, frames_per_resize), - l(max_v, 1, frames_per_resize), - ]) + shift_l = np.concatenate( + [ + l(0, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ] + ) + shift_r = np.concatenate( + [ + l(0, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ] + ) + size_v = np.concatenate( + [ + l(1, max_v, frames_per_resize), + l(max_v, 1, frames_per_resize), + l(1, max_v, frames_per_resize), + l(max_v, 1, frames_per_resize), + l(1, max_v, frames_per_resize), + l(max_v, 1, frames_per_resize), + ] + ) size_h = np.concatenate([l(1, 1, 6 * frames_per_resize)]) elif script_name == 'flicker': size_h = np.concatenate([l(1, 1, 6 * frames_per_resize)]) size_v = size_h - shift_l = np.concatenate([ - l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize), - ]) - shift_r = np.concatenate([ - l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize) - ]) + shift_l = np.concatenate( + [ + l(max_t, max_t, frames_per_resize), + l(-max_t, -max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize), + l(-max_t, -max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize), + l(-max_t, -max_t, frames_per_resize), + ] + ) + shift_r = np.concatenate( + [ + l(-max_t, -max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize), + l(-max_t, -max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize), + l(-max_t, -max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize) + ] + ) elif script_name == 'homography': size_h = np.concatenate([l(1, 1, 6 * frames_per_resize)]) size_v = size_h - shift_l = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize), - l(max_t, -max_t, 2 * frames_per_resize), - l(-max_t, -max_t, 2 * frames_per_resize), - l(-max_t, 0, frames_per_resize) - ]) - shift_r = np.concatenate([ - l(0, 0, frames_per_resize), - l(0, max_t, frames_per_resize), - l(max_t, max_t, 2 * frames_per_resize), - l(max_t, -max_t, 2 * frames_per_resize), - l(-max_t, 0, frames_per_resize) - ]) + shift_l = np.concatenate( + [ + l(0, max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, -max_t, 2 * frames_per_resize), + l(-max_t, 0, frames_per_resize) + ] + ) + shift_r = np.concatenate( + [ + l(0, 0, frames_per_resize), + l(0, max_t, frames_per_resize), + l(max_t, max_t, 2 * frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, 0, frames_per_resize) + ] + ) elif script_name == 'random': - stops = np.random.rand(10, 4) * np.array([ - max_v - min_v, max_h - min_h, 2 * max_t, 2 * max_t - ])[None, :] + np.array([min_v, min_h, -max_t, -max_t])[None, :] + stops = np.random.rand(10, 4) * np.array([max_v - min_v, max_h - min_h, 2 * max_t, 2 * max_t] + )[None, :] + np.array([min_v, min_h, -max_t, -max_t])[None, :] stops = np.vstack([stops, [1, 1, 0, 0]]) print(stops) - size_v = np.concatenate([ - l(stop_0[0], stop_1[0], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) - ]) + size_v = np.concatenate( + [ + l(stop_0[0], stop_1[0], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ] + ) - size_h = np.concatenate([ - l(stop_0[1], stop_1[1], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) - ]) + size_h = np.concatenate( + [ + l(stop_0[1], stop_1[1], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ] + ) - shift_l = np.concatenate([ - l(stop_0[2], stop_1[2], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) - ]) + shift_l = np.concatenate( + [ + l(stop_0[2], stop_1[2], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ] + ) - shift_r = np.concatenate([ - l(stop_0[3], stop_1[3], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) - ]) + shift_r = np.concatenate( + [ + l(stop_0[3], stop_1[3], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ] + ) elif script_name == 'random_trapezoids': stops_l = np.random.rand(11) * 2 * max_t - max_t @@ -249,109 +276,87 @@ def make_scene_script(script_name, size_h = np.concatenate([l(1, 1, 20 * frames_per_resize)]) size_v = size_h - shift_l = np.concatenate([ - l(stop_0[0], stop_1[0], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops) - ]) + shift_l = np.concatenate( + [l(stop_0[0], stop_1[0], frames_per_resize) for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops)] + ) - shift_r = np.concatenate([ - l(stop_0[1], stop_1[1], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops) - ]) + shift_r = np.concatenate( + [l(stop_0[1], stop_1[1], frames_per_resize) for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops)] + ) - return [[-1, -1, -1, -1]] * 20 + list(zip( - size_v, size_h, shift_l, shift_r)) * repeat if show_input else list( - zip(size_v, size_h, shift_l, shift_r)) * repeat + return [[-1, -1, -1, -1]] * 20 + list(zip(size_v, size_h, shift_l, shift_r)) * repeat if show_input else list( + zip(size_v, size_h, shift_l, shift_r) + ) * repeat INPUT_DICT = { - 'fruits': [ - 'fruits_ss.png', - '/experiment_old_code_with_homo_2/results/fruits_ss_geo_new_pad_Mar_16_18_00_17/checkpoint_0075000.pth.tar' - ], - 'farm_house': [ - 'farm_house_s.png', - '/results/farm_house_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_16_07_59/checkpoint_0050000.pth.tar' - ], - 'cab_building': [ - 'cab_building_s.png', - '/results/cab_building_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_10_25/checkpoint_0065000.pth.tar' - ], - 'capitol': [ - 'capitol.png', - '/results/capitol_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_13_22/checkpoint_0055000.pth.tar' - ], - 'rome': [ - 'rome_s.png', - '/results/rome_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_09_19/checkpoint_0045000.pth.tar' - ], - 'soldiers': [ - 'china_soldiers.png', - '/results/china_soldiers_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_46_09/checkpoint_0075000.pth.tar' - ], - 'corn': [ - 'corn.png', - '/results/corn_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_10_29_00/checkpoint_0075000.pth.tar' - ], - 'sushi': [ - 'sushi.png', - '/results/sushi_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_47_39/checkpoint_0075000.pth.tar' - ], - 'penguins': [ - 'penguins.png', - '/results/penguins_Nov_13_16_26_14/checkpoint_0075000.pth.tar' - ], - 'emojis': [ - 'emojis3.png', - '/results/emojis3_Nov_23_09_59_59/checkpoint_0075000.pth.tar' - ], - 'fish': [ - 'input/fish.png', - '/results/fish_plethora_75_Mar_18_03_36_25/checkpoint_0075000.pth.tar' - ], - 'ny': [ - 'textures/ny.png', - '/results/ny_texture_synth_Mar_19_04_51_14/checkpoint_0075000.pth.tar' - ], - 'metal_circles': [ - 'metal_circles.jpg', - '/results/metal_circles_Mar_26_20_04_11/checkpoint_0075000.pth.tar' - ], + 'fruits': + [ + 'fruits_ss.png', + '/experiment_old_code_with_homo_2/results/fruits_ss_geo_new_pad_Mar_16_18_00_17/checkpoint_0075000.pth.tar' + ], + 'farm_house': + [ + 'farm_house_s.png', + '/results/farm_house_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_16_07_59/checkpoint_0050000.pth.tar' + ], + 'cab_building': + [ + 'cab_building_s.png', + '/results/cab_building_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_10_25/checkpoint_0065000.pth.tar' + ], + 'capitol': + [ + 'capitol.png', + '/results/capitol_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_13_22/checkpoint_0055000.pth.tar' + ], + 'rome': + [ + 'rome_s.png', + '/results/rome_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_09_19/checkpoint_0045000.pth.tar' + ], + 'soldiers': + [ + 'china_soldiers.png', + '/results/china_soldiers_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_46_09/checkpoint_0075000.pth.tar' + ], + 'corn': + [ + 'corn.png', + '/results/corn_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_10_29_00/checkpoint_0075000.pth.tar' + ], + 'sushi': + [ + 'sushi.png', + '/results/sushi_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_47_39/checkpoint_0075000.pth.tar' + ], + 'penguins': ['penguins.png', '/results/penguins_Nov_13_16_26_14/checkpoint_0075000.pth.tar'], + 'emojis': ['emojis3.png', '/results/emojis3_Nov_23_09_59_59/checkpoint_0075000.pth.tar'], + 'fish': ['input/fish.png', '/results/fish_plethora_75_Mar_18_03_36_25/checkpoint_0075000.pth.tar'], + 'ny': ['textures/ny.png', '/results/ny_texture_synth_Mar_19_04_51_14/checkpoint_0075000.pth.tar'], + 'metal_circles': ['metal_circles.jpg', '/results/metal_circles_Mar_26_20_04_11/checkpoint_0075000.pth.tar'], 'quilt': ['quilt.png', '/results/quilt/checkpoint_0075000.pth.tar'], - 'sapa': [ - 'sapa.png', - '/results/sapa_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_44_59/checkpoint_0075000.pth.tar' - ], - 'nkorea': [ - 'nkorea.png', - '/results/nkorea_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_48_00/checkpoint_0075000.pth.tar' - ], + 'sapa': + [ + 'sapa.png', + '/results/sapa_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_44_59/checkpoint_0075000.pth.tar' + ], + 'nkorea': + [ + 'nkorea.png', + '/results/nkorea_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_48_00/checkpoint_0075000.pth.tar' + ], 'wood': ['wood.png', '/results/wood/checkpoint_0075000.pth.tar'], 'starry': ['starry.png', '/results/starry/checkpoint_0075000.pth.tar'], - 'umbrella': - ['umbrella.png', '/results/umbrella/checkpoint_0075000.pth.tar'], - 'fruits_old': [ - 'fruits_ss.png', - '/results/fruits_ss_256_COARSE2FINE_extraInv_2_30_until60_killReconstruct_20_Oct_24_12_35_33/checkpoint_0040000.pth.tar' - ], - 'peacock': [ - 'scaled_nird/ours_1_scaled.jpg', - '/results/ours_1/checkpoint_0050000.pth.tar' - ], - 'windows': [ - 'scaled_nird/ours_2_scaled.jpg', - '/results/ours_2/checkpoint_0050000.pth.tar' - ], - 'light_house': [ - 'scaled_nird/ours_23_scaled.jpg', - '/results/ours_23/checkpoint_0050000.pth.tar' - ], - 'hats': [ - 'scaled_nird/ours_26_scaled.jpg', - '/results/ours_26/checkpoint_0050000.pth.tar' - ], - 'nature': [ - 'scaled_nird/ours_32_scaled.jpg', - '/results/ours_32/checkpoint_0050000.pth.tar' - ], + 'umbrella': ['umbrella.png', '/results/umbrella/checkpoint_0075000.pth.tar'], + 'fruits_old': + [ + 'fruits_ss.png', + '/results/fruits_ss_256_COARSE2FINE_extraInv_2_30_until60_killReconstruct_20_Oct_24_12_35_33/checkpoint_0040000.pth.tar' + ], + 'peacock': ['scaled_nird/ours_1_scaled.jpg', '/results/ours_1/checkpoint_0050000.pth.tar'], + 'windows': ['scaled_nird/ours_2_scaled.jpg', '/results/ours_2/checkpoint_0050000.pth.tar'], + 'light_house': ['scaled_nird/ours_23_scaled.jpg', '/results/ours_23/checkpoint_0050000.pth.tar'], + 'hats': ['scaled_nird/ours_26_scaled.jpg', '/results/ours_26/checkpoint_0050000.pth.tar'], + 'nature': ['scaled_nird/ours_32_scaled.jpg', '/results/ours_32/checkpoint_0050000.pth.tar'], } diff --git a/configs.py b/configs.py index d5bbb26..dec6496 100644 --- a/configs.py +++ b/configs.py @@ -13,290 +13,208 @@ def __init__(self): # Paths self.parser.add_argument( '--input_image_path', - default=[ - os.path.dirname(os.path.abspath(__file__)) + - '/examples/fruit/fruit.png' - ], + default=[os.path.dirname(os.path.abspath(__file__)) + '/examples/fruit/fruit.png'], nargs='+', - help='path to one specific image file') + help='path to one specific image file' + ) self.parser.add_argument( '--output_dir_path', default=os.path.dirname(os.path.abspath(__file__)) + '/results', - help='path to a directory to save results to') + help='path to a directory to save results to' + ) self.parser.add_argument( - '--name', - default='fruit', - help='name of current experiment, to be used for saving the results' + '--name', default='fruit', help='name of current experiment, to be used for saving the results' ) - self.parser.add_argument('--resume', - type=str, - default=None, - help='checkpoint to resume from') + self.parser.add_argument('--resume', type=str, default=None, help='checkpoint to resume from') self.parser.add_argument( '--test_params_path', type=str, - default=os.path.dirname(os.path.abspath(__file__)) + - '/examples/fruit/checkpoint_0075000.pth.tar', - help='checkpoint for testing') + default=os.path.dirname(os.path.abspath(__file__)) + '/examples/fruit/checkpoint_0075000.pth.tar', + help='checkpoint for testing' + ) # Test - self.parser.add_argument('--test_collage', - default=True, - action='store_true', - help='Create collage in test?') - self.parser.add_argument('--test_video', - default=True, - action='store_true', - help='Create retarget-video in test?') + self.parser.add_argument('--test_collage', default=True, action='store_true', help='Create collage in test?') + self.parser.add_argument( + '--test_video', default=True, action='store_true', help='Create retarget-video in test?' + ) self.parser.add_argument( '--test_non_rect', default=False, action='store_true', - help='Produce non-rectangular transformations in test?') + help='Produce non-rectangular transformations in test?' + ) self.parser.add_argument( '--test_vid_scales', type=float, default=[2.2, 0.1, 2.2, 0.1], nargs='+', - help= - 'boundary scales for output video: [max_v, min_v, max_h, min_h]') + help='boundary scales for output video: [max_v, min_v, max_h, min_h]' + ) self.parser.add_argument( '--collage_scales', type=float, default=[2.0, 1.25, 1.0, 0.66, 0.33], nargs='+', - help='scales for collage (h=w, only one number)') + help='scales for collage (h=w, only one number)' + ) self.parser.add_argument( '--collage_input_spot', type=float, default=[2, 2], nargs='+', - help= - 'replaces one spot in the collage with original input. must match a spot with scale 1.0' + help='replaces one spot in the collage with original input. must match a spot with scale 1.0' ) self.parser.add_argument( '--non_rect_shift_range', type=float, default=[-0.8, 1.0, 0.2], nargs='+', - help= - 'range for homography shifts for non rect transforms [min, max, step]' - ) - self.parser.add_argument('--non_rect_scales', - type=float, - default=[0.7, 1.0], - nargs='+', - help='list of scales for non_rect outputs') + help='range for homography shifts for non rect transforms [min, max, step]' + ) + self.parser.add_argument( + '--non_rect_scales', type=float, default=[0.7, 1.0], nargs='+', help='list of scales for non_rect outputs' + ) # Architecture (Generator) - self.parser.add_argument('--G_base_channels', - type=int, - default=64, - help='# of base channels in G') - self.parser.add_argument('--G_num_resblocks', - type=int, - default=6, - help='# of resblocks in G\'s bottleneck') - self.parser.add_argument('--G_num_downscales', - type=int, - default=3, - help='# of downscaling layers in G') - self.parser.add_argument( - '--G_use_bias', - type=bool, - default=True, - help='Determinhes whether bias is used in G\'s conv layers') + self.parser.add_argument('--G_base_channels', type=int, default=64, help='# of base channels in G') + self.parser.add_argument('--G_num_resblocks', type=int, default=6, help='# of resblocks in G\'s bottleneck') + self.parser.add_argument('--G_num_downscales', type=int, default=3, help='# of downscaling layers in G') self.parser.add_argument( - '--G_skip', - type=bool, - default=True, - help='Determines wether G uses skip connections (U-net)') + '--G_use_bias', type=bool, default=True, help='Determinhes whether bias is used in G\'s conv layers' + ) + self.parser.add_argument( + '--G_skip', type=bool, default=True, help='Determines wether G uses skip connections (U-net)' + ) # Architecture (Discriminator) - self.parser.add_argument('--D_base_channels', - type=int, - default=64, - help='# of base channels in D') + self.parser.add_argument('--D_base_channels', type=int, default=64, help='# of base channels in D') self.parser.add_argument( - '--D_max_num_scales', - type=int, - default=99, - help='Limits the # of scales for the multiscale D') + '--D_max_num_scales', type=int, default=99, help='Limits the # of scales for the multiscale D' + ) self.parser.add_argument( - '--D_scale_factor', - type=float, - default=1.4, - help='Determines the downscaling factor for multiscale D') + '--D_scale_factor', type=float, default=1.4, help='Determines the downscaling factor for multiscale D' + ) self.parser.add_argument( '--D_scale_weights_sigma', type=float, default=1.4, - help='Determines the downscaling factor for multiscale D') + help='Determines the downscaling factor for multiscale D' + ) self.parser.add_argument( - '--D_min_input_size', - type=int, - default=13, - help='Determines the downscaling factor for multiscale D') + '--D_min_input_size', type=int, default=13, help='Determines the downscaling factor for multiscale D' + ) self.parser.add_argument( '--D_scale_weights_iter_for_even_scales', type=int, default=25000, - help='Determines the downscaling factor for multiscale D') + help='Determines the downscaling factor for multiscale D' + ) # Optimization hyper-parameters - self.parser.add_argument('--g_lr', - type=float, - default=0.00005, - help='initial learning rate for generator') - self.parser.add_argument( - '--d_lr', - type=float, - default=0.00005, - help='initial learning rate for discriminator') + self.parser.add_argument('--g_lr', type=float, default=0.00005, help='initial learning rate for generator') + self.parser.add_argument('--d_lr', type=float, default=0.00005, help='initial learning rate for discriminator') self.parser.add_argument( '--lr_start_decay_iter', type=float, default=20000, help='iteration from which linear decay of lr starts until max_iter' ) - self.parser.add_argument('--beta1', - type=float, - default=0.5, - help='momentum term of adam') - self.parser.add_argument('--curriculum', - type=bool, - default=True, - help='Enable curriculum learning') + self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') + self.parser.add_argument('--curriculum', type=bool, default=True, help='Enable curriculum learning') self.parser.add_argument( '--iter_for_max_range', type=int, default=10000, - help= - 'In curriculum learning, when getting to this iteration all range is covered' + help='In curriculum learning, when getting to this iteration all range is covered' ) # Sizes - self.parser.add_argument('--input_crop_size', - type=int, - default=256, - help='input is cropped to this size') - self.parser.add_argument('--output_crop_size', - type=int, - default=256, - help='output is cropped to this size') - self.parser.add_argument('--max_scale', - type=float, - default=2.25, - help='max retargeting scale') - self.parser.add_argument('--min_scale', - type=float, - default=0.15, - help='min retargeting scale') + self.parser.add_argument('--input_crop_size', type=int, default=256, help='input is cropped to this size') + self.parser.add_argument('--output_crop_size', type=int, default=256, help='output is cropped to this size') + self.parser.add_argument('--max_scale', type=float, default=2.25, help='max retargeting scale') + self.parser.add_argument('--min_scale', type=float, default=0.15, help='min retargeting scale') self.parser.add_argument( '--must_divide', type=int, default=8, - help= - 'In curriculum learning, when getting to this iteration all range is covered' + help='In curriculum learning, when getting to this iteration all range is covered' ) self.parser.add_argument( - '--max_transform_magnitude', - type=float, - default=0.0, - help='max manitude of geometric transformation') + '--max_transform_magnitude', type=float, default=0.0, help='max manitude of geometric transformation' + ) # Crop Swap - self.parser.add_argument('--crop_swap_min_size', - type=int, - default=32, - help='swapping crops augmnetation') - self.parser.add_argument('--crop_swap_max_size', - type=int, - default=256, - help='swapping crops augmnetation') - self.parser.add_argument('--crop_swap_probability', - type=float, - default=0.0, - help='probability for crop swapping to occur') + self.parser.add_argument('--crop_swap_min_size', type=int, default=32, help='swapping crops augmnetation') + self.parser.add_argument('--crop_swap_max_size', type=int, default=256, help='swapping crops augmnetation') + self.parser.add_argument( + '--crop_swap_probability', type=float, default=0.0, help='probability for crop swapping to occur' + ) # GPU - self.parser.add_argument('--gpu_id', - type=int, - default=0, - help='gpu id number') + self.parser.add_argument('--gpu_id', type=int, default=0, help='gpu id number') # Monitoring display frequencies self.parser.add_argument( - '--display_freq', - type=int, - default=200, - help='frequency of showing training results on screen') + '--display_freq', type=int, default=200, help='frequency of showing training results on screen' + ) self.parser.add_argument( - '--print_freq', - type=int, - default=20, - help='frequency of showing training results on console') - self.parser.add_argument('--save_snapshot_freq', - type=int, - default=5000, - help='frequency of saving the latest results') + '--print_freq', type=int, default=20, help='frequency of showing training results on console' + ) + self.parser.add_argument( + '--save_snapshot_freq', type=int, default=5000, help='frequency of saving the latest results' + ) # Iterations - self.parser.add_argument('--max_iters', - type=int, - default=75000, - help='max # of iters') + self.parser.add_argument('--max_iters', type=int, default=75000, help='max # of iters') self.parser.add_argument( - '--G_iters', - type=int, - default=1, - help='# of sub-iters for the generator per each global iteration') + '--G_iters', type=int, default=1, help='# of sub-iters for the generator per each global iteration' + ) self.parser.add_argument( - '--D_iters', - type=int, - default=1, - help= - '# of sub-iters for the discriminator per each global iteration') + '--D_iters', type=int, default=1, help='# of sub-iters for the discriminator per each global iteration' + ) # Losses self.parser.add_argument( '--reconstruct_loss_proportion', type=float, default=0.1, - help='relative part of reconstruct-loss (out of 1)') + help='relative part of reconstruct-loss (out of 1)' + ) self.parser.add_argument( '--reconstruct_loss_stop_iter', type=int, default=200000, - help='from this iter and on, reconstruct loss is deactivated') + help='from this iter and on, reconstruct loss is deactivated' + ) self.parser.add_argument( '--G_extra_inverse_train', type=int, default=1, - help='number of extra training iters for G on inverse direction') + help='number of extra training iters for G on inverse direction' + ) self.parser.add_argument( '--G_extra_inverse_train_start_iter', type=int, default=10000, - help='number of extra training iters for G on inverse direction') + help='number of extra training iters for G on inverse direction' + ) self.parser.add_argument( '--G_extra_inverse_train_ratio', type=int, default=1.0, - help='number of extra training iters for G on inverse direction') + help='number of extra training iters for G on inverse direction' + ) self.parser.add_argument( - '--use_L1', - type=bool, - default=True, - help='Determine whether to use L1 or L2 for reconstruction') + '--use_L1', type=bool, default=True, help='Determine whether to use L1 or L2 for reconstruction' + ) # Misc self.parser.add_argument( '--create_code_copy', type=bool, default=True, - help= - 'when set to true, all .py files are saved to results directory to keep track' + help='when set to true, all .py files are saved to results directory to keep track' ) def parse(self, create_dir_flag=True): diff --git a/environment.yml b/environment.yml index 83e6652..0e480f2 100644 --- a/environment.yml +++ b/environment.yml @@ -15,6 +15,7 @@ dependencies: - seaborn - pip: - opencv-python + - scikit-video - ipython - pytest - yapf diff --git a/networks.py b/networks.py index 142c9cd..cec1ed5 100644 --- a/networks.py +++ b/networks.py @@ -65,8 +65,7 @@ def __init__(self): def forward(self, d_last_layer, is_d_input_real): # Determine label map according to whether current input to discriminator is real or fake - self.label_tensor = Variable(torch.ones_like(d_last_layer).cuda(), - requires_grad=False) * is_d_input_real + self.label_tensor = Variable(torch.ones_like(d_last_layer).cuda(), requires_grad=False) * is_d_input_real # Finally return the loss return self.loss(d_last_layer, self.label_tensor) @@ -98,21 +97,14 @@ def forward(self, input_tensor, target_tensor, scale_weights): # Run all nets over all scales and aggregate the interpolated results loss = 0 for i, scale_weight in enumerate(scale_weights): - input_tensor = f.interpolate(input_tensor, - scale_factor=self.scale_factor**(-i), - mode='bilinear') + input_tensor = f.interpolate(input_tensor, scale_factor=self.scale_factor**(-i), mode='bilinear') loss += scale_weight * self.mse(input_tensor, target_tensor) return loss class Generator(nn.Module): """ Architecture of the Generator, uses res-blocks """ - def __init__(self, - base_channels=64, - n_blocks=6, - n_downsampling=3, - use_bias=True, - skip_flag=True): + def __init__(self, base_channels=64, n_blocks=6, n_downsampling=3, use_bias=True, skip_flag=True): super(Generator, self).__init__() # Determine whether to use skip connections @@ -121,40 +113,32 @@ def __init__(self, # Entry block # First conv-block, no stride so image dims are kept and channels dim is expanded (pad-conv-norm-relu) self.entry_block = nn.Sequential( - nn.ReflectionPad2d(3), - nn.utils.spectral_norm( - nn.Conv2d(3, base_channels, kernel_size=7, bias=use_bias)), - normalization_layer(base_channels), nn.LeakyReLU(0.2, True)) + nn.ReflectionPad2d(3), nn.utils.spectral_norm(nn.Conv2d(3, base_channels, kernel_size=7, bias=use_bias)), + normalization_layer(base_channels), nn.LeakyReLU(0.2, True) + ) # Geometric transformation self.geo_transform = GeoTransform() # Downscaling # A sequence of strided conv-blocks. Image dims shrink by 2, channels dim expands by 2 at each block - self.downscale_block = RescaleBlock(n_downsampling, 0.5, base_channels, - True) + self.downscale_block = RescaleBlock(n_downsampling, 0.5, base_channels, True) # Bottleneck # A sequence of res-blocks bottleneck_block = [] for _ in range(n_blocks): # noinspection PyUnboundLocalVariable - bottleneck_block += [ - ResnetBlock(base_channels * 2**n_downsampling, - use_bias=use_bias) - ] + bottleneck_block += [ResnetBlock(base_channels * 2**n_downsampling, use_bias=use_bias)] self.bottleneck_block = nn.Sequential(*bottleneck_block) # Upscaling # A sequence of transposed-conv-blocks, Image dims expand by 2, channels dim shrinks by 2 at each block\ - self.upscale_block = RescaleBlock(n_downsampling, 2.0, base_channels, - True) + self.upscale_block = RescaleBlock(n_downsampling, 2.0, base_channels, True) # Final block # No stride so image dims are kept and channels dim shrinks to 3 (output image channels) - self.final_block = nn.Sequential( - nn.ReflectionPad2d(3), nn.Conv2d(base_channels, 3, kernel_size=7), - nn.Tanh()) + self.final_block = nn.Sequential(nn.ReflectionPad2d(3), nn.Conv2d(base_channels, 3, kernel_size=7), nn.Tanh()) def forward(self, input_tensor, output_size, random_affine): # A condition for having the output at same size as the scaled input is having even output_size @@ -164,24 +148,18 @@ def forward(self, input_tensor, output_size, random_affine): # Change scale to output scale by interpolation if random_affine is None: - feature_map = f.interpolate(feature_map, - size=output_size, - mode='bilinear') + feature_map = f.interpolate(feature_map, size=output_size, mode='bilinear') else: - feature_map = self.geo_transform.forward(feature_map, output_size, - random_affine) + feature_map = self.geo_transform.forward(feature_map, output_size, random_affine) # Downscale block - feature_map, downscales = self.downscale_block.forward( - feature_map, return_all_scales=self.skip) + feature_map, downscales = self.downscale_block.forward(feature_map, return_all_scales=self.skip) # Bottleneck (res-blocks) feature_map = self.bottleneck_block(feature_map) # Upscale block - feature_map, _ = self.upscale_block.forward(feature_map, - pyramid=downscales, - skip=self.skip) + feature_map, _ = self.upscale_block.forward(feature_map, pyramid=downscales, skip=self.skip) # Final block output_tensor = self.final_block(feature_map) @@ -196,16 +174,12 @@ def __init__(self, dim, use_bias): # A res-block without the skip-connection, pad-conv-norm-relu-pad-conv-norm self.conv_block = nn.Sequential( - nn.utils.spectral_norm( - nn.Conv2d(dim, dim // 4, kernel_size=1, bias=use_bias)), - normalization_layer(dim // 4), nn.LeakyReLU(0.2, True), - nn.ReflectionPad2d(1), - nn.utils.spectral_norm( - nn.Conv2d(dim // 4, dim // 4, kernel_size=3, bias=use_bias)), + nn.utils.spectral_norm(nn.Conv2d(dim, dim // 4, kernel_size=1, bias=use_bias)), + normalization_layer(dim // 4), nn.LeakyReLU(0.2, True), nn.ReflectionPad2d(1), + nn.utils.spectral_norm(nn.Conv2d(dim // 4, dim // 4, kernel_size=3, bias=use_bias)), normalization_layer(dim // 4), nn.LeakyReLU(0.2, True), - nn.utils.spectral_norm( - nn.Conv2d(dim // 4, dim, kernel_size=1, bias=use_bias)), - normalization_layer(dim)) + nn.utils.spectral_norm(nn.Conv2d(dim // 4, dim, kernel_size=1, bias=use_bias)), normalization_layer(dim) + ) def forward(self, input_tensor): # The skip connection is applied here @@ -213,12 +187,7 @@ def forward(self, input_tensor): class MultiScaleDiscriminator(nn.Module): - def __init__(self, - real_crop_size, - max_n_scales=9, - scale_factor=2, - base_channels=128, - extra_conv_layers=0): + def __init__(self, real_crop_size, max_n_scales=9, scale_factor=2, base_channels=128, extra_conv_layers=0): super(MultiScaleDiscriminator, self).__init__() self.base_channels = base_channels self.scale_factor = scale_factor @@ -227,12 +196,12 @@ def __init__(self, # We want the max num of scales to fit the size of the real examples. further scaling would create networks that # only train on fake examples - self.max_n_scales = np.min([ - np.int( - np.ceil( - np.log(np.min(real_crop_size) * 1.0 / self.min_size) / - np.log(self.scale_factor))), max_n_scales - ]) + self.max_n_scales = np.min( + [ + np.int(np.ceil(np.log(np.min(real_crop_size) * 1.0 / self.min_size) / np.log(self.scale_factor))), + max_n_scales + ] + ) # Prepare a list of all the networks for all the wanted scales self.nets = nn.ModuleList() @@ -247,8 +216,7 @@ def make_net(self): # Entry block net += [ - nn.utils.spectral_norm( - nn.Conv2d(3, base_channels, kernel_size=3, stride=1)), + nn.utils.spectral_norm(nn.Conv2d(3, base_channels, kernel_size=3, stride=1)), nn.BatchNorm2d(base_channels), nn.LeakyReLU(0.2, True) ] @@ -256,11 +224,7 @@ def make_net(self): # Downscaling blocks # A sequence of strided conv-blocks. Image dims shrink by 2, channels dim expands by 2 at each block net += [ - nn.utils.spectral_norm( - nn.Conv2d(base_channels, - base_channels * 2, - kernel_size=3, - stride=2)), + nn.utils.spectral_norm(nn.Conv2d(base_channels, base_channels * 2, kernel_size=3, stride=2)), nn.BatchNorm2d(base_channels * 2), nn.LeakyReLU(0.2, True) ] @@ -268,10 +232,8 @@ def make_net(self): # Regular conv-block net += [ nn.utils.spectral_norm( - nn.Conv2d(in_channels=base_channels * 2, - out_channels=base_channels * 2, - kernel_size=3, - bias=True)), + nn.Conv2d(in_channels=base_channels * 2, out_channels=base_channels * 2, kernel_size=3, bias=True) + ), nn.BatchNorm2d(base_channels * 2), nn.LeakyReLU(0.2, True) ] @@ -280,38 +242,30 @@ def make_net(self): for _ in range(self.extra_conv_layers): net += [ nn.utils.spectral_norm( - nn.Conv2d(in_channels=base_channels * 2, - out_channels=base_channels * 2, - kernel_size=3, - bias=True)), + nn.Conv2d(in_channels=base_channels * 2, out_channels=base_channels * 2, kernel_size=3, bias=True) + ), nn.BatchNorm2d(base_channels * 2), nn.LeakyReLU(0.2, True) ] # Final conv-block # Ends with a Sigmoid to get a range of 0-1 - net += nn.Sequential( - nn.utils.spectral_norm( - nn.Conv2d(base_channels * 2, 1, kernel_size=1)), nn.Sigmoid()) + net += nn.Sequential(nn.utils.spectral_norm(nn.Conv2d(base_channels * 2, 1, kernel_size=1)), nn.Sigmoid()) # Make it a valid layers sequence and return return nn.Sequential(*net) def forward(self, input_tensor, scale_weights): - aggregated_result_maps_from_all_scales = self.nets[0]( - input_tensor) * scale_weights[0] + aggregated_result_maps_from_all_scales = self.nets[0](input_tensor) * scale_weights[0] map_size = aggregated_result_maps_from_all_scales.shape[2:] # Run all nets over all scales and aggregate the interpolated results - for net, scale_weight, i in zip(self.nets[1:], scale_weights[1:], - list(range(1, len(scale_weights)))): - downscaled_image = f.interpolate( - input_tensor, - scale_factor=self.scale_factor**(-i), - mode='bilinear') + for net, scale_weight, i in zip(self.nets[1:], scale_weights[1:], list(range(1, len(scale_weights)))): + downscaled_image = f.interpolate(input_tensor, scale_factor=self.scale_factor**(-i), mode='bilinear') result_map_for_current_scale = net(downscaled_image) upscaled_result_map_for_current_scale = f.interpolate( - result_map_for_current_scale, size=map_size, mode='bilinear') + result_map_for_current_scale, size=map_size, mode='bilinear' + ) aggregated_result_maps_from_all_scales += upscaled_result_map_for_current_scale * scale_weight return aggregated_result_maps_from_all_scales @@ -327,23 +281,21 @@ def __init__(self, n_layers, scale=0.5, base_channels=64, use_bias=True): in_channel_power = scale > 1 out_channel_power = scale < 1 - i_range = list(range(n_layers)) if scale < 1 else list( - range(n_layers - 1, -1, -1)) + i_range = list(range(n_layers)) if scale < 1 else list(range(n_layers - 1, -1, -1)) for i in i_range: self.conv_layers[i] = nn.Sequential( nn.ReflectionPad2d(1), nn.utils.spectral_norm( - nn.Conv2d(in_channels=base_channels * - 2**(i + in_channel_power), - out_channels=base_channels * - 2**(i + out_channel_power), - kernel_size=3, - stride=1, - bias=use_bias)), - normalization_layer(base_channels * - 2**(i + out_channel_power)), - nn.LeakyReLU(0.2, True)) + nn.Conv2d( + in_channels=base_channels * 2**(i + in_channel_power), + out_channels=base_channels * 2**(i + out_channel_power), + kernel_size=3, + stride=1, + bias=use_bias + ) + ), normalization_layer(base_channels * 2**(i + out_channel_power)), nn.LeakyReLU(0.2, True) + ) self.add_module("conv_%d" % i, self.conv_layers[i]) if scale > 1: @@ -351,11 +303,7 @@ def __init__(self, n_layers, scale=0.5, base_channels=64, use_bias=True): self.max_pool = nn.MaxPool2d(2, 2) - def forward(self, - input_tensor, - pyramid=None, - return_all_scales=False, - skip=False): + def forward(self, input_tensor, pyramid=None, return_all_scales=False, skip=False): feature_map = input_tensor all_scales = [] @@ -365,9 +313,7 @@ def forward(self, for i, conv_layer in enumerate(self.conv_layers): if self.scale > 1.0: - feature_map = f.interpolate(feature_map, - scale_factor=self.scale, - mode='nearest') + feature_map = f.interpolate(feature_map, scale_factor=self.scale, mode='nearest') feature_map = conv_layer(feature_map) @@ -380,8 +326,7 @@ def forward(self, if return_all_scales: all_scales.append(feature_map) - return (feature_map, - all_scales) if return_all_scales else (feature_map, None) + return (feature_map, all_scales) if return_all_scales else (feature_map, None) class RandomCrop(nn.Module): @@ -396,29 +341,22 @@ def __init__(self, crop_size, return_pos=False, must_divide=4.0): def forward(self, input_tensors, crop_size=None): im_v_sz, im_h_sz = input_tensors[0].shape[2:] if crop_size is None: - cr_v_sz, cr_h_sz = np.clip(self.crop_size, [0, 0], - [im_v_sz - 1, im_h_sz - 1]) + cr_v_sz, cr_h_sz = np.clip(self.crop_size, [0, 0], [im_v_sz - 1, im_h_sz - 1]) cr_v_sz, cr_h_sz = np.uint32( - np.floor( - np.array([cr_v_sz, cr_h_sz]) * 1.0 / self.must_divide) * - self.must_divide) + np.floor(np.array([cr_v_sz, cr_h_sz]) * 1.0 / self.must_divide) * self.must_divide + ) else: cr_v_sz, cr_h_sz = crop_size - top_left_v, top_left_h = [ - np.random.randint(0, im_v_sz - cr_v_sz), - np.random.randint(0, im_h_sz - cr_h_sz) - ] + top_left_v, top_left_h = [np.random.randint(0, im_v_sz - cr_v_sz), np.random.randint(0, im_h_sz - cr_h_sz)] out_tensors = [ input_tensor[:, :, top_left_v:top_left_v + cr_v_sz, - top_left_h:top_left_h + - cr_h_sz] if input_tensor is not None else None + top_left_h:top_left_h + cr_h_sz] if input_tensor is not None else None for input_tensor in input_tensors ] - return (out_tensors, (top_left_v, - top_left_h)) if self.return_pos else out_tensors + return (out_tensors, (top_left_v, top_left_h)) if self.return_pos else out_tensors class SwapCrops(nn.Module): @@ -434,45 +372,31 @@ def __init__(self, min_crop_size, max_crop_size, mask_width=5): self.mask_width = mask_width def forward(self, input_tensor): - cr_v_sz, cr_h_sz = np.uint32( - np.random.rand(2) * (self.max_crop_size - self.min_crop_size) + - self.min_crop_size) + cr_v_sz, cr_h_sz = np.uint32(np.random.rand(2) * (self.max_crop_size - self.min_crop_size) + self.min_crop_size) - [crop_1], (top_left_v_1, - top_left_h_1) = self.rand_crop_1.forward([input_tensor], - (cr_v_sz, cr_h_sz)) - [crop_2], (top_left_v_2, - top_left_h_2) = self.rand_crop_1.forward([input_tensor], - (cr_v_sz, cr_h_sz)) + [crop_1], (top_left_v_1, top_left_h_1) = self.rand_crop_1.forward([input_tensor], (cr_v_sz, cr_h_sz)) + [crop_2], (top_left_v_2, top_left_h_2) = self.rand_crop_1.forward([input_tensor], (cr_v_sz, cr_h_sz)) output_tensor = torch.zeros_like(input_tensor) output_tensor[:, :, :, :] = input_tensor - output_tensor[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, - top_left_h_1:top_left_h_1 + cr_h_sz] = crop_2 - output_tensor[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, - top_left_h_2:top_left_h_2 + cr_h_sz] = crop_1 + output_tensor[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, top_left_h_1:top_left_h_1 + cr_h_sz] = crop_2 + output_tensor[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, top_left_h_2:top_left_h_2 + cr_h_sz] = crop_1 # Creating a mask. this is drawing a line in width 2*mask_width over the boundaries of the cropped image loss_mask = torch.ones_like(input_tensor) mw = self.mask_width - loss_mask[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, - top_left_h_1 - mw:top_left_h_1 + mw] = 0 - loss_mask[:, :, top_left_v_1 - mw:top_left_v_1 + mw, - top_left_h_1:top_left_h_1 + cr_h_sz] = 0 + loss_mask[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, top_left_h_1 - mw:top_left_h_1 + mw] = 0 + loss_mask[:, :, top_left_v_1 - mw:top_left_v_1 + mw, top_left_h_1:top_left_h_1 + cr_h_sz] = 0 loss_mask[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, top_left_h_1 + cr_h_sz - mw:top_left_h_1 + cr_h_sz + mw] = 0 - loss_mask[:, :, - top_left_v_1 + cr_v_sz - mw:top_left_v_1 + cr_v_sz + mw, + loss_mask[:, :, top_left_v_1 + cr_v_sz - mw:top_left_v_1 + cr_v_sz + mw, top_left_h_1:top_left_h_1 + cr_h_sz] = 0 - loss_mask[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, - top_left_h_2 - mw:top_left_h_2 + mw] = 0 - loss_mask[:, :, top_left_v_2 - mw:top_left_v_2 + mw, - top_left_h_2:top_left_h_2 + cr_h_sz] = 0 + loss_mask[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, top_left_h_2 - mw:top_left_h_2 + mw] = 0 + loss_mask[:, :, top_left_v_2 - mw:top_left_v_2 + mw, top_left_h_2:top_left_h_2 + cr_h_sz] = 0 loss_mask[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, top_left_h_2 + cr_h_sz - mw:top_left_h_2 + cr_h_sz + mw] = 0 - loss_mask[:, :, - top_left_v_2 + cr_v_sz - mw:top_left_v_2 + cr_v_sz + mw, + loss_mask[:, :, top_left_v_2 + cr_v_sz - mw:top_left_v_2 + cr_v_sz + mw, top_left_h_2:top_left_h_2 + cr_h_sz] = 0 return output_tensor, loss_mask @@ -486,14 +410,12 @@ def forward(self, input_tensor, target_size, shifts): sz = input_tensor.shape theta = homography_based_on_top_corners_x_shift(shifts) - pad = f.pad(input_tensor, - (np.abs(np.int(np.ceil(sz[3] * shifts[0]))), - np.abs(np.int(np.ceil(-sz[3] * shifts[1]))), 0, 0), - 'reflect') - target_size4d = torch.Size( - [pad.shape[0], pad.shape[1], target_size[0], target_size[1]]) + pad = f.pad( + input_tensor, + (np.abs(np.int(np.ceil(sz[3] * shifts[0]))), np.abs(np.int(np.ceil(-sz[3] * shifts[1]))), 0, 0), 'reflect' + ) + target_size4d = torch.Size([pad.shape[0], pad.shape[1], target_size[0], target_size[1]]) - grid = homography_grid(theta.expand(pad.shape[0], -1, -1), - target_size4d) + grid = homography_grid(theta.expand(pad.shape[0], -1, -1), target_size4d) return f.grid_sample(pad, grid, mode='bilinear', padding_mode='border') diff --git a/non_rect.py b/non_rect.py index b2c02d3..47a770d 100644 --- a/non_rect.py +++ b/non_rect.py @@ -9,16 +9,14 @@ def affine_based_on_top_left_corner_x_shift(rand_affine): :param sig: amount of random x perturbation :return: forward and backward affine transforms """ - aff = np.array([[1., -0.5 * rand_affine, 0.5 * rand_affine], [0, 1., 0]], - dtype=np.float32) + aff = np.array([[1., -0.5 * rand_affine, 0.5 * rand_affine], [0, 1., 0]], dtype=np.float32) return torch.from_numpy(aff).clone().cuda() def apply_resize_and_affine(x, target_size, rand_affine): aff = affine_based_on_top_left_corner_x_shift(rand_affine) - target_size4d = torch.Size( - [x.shape[0], x.shape[1], target_size[0], target_size[1]]) + target_size4d = torch.Size([x.shape[0], x.shape[1], target_size[0], target_size[1]]) grid = f.affine_grid(aff.expand(x.shape[0], -1, -1), target_size4d) out = f.grid_sample(x, grid, mode='bilinear', padding_mode='border') return out @@ -37,8 +35,7 @@ def homography_grid(theta, size): Returns: output (Tensor): output Tensor of size (:math:`N \times H \times W \times 2`) """ - y, x = torch.meshgrid( - (torch.linspace(-1., 1., size[-2]), torch.linspace(-1., 1., size[-1]))) + y, x = torch.meshgrid((torch.linspace(-1., 1., size[-2]), torch.linspace(-1., 1., size[-1]))) n = size[-2] * size[-1] hxy = torch.ones(n, 3, dtype=torch.float) hxy[:, 0] = x.contiguous().view(-1) @@ -51,8 +48,7 @@ def homography_grid(theta, size): def apply_resize_and_homograhpy(x, target_size, rand_h): theta = homography_based_on_top_corners_x_shift(rand_h) - target_size4d = torch.Size( - [x.shape[0], x.shape[1], target_size[0], target_size[1]]) + target_size4d = torch.Size([x.shape[0], x.shape[1], target_size[0], target_size[1]]) grid = homography_grid(theta.expand(x.shape[0], -1, -1), target_size4d) out = f.grid_sample(x, grid, mode='bilinear', padding_mode='border') return out @@ -71,15 +67,14 @@ def homography_based_on_top_corners_x_shift(rand_h): # [0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=np.float32) # play with top left and bottom right p = np.array( - [[ - 1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), - -1. + rand_h[0] - ], [0, 0, 0, 1., 1., -1., 1., 1., -1.], - [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], - [0, 0, 0, -1, -1, -1, 1, 1, 1], [1, 0, -1, 0, 0, 0, 1, 0, -1], - [0, 0, 0, 1, 0, -1, 0, 0, 0], [-1, 0, -1, 0, 0, 0, 1, 0, 1], - [0, 0, 0, -1, 0, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1]], - dtype=np.float32) + [ + [1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), -1. + rand_h[0]], + [0, 0, 0, 1., 1., -1., 1., 1., -1.], [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], + [0, 0, 0, -1, -1, -1, 1, 1, 1], [1, 0, -1, 0, 0, 0, 1, 0, -1], [0, 0, 0, 1, 0, -1, 0, 0, 0], + [-1, 0, -1, 0, 0, 0, 1, 0, 1], [0, 0, 0, -1, 0, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1] + ], + dtype=np.float32 + ) b = np.zeros((9, 1), dtype=np.float32) b[8, 0] = 1. h = np.dot(np.linalg.inv(p), b) @@ -87,17 +82,14 @@ def homography_based_on_top_corners_x_shift(rand_h): def apply_resize_and_radial(x, target_size, rand_r): - target_size4d = torch.Size( - [x.shape[0], x.shape[1], target_size[0], target_size[1]]) + target_size4d = torch.Size([x.shape[0], x.shape[1], target_size[0], target_size[1]]) grid = make_radial_scale_grid(rand_r, target_size4d) out = f.grid_sample(x, grid, mode='bilinear', padding_mode='border') return out def make_radial_scale_grid(rand_r, size4d): - y, x = torch.meshgrid( - (torch.linspace(-1., 1., - size4d[-2]), torch.linspace(-1., 1., size4d[-1]))) + y, x = torch.meshgrid((torch.linspace(-1., 1., size4d[-2]), torch.linspace(-1., 1., size4d[-1]))) theta = torch.atan2(x, y) r = torch.sqrt() diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..adc2431 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,8 @@ +[flake8] +exclude = .git,*migrations*,build*,old* +max-line-length = 120 +ignore=W391 + +[yapf] +based_on_style = facebook +column_limit = 120 diff --git a/supp_video.py b/supp_video.py index fffd628..82febb4 100644 --- a/supp_video.py +++ b/supp_video.py @@ -52,8 +52,7 @@ ] -def generate_one_frame(gan, input_tensor, frame_shape, scale, geo_shifts, - center): +def generate_one_frame(gan, input_tensor, frame_shape, scale, geo_shifts, center): with torch.no_grad(): base_sz = input_tensor.shape in_size = base_sz[2:] @@ -67,29 +66,29 @@ def generate_one_frame(gan, input_tensor, frame_shape, scale, geo_shifts, else: out_mask, out_size = prepare_geometric(base_sz, scale, geo_shifts) - output_tensor, _, _ = gan.test(input_tensor=input_tensor, - input_size=in_size, - output_size=out_size, - rand_affine=geo_shifts, - run_d_pred=False, - run_reconstruct=False) + output_tensor, _, _ = gan.test( + input_tensor=input_tensor, + input_size=in_size, + output_size=out_size, + rand_affine=geo_shifts, + run_d_pred=False, + run_reconstruct=False + ) out = out_mask * output_tensor[1] - 1 + out_mask - margin = np.uint16( - (frame_shape - np.array(out_size)) / 2) if center else [0, 0] - out_pad[margin[0]:margin[0] + out_size[0], - margin[1]:margin[1] + out_size[1], :] = util.hist_match( - util.tensor2im(out), util.tensor2im(input_tensor), - util.tensor2im(out_mask)) + margin = np.uint16((frame_shape - np.array(out_size)) / 2) if center else [0, 0] + out_pad[margin[0]:margin[0] + out_size[0], margin[1]:margin[1] + out_size[1], :] = util.hist_match( + util.tensor2im(out), util.tensor2im(input_tensor), util.tensor2im(out_mask) + ) return out_pad def generate_one_scene(gan, input_tensor, scene_script, frame_shape, center): frames = [] for i, (scale_v, scale_h, shift_l, shift_r) in enumerate(scene_script): - output_image = generate_one_frame(gan, input_tensor, frame_shape, - [scale_v, scale_h], - [shift_l, shift_r], center) + output_image = generate_one_frame( + gan, input_tensor, frame_shape, [scale_v, scale_h], [shift_l, shift_r], center + ) frames.append(output_image) return np.stack(frames, axis=0) @@ -100,29 +99,21 @@ def generate_full_video(video_script, frame_shape): conf.output_dir_path = util.prepare_result_dir(conf) n_scenes = len(video_script) - for i, (nameses, scene_script_names, - scene_script_params) in enumerate(video_script): + for i, (nameses, scene_script_names, scene_script_params) in enumerate(video_script): if not isinstance(nameses, list): nameses = [[nameses]] if not isinstance(scene_script_names, list): scene_script_names = [scene_script_names] scene_script_params = [scene_script_params] scenes = [] - for names, scene_script_name, scene_script_param in zip( - nameses, scene_script_names, scene_script_params): + for names, scene_script_name, scene_script_param in zip(nameses, scene_script_names, scene_script_params): partial_screen_scenes = [] for name in names: - conf.input_image_path = [ - os.path.dirname(os.path.abspath(__file__)) + '/' + - INPUT_DICT[name][0] - ] - conf.test_params_path = os.path.dirname( - os.path.abspath(__file__)) + INPUT_DICT[name][1] + conf.input_image_path = [os.path.dirname(os.path.abspath(__file__)) + '/' + INPUT_DICT[name][0]] + conf.test_params_path = os.path.dirname(os.path.abspath(__file__)) + INPUT_DICT[name][1] gan = InGAN(conf) - gan.G.load_state_dict( - torch.load(conf.test_params_path, - map_location='cuda:0')['G']) + gan.G.load_state_dict(torch.load(conf.test_params_path, map_location='cuda:0')['G']) [input_tensor] = util.read_data(conf) cur_frame_shape = frame_shape[:] @@ -132,55 +123,58 @@ def generate_full_video(video_script, frame_shape): cur_scene_script_param = scene_script_param[:] if scene_script_param[1] is None: - cur_scene_script_param[ - 1] = cur_frame_shape[0] * 1.0 / input_tensor.shape[2] + cur_scene_script_param[1] = cur_frame_shape[0] * 1.0 / input_tensor.shape[2] print('max scale vertical:', cur_scene_script_param[1]) if cur_scene_script_param[3] is None: - cur_scene_script_param[ - 3] = cur_frame_shape[1] * 1.0 / input_tensor.shape[3] + cur_scene_script_param[3] = cur_frame_shape[1] * 1.0 / input_tensor.shape[3] print('max scale horizontal:', cur_scene_script_param[3]) - scene_script = make_scene_script(scene_script_name, - *cur_scene_script_param) + scene_script = make_scene_script(scene_script_name, *cur_scene_script_param) center = (cur_scene_script_param[4] is not None) scene = generate_one_scene( - gan, input_tensor, scene_script, - np.array([cur_frame_shape[0], cur_frame_shape[1]]), center) + gan, input_tensor, scene_script, np.array([cur_frame_shape[0], cur_frame_shape[1]]), center + ) partial_screen_scenes.append(scene) print('Done with %s, (scene %d/%d)' % (name, i + 1, n_scenes)) - scene = np.concatenate( - partial_screen_scenes, axis=concat_axis - ) if len(partial_screen_scenes) > 1 else partial_screen_scenes[0] + scene = np.concatenate(partial_screen_scenes, + axis=concat_axis) if len(partial_screen_scenes) > 1 else partial_screen_scenes[0] scenes.append(scene) scene = np.concatenate(scenes, axis=0) outputdict = { - '-b:v': '30000000', - '-r': '100.0', + '-b:v': + '30000000', + '-r': + '100.0', '-vf': - 'drawtext="text=\'Input image\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)*7/8:enable=\'between(t,0,2)\'"', - '-preset': 'slow', - '-profile:v': 'high444', - '-level:v': '4.0', - '-crf': '22' + 'drawtext="text=\'Input image\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)*7/8:enable=\'between(t,0,2)\'"', + '-preset': + 'slow', + '-profile:v': + 'high444', + '-level:v': + '4.0', + '-crf': + '22' } if len(names) > 1: outputdict[ - '-vf'] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,2)\'"' + '-vf' + ] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,2)\'"' if not scene_script_params[-1]: outputdict[ - '-vf'] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,0)\'"' + '-vf' + ] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,0)\'"' - writer = FFmpegWriter(conf.output_dir_path + '/vid%d_%s.mp4' % - (i, '_'.join(names)), - verbosity=1, - outputdict=outputdict) + writer = FFmpegWriter( + conf.output_dir_path + '/vid%d_%s.mp4' % (i, '_'.join(names)), verbosity=1, outputdict=outputdict + ) for frame in scene: for j in range(3): writer.writeFrame(frame) @@ -190,28 +184,21 @@ def generate_full_video(video_script, frame_shape): def prepare_geometric(base_sz, scale, geo_shifts): pad_l = np.abs(np.int(np.ceil(base_sz[3] * geo_shifts[0]))) pad_r = np.abs(np.int(np.ceil(base_sz[3] * geo_shifts[1]))) - in_mask = torch.zeros(base_sz[0], base_sz[1], base_sz[2], - pad_l + base_sz[3] + pad_r).cuda() + in_mask = torch.zeros(base_sz[0], base_sz[1], base_sz[2], pad_l + base_sz[3] + pad_r).cuda() in_size = in_mask.shape[2:] - out_size = (np.uint32( - np.floor(scale[0] * in_size[0] * 1.0 / MUST_DIVIDE) * MUST_DIVIDE), - np.uint32( - np.floor(scale[1] * in_size[1] * 1.0 / MUST_DIVIDE) * - MUST_DIVIDE)) + out_size = ( + np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / MUST_DIVIDE) * MUST_DIVIDE), + np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / MUST_DIVIDE) * MUST_DIVIDE) + ) if pad_r > 0: in_mask[:, :, :, pad_l:-pad_r] = torch.ones(base_sz) else: in_mask[:, :, :, pad_l:] = torch.ones(base_sz) theta = homography_based_on_top_corners_x_shift(geo_shifts) - target_size4d = torch.Size( - [in_mask.shape[0], in_mask.shape[1], out_size[0], out_size[1]]) - grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), - target_size4d) - out_mask = f.grid_sample(in_mask, - grid, - mode='bilinear', - padding_mode='zeros') + target_size4d = torch.Size([in_mask.shape[0], in_mask.shape[1], out_size[0], out_size[1]]) + grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), target_size4d) + out_mask = f.grid_sample(in_mask, grid, mode='bilinear', padding_mode='zeros') return out_mask, out_size diff --git a/test.py b/test.py index a110fe6..569cdf8 100644 --- a/test.py +++ b/test.py @@ -1,43 +1,38 @@ import os -import cv2 +import torch +import numpy as np from PIL import Image +from skvideo.io import FFmpegWriter import util from InGAN import InGAN from configs import Config from traceback import print_exc from networks import GeoTransform -from non_rect import (apply_resize_and_radial, - homography_based_on_top_corners_x_shift, - apply_resize_and_homograhpy, homography_grid, - apply_resize_and_affine) - - -def test_one_scale(gan, - input_tensor, - scale, - must_divide, - affine=None, - return_tensor=False, - size_instead_scale=False): +from non_rect import ( + apply_resize_and_radial, homography_based_on_top_corners_x_shift, apply_resize_and_homograhpy, homography_grid, + apply_resize_and_affine +) + + +def test_one_scale(gan, input_tensor, scale, must_divide, affine=None, return_tensor=False, size_instead_scale=False): with torch.no_grad(): in_size = input_tensor.shape[2:] if size_instead_scale: out_size = scale else: out_size = ( - np.uint32( - np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * - must_divide), - np.uint32( - np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * - must_divide)) - - output_tensor, _, _ = gan.test(input_tensor=input_tensor, - input_size=in_size, - output_size=out_size, - rand_affine=affine, - run_d_pred=False, - run_reconstruct=False) + np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), + np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide) + ) + + output_tensor, _, _ = gan.test( + input_tensor=input_tensor, + input_size=in_size, + output_size=out_size, + rand_affine=affine, + run_d_pred=False, + run_reconstruct=False + ) if return_tensor: return output_tensor[1] else: @@ -59,12 +54,9 @@ def concat_images(images, margin, input_spot): bottom_right_corner_w = int(top_left_corner_w + w_sizes[i]) if [i, j] == input_spot: - collage[top_left_corner_h - margin // 2:bottom_right_corner_h + - margin // 2, - top_left_corner_w - margin // 2:bottom_right_corner_w + - margin // 2, :] = [255, 0, 0] - collage[top_left_corner_h:bottom_right_corner_h, - top_left_corner_w:bottom_right_corner_w] = images[j][i] + collage[top_left_corner_h - margin // 2:bottom_right_corner_h + margin // 2, + top_left_corner_w - margin // 2:bottom_right_corner_w + margin // 2, :] = [255, 0, 0] + collage[top_left_corner_h:bottom_right_corner_h, top_left_corner_w:bottom_right_corner_w] = images[j][i] return collage @@ -72,15 +64,12 @@ def concat_images(images, margin, input_spot): def generate_images_for_collage(gan, input_tensor, scales, must_divide): # NOTE: scales here is different from in the other funcs: here we only need 1d scales. # Prepare output images list - output_images = [[[None] for _ in range(len(scales))] - for _ in range(len(scales))] + output_images = [[[None] for _ in range(len(scales))] for _ in range(len(scales))] # Run over all scales and test the network for each one for i, scale_h in enumerate(scales): for j, scale_w in enumerate(scales): - output_images[i][j] = test_one_scale(gan, input_tensor, - [scale_h, scale_w], - must_divide) + output_images[i][j] = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) return output_images @@ -91,100 +80,89 @@ def retarget_video(gan, input_tensor, scales, must_divide, output_dir_path): frame_shape[1] += (frame_shape[1] % 2) frames = np.zeros([len(scales), frame_shape[0], frame_shape[1], 3]) for i, (scale_h, scale_w) in enumerate(scales): - output_image = test_one_scale(gan, input_tensor, [scale_h, scale_w], - must_divide) - frames[i, 0:output_image.shape[0], - 0:output_image.shape[1], :] = output_image - frame_size = frame_shape[:2] - fourcc = cv2.VideoWriter_fourcc(*'MP4V') - writer = cv2.VideoWriter(output_dir_path + '/vid.mp4', fourcc, 20.0, - frame_size) - - frames = frames.astype(np.uint8)[::-1] + output_image = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) + frames[i, 0:output_image.shape[0], 0:output_image.shape[1], :] = output_image + writer = FFmpegWriter(output_dir_path + '/vid.mp4', verbosity=1, outputdict={'-b': '30000000', '-r': '100.0'}) + for i, _ in enumerate(scales): - for _ in range(3): - writer.write(frames[i, :, :, :]) - writer.release() + for j in range(3): + writer.writeFrame(frames[i, :, :, :]) + writer.close() def define_video_scales(scales): max_v, min_v, max_h, min_h = scales frames_per_resize = 10 - x = np.concatenate([ - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize), - np.linspace(min_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, 1, frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize), - np.linspace(min_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, 1, frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize) - ]) - y = np.concatenate([ - np.linspace(1, 1, frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, max_h, 2 * frames_per_resize), - np.linspace(max_h, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, max_h, 2 * frames_per_resize), - np.linspace(max_h, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, min_h, 2 * frames_per_resize) - ]) + x = np.concatenate( + [ + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize), + np.linspace(min_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, 1, frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize), + np.linspace(min_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, 1, frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize) + ] + ) + y = np.concatenate( + [ + np.linspace(1, 1, frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, max_h, 2 * frames_per_resize), + np.linspace(max_h, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, max_h, 2 * frames_per_resize), + np.linspace(max_h, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, min_h, 2 * frames_per_resize) + ] + ) return list(zip(x, y)) def generate_collage_and_outputs(conf, gan, input_tensor): - output_images = generate_images_for_collage(gan, input_tensor, - conf.collage_scales, - conf.must_divide) + output_images = generate_images_for_collage(gan, input_tensor, conf.collage_scales, conf.must_divide) for i in range(len(output_images)): for j in range(len(output_images)): - Image.fromarray(output_images[i][j], - 'RGB').save(conf.output_dir_path + - '/test_%d_%d.png' % (i, j)) + Image.fromarray(output_images[i][j], 'RGB').save(conf.output_dir_path + '/test_%d_%d.png' % (i, j)) input_spot = conf.collage_input_spot output_images[input_spot[0]][input_spot[1]] = util.tensor2im(input_tensor) collage = concat_images(output_images, margin=10, input_spot=input_spot) - Image.fromarray(np.uint8(collage), - 'RGB').save(conf.output_dir_path + '/test_collage.png') + Image.fromarray(np.uint8(collage), 'RGB').save(conf.output_dir_path + '/test_collage.png') def _make_homography_mask(in_mask, target_size, rand_h): theta = homography_based_on_top_corners_x_shift(rand_h) - target_size4d = torch.Size( - [in_mask.shape[0], in_mask.shape[1], target_size[0], target_size[1]]) - grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), - target_size4d) + target_size4d = torch.Size([in_mask.shape[0], in_mask.shape[1], target_size[0], target_size[1]]) + grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), target_size4d) out = f.grid_sample(in_mask, grid, mode='bilinear', padding_mode='border') return out def test_homo(conf, gan, input_tensor, must_divide=8): - shift_range = np.arange(conf.non_rect_shift_range[0], - conf.non_rect_shift_range[1], - conf.non_rect_shift_range[2]) + shift_range = np.arange(conf.non_rect_shift_range[0], conf.non_rect_shift_range[1], conf.non_rect_shift_range[2]) total = (len(conf.non_rect_scales) * len(shift_range))**2 ind = 0 for scale1 in conf.non_rect_scales: @@ -195,44 +173,41 @@ def test_homo(conf, gan, input_tensor, must_divide=8): ind += 1 shifts = (shift1, shift2) sz = input_tensor.shape - out_pad = np.uint8(255 * np.ones([ - np.uint32(np.floor(sz[2] * scale[0])), - np.uint32(np.floor(3 * sz[3] * scale[1])), 3 - ])) + out_pad = np.uint8( + 255 * + np.ones([np.uint32(np.floor(sz[2] * scale[0])), + np.uint32(np.floor(3 * sz[3] * scale[1])), 3]) + ) pad_l = np.abs(np.int(np.ceil(sz[3] * shifts[0]))) pad_r = np.abs(np.int(np.ceil(sz[3] * shifts[1]))) - in_mask = torch.zeros(sz[0], sz[1], sz[2], - pad_l + sz[3] + pad_r).cuda() - input_for_regular = torch.zeros( - sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() + in_mask = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() + input_for_regular = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() in_size = in_mask.shape[2:] - out_size = (np.uint32( - np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * - must_divide), - np.uint32( - np.floor(scale[1] * in_size[1] * 1.0 / - must_divide) * must_divide)) + out_size = ( + np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), + np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide) + ) if pad_r > 0: - in_mask[:, :, :, - pad_l:-pad_r] = torch.ones_like(input_tensor) + in_mask[:, :, :, pad_l:-pad_r] = torch.ones_like(input_tensor) input_for_regular[:, :, :, pad_l:-pad_r] = input_tensor else: - in_mask[:, :, :, - pad_l:] = torch.ones_like(input_tensor) + in_mask[:, :, :, pad_l:] = torch.ones_like(input_tensor) input_for_regular[:, :, :, pad_l:] = input_tensor - out = test_one_scale(gan, - input_tensor, - out_size, - conf.must_divide, - affine=shifts, - return_tensor=True, - size_instead_scale=True) + out = test_one_scale( + gan, + input_tensor, + out_size, + conf.must_divide, + affine=shifts, + return_tensor=True, + size_instead_scale=True + ) # regular = transform(input_tensor, out_size, shifts) out_mask = _make_homography_mask(in_mask, out_size, shifts) @@ -240,16 +215,16 @@ def test_homo(conf, gan, input_tensor, must_divide=8): # regular_out = util.tensor2im(out_mask * regular + 1 - out_mask) # out_pad[:, sz[3] - pad_l: sz[3] - pad_l + out_size[1], :] = out shift_str = "{1:0{0}d}_{3:0{2}d}".format( - 2 if shift1 >= 0 else 3, int(10 * shift1), - 2 if shift2 >= 0 else 3, int(10 * shift2)) + 2 if shift1 >= 0 else 3, int(10 * shift1), 2 if shift2 >= 0 else 3, int(10 * shift2) + ) # out = np.rot90(out, 3) # regular_out = np.rot90(regular_out, 3) Image.fromarray(out, 'RGB').save( - conf.output_dir_path + - '/scale_%02d_%02d_transform %s_ingan.png' % - (int(10 * scale1), int(10 * scale2), shift_str)) + conf.output_dir_path + '/scale_%02d_%02d_transform %s_ingan.png' % + (int(10 * scale1), int(10 * scale2), shift_str) + ) # Image.fromarray(regular_out, 'RGB').save(conf.output_dir_path + '/scale_%02d_%02d_transform %s_ref.png' % (scale1, scale2, shift_str)) print((ind, '/', total, 'scale:', scale, 'shift:', shifts)) @@ -265,9 +240,7 @@ def main(): [input_tensor] = util.read_data(conf) if conf.test_video: - retarget_video(gan, input_tensor, - define_video_scales(conf.test_vid_scales), 8, - conf.output_dir_path) + retarget_video(gan, input_tensor, define_video_scales(conf.test_vid_scales), 8, conf.output_dir_path) if conf.test_collage: generate_collage_and_outputs(conf, gan, input_tensor) if conf.test_non_rect: diff --git a/test_util.py b/test_util.py index af1ec9d..a6a2a91 100644 --- a/test_util.py +++ b/test_util.py @@ -13,7 +13,6 @@ def test_image(): def test_tensor2im(test_image): - tensor = torch.tensor(test_image).permute(2, 0, - 1).unsqueeze(0) / 255. * 2 - 1 + tensor = torch.tensor(test_image).permute(2, 0, 1).unsqueeze(0) / 255. * 2 - 1 img = tensor2im(tensor) assert np.allclose(img, test_image) diff --git a/train.py b/train.py index 4e8b094..77a454f 100644 --- a/train.py +++ b/train.py @@ -38,26 +38,19 @@ except KeyboardInterrupt: raise except Exception as e: - print( - 'Something went wrong in iteration %d, While testing or visualizing.' - % i) + print('Something went wrong in iteration %d, While testing or visualizing.' % i) print_exc() # Save snapshot when needed try: if i > 0 and not i % conf.save_snapshot_freq: - gan.save( - os.path.join(conf.output_dir_path, - 'checkpoint_%07d.pth.tar' % i)) + gan.save(os.path.join(conf.output_dir_path, 'checkpoint_%07d.pth.tar' % i)) del gan gan = InGAN(conf) - gan.resume( - os.path.join(conf.output_dir_path, - 'checkpoint_%07d.pth.tar' % i)) + gan.resume(os.path.join(conf.output_dir_path, 'checkpoint_%07d.pth.tar' % i)) visualizer.gan = gan except KeyboardInterrupt: raise except Exception as e: - print('Something went wrong in iteration %d, While saving snapshot.' % - i) + print('Something went wrong in iteration %d, While saving snapshot.' % i) print_exc() diff --git a/train_supp_mat.py b/train_supp_mat.py index 8d9ed49..68480da 100644 --- a/train_supp_mat.py +++ b/train_supp_mat.py @@ -41,10 +41,7 @@ def main(): for imgname in os.listdir(base_dir): full_img_name = os.path.join(base_dir, imgname) short_name = os.path.splitext(imgname)[0] - cmd = [ - 'python', 'train.py', '--input_image_path', full_img_name, - '--gpu_id', '0' - ] + cmd = ['python', 'train.py', '--input_image_path', full_img_name, '--gpu_id', '0'] for aname, aa in list(abl_args.items()): exp_name = '{}_{}'.format(short_name, aname) full_cmd = cmd + aa + ['--name', exp_name] diff --git a/util.py b/util.py index d144d54..7cd95e4 100644 --- a/util.py +++ b/util.py @@ -10,19 +10,15 @@ def read_data(conf): - input_images = [ - read_shave_tensorize(path, conf.must_divide) - for path in conf.input_image_path - ] + input_images = [read_shave_tensorize(path, conf.must_divide) for path in conf.input_image_path] return input_images def read_shave_tensorize(path, must_divide): input_np = (np.array(Image.open(path).convert('RGB')) / 255.0) - input_np_shaved = input_np[:(input_np.shape[0] // must_divide) * - must_divide, :(input_np.shape[1] // - must_divide) * must_divide, :] + input_np_shaved = input_np[:(input_np.shape[0] // must_divide) * must_divide, :(input_np.shape[1] // must_divide) * + must_divide, :] input_tensor = im2tensor(input_np_shaved) @@ -60,32 +56,30 @@ def im2tensor(image_numpy, int_flag=False, device=torch.device('cuda')): return torch.FloatTensor(transformed_image).unsqueeze(0).to(device) -def random_size(orig_size, - curriculum=True, - i=None, - iter_for_max_range=None, - must_divide=8.0, - min_scale=0.25, - max_scale=2.0, - max_transform_magniutude=0.3): - cur_max_scale = 1.0 + (max_scale - 1.0) * np.clip( - 1.0 * i / iter_for_max_range, 0, 1) if curriculum else max_scale - cur_min_scale = 1.0 + (min_scale - 1.0) * np.clip( - 1.0 * i / iter_for_max_range, 0, 1) if curriculum else min_scale - cur_max_transform_magnitude = (max_transform_magniutude * - np.clip(1.0 * i / iter_for_max_range, 0, 1) - if curriculum else max_transform_magniutude) +def random_size( + orig_size, + curriculum=True, + i=None, + iter_for_max_range=None, + must_divide=8.0, + min_scale=0.25, + max_scale=2.0, + max_transform_magniutude=0.3 +): + cur_max_scale = 1.0 + (max_scale - 1.0) * np.clip(1.0 * i / iter_for_max_range, 0, 1) if curriculum else max_scale + cur_min_scale = 1.0 + (min_scale - 1.0) * np.clip(1.0 * i / iter_for_max_range, 0, 1) if curriculum else min_scale + cur_max_transform_magnitude = ( + max_transform_magniutude * + np.clip(1.0 * i / iter_for_max_range, 0, 1) if curriculum else max_transform_magniutude + ) # set random transformation magnitude. scalar = affine, pair = homography. - random_affine = -cur_max_transform_magnitude + 2 * cur_max_transform_magnitude * np.random.rand( - 2) + random_affine = -cur_max_transform_magnitude + 2 * cur_max_transform_magnitude * np.random.rand(2) # set new size for the output image - new_size = np.array(orig_size) * ( - cur_min_scale + (cur_max_scale - cur_min_scale) * np.random.rand(2)) + new_size = np.array(orig_size) * (cur_min_scale + (cur_max_scale - cur_min_scale) * np.random.rand(2)) - return tuple(np.uint32( - np.ceil(new_size * 1.0 / must_divide) * must_divide)), random_affine + return tuple(np.uint32(np.ceil(new_size * 1.0 / must_divide) * must_divide)), random_affine def image_concat(g_preds, d_preds=None, size=None): @@ -101,12 +95,9 @@ def image_concat(g_preds, d_preds=None, size=None): img = (np.concatenate([d_pred] * 3, 2) - 128) * 2 import cv2 # d_pred_new = imresize(img, g_pred.shape[0:2], interp='nearest') - d_pred_new = cv2.resize(img, - dsize=g_pred.shape[0:2][::-1], - interpolation=cv2.INTER_NEAREST) + d_pred_new = cv2.resize(img, dsize=g_pred.shape[0:2][::-1], interpolation=cv2.INTER_NEAREST) con = np.concatenate([g_pred, d_pred_new], 0) - result[hsize - g_pred.shape[0]:hsize + - g_pred.shape[0], :g_pred.shape[1], :] = con + result[hsize - g_pred.shape[0]:hsize + g_pred.shape[0], :g_pred.shape[1], :] = con else: result[hsize - g_pred.shape[0]:, :, :] = g_pred results.append(np.uint8(np.round(result))) @@ -119,14 +110,10 @@ def save_image(image_tensor, image_path): image_pil.save(image_path) -def get_scale_weights(i, max_i, start_factor, input_shape, min_size, - num_scales_limit, scale_factor): - num_scales = np.min([ - np.int( - np.ceil( - np.log(np.min(input_shape) * 1.0 / min_size) / - np.log(scale_factor))), num_scales_limit - ]) +def get_scale_weights(i, max_i, start_factor, input_shape, min_size, num_scales_limit, scale_factor): + num_scales = np.min( + [np.int(np.ceil(np.log(np.min(input_shape) * 1.0 / min_size) / np.log(scale_factor))), num_scales_limit] + ) # if i > max_i * 2: # i = max_i * 2 @@ -153,9 +140,7 @@ def __init__(self, gan, conf, test_inputs): self.D_loss_fake = [None] * conf.max_iters self.test_inputs = test_inputs - self.test_input_sizes = [ - test_input.shape[2:] for test_input in test_inputs - ] + self.test_input_sizes = [test_input.shape[2:] for test_input in test_inputs] if conf.reconstruct_loss_stop_iter > 0: self.Rec_loss = [None] * conf.max_iters @@ -171,11 +156,8 @@ def recreate_fig(self): self.d_map_real = self.fig.add_subplot(gs[7, 7]) # First plot data - self.plot_gan_loss = self.gan_loss.plot([], [], 'b-', [], [], 'c--', - [], [], 'r--') - self.gan_loss.legend( - ('Generator loss', 'Discriminator loss (real image)', - 'Discriminator loss (fake image)')) + self.plot_gan_loss = self.gan_loss.plot([], [], 'b-', [], [], 'c--', [], [], 'r--') + self.gan_loss.legend(('Generator loss', 'Discriminator loss (real image)', 'Discriminator loss (fake image)')) self.gan_loss.set_ylim(0, 1) if self.conf.reconstruct_loss_stop_iter > 0: @@ -185,8 +167,7 @@ def recreate_fig(self): self.gan_loss.set_title('Gan Losses') self.reconstruct_loss.set_title('Reconstruction Loss') self.reconstruction.set_title('Reconstruction') - self.d_map_real.set_xlabel( - 'Current Discriminator \n map for real example') + self.d_map_real.set_xlabel('Current Discriminator \n map for real example') self.real_example.set_xlabel('Real example') self.result.set_title('Current result') @@ -200,32 +181,33 @@ def recreate_fig(self): def test_and_display(self, i): if not i % self.conf.print_freq and i > 0: - self.G_loss[i - - self.conf.print_freq:i] = self.gan.losses_G_gan.detach( - ).cpu().float().numpy().tolist() - self.D_loss_real[i - self.conf. - print_freq:i] = self.gan.losses_D_real.detach( - ).cpu().float().numpy().tolist() - self.D_loss_fake[i - self.conf. - print_freq:i] = self.gan.losses_D_fake.detach( - ).cpu().float().numpy().tolist() + self.G_loss[i - self.conf.print_freq:i] = self.gan.losses_G_gan.detach().cpu().float().numpy().tolist() + self.D_loss_real[i - + self.conf.print_freq:i] = self.gan.losses_D_real.detach().cpu().float().numpy().tolist() + self.D_loss_fake[i - + self.conf.print_freq:i] = self.gan.losses_D_fake.detach().cpu().float().numpy().tolist() if self.conf.reconstruct_loss_stop_iter > i: - self.Rec_loss[i - self.conf.print_freq: - i] = self.gan.losses_G_reconstruct.detach().cpu( - ).float().numpy().tolist() + self.Rec_loss[i - self.conf.print_freq:i] = self.gan.losses_G_reconstruct.detach().cpu().float().numpy( + ).tolist() if self.conf.reconstruct_loss_stop_iter < i: - print(( - 'iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, LR: %f' - % (i, self.G_loss[i - 1], - self.D_loss_real[i - 1], self.D_loss_fake[i - 1], - self.gan.lr_scheduler_G.get_lr()[0]))) + print( + ( + 'iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, LR: %f' % ( + i, self.G_loss[i - 1], self.D_loss_real[i - 1], self.D_loss_fake[i - 1], + self.gan.lr_scheduler_G.get_lr()[0] + ) + ) + ) else: - print(( - 'iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, Rec_loss: %f, LR: %f' - % (i, self.G_loss[i - 1], self.D_loss_real[i - 1], - self.D_loss_fake[i - 1], self.Rec_loss[i - 1], - self.gan.lr_scheduler_G.get_lr()[0]))) + print( + ( + 'iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, Rec_loss: %f, LR: %f' % ( + i, self.G_loss[i - 1], self.D_loss_real[i - 1], self.D_loss_fake[i - 1], + self.Rec_loss[i - 1], self.gan.lr_scheduler_G.get_lr()[0] + ) + ) + ) if not i % self.conf.display_freq and i > 0: plt.gcf().clear() @@ -247,55 +229,41 @@ def test_and_display(self, i): g_preds = [self.gan.input_tensor_noised, self.gan.G_pred] d_preds = [ - self.gan.D.forward(self.gan.input_tensor_noised.detach(), - self.gan.scale_weights), - self.gan.d_pred_fake + self.gan.D.forward(self.gan.input_tensor_noised.detach(), self.gan.scale_weights), self.gan.d_pred_fake ] reconstructs = self.gan.reconstruct input_size = self.gan.input_tensor_noised.shape[2:] - result = image_concat(tensor2im(g_preds), tensor2im(d_preds), - (input_size[0] * 2, input_size[1] * 2)) + result = image_concat(tensor2im(g_preds), tensor2im(d_preds), (input_size[0] * 2, input_size[1] * 2)) self.plot_gan_loss[0].set_data(list(range(i)), self.G_loss[:i]) - self.plot_gan_loss[1].set_data(list(range(i)), - self.D_loss_real[:i]) - self.plot_gan_loss[2].set_data(list(range(i)), - self.D_loss_fake[:i]) + self.plot_gan_loss[1].set_data(list(range(i)), self.D_loss_real[:i]) + self.plot_gan_loss[2].set_data(list(range(i)), self.D_loss_fake[:i]) self.gan_loss.set_xlim(0, i) if self.conf.reconstruct_loss_stop_iter > i: - self.plot_reconstruct_loss[0].set_data(list(range(i)), - self.Rec_loss[:i]) - self.reconstruct_loss.set_ylim(np.min(self.Rec_loss[:i]), - np.max(self.Rec_loss[:i])) + self.plot_reconstruct_loss[0].set_data(list(range(i)), self.Rec_loss[:i]) + self.reconstruct_loss.set_ylim(np.min(self.Rec_loss[:i]), np.max(self.Rec_loss[:i])) self.reconstruct_loss.set_xlim(0, i) self.result.imshow(np.clip(result, 0, 255), vmin=0, vmax=255) - self.real_example.imshow(np.clip( - tensor2im(self.gan.real_example[0:1, :, :, :]), 0, 255), - vmin=0, - vmax=255) - self.d_map_real.imshow(self.gan.d_pred_real[ - 0:1, :, :, :].detach().cpu().float().numpy().squeeze(), - cmap='gray', - vmin=0, - vmax=1) + self.real_example.imshow(np.clip(tensor2im(self.gan.real_example[0:1, :, :, :]), 0, 255), vmin=0, vmax=255) + self.d_map_real.imshow( + self.gan.d_pred_real[0:1, :, :, :].detach().cpu().float().numpy().squeeze(), + cmap='gray', + vmin=0, + vmax=1 + ) if self.conf.reconstruct_loss_stop_iter > i: - self.reconstruction.imshow(np.clip( - image_concat([tensor2im(reconstructs)]), 0, 255), - vmin=0, - vmax=255) + self.reconstruction.imshow(np.clip(image_concat([tensor2im(reconstructs)]), 0, 255), vmin=0, vmax=255) plt.savefig(self.conf.output_dir_path + '/monitor_%d' % i) - save_image(self.gan.G_pred, - self.conf.output_dir_path + '/result_iter_%d.png' % i) + save_image(self.gan.G_pred, self.conf.output_dir_path + '/result_iter_%d.png' % i) def prepare_result_dir(conf): # Create results directory - conf.output_dir_path += '/' + conf.name + strftime('_%b_%d_%H_%M_%S', - localtime()) + conf.output_dir_path += '/' + conf.name + strftime('_%b_%d_%H_%M_%S', localtime()) os.makedirs(conf.output_dir_path) # Put a copy of all *.py files in results path, to be able to reproduce experimental results @@ -304,24 +272,20 @@ def prepare_result_dir(conf): for py_file in glob.glob(local_dir + '/*.py'): copy(py_file, conf.output_dir_path) if conf.resume: - copy( - conf.resume, - os.path.join(conf.output_dir_path, - 'starting_checkpoint.pth.tar')) + copy(conf.resume, os.path.join(conf.output_dir_path, 'starting_checkpoint.pth.tar')) return conf.output_dir_path def homography_based_on_top_corners_x_shift(rand_h): p = np.array( - [[ - 1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), - -1. + rand_h[0] - ], [0, 0, 0, 1., 1., -1., 1., 1., -1.], - [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], - [0, 0, 0, -1, -1, -1, 1, 1, 1], [1, 0, -1, 0, 0, 0, 1, 0, -1], - [0, 0, 0, 1, 0, -1, 0, 0, 0], [-1, 0, -1, 0, 0, 0, 1, 0, 1], - [0, 0, 0, -1, 0, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1]], - dtype=np.float32) + [ + [1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), -1. + rand_h[0]], + [0, 0, 0, 1., 1., -1., 1., 1., -1.], [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], + [0, 0, 0, -1, -1, -1, 1, 1, 1], [1, 0, -1, 0, 0, 0, 1, 0, -1], [0, 0, 0, 1, 0, -1, 0, 0, 0], + [-1, 0, -1, 0, 0, 0, 1, 0, 1], [0, 0, 0, -1, 0, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1] + ], + dtype=np.float32 + ) b = np.zeros((9, 1), dtype=np.float32) b[8, 0] = 1. h = np.dot(np.linalg.inv(p), b) @@ -343,8 +307,7 @@ def homography_grid(theta, size): """ a = 1 b = 1 - y, x = torch.meshgrid((torch.linspace(-b, b, np.int(size[-2] * a)), - torch.linspace(-b, b, np.int(size[-1] * a)))) + y, x = torch.meshgrid((torch.linspace(-b, b, np.int(size[-2] * a)), torch.linspace(-b, b, np.int(size[-1] * a)))) n = np.int(size[-2] * a) * np.int(size[-1] * a) hxy = torch.ones(n, 3, dtype=torch.float) hxy[:, 0] = x.contiguous().view(-1) @@ -352,8 +315,7 @@ def homography_grid(theta, size): out = hxy[None, ...].cuda().matmul(theta.transpose(1, 2)) # normalize out = out[:, :, :2] / out[:, :, 2:] - return out.view(theta.shape[0], np.int(size[-2] * a), np.int(size[-1] * a), - 2) + return out.view(theta.shape[0], np.int(size[-2] * a), np.int(size[-1] * a), 2) def hist_match(source, template, mask_3ch): @@ -379,9 +341,7 @@ def hist_match(source, template, mask_3ch): template = template.ravel() # get the set of unique pixel values and their corresponding indices and # counts - s_values, bin_idx, s_counts = np.unique(source_masked, - return_inverse=True, - return_counts=True) + s_values, bin_idx, s_counts = np.unique(source_masked, return_inverse=True, return_counts=True) t_values, t_counts = np.unique(template, return_counts=True) # take the cumsum of the counts and normalize by the number of pixels to From f4a0454dcb726948ca22d1f5dc0114026a26c294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Migda=C5=82?= Date: Sat, 17 Jul 2021 22:24:33 +0200 Subject: [PATCH 13/13] a note on the Python 3.8 port --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4a6f71f..2b3f68e 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@ -**ported to Python 3.8 and PyTorch 1.9** # InGAN ### Official code for the paper "InGAN: Capturing and Retargeting the DNA of a Natural Image" Project page: http://www.wisdom.weizmann.ac.il/~vision/ingan/ (See our results and visual comparison to other methods) +Version ported to Python 3.8 and PyTorch 1.9 by [https://github.com/Bartolo1024](https://github.com/Bartolo1024) and used in [Level generation and style enhancement - deep learning for game development overview](https://arxiv.org/abs/2107.07397). + **Accepted ICCV'19 (Oral)** ---------- ![](/figs/fruits.gif)