diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml new file mode 100644 index 0000000..9ba0949 --- /dev/null +++ b/.github/workflows/pythonapp.yml @@ -0,0 +1,34 @@ +name: Python conda test + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - uses: conda-incubator/setup-miniconda@v2 + with: + miniconda-version: "latest" + activate-environment: ingan + environment-file: environment.yml + python-version: 3.8 + auto-activate-base: false + - shell: bash -l {0} + run: | + conda info + conda list + - name: Test with pytest + shell: bash -l {0} + run: | + pytest . + - name: yapf + id: yapf + uses: diegovalenzuelaiturra/yapf-action@v0.0.1 + with: + args: . --recursive --diff + - name: Fail if yapf made changes + if: steps.yapf.outputs.exit-code == 2 + run: exit 1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8de7ad4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +examples/ +results/ +.idea/ diff --git a/.yapfignore b/.yapfignore new file mode 100644 index 0000000..fc95ba6 --- /dev/null +++ b/.yapfignore @@ -0,0 +1,2 @@ +results/ +examples/ diff --git a/InGAN.py b/InGAN.py index 05acdc6..ca4dc2c 100644 --- a/InGAN.py +++ b/InGAN.py @@ -29,10 +29,12 @@ def __init__(self, conf): self.real_example = torch.FloatTensor(1, 3, conf.output_crop_size, conf.output_crop_size).cuda() # Define networks - self.G = networks.Generator(conf.G_base_channels, conf.G_num_resblocks, conf.G_num_downscales, conf.G_use_bias, - conf.G_skip) - self.D = networks.MultiScaleDiscriminator(conf.output_crop_size, self.conf.D_max_num_scales, - self.conf.D_scale_factor, self.conf.D_base_channels) + self.G = networks.Generator( + conf.G_base_channels, conf.G_num_resblocks, conf.G_num_downscales, conf.G_use_bias, conf.G_skip + ) + self.D = networks.MultiScaleDiscriminator( + conf.output_crop_size, self.conf.D_max_num_scales, self.conf.D_scale_factor, self.conf.D_base_channels + ) self.GAN_loss_layer = networks.GANLoss() self.Reconstruct_loss = networks.WeightedMSELoss(use_L1=conf.use_L1) self.RandCrop = networks.RandomCrop([conf.input_crop_size, conf.input_crop_size], must_divide=conf.must_divide) @@ -89,15 +91,18 @@ def save(self, citer=None): filename = citer else: filename = 'snapshot-{:05d}.pth.tar'.format(citer) - torch.save({'G': self.G.state_dict(), - 'D': self.D.state_dict(), - 'optim_G': self.optimizer_G.state_dict(), - 'optim_D': self.optimizer_D.state_dict(), - 'sched_G': self.lr_scheduler_G.state_dict(), - 'sched_D': self.lr_scheduler_D.state_dict(), - 'loss': self.GAN_loss_layer.state_dict(), - 'iter': citer if citer else self.cur_iter}, - os.path.join(self.conf.output_dir_path, filename)) + torch.save( + { + 'G': self.G.state_dict(), + 'D': self.D.state_dict(), + 'optim_G': self.optimizer_G.state_dict(), + 'optim_D': self.optimizer_D.state_dict(), + 'sched_G': self.lr_scheduler_G.state_dict(), + 'sched_D': self.lr_scheduler_D.state_dict(), + 'loss': self.GAN_loss_layer.state_dict(), + 'iter': citer if citer else self.cur_iter + }, os.path.join(self.conf.output_dir_path, filename) + ) def resume(self, resume_path, test_flag=False): resume = torch.load(resume_path, map_location={'cuda:5': 'cuda:0'}) @@ -134,34 +139,44 @@ def resume(self, resume_path, test_flag=False): if len(missing): warnings.warn('Missing the following state dicts from checkpoint: {}'.format(', '.join(missing))) - print('resuming checkpoint {}'.format(self.conf.resume)) + print(('resuming checkpoint {}'.format(self.conf.resume))) def test(self, input_tensor, output_size, rand_affine, input_size, run_d_pred=True, run_reconstruct=True): with torch.no_grad(): - self.G_pred = self.G.forward(Variable(input_tensor.detach()), output_size=output_size, random_affine=rand_affine) + self.G_pred = self.G.forward( + Variable(input_tensor.detach()), output_size=output_size, random_affine=rand_affine + ) if run_d_pred: - scale_weights_for_output = get_scale_weights(i=self.cur_iter, - max_i=self.conf.D_scale_weights_iter_for_even_scales, - start_factor=self.conf.D_scale_weights_sigma, - input_shape=self.G_pred.shape[2:], - min_size=self.conf.D_min_input_size, - num_scales_limit=self.conf.D_max_num_scales, - scale_factor=self.conf.D_scale_factor) - scale_weights_for_input = get_scale_weights(i=self.cur_iter, - max_i=self.conf.D_scale_weights_iter_for_even_scales, - start_factor=self.conf.D_scale_weights_sigma, - input_shape=input_tensor.shape[2:], - min_size=self.conf.D_min_input_size, - num_scales_limit=self.conf.D_max_num_scales, - scale_factor=self.conf.D_scale_factor) - self.D_preds = [self.D.forward(Variable(input_tensor.detach()), scale_weights_for_input), - self.D.forward(Variable(self.G_pred.detach()), scale_weights_for_output)] + scale_weights_for_output = get_scale_weights( + i=self.cur_iter, + max_i=self.conf.D_scale_weights_iter_for_even_scales, + start_factor=self.conf.D_scale_weights_sigma, + input_shape=self.G_pred.shape[2:], + min_size=self.conf.D_min_input_size, + num_scales_limit=self.conf.D_max_num_scales, + scale_factor=self.conf.D_scale_factor + ) + scale_weights_for_input = get_scale_weights( + i=self.cur_iter, + max_i=self.conf.D_scale_weights_iter_for_even_scales, + start_factor=self.conf.D_scale_weights_sigma, + input_shape=input_tensor.shape[2:], + min_size=self.conf.D_min_input_size, + num_scales_limit=self.conf.D_max_num_scales, + scale_factor=self.conf.D_scale_factor + ) + self.D_preds = [ + self.D.forward(Variable(input_tensor.detach()), scale_weights_for_input), + self.D.forward(Variable(self.G_pred.detach()), scale_weights_for_output) + ] else: self.D_preds = None self.G_preds = [input_tensor, self.G_pred] - self.reconstruct = self.G.forward(self.G_pred, output_size=input_size, random_affine=-rand_affine) if run_reconstruct else None + self.reconstruct = self.G.forward( + self.G_pred, output_size=input_size, random_affine=-rand_affine + ) if run_reconstruct else None return self.G_preds, self.D_preds, self.reconstruct @@ -171,14 +186,16 @@ def train_g(self): self.optimizer_D.zero_grad() # Determine output size of G (dynamic change) - output_size, random_affine = random_size(orig_size=self.input_tensor.shape[2:], - curriculum=self.conf.curriculum, - i=self.cur_iter, - iter_for_max_range=self.conf.iter_for_max_range, - must_divide=self.conf.must_divide, - min_scale=self.conf.min_scale, - max_scale=self.conf.max_scale, - max_transform_magniutude=self.conf.max_transform_magnitude) + output_size, random_affine = random_size( + orig_size=self.input_tensor.shape[2:], + curriculum=self.conf.curriculum, + i=self.cur_iter, + iter_for_max_range=self.conf.iter_for_max_range, + must_divide=self.conf.must_divide, + min_scale=self.conf.min_scale, + max_scale=self.conf.max_scale, + max_transform_magniutude=self.conf.max_transform_magnitude + ) # Add noise to G input for better generalization (make it ignore the 1/255 binning) self.input_tensor_noised = self.input_tensor + (torch.rand_like(self.input_tensor) - 0.5) * 2.0 / 255 @@ -187,18 +204,22 @@ def train_g(self): self.G_pred = self.G.forward(self.input_tensor_noised, output_size=output_size, random_affine=random_affine) # Run generator result through discriminator forward pass - self.scale_weights = get_scale_weights(i=self.cur_iter, - max_i=self.conf.D_scale_weights_iter_for_even_scales, - start_factor=self.conf.D_scale_weights_sigma, - input_shape=self.G_pred.shape[2:], - min_size=self.conf.D_min_input_size, - num_scales_limit=self.conf.D_max_num_scales, - scale_factor=self.conf.D_scale_factor) + self.scale_weights = get_scale_weights( + i=self.cur_iter, + max_i=self.conf.D_scale_weights_iter_for_even_scales, + start_factor=self.conf.D_scale_weights_sigma, + input_shape=self.G_pred.shape[2:], + min_size=self.conf.D_min_input_size, + num_scales_limit=self.conf.D_max_num_scales, + scale_factor=self.conf.D_scale_factor + ) d_pred_fake = self.D.forward(self.G_pred, self.scale_weights) # If reconstruction-loss is used, run through decoder to reconstruct, then calculate reconstruction loss if self.conf.reconstruct_loss_stop_iter > self.cur_iter: - self.reconstruct = self.G.forward(self.G_pred, output_size=self.input_tensor.shape[2:], random_affine=-random_affine) + self.reconstruct = self.G.forward( + self.G_pred, output_size=self.input_tensor.shape[2:], random_affine=-random_affine + ) self.loss_G_reconstruct = self.criterionReconstruction(self.reconstruct, self.input_tensor, self.loss_mask) # Calculate generator loss, based on discriminator prediction on generator result @@ -224,9 +245,13 @@ def train_g(self): if self.cur_iter > self.conf.G_extra_inverse_train_start_iter: for _ in range(self.conf.G_extra_inverse_train): self.optimizer_G.zero_grad() - self.inverse = self.G.forward(self.G_pred.detach(), output_size=self.input_tensor.shape[2:], random_affine=-random_affine) - self.loss_G_inverse = (self.criterionReconstruction(self.inverse, self.input_tensor, self.loss_mask) * - self.conf.G_extra_inverse_train_ratio) + self.inverse = self.G.forward( + self.G_pred.detach(), output_size=self.input_tensor.shape[2:], random_affine=-random_affine + ) + self.loss_G_inverse = ( + self.criterionReconstruction(self.inverse, self.input_tensor, self.loss_mask) * + self.conf.G_extra_inverse_train_ratio + ) self.loss_G_inverse.backward() self.optimizer_G.step() diff --git a/README.md b/README.md index 09ccc23..2b3f68e 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ Project page: http://www.wisdom.weizmann.ac.il/~vision/ingan/ (See our results and visual comparison to other methods) +Version ported to Python 3.8 and PyTorch 1.9 by [https://github.com/Bartolo1024](https://github.com/Bartolo1024) and used in [Level generation and style enhancement - deep learning for game development overview](https://arxiv.org/abs/2107.07397). + **Accepted ICCV'19 (Oral)** ---------- ![](/figs/fruits.gif) diff --git a/SceneScripts.py b/SceneScripts.py index f8dc142..a281b2c 100644 --- a/SceneScripts.py +++ b/SceneScripts.py @@ -5,249 +5,358 @@ def make_scene_script(script_name, min_v, max_v, min_h, max_h, max_t, repeat, sh l = np.linspace if script_name == 'vertical_grow_shrink': - size_v = np.concatenate([ - l(1, max_v, frames_per_resize), - l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize)]) - size_h = np.concatenate([ - l(1, 1, frames_per_resize), - l(1, 1, 2 * frames_per_resize), - l(1, 1, frames_per_resize)]) + size_v = np.concatenate( + [l(1, max_v, frames_per_resize), + l(max_v, min_v, 2 * frames_per_resize), + l(min_v, 1, frames_per_resize)] + ) + size_h = np.concatenate( + [l(1, 1, frames_per_resize), + l(1, 1, 2 * frames_per_resize), + l(1, 1, frames_per_resize)] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'horizontal_grow_shrink': - size_v = np.concatenate([ - l(1, 1, frames_per_resize), - l(1, 1, 2 * frames_per_resize), - l(1, 1, frames_per_resize)]) - size_h = np.concatenate([ - l(1, max_h, frames_per_resize), - l(max_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize)]) + size_v = np.concatenate( + [l(1, 1, frames_per_resize), + l(1, 1, 2 * frames_per_resize), + l(1, 1, frames_per_resize)] + ) + size_h = np.concatenate( + [l(1, max_h, frames_per_resize), + l(max_h, min_h, 2 * frames_per_resize), + l(min_h, 1, frames_per_resize)] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'horizontal_grow_shrink_slow': - size_v = np.concatenate([ - l(1, 1, 2 *frames_per_resize), - l(1, 1, 2 * frames_per_resize), - l(1, 1, frames_per_resize)]) - size_h = np.concatenate([ - l(1, max_h, 2 * frames_per_resize), - l(max_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize)]) + size_v = np.concatenate( + [l(1, 1, 2 * frames_per_resize), + l(1, 1, 2 * frames_per_resize), + l(1, 1, frames_per_resize)] + ) + size_h = np.concatenate( + [ + l(1, max_h, 2 * frames_per_resize), + l(max_h, min_h, 2 * frames_per_resize), + l(min_h, 1, frames_per_resize) + ] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == '2d_grow_shrink': - size_v = np.concatenate([ - l(1, max_v, frames_per_resize), - l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize)]) - size_h = np.concatenate([ - l(1, max_h, frames_per_resize), - l(max_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize)]) + size_v = np.concatenate( + [l(1, max_v, frames_per_resize), + l(max_v, min_v, 2 * frames_per_resize), + l(min_v, 1, frames_per_resize)] + ) + size_h = np.concatenate( + [l(1, max_h, frames_per_resize), + l(max_h, min_h, 2 * frames_per_resize), + l(min_h, 1, frames_per_resize)] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'resize_round': - size_v = np.concatenate([ - l(1, 1, frames_per_resize), - l(1, max_v, frames_per_resize), - l(max_v, max_v, 2 * frames_per_resize), - l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize)]) - size_h = np.concatenate([ - l(1, max_h, frames_per_resize), - l(max_h, max_h, frames_per_resize), - l(max_h, min_h, 2 * frames_per_resize), - l(min_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize)]) + size_v = np.concatenate( + [ + l(1, 1, frames_per_resize), + l(1, max_v, frames_per_resize), + l(max_v, max_v, 2 * frames_per_resize), + l(max_v, min_v, 2 * frames_per_resize), + l(min_v, 1, frames_per_resize) + ] + ) + size_h = np.concatenate( + [ + l(1, max_h, frames_per_resize), + l(max_h, max_h, frames_per_resize), + l(max_h, min_h, 2 * frames_per_resize), + l(min_h, min_h, 2 * frames_per_resize), + l(min_h, 1, frames_per_resize) + ] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'special_resize_round': - size_v = np.concatenate([ - l(1, 1, frames_per_resize/2), - l(1, max_v, frames_per_resize), - l(max_v, max_v, frames_per_resize), - l(max_v, max_v, 2 * frames_per_resize), - l(max_v, min_v, 2 * frames_per_resize), - l(min_v, 1, frames_per_resize)]) - - size_h = np.concatenate([ - l(1, max_h/2, frames_per_resize/2), - l(max_h/2, max_h/2, frames_per_resize), - l(max_h/2, max_h, frames_per_resize), - l(max_h, min_h, 2 * frames_per_resize), - l(min_h, min_h, 2 * frames_per_resize), - l(min_h, 1, frames_per_resize)]) + size_v = np.concatenate( + [ + l(1, 1, frames_per_resize / 2), + l(1, max_v, frames_per_resize), + l(max_v, max_v, frames_per_resize), + l(max_v, max_v, 2 * frames_per_resize), + l(max_v, min_v, 2 * frames_per_resize), + l(min_v, 1, frames_per_resize) + ] + ) + + size_h = np.concatenate( + [ + l(1, max_h / 2, frames_per_resize / 2), + l(max_h / 2, max_h / 2, frames_per_resize), + l(max_h / 2, max_h, frames_per_resize), + l(max_h, min_h, 2 * frames_per_resize), + l(min_h, min_h, 2 * frames_per_resize), + l(min_h, 1, frames_per_resize) + ] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'special_zoom': - size_v = np.concatenate([ - l(1, max_v, frames_per_resize), - l(max_v, min_v, frames_per_resize), - l(min_v, 1, frames_per_resize)]) - size_h = np.concatenate([ - l(1, max_v, frames_per_resize), - l(max_v, min_v, frames_per_resize), - l(min_v, 1, frames_per_resize)]) + size_v = np.concatenate( + [l(1, max_v, frames_per_resize), + l(max_v, min_v, frames_per_resize), + l(min_v, 1, frames_per_resize)] + ) + size_h = np.concatenate( + [l(1, max_v, frames_per_resize), + l(max_v, min_v, frames_per_resize), + l(min_v, 1, frames_per_resize)] + ) shift_l = [0 for _ in size_v] shift_r = [0 for _ in size_v] elif script_name == 'affine_dance': - shift_l = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, 0, frames_per_resize)]) - shift_r = np.concatenate([ - l(0, - max_t, frames_per_resize), - l(- max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize)]) - size_v = [1for _ in shift_l] + shift_l = np.concatenate( + [l(0, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, 0, frames_per_resize)] + ) + shift_r = np.concatenate( + [l(0, -max_t, frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize)] + ) + size_v = [1 for _ in shift_l] size_h = [1 for _ in shift_l] elif script_name == 'trapezoids': - shift_l = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize)]) - shift_r = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize)]) - size_v = [1for _ in shift_l] + shift_l = np.concatenate( + [ + l(0, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ] + ) + shift_r = np.concatenate( + [ + l(0, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ] + ) + size_v = [1 for _ in shift_l] size_h = [1 for _ in shift_l] elif script_name == 'trapezoids_vresize': - shift_l = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize)]) - shift_r = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, max_t, 2 * frames_per_resize), - l(max_t, 0, frames_per_resize)]) - size_v = np.concatenate([ - l(1, max_v, frames_per_resize), - l(max_v, 1, frames_per_resize), - l(1, max_v, frames_per_resize), - l(max_v, 1, frames_per_resize), - l(1, max_v, frames_per_resize), - l(max_v, 1, frames_per_resize), - ]) - size_h = np.concatenate([ - l(1, 1, 6*frames_per_resize)]) + shift_l = np.concatenate( + [ + l(0, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ] + ) + shift_r = np.concatenate( + [ + l(0, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, max_t, 2 * frames_per_resize), + l(max_t, 0, frames_per_resize) + ] + ) + size_v = np.concatenate( + [ + l(1, max_v, frames_per_resize), + l(max_v, 1, frames_per_resize), + l(1, max_v, frames_per_resize), + l(max_v, 1, frames_per_resize), + l(1, max_v, frames_per_resize), + l(max_v, 1, frames_per_resize), + ] + ) + size_h = np.concatenate([l(1, 1, 6 * frames_per_resize)]) elif script_name == 'flicker': - size_h = np.concatenate([ - l(1, 1, 6 * frames_per_resize)]) + size_h = np.concatenate([l(1, 1, 6 * frames_per_resize)]) size_v = size_h - shift_l = np.concatenate([ - l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize),]) - shift_r = np.concatenate([ - l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize), - l(-max_t, -max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize)]) + shift_l = np.concatenate( + [ + l(max_t, max_t, frames_per_resize), + l(-max_t, -max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize), + l(-max_t, -max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize), + l(-max_t, -max_t, frames_per_resize), + ] + ) + shift_r = np.concatenate( + [ + l(-max_t, -max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize), + l(-max_t, -max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize), + l(-max_t, -max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize) + ] + ) elif script_name == 'homography': - size_h = np.concatenate([ - l(1, 1, 6 * frames_per_resize)]) + size_h = np.concatenate([l(1, 1, 6 * frames_per_resize)]) size_v = size_h - shift_l = np.concatenate([ - l(0, max_t, frames_per_resize), - l(max_t, max_t, frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, - max_t, 2 * frames_per_resize), - l(- max_t, 0, frames_per_resize)]) - shift_r = np.concatenate([ - l(0, 0, frames_per_resize), - l(0, max_t, frames_per_resize), - l(max_t, max_t, 2 * frames_per_resize), - l(max_t, - max_t, 2 * frames_per_resize), - l(- max_t, 0, frames_per_resize)]) - - + shift_l = np.concatenate( + [ + l(0, max_t, frames_per_resize), + l(max_t, max_t, frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, -max_t, 2 * frames_per_resize), + l(-max_t, 0, frames_per_resize) + ] + ) + shift_r = np.concatenate( + [ + l(0, 0, frames_per_resize), + l(0, max_t, frames_per_resize), + l(max_t, max_t, 2 * frames_per_resize), + l(max_t, -max_t, 2 * frames_per_resize), + l(-max_t, 0, frames_per_resize) + ] + ) elif script_name == 'random': - stops = np.random.rand(10, 4) * np.array([max_v-min_v, max_h-min_h, 2*max_t, 2*max_t])[None, :] + np.array([min_v, min_h, -max_t, -max_t])[None, :] + stops = np.random.rand(10, 4) * np.array([max_v - min_v, max_h - min_h, 2 * max_t, 2 * max_t] + )[None, :] + np.array([min_v, min_h, -max_t, -max_t])[None, :] stops = np.vstack([stops, [1, 1, 0, 0]]) - print stops + print(stops) - size_v = np.concatenate([l(stop_0[0], stop_1[0], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops)]) + size_v = np.concatenate( + [ + l(stop_0[0], stop_1[0], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ] + ) - size_h = np.concatenate([l(stop_0[1], stop_1[1], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops)]) + size_h = np.concatenate( + [ + l(stop_0[1], stop_1[1], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ] + ) - shift_l = np.concatenate([l(stop_0[2], stop_1[2], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops)]) + shift_l = np.concatenate( + [ + l(stop_0[2], stop_1[2], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ] + ) - shift_r = np.concatenate([l(stop_0[3], stop_1[3], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops)]) + shift_r = np.concatenate( + [ + l(stop_0[3], stop_1[3], frames_per_resize) + for stop_0, stop_1 in zip(np.vstack(([1, 1, 0, 0], stops)), stops) + ] + ) elif script_name == 'random_trapezoids': stops_l = np.random.rand(11) * 2 * max_t - max_t stops_l[-1] = 0 stops_r = np.random.rand(11) * max_t * (stops_l / np.abs(stops_l)) - stops = zip(stops_l, stops_r) - print stops + stops = list(zip(stops_l, stops_r)) + print(stops) - size_h = np.concatenate([ - l(1, 1, 20 * frames_per_resize)]) + size_h = np.concatenate([l(1, 1, 20 * frames_per_resize)]) size_v = size_h - shift_l = np.concatenate([l(stop_0[0], stop_1[0], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops)]) + shift_l = np.concatenate( + [l(stop_0[0], stop_1[0], frames_per_resize) for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops)] + ) - shift_r = np.concatenate([l(stop_0[1], stop_1[1], frames_per_resize) - for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops)]) + shift_r = np.concatenate( + [l(stop_0[1], stop_1[1], frames_per_resize) for stop_0, stop_1 in zip(np.vstack(([0, 0], stops)), stops)] + ) - - return [[-1, -1, -1, -1]] * 20 + zip(size_v, size_h, shift_l, shift_r) * repeat if show_input else zip(size_v, size_h, shift_l, shift_r) * repeat + return [[-1, -1, -1, -1]] * 20 + list(zip(size_v, size_h, shift_l, shift_r)) * repeat if show_input else list( + zip(size_v, size_h, shift_l, shift_r) + ) * repeat INPUT_DICT = { - 'fruits': ['fruits_ss.png', '/experiment_old_code_with_homo_2/results/fruits_ss_geo_new_pad_Mar_16_18_00_17/checkpoint_0075000.pth.tar'], - 'farm_house': ['farm_house_s.png', '/results/farm_house_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_16_07_59/checkpoint_0050000.pth.tar'], - 'cab_building': ['cab_building_s.png', '/results/cab_building_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_10_25/checkpoint_0065000.pth.tar'], - 'capitol': ['capitol.png', '/results/capitol_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_13_22/checkpoint_0055000.pth.tar'], - 'rome': ['rome_s.png', '/results/rome_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_09_19/checkpoint_0045000.pth.tar'], - 'soldiers': ['china_soldiers.png', '/results/china_soldiers_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_46_09/checkpoint_0075000.pth.tar'], - 'corn': ['corn.png', '/results/corn_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_10_29_00/checkpoint_0075000.pth.tar'], - 'sushi': ['sushi.png', '/results/sushi_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_47_39/checkpoint_0075000.pth.tar'], - 'penguins': ['penguins.png', '/results/penguins_Nov_13_16_26_14/checkpoint_0075000.pth.tar'], - 'emojis': ['emojis3.png', '/results/emojis3_Nov_23_09_59_59/checkpoint_0075000.pth.tar'], - 'fish': ['input/fish.png', '/results/fish_plethora_75_Mar_18_03_36_25/checkpoint_0075000.pth.tar'], - 'ny': ['textures/ny.png', '/results/ny_texture_synth_Mar_19_04_51_14/checkpoint_0075000.pth.tar'], - 'metal_circles': ['metal_circles.jpg', '/results/metal_circles_Mar_26_20_04_11/checkpoint_0075000.pth.tar'], - 'quilt': ['quilt.png', '/results/quilt/checkpoint_0075000.pth.tar'], - 'sapa': ['sapa.png', '/results/sapa_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_44_59/checkpoint_0075000.pth.tar'], - 'nkorea': ['nkorea.png', '/results/nkorea_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_48_00/checkpoint_0075000.pth.tar'], - 'wood': ['wood.png', '/results/wood/checkpoint_0075000.pth.tar'], - 'starry': ['starry.png', '/results/starry/checkpoint_0075000.pth.tar'], - 'umbrella': ['umbrella.png', '/results/umbrella/checkpoint_0075000.pth.tar'], - 'fruits_old': ['fruits_ss.png', '/results/fruits_ss_256_COARSE2FINE_extraInv_2_30_until60_killReconstruct_20_Oct_24_12_35_33/checkpoint_0040000.pth.tar'], - 'peacock': ['scaled_nird/ours_1_scaled.jpg', '/results/ours_1/checkpoint_0050000.pth.tar'], - 'windows': ['scaled_nird/ours_2_scaled.jpg', '/results/ours_2/checkpoint_0050000.pth.tar'], - 'light_house': ['scaled_nird/ours_23_scaled.jpg', '/results/ours_23/checkpoint_0050000.pth.tar'], - 'hats': ['scaled_nird/ours_26_scaled.jpg', '/results/ours_26/checkpoint_0050000.pth.tar'], - 'nature': ['scaled_nird/ours_32_scaled.jpg', '/results/ours_32/checkpoint_0050000.pth.tar'], - + 'fruits': + [ + 'fruits_ss.png', + '/experiment_old_code_with_homo_2/results/fruits_ss_geo_new_pad_Mar_16_18_00_17/checkpoint_0075000.pth.tar' + ], + 'farm_house': + [ + 'farm_house_s.png', + '/results/farm_house_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_16_07_59/checkpoint_0050000.pth.tar' + ], + 'cab_building': + [ + 'cab_building_s.png', + '/results/cab_building_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_10_25/checkpoint_0065000.pth.tar' + ], + 'capitol': + [ + 'capitol.png', + '/results/capitol_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_13_22/checkpoint_0055000.pth.tar' + ], + 'rome': + [ + 'rome_s.png', + '/results/rome_s_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_Nov_03_18_09_19/checkpoint_0045000.pth.tar' + ], + 'soldiers': + [ + 'china_soldiers.png', + '/results/china_soldiers_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_46_09/checkpoint_0075000.pth.tar' + ], + 'corn': + [ + 'corn.png', + '/results/corn_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_10_29_00/checkpoint_0075000.pth.tar' + ], + 'sushi': + [ + 'sushi.png', + '/results/sushi_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_47_39/checkpoint_0075000.pth.tar' + ], + 'penguins': ['penguins.png', '/results/penguins_Nov_13_16_26_14/checkpoint_0075000.pth.tar'], + 'emojis': ['emojis3.png', '/results/emojis3_Nov_23_09_59_59/checkpoint_0075000.pth.tar'], + 'fish': ['input/fish.png', '/results/fish_plethora_75_Mar_18_03_36_25/checkpoint_0075000.pth.tar'], + 'ny': ['textures/ny.png', '/results/ny_texture_synth_Mar_19_04_51_14/checkpoint_0075000.pth.tar'], + 'metal_circles': ['metal_circles.jpg', '/results/metal_circles_Mar_26_20_04_11/checkpoint_0075000.pth.tar'], + 'quilt': ['quilt.png', '/results/quilt/checkpoint_0075000.pth.tar'], + 'sapa': + [ + 'sapa.png', + '/results/sapa_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_09_44_59/checkpoint_0075000.pth.tar' + ], + 'nkorea': + [ + 'nkorea.png', + '/results/nkorea_L1_Dfactor_14_WeightsEqualeThenFine_25_LRdecay_20_curric_NOISE2G_Nov_05_07_48_00/checkpoint_0075000.pth.tar' + ], + 'wood': ['wood.png', '/results/wood/checkpoint_0075000.pth.tar'], + 'starry': ['starry.png', '/results/starry/checkpoint_0075000.pth.tar'], + 'umbrella': ['umbrella.png', '/results/umbrella/checkpoint_0075000.pth.tar'], + 'fruits_old': + [ + 'fruits_ss.png', + '/results/fruits_ss_256_COARSE2FINE_extraInv_2_30_until60_killReconstruct_20_Oct_24_12_35_33/checkpoint_0040000.pth.tar' + ], + 'peacock': ['scaled_nird/ours_1_scaled.jpg', '/results/ours_1/checkpoint_0050000.pth.tar'], + 'windows': ['scaled_nird/ours_2_scaled.jpg', '/results/ours_2/checkpoint_0050000.pth.tar'], + 'light_house': ['scaled_nird/ours_23_scaled.jpg', '/results/ours_23/checkpoint_0050000.pth.tar'], + 'hats': ['scaled_nird/ours_26_scaled.jpg', '/results/ours_26/checkpoint_0050000.pth.tar'], + 'nature': ['scaled_nird/ours_32_scaled.jpg', '/results/ours_32/checkpoint_0050000.pth.tar'], } diff --git a/configs.py b/configs.py index 0438ce0..dec6496 100644 --- a/configs.py +++ b/configs.py @@ -11,81 +11,211 @@ def __init__(self): self.conf = None # Paths - self.parser.add_argument('--input_image_path', default=[os.path.dirname(os.path.abspath(__file__)) + '/examples/fruit/fruit.png'], nargs='+', help='path to one specific image file') - self.parser.add_argument('--output_dir_path', default=os.path.dirname(os.path.abspath(__file__)) + '/results', help='path to a directory to save results to') - self.parser.add_argument('--name', default='fruit', help='name of current experiment, to be used for saving the results') + self.parser.add_argument( + '--input_image_path', + default=[os.path.dirname(os.path.abspath(__file__)) + '/examples/fruit/fruit.png'], + nargs='+', + help='path to one specific image file' + ) + self.parser.add_argument( + '--output_dir_path', + default=os.path.dirname(os.path.abspath(__file__)) + '/results', + help='path to a directory to save results to' + ) + self.parser.add_argument( + '--name', default='fruit', help='name of current experiment, to be used for saving the results' + ) self.parser.add_argument('--resume', type=str, default=None, help='checkpoint to resume from') - self.parser.add_argument('--test_params_path', type=str, default=os.path.dirname(os.path.abspath(__file__)) + '/examples/fruit/checkpoint_0075000.pth.tar', help='checkpoint for testing') + self.parser.add_argument( + '--test_params_path', + type=str, + default=os.path.dirname(os.path.abspath(__file__)) + '/examples/fruit/checkpoint_0075000.pth.tar', + help='checkpoint for testing' + ) # Test - self.parser.add_argument('--test_collage', default=True, action='store_true', help='Create collage in test?') - self.parser.add_argument('--test_video', default=True, action='store_true', help='Create retarget-video in test?') - self.parser.add_argument('--test_non_rect', default=False, action='store_true', help='Produce non-rectangular transformations in test?') - self.parser.add_argument('--test_vid_scales', type=float, default=[2.2, 0.1, 2.2, 0.1], nargs='+', help='boundary scales for output video: [max_v, min_v, max_h, min_h]') - self.parser.add_argument('--collage_scales', type=float, default=[2.0, 1.25, 1.0, 0.66, 0.33], nargs='+', help='scales for collage (h=w, only one number)') - self.parser.add_argument('--collage_input_spot', type=float, default=[2, 2], nargs='+', help='replaces one spot in the collage with original input. must match a spot with scale 1.0') - self.parser.add_argument('--non_rect_shift_range', type=float, default=[-0.8, 1.0, 0.2], nargs='+', help='range for homography shifts for non rect transforms [min, max, step]') - self.parser.add_argument('--non_rect_scales', type=float, default=[0.7, 1.0], nargs='+', help='list of scales for non_rect outputs') + self.parser.add_argument('--test_collage', default=True, action='store_true', help='Create collage in test?') + self.parser.add_argument( + '--test_video', default=True, action='store_true', help='Create retarget-video in test?' + ) + self.parser.add_argument( + '--test_non_rect', + default=False, + action='store_true', + help='Produce non-rectangular transformations in test?' + ) + self.parser.add_argument( + '--test_vid_scales', + type=float, + default=[2.2, 0.1, 2.2, 0.1], + nargs='+', + help='boundary scales for output video: [max_v, min_v, max_h, min_h]' + ) + self.parser.add_argument( + '--collage_scales', + type=float, + default=[2.0, 1.25, 1.0, 0.66, 0.33], + nargs='+', + help='scales for collage (h=w, only one number)' + ) + self.parser.add_argument( + '--collage_input_spot', + type=float, + default=[2, 2], + nargs='+', + help='replaces one spot in the collage with original input. must match a spot with scale 1.0' + ) + self.parser.add_argument( + '--non_rect_shift_range', + type=float, + default=[-0.8, 1.0, 0.2], + nargs='+', + help='range for homography shifts for non rect transforms [min, max, step]' + ) + self.parser.add_argument( + '--non_rect_scales', type=float, default=[0.7, 1.0], nargs='+', help='list of scales for non_rect outputs' + ) # Architecture (Generator) self.parser.add_argument('--G_base_channels', type=int, default=64, help='# of base channels in G') self.parser.add_argument('--G_num_resblocks', type=int, default=6, help='# of resblocks in G\'s bottleneck') self.parser.add_argument('--G_num_downscales', type=int, default=3, help='# of downscaling layers in G') - self.parser.add_argument('--G_use_bias', type=bool, default=True, help='Determinhes whether bias is used in G\'s conv layers') - self.parser.add_argument('--G_skip', type=bool, default=True, help='Determines wether G uses skip connections (U-net)') + self.parser.add_argument( + '--G_use_bias', type=bool, default=True, help='Determinhes whether bias is used in G\'s conv layers' + ) + self.parser.add_argument( + '--G_skip', type=bool, default=True, help='Determines wether G uses skip connections (U-net)' + ) # Architecture (Discriminator) self.parser.add_argument('--D_base_channels', type=int, default=64, help='# of base channels in D') - self.parser.add_argument('--D_max_num_scales', type=int, default=99, help='Limits the # of scales for the multiscale D') - self.parser.add_argument('--D_scale_factor', type=float, default=1.4, help='Determines the downscaling factor for multiscale D') - self.parser.add_argument('--D_scale_weights_sigma', type=float, default=1.4, help='Determines the downscaling factor for multiscale D') - self.parser.add_argument('--D_min_input_size', type=int, default=13, help='Determines the downscaling factor for multiscale D') - self.parser.add_argument('--D_scale_weights_iter_for_even_scales', type=int, default=25000, help='Determines the downscaling factor for multiscale D') + self.parser.add_argument( + '--D_max_num_scales', type=int, default=99, help='Limits the # of scales for the multiscale D' + ) + self.parser.add_argument( + '--D_scale_factor', type=float, default=1.4, help='Determines the downscaling factor for multiscale D' + ) + self.parser.add_argument( + '--D_scale_weights_sigma', + type=float, + default=1.4, + help='Determines the downscaling factor for multiscale D' + ) + self.parser.add_argument( + '--D_min_input_size', type=int, default=13, help='Determines the downscaling factor for multiscale D' + ) + self.parser.add_argument( + '--D_scale_weights_iter_for_even_scales', + type=int, + default=25000, + help='Determines the downscaling factor for multiscale D' + ) # Optimization hyper-parameters self.parser.add_argument('--g_lr', type=float, default=0.00005, help='initial learning rate for generator') self.parser.add_argument('--d_lr', type=float, default=0.00005, help='initial learning rate for discriminator') - self.parser.add_argument('--lr_start_decay_iter', type=float, default=20000, help='iteration from which linear decay of lr starts until max_iter') + self.parser.add_argument( + '--lr_start_decay_iter', + type=float, + default=20000, + help='iteration from which linear decay of lr starts until max_iter' + ) self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') self.parser.add_argument('--curriculum', type=bool, default=True, help='Enable curriculum learning') - self.parser.add_argument('--iter_for_max_range', type=int, default=10000, help='In curriculum learning, when getting to this iteration all range is covered') + self.parser.add_argument( + '--iter_for_max_range', + type=int, + default=10000, + help='In curriculum learning, when getting to this iteration all range is covered' + ) # Sizes self.parser.add_argument('--input_crop_size', type=int, default=256, help='input is cropped to this size') self.parser.add_argument('--output_crop_size', type=int, default=256, help='output is cropped to this size') self.parser.add_argument('--max_scale', type=float, default=2.25, help='max retargeting scale') self.parser.add_argument('--min_scale', type=float, default=0.15, help='min retargeting scale') - self.parser.add_argument('--must_divide', type=int, default=8, help='In curriculum learning, when getting to this iteration all range is covered') - self.parser.add_argument('--max_transform_magnitude', type=float, default=0.0, help='max manitude of geometric transformation') + self.parser.add_argument( + '--must_divide', + type=int, + default=8, + help='In curriculum learning, when getting to this iteration all range is covered' + ) + self.parser.add_argument( + '--max_transform_magnitude', type=float, default=0.0, help='max manitude of geometric transformation' + ) # Crop Swap self.parser.add_argument('--crop_swap_min_size', type=int, default=32, help='swapping crops augmnetation') self.parser.add_argument('--crop_swap_max_size', type=int, default=256, help='swapping crops augmnetation') - self.parser.add_argument('--crop_swap_probability', type=float, default=0.0, help='probability for crop swapping to occur') + self.parser.add_argument( + '--crop_swap_probability', type=float, default=0.0, help='probability for crop swapping to occur' + ) # GPU self.parser.add_argument('--gpu_id', type=int, default=0, help='gpu id number') # Monitoring display frequencies - self.parser.add_argument('--display_freq', type=int, default=200, help='frequency of showing training results on screen') - self.parser.add_argument('--print_freq', type=int, default=20, help='frequency of showing training results on console') - self.parser.add_argument('--save_snapshot_freq', type=int, default=5000, help='frequency of saving the latest results') + self.parser.add_argument( + '--display_freq', type=int, default=200, help='frequency of showing training results on screen' + ) + self.parser.add_argument( + '--print_freq', type=int, default=20, help='frequency of showing training results on console' + ) + self.parser.add_argument( + '--save_snapshot_freq', type=int, default=5000, help='frequency of saving the latest results' + ) # Iterations self.parser.add_argument('--max_iters', type=int, default=75000, help='max # of iters') - self.parser.add_argument('--G_iters', type=int, default=1, help='# of sub-iters for the generator per each global iteration') - self.parser.add_argument('--D_iters', type=int, default=1, help='# of sub-iters for the discriminator per each global iteration') + self.parser.add_argument( + '--G_iters', type=int, default=1, help='# of sub-iters for the generator per each global iteration' + ) + self.parser.add_argument( + '--D_iters', type=int, default=1, help='# of sub-iters for the discriminator per each global iteration' + ) # Losses - self.parser.add_argument('--reconstruct_loss_proportion', type=float, default=0.1, help='relative part of reconstruct-loss (out of 1)') - self.parser.add_argument('--reconstruct_loss_stop_iter', type=int, default=200000, help='from this iter and on, reconstruct loss is deactivated') - self.parser.add_argument('--G_extra_inverse_train', type=int, default=1, help='number of extra training iters for G on inverse direction') - self.parser.add_argument('--G_extra_inverse_train_start_iter', type=int, default=10000, help='number of extra training iters for G on inverse direction') - self.parser.add_argument('--G_extra_inverse_train_ratio', type=int, default=1.0, help='number of extra training iters for G on inverse direction') - self.parser.add_argument('--use_L1', type=bool, default=True, help='Determine whether to use L1 or L2 for reconstruction') + self.parser.add_argument( + '--reconstruct_loss_proportion', + type=float, + default=0.1, + help='relative part of reconstruct-loss (out of 1)' + ) + self.parser.add_argument( + '--reconstruct_loss_stop_iter', + type=int, + default=200000, + help='from this iter and on, reconstruct loss is deactivated' + ) + self.parser.add_argument( + '--G_extra_inverse_train', + type=int, + default=1, + help='number of extra training iters for G on inverse direction' + ) + self.parser.add_argument( + '--G_extra_inverse_train_start_iter', + type=int, + default=10000, + help='number of extra training iters for G on inverse direction' + ) + self.parser.add_argument( + '--G_extra_inverse_train_ratio', + type=int, + default=1.0, + help='number of extra training iters for G on inverse direction' + ) + self.parser.add_argument( + '--use_L1', type=bool, default=True, help='Determine whether to use L1 or L2 for reconstruction' + ) # Misc - self.parser.add_argument('--create_code_copy', type=bool, default=True, help='when set to true, all .py files are saved to results directory to keep track') + self.parser.add_argument( + '--create_code_copy', + type=bool, + default=True, + help='when set to true, all .py files are saved to results directory to keep track' + ) def parse(self, create_dir_flag=True): # Parse arguments diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..0e480f2 --- /dev/null +++ b/environment.yml @@ -0,0 +1,21 @@ +name: ingan +channels: + - pytorch + - defaults +dependencies: + - cudatoolkit>=10.2 + - numpy + - pillow + - pip + - python>=3.8.10 + - pytorch>=1.9.0 + - torchvision>=0.4 + - scipy + - scikit-learn + - seaborn + - pip: + - opencv-python + - scikit-video + - ipython + - pytest + - yapf diff --git a/networks.py b/networks.py index 449639f..cec1ed5 100644 --- a/networks.py +++ b/networks.py @@ -34,16 +34,16 @@ def __init__(self, num_features): def forward(self, input_tensor): local_mean = self.get_local_mean(input_tensor) - print local_mean + print(local_mean) centered_input_tensor = input_tensor - local_mean - print centered_input_tensor - squared_diff = centered_input_tensor ** 2 - print squared_diff - local_std = self.get_var(squared_diff) ** 0.5 - print local_std + print(centered_input_tensor) + squared_diff = centered_input_tensor**2 + print(squared_diff) + local_std = self.get_var(squared_diff)**0.5 + print(local_std) normalized_tensor = centered_input_tensor / (local_std + 1e-8) - return normalized_tensor # * self.weight[None, :, None, None] + self.bias[None, :, None, None] + return normalized_tensor # * self.weight[None, :, None, None] + self.bias[None, :, None, None] normalization_layer = nn.BatchNorm2d # BatchReNorm2d # LocalNorm @@ -52,7 +52,6 @@ def forward(self, input_tensor): class GANLoss(nn.Module): """ Receiving the final layer form the discriminator and a boolean indicating whether the input to the discriminator is real or fake (generated by generator), this returns a patch""" - def __init__(self): super(GANLoss, self).__init__() @@ -80,7 +79,7 @@ def __init__(self, use_L1=False): def forward(self, input_tensor, target_tensor, loss_mask): if loss_mask is not None: - e = (target_tensor.detach() - input_tensor) ** 2 + e = (target_tensor.detach() - input_tensor)**2 e *= loss_mask return torch.sum(e) / torch.sum(loss_mask) else: @@ -105,7 +104,6 @@ def forward(self, input_tensor, target_tensor, scale_weights): class Generator(nn.Module): """ Architecture of the Generator, uses res-blocks """ - def __init__(self, base_channels=64, n_blocks=6, n_downsampling=3, use_bias=True, skip_flag=True): super(Generator, self).__init__() @@ -114,10 +112,10 @@ def __init__(self, base_channels=64, n_blocks=6, n_downsampling=3, use_bias=True # Entry block # First conv-block, no stride so image dims are kept and channels dim is expanded (pad-conv-norm-relu) - self.entry_block = nn.Sequential(nn.ReflectionPad2d(3), - nn.utils.spectral_norm(nn.Conv2d(3, base_channels, kernel_size=7, bias=use_bias)), - normalization_layer(base_channels), - nn.LeakyReLU(0.2, True)) + self.entry_block = nn.Sequential( + nn.ReflectionPad2d(3), nn.utils.spectral_norm(nn.Conv2d(3, base_channels, kernel_size=7, bias=use_bias)), + normalization_layer(base_channels), nn.LeakyReLU(0.2, True) + ) # Geometric transformation self.geo_transform = GeoTransform() @@ -131,7 +129,7 @@ def __init__(self, base_channels=64, n_blocks=6, n_downsampling=3, use_bias=True bottleneck_block = [] for _ in range(n_blocks): # noinspection PyUnboundLocalVariable - bottleneck_block += [ResnetBlock(base_channels * 2 ** n_downsampling, use_bias=use_bias)] + bottleneck_block += [ResnetBlock(base_channels * 2**n_downsampling, use_bias=use_bias)] self.bottleneck_block = nn.Sequential(*bottleneck_block) # Upscaling @@ -140,9 +138,7 @@ def __init__(self, base_channels=64, n_blocks=6, n_downsampling=3, use_bias=True # Final block # No stride so image dims are kept and channels dim shrinks to 3 (output image channels) - self.final_block = nn.Sequential(nn.ReflectionPad2d(3), - nn.Conv2d(base_channels, 3, kernel_size=7), - nn.Tanh()) + self.final_block = nn.Sequential(nn.ReflectionPad2d(3), nn.Conv2d(base_channels, 3, kernel_size=7), nn.Tanh()) def forward(self, input_tensor, output_size, random_affine): # A condition for having the output at same size as the scaled input is having even output_size @@ -173,20 +169,17 @@ def forward(self, input_tensor, output_size, random_affine): class ResnetBlock(nn.Module): """ A single Res-Block module """ - def __init__(self, dim, use_bias): super(ResnetBlock, self).__init__() # A res-block without the skip-connection, pad-conv-norm-relu-pad-conv-norm - self.conv_block = nn.Sequential(nn.utils.spectral_norm(nn.Conv2d(dim, dim // 4, kernel_size=1, bias=use_bias)), - normalization_layer(dim // 4), - nn.LeakyReLU(0.2, True), - nn.ReflectionPad2d(1), - nn.utils.spectral_norm(nn.Conv2d(dim // 4, dim // 4, kernel_size=3, bias=use_bias)), - normalization_layer(dim // 4), - nn.LeakyReLU(0.2, True), - nn.utils.spectral_norm(nn.Conv2d(dim // 4, dim, kernel_size=1, bias=use_bias)), - normalization_layer(dim)) + self.conv_block = nn.Sequential( + nn.utils.spectral_norm(nn.Conv2d(dim, dim // 4, kernel_size=1, bias=use_bias)), + normalization_layer(dim // 4), nn.LeakyReLU(0.2, True), nn.ReflectionPad2d(1), + nn.utils.spectral_norm(nn.Conv2d(dim // 4, dim // 4, kernel_size=3, bias=use_bias)), + normalization_layer(dim // 4), nn.LeakyReLU(0.2, True), + nn.utils.spectral_norm(nn.Conv2d(dim // 4, dim, kernel_size=1, bias=use_bias)), normalization_layer(dim) + ) def forward(self, input_tensor): # The skip connection is applied here @@ -203,8 +196,12 @@ def __init__(self, real_crop_size, max_n_scales=9, scale_factor=2, base_channels # We want the max num of scales to fit the size of the real examples. further scaling would create networks that # only train on fake examples - self.max_n_scales = np.min([np.int(np.ceil(np.log(np.min(real_crop_size) * 1.0 / self.min_size) - / np.log(self.scale_factor))), max_n_scales]) + self.max_n_scales = np.min( + [ + np.int(np.ceil(np.log(np.min(real_crop_size) * 1.0 / self.min_size) / np.log(self.scale_factor))), + max_n_scales + ] + ) # Prepare a list of all the networks for all the wanted scales self.nets = nn.ModuleList() @@ -218,37 +215,42 @@ def make_net(self): net = [] # Entry block - net += [nn.utils.spectral_norm(nn.Conv2d(3, base_channels, kernel_size=3, stride=1)), - nn.BatchNorm2d(base_channels), - nn.LeakyReLU(0.2, True)] + net += [ + nn.utils.spectral_norm(nn.Conv2d(3, base_channels, kernel_size=3, stride=1)), + nn.BatchNorm2d(base_channels), + nn.LeakyReLU(0.2, True) + ] # Downscaling blocks # A sequence of strided conv-blocks. Image dims shrink by 2, channels dim expands by 2 at each block - net += [nn.utils.spectral_norm(nn.Conv2d(base_channels, base_channels * 2, kernel_size=3, stride=2)), - nn.BatchNorm2d(base_channels * 2), - nn.LeakyReLU(0.2, True)] + net += [ + nn.utils.spectral_norm(nn.Conv2d(base_channels, base_channels * 2, kernel_size=3, stride=2)), + nn.BatchNorm2d(base_channels * 2), + nn.LeakyReLU(0.2, True) + ] # Regular conv-block - net += [nn.utils.spectral_norm(nn.Conv2d(in_channels=base_channels * 2, - out_channels=base_channels * 2, - kernel_size=3, - bias=True)), - nn.BatchNorm2d(base_channels * 2), - nn.LeakyReLU(0.2, True)] + net += [ + nn.utils.spectral_norm( + nn.Conv2d(in_channels=base_channels * 2, out_channels=base_channels * 2, kernel_size=3, bias=True) + ), + nn.BatchNorm2d(base_channels * 2), + nn.LeakyReLU(0.2, True) + ] # Additional 1x1 conv-blocks for _ in range(self.extra_conv_layers): - net += [nn.utils.spectral_norm(nn.Conv2d(in_channels=base_channels * 2, - out_channels=base_channels * 2, - kernel_size=3, - bias=True)), - nn.BatchNorm2d(base_channels * 2), - nn.LeakyReLU(0.2, True)] + net += [ + nn.utils.spectral_norm( + nn.Conv2d(in_channels=base_channels * 2, out_channels=base_channels * 2, kernel_size=3, bias=True) + ), + nn.BatchNorm2d(base_channels * 2), + nn.LeakyReLU(0.2, True) + ] # Final conv-block # Ends with a Sigmoid to get a range of 0-1 - net += nn.Sequential(nn.utils.spectral_norm(nn.Conv2d(base_channels * 2, 1, kernel_size=1)), - nn.Sigmoid()) + net += nn.Sequential(nn.utils.spectral_norm(nn.Conv2d(base_channels * 2, 1, kernel_size=1)), nn.Sigmoid()) # Make it a valid layers sequence and return return nn.Sequential(*net) @@ -258,12 +260,12 @@ def forward(self, input_tensor, scale_weights): map_size = aggregated_result_maps_from_all_scales.shape[2:] # Run all nets over all scales and aggregate the interpolated results - for net, scale_weight, i in zip(self.nets[1:], scale_weights[1:], range(1, len(scale_weights))): + for net, scale_weight, i in zip(self.nets[1:], scale_weights[1:], list(range(1, len(scale_weights)))): downscaled_image = f.interpolate(input_tensor, scale_factor=self.scale_factor**(-i), mode='bilinear') result_map_for_current_scale = net(downscaled_image) - upscaled_result_map_for_current_scale = f.interpolate(result_map_for_current_scale, - size=map_size, - mode='bilinear') + upscaled_result_map_for_current_scale = f.interpolate( + result_map_for_current_scale, size=map_size, mode='bilinear' + ) aggregated_result_maps_from_all_scales += upscaled_result_map_for_current_scale * scale_weight return aggregated_result_maps_from_all_scales @@ -279,18 +281,21 @@ def __init__(self, n_layers, scale=0.5, base_channels=64, use_bias=True): in_channel_power = scale > 1 out_channel_power = scale < 1 - i_range = range(n_layers) if scale < 1 else range(n_layers-1, -1, -1) + i_range = list(range(n_layers)) if scale < 1 else list(range(n_layers - 1, -1, -1)) for i in i_range: - self.conv_layers[i] = nn.Sequential(nn.ReflectionPad2d(1), - nn.utils.spectral_norm(nn.Conv2d( - in_channels=base_channels * 2 ** (i + in_channel_power), - out_channels=base_channels * 2 ** (i + out_channel_power), - kernel_size=3, - stride=1, - bias=use_bias)), - normalization_layer(base_channels * 2 ** (i + out_channel_power)), - nn.LeakyReLU(0.2, True)) + self.conv_layers[i] = nn.Sequential( + nn.ReflectionPad2d(1), + nn.utils.spectral_norm( + nn.Conv2d( + in_channels=base_channels * 2**(i + in_channel_power), + out_channels=base_channels * 2**(i + out_channel_power), + kernel_size=3, + stride=1, + bias=use_bias + ) + ), normalization_layer(base_channels * 2**(i + out_channel_power)), nn.LeakyReLU(0.2, True) + ) self.add_module("conv_%d" % i, self.conv_layers[i]) if scale > 1: @@ -313,7 +318,7 @@ def forward(self, input_tensor, pyramid=None, return_all_scales=False, skip=Fals feature_map = conv_layer(feature_map) if skip: - feature_map = feature_map + pyramid[-i-2] + feature_map = feature_map + pyramid[-i - 2] if self.scale < 1.0: feature_map = self.max_pool(feature_map) @@ -336,16 +341,20 @@ def __init__(self, crop_size, return_pos=False, must_divide=4.0): def forward(self, input_tensors, crop_size=None): im_v_sz, im_h_sz = input_tensors[0].shape[2:] if crop_size is None: - cr_v_sz, cr_h_sz = np.clip(self.crop_size, [0, 0], [im_v_sz-1, im_h_sz-1]) - cr_v_sz, cr_h_sz = np.uint32(np.floor(np.array([cr_v_sz, cr_h_sz]) - * 1.0 / self.must_divide) * self.must_divide) + cr_v_sz, cr_h_sz = np.clip(self.crop_size, [0, 0], [im_v_sz - 1, im_h_sz - 1]) + cr_v_sz, cr_h_sz = np.uint32( + np.floor(np.array([cr_v_sz, cr_h_sz]) * 1.0 / self.must_divide) * self.must_divide + ) else: cr_v_sz, cr_h_sz = crop_size top_left_v, top_left_h = [np.random.randint(0, im_v_sz - cr_v_sz), np.random.randint(0, im_h_sz - cr_h_sz)] - out_tensors = [input_tensor[:, :, top_left_v:top_left_v + cr_v_sz, top_left_h:top_left_h + cr_h_sz] - if input_tensor is not None else None for input_tensor in input_tensors] + out_tensors = [ + input_tensor[:, :, top_left_v:top_left_v + cr_v_sz, + top_left_h:top_left_h + cr_h_sz] if input_tensor is not None else None + for input_tensor in input_tensors + ] return (out_tensors, (top_left_v, top_left_h)) if self.return_pos else out_tensors @@ -377,14 +386,18 @@ def forward(self, input_tensor): # Creating a mask. this is drawing a line in width 2*mask_width over the boundaries of the cropped image loss_mask = torch.ones_like(input_tensor) mw = self.mask_width - loss_mask[:, :, top_left_v_1:top_left_v_1+cr_v_sz, top_left_h_1-mw:top_left_h_1+mw] = 0 - loss_mask[:, :, top_left_v_1-mw:top_left_v_1+mw, top_left_h_1:top_left_h_1+cr_h_sz] = 0 - loss_mask[:, :, top_left_v_1:top_left_v_1+cr_v_sz, top_left_h_1+cr_h_sz-mw:top_left_h_1+cr_h_sz+mw] = 0 - loss_mask[:, :, top_left_v_1+cr_v_sz-mw:top_left_v_1+cr_v_sz+mw, top_left_h_1:top_left_h_1+cr_h_sz] = 0 - loss_mask[:, :, top_left_v_2:top_left_v_2+cr_v_sz, top_left_h_2-mw:top_left_h_2+mw] = 0 - loss_mask[:, :, top_left_v_2-mw:top_left_v_2+mw, top_left_h_2:top_left_h_2+cr_h_sz] = 0 - loss_mask[:, :, top_left_v_2:top_left_v_2+cr_v_sz, top_left_h_2+cr_h_sz-mw:top_left_h_2+cr_h_sz+mw] = 0 - loss_mask[:, :, top_left_v_2+cr_v_sz-mw:top_left_v_2+cr_v_sz+mw, top_left_h_2:top_left_h_2+cr_h_sz] = 0 + loss_mask[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, top_left_h_1 - mw:top_left_h_1 + mw] = 0 + loss_mask[:, :, top_left_v_1 - mw:top_left_v_1 + mw, top_left_h_1:top_left_h_1 + cr_h_sz] = 0 + loss_mask[:, :, top_left_v_1:top_left_v_1 + cr_v_sz, + top_left_h_1 + cr_h_sz - mw:top_left_h_1 + cr_h_sz + mw] = 0 + loss_mask[:, :, top_left_v_1 + cr_v_sz - mw:top_left_v_1 + cr_v_sz + mw, + top_left_h_1:top_left_h_1 + cr_h_sz] = 0 + loss_mask[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, top_left_h_2 - mw:top_left_h_2 + mw] = 0 + loss_mask[:, :, top_left_v_2 - mw:top_left_v_2 + mw, top_left_h_2:top_left_h_2 + cr_h_sz] = 0 + loss_mask[:, :, top_left_v_2:top_left_v_2 + cr_v_sz, + top_left_h_2 + cr_h_sz - mw:top_left_h_2 + cr_h_sz + mw] = 0 + loss_mask[:, :, top_left_v_2 + cr_v_sz - mw:top_left_v_2 + cr_v_sz + mw, + top_left_h_2:top_left_h_2 + cr_h_sz] = 0 return output_tensor, loss_mask @@ -397,7 +410,10 @@ def forward(self, input_tensor, target_size, shifts): sz = input_tensor.shape theta = homography_based_on_top_corners_x_shift(shifts) - pad = f.pad(input_tensor, (np.abs(np.int(np.ceil(sz[3] * shifts[0]))), np.abs(np.int(np.ceil(-sz[3] * shifts[1]))), 0, 0), 'reflect') + pad = f.pad( + input_tensor, + (np.abs(np.int(np.ceil(sz[3] * shifts[0]))), np.abs(np.int(np.ceil(-sz[3] * shifts[1]))), 0, 0), 'reflect' + ) target_size4d = torch.Size([pad.shape[0], pad.shape[1], target_size[0], target_size[1]]) grid = homography_grid(theta.expand(pad.shape[0], -1, -1), target_size4d) diff --git a/non_rect.py b/non_rect.py index 4652ac3..47a770d 100644 --- a/non_rect.py +++ b/non_rect.py @@ -66,15 +66,15 @@ def homography_based_on_top_corners_x_shift(rand_h): # [0, 0, 0, -1, 0, -1, 0, 0, 0], # [0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=np.float32) # play with top left and bottom right - p = np.array([[1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), -1. + rand_h[0]], - [0, 0, 0, 1., 1., -1., 1., 1., -1.], - [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], - [0, 0, 0, -1, -1, -1, 1, 1, 1], - [1, 0, -1, 0, 0, 0, 1, 0, -1], - [0, 0, 0, 1, 0, -1, 0, 0, 0], - [-1, 0, -1, 0, 0, 0, 1, 0, 1], - [0, 0, 0, -1, 0, -1, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=np.float32) + p = np.array( + [ + [1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), -1. + rand_h[0]], + [0, 0, 0, 1., 1., -1., 1., 1., -1.], [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], + [0, 0, 0, -1, -1, -1, 1, 1, 1], [1, 0, -1, 0, 0, 0, 1, 0, -1], [0, 0, 0, 1, 0, -1, 0, 0, 0], + [-1, 0, -1, 0, 0, 0, 1, 0, 1], [0, 0, 0, -1, 0, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1] + ], + dtype=np.float32 + ) b = np.zeros((9, 1), dtype=np.float32) b[8, 0] = 1. h = np.dot(np.linalg.inv(p), b) @@ -87,11 +87,13 @@ def apply_resize_and_radial(x, target_size, rand_r): out = f.grid_sample(x, grid, mode='bilinear', padding_mode='border') return out + def make_radial_scale_grid(rand_r, size4d): y, x = torch.meshgrid((torch.linspace(-1., 1., size4d[-2]), torch.linspace(-1., 1., size4d[-1]))) theta = torch.atan2(x, y) r = torch.sqrt() + ''' def test_time(): def _make_pink_noise(sz_): @@ -156,4 +158,4 @@ def _make_homography_mask(in_mask, target_size, rand_h): frame = util.tensor2im(out*out_mask - 1 + out_mask) writer.writeFrame(frame) writer.close() -''' \ No newline at end of file +''' diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..adc2431 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,8 @@ +[flake8] +exclude = .git,*migrations*,build*,old* +max-line-length = 120 +ignore=W391 + +[yapf] +based_on_style = facebook +column_limit = 120 diff --git a/supp_video.py b/supp_video.py index 535b194..82febb4 100644 --- a/supp_video.py +++ b/supp_video.py @@ -6,24 +6,49 @@ from non_rect import * from SceneScripts import * - FRAME_SHAPE = [500, 1000] MUST_DIVIDE = 8 VIDEO_SCRIPT = [ # [nameses, script_name, script_params=(min_v, max_v, min_h, max_h, max_t, repeat)] -[[['fruits'], ['fruits_old'], ['fruits_old'], ['fruits'], ['fruits']], ['horizontal_grow_shrink_slow', 'vertical_grow_shrink', 'resize_round', 'affine_dance', 'random'], [[0.55, None, 0.55, None, None, 1], [0.3, 1.8, 0.3, 2.0, None, 1, False], [0.3, 1.8, 0.3, 2.0, None, 1, False], [None, None, None, None, 0.45, 1, False], [0.3, 1.3, 0.3, 1.6, 0.45, 1, False]]], -['farm_house', 'special_resize_round', [0.45, None, 0.45, None, None, 2]], -['cab_building', 'resize_round', [0.5, None, 0.3, 2.5, None, 2]], -['rome', 'horizontal_grow_shrink', [0.3, None, 0.3, None, None, 3]], -[[['peacock', 'windows']], 'resize_round', [0.5, 2, 0.5, 1.75, None, 3]], -[[['soldiers', 'penguins']], 'horizontal_grow_shrink', [0.3, None, 0.3, None, None, 3]], -[[['nkorea', 'sapa']], 'horizontal_grow_shrink', [0.15, None, 0.15, None, None, 3]], -[[['quilt']] * 5, ['horizontal_grow_shrink', 'vertical_grow_shrink', 'resize_round', 'affine_dance', 'random'], [[0.55, None, 0.55, None, None, 1], [0.3, None, 0.3, None, None, 1, False], [0.3, None, 0.3, None, None, 1, False], [None, None, None, None, 0.45, 1, False], [0.6, 1.6, 0.6, 1.75, 0.55, 1, False]]], -[[['umbrella'], ['umbrella'], ['umbrella']], ['horizontal_grow_shrink', 'resize_round', 'trapezoids'], [[0.55, None, 0.55, None, None, 1], [0.55, None, 0.55, None, None, 1, False], [1, 1, 0.8, 1.2, 0.3, 1, False]]], -[[['metal_circles']] * 5, ['vertical_grow_shrink', 'random'], [[0.15, None, 0.55, None, None, 2], [0.15, 1.8, 0.15, 1.45, 0.55, 1, False]]], -[[['fish'], ['fish']], ['affine_dance', 'random'], [[1, 1, 1, 1, 0.4, 1], [1, 1, 1, 1, 0.5, 1, False]]], -['wood', 'special_zoom', [0.3, None, 0.3, None, None, 2]], -['ny', 'affine_dance', [None, None, None, None, 0.3, 2]], -['sushi', 'resize_round', [0.5, None, 0.3, None, None, 1]], + [[['fruits'], ['fruits_old'], ['fruits_old'], ['fruits'], ['fruits']], + [ + 'horizontal_grow_shrink_slow', 'vertical_grow_shrink', 'resize_round', + 'affine_dance', 'random' + ], + [[0.55, None, 0.55, None, None, 1], [0.3, 1.8, 0.3, 2.0, None, 1, False], + [0.3, 1.8, 0.3, 2.0, None, 1, False], + [None, None, None, None, 0.45, 1, False], + [0.3, 1.3, 0.3, 1.6, 0.45, 1, False]]], + ['farm_house', 'special_resize_round', [0.45, None, 0.45, None, None, 2]], + ['cab_building', 'resize_round', [0.5, None, 0.3, 2.5, None, 2]], + ['rome', 'horizontal_grow_shrink', [0.3, None, 0.3, None, None, 3]], + [[['peacock', 'windows']], 'resize_round', [0.5, 2, 0.5, 1.75, None, 3]], + [[['soldiers', 'penguins']], 'horizontal_grow_shrink', + [0.3, None, 0.3, None, None, 3]], + [[['nkorea', 'sapa']], 'horizontal_grow_shrink', + [0.15, None, 0.15, None, None, 3]], + [[['quilt']] * 5, + [ + 'horizontal_grow_shrink', 'vertical_grow_shrink', 'resize_round', + 'affine_dance', 'random' + ], + [[0.55, None, 0.55, None, None, 1], + [0.3, None, 0.3, None, None, 1, False], + [0.3, None, 0.3, None, None, 1, False], + [None, None, None, None, 0.45, 1, False], + [0.6, 1.6, 0.6, 1.75, 0.55, 1, False]]], + [[['umbrella'], ['umbrella'], ['umbrella']], + ['horizontal_grow_shrink', 'resize_round', 'trapezoids'], + [[0.55, None, 0.55, None, None, 1], + [0.55, None, 0.55, None, None, 1, False], + [1, 1, 0.8, 1.2, 0.3, 1, False]]], + [[['metal_circles']] * 5, ['vertical_grow_shrink', 'random'], + [[0.15, None, 0.55, None, None, 2], + [0.15, 1.8, 0.15, 1.45, 0.55, 1, False]]], + [[['fish'], ['fish']], ['affine_dance', 'random'], + [[1, 1, 1, 1, 0.4, 1], [1, 1, 1, 1, 0.5, 1, False]]], + ['wood', 'special_zoom', [0.3, None, 0.3, None, None, 2]], + ['ny', 'affine_dance', [None, None, None, None, 0.3, 2]], + ['sushi', 'resize_round', [0.5, None, 0.3, None, None, 1]], ] @@ -41,23 +66,29 @@ def generate_one_frame(gan, input_tensor, frame_shape, scale, geo_shifts, center else: out_mask, out_size = prepare_geometric(base_sz, scale, geo_shifts) - output_tensor, _, _ = gan.test(input_tensor=input_tensor, - input_size=in_size, - output_size=out_size, - rand_affine=geo_shifts, - run_d_pred=False, - run_reconstruct=False) + output_tensor, _, _ = gan.test( + input_tensor=input_tensor, + input_size=in_size, + output_size=out_size, + rand_affine=geo_shifts, + run_d_pred=False, + run_reconstruct=False + ) out = out_mask * output_tensor[1] - 1 + out_mask margin = np.uint16((frame_shape - np.array(out_size)) / 2) if center else [0, 0] - out_pad[margin[0]:margin[0] + out_size[0], margin[1]:margin[1] + out_size[1], :] = util.hist_match(util.tensor2im(out), util.tensor2im(input_tensor), util.tensor2im(out_mask)) + out_pad[margin[0]:margin[0] + out_size[0], margin[1]:margin[1] + out_size[1], :] = util.hist_match( + util.tensor2im(out), util.tensor2im(input_tensor), util.tensor2im(out_mask) + ) return out_pad def generate_one_scene(gan, input_tensor, scene_script, frame_shape, center): frames = [] for i, (scale_v, scale_h, shift_l, shift_r) in enumerate(scene_script): - output_image = generate_one_frame(gan, input_tensor, frame_shape, [scale_v, scale_h], [shift_l, shift_r], center) + output_image = generate_one_frame( + gan, input_tensor, frame_shape, [scale_v, scale_h], [shift_l, shift_r], center + ) frames.append(output_image) return np.stack(frames, axis=0) @@ -93,38 +124,57 @@ def generate_full_video(video_script, frame_shape): cur_scene_script_param = scene_script_param[:] if scene_script_param[1] is None: cur_scene_script_param[1] = cur_frame_shape[0] * 1.0 / input_tensor.shape[2] - print 'max scale vertical:', cur_scene_script_param[1] + print('max scale vertical:', cur_scene_script_param[1]) if cur_scene_script_param[3] is None: cur_scene_script_param[3] = cur_frame_shape[1] * 1.0 / input_tensor.shape[3] - print 'max scale horizontal:', cur_scene_script_param[3] + print('max scale horizontal:', cur_scene_script_param[3]) scene_script = make_scene_script(scene_script_name, *cur_scene_script_param) center = (cur_scene_script_param[4] is not None) - - scene = generate_one_scene(gan, input_tensor, scene_script, np.array([cur_frame_shape[0], cur_frame_shape[1]]), center) + scene = generate_one_scene( + gan, input_tensor, scene_script, np.array([cur_frame_shape[0], cur_frame_shape[1]]), center + ) partial_screen_scenes.append(scene) - print 'Done with %s, (scene %d/%d)' % (name, i + 1, n_scenes) - + print('Done with %s, (scene %d/%d)' % (name, i + 1, n_scenes)) - scene = np.concatenate(partial_screen_scenes, axis=concat_axis) if len(partial_screen_scenes) > 1 else partial_screen_scenes[0] + scene = np.concatenate(partial_screen_scenes, + axis=concat_axis) if len(partial_screen_scenes) > 1 else partial_screen_scenes[0] scenes.append(scene) scene = np.concatenate(scenes, axis=0) - outputdict = {'-b:v': '30000000', '-r': '100.0', - '-vf': 'drawtext="text=\'Input image\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)*7/8:enable=\'between(t,0,2)\'"', - '-preset': 'slow', '-profile:v': 'high444', '-level:v': '4.0', '-crf': '22'} + outputdict = { + '-b:v': + '30000000', + '-r': + '100.0', + '-vf': + 'drawtext="text=\'Input image\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)*7/8:enable=\'between(t,0,2)\'"', + '-preset': + 'slow', + '-profile:v': + 'high444', + '-level:v': + '4.0', + '-crf': + '22' + } if len(names) > 1: - outputdict['-vf'] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,2)\'"' + outputdict[ + '-vf' + ] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,2)\'"' if not scene_script_params[-1]: - outputdict['-vf'] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,0)\'"' + outputdict[ + '-vf' + ] = 'drawtext="text=\'Input images\':fontcolor=red:fontsize=48:x=(w-text_w)/2:y=(h-text_h)/2.5:enable=\'between(t,0,0)\'"' - writer = FFmpegWriter(conf.output_dir_path + '/vid%d_%s.mp4' % (i, '_'.join(names)), verbosity=1, - outputdict=outputdict) + writer = FFmpegWriter( + conf.output_dir_path + '/vid%d_%s.mp4' % (i, '_'.join(names)), verbosity=1, outputdict=outputdict + ) for frame in scene: for j in range(3): writer.writeFrame(frame) @@ -136,8 +186,10 @@ def prepare_geometric(base_sz, scale, geo_shifts): pad_r = np.abs(np.int(np.ceil(base_sz[3] * geo_shifts[1]))) in_mask = torch.zeros(base_sz[0], base_sz[1], base_sz[2], pad_l + base_sz[3] + pad_r).cuda() in_size = in_mask.shape[2:] - out_size = (np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / MUST_DIVIDE) * MUST_DIVIDE), - np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / MUST_DIVIDE) * MUST_DIVIDE)) + out_size = ( + np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / MUST_DIVIDE) * MUST_DIVIDE), + np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / MUST_DIVIDE) * MUST_DIVIDE) + ) if pad_r > 0: in_mask[:, :, :, pad_l:-pad_r] = torch.ones(base_sz) else: diff --git a/test.py b/test.py index 0cc38b6..569cdf8 100644 --- a/test.py +++ b/test.py @@ -1,226 +1,259 @@ -from networks import GeoTransform -from PIL import Image -import util -from InGAN import InGAN -from configs import Config -from traceback import print_exc -from skvideo.io import FFmpegWriter -import os -from non_rect import * - - -def test_one_scale(gan, input_tensor, scale, must_divide, affine=None, return_tensor=False, size_instead_scale=False): - with torch.no_grad(): - in_size = input_tensor.shape[2:] - if size_instead_scale: - out_size = scale - else: - out_size = (np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), - np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide)) - - output_tensor, _, _ = gan.test(input_tensor=input_tensor, - input_size=in_size, - output_size=out_size, - rand_affine=affine, - run_d_pred=False, - run_reconstruct=False) - if return_tensor: - return output_tensor[1] - else: - return util.tensor2im(output_tensor[1]) - - -def concat_images(images, margin, input_spot): - h_sizes = [im.shape[0] for im in zip(*images)[0]] - w_sizes = [im.shape[1] for im in images[0]] - h_total_size = np.sum(h_sizes) + margin * (len(images) - 1) - w_total_size = np.sum(w_sizes) + margin * (len(images) - 1) - - collage = np.ones([h_total_size, w_total_size, 3]) * 255 - for i in range(len(images)): - for j in range(len(images)): - top_left_corner_h = int(np.sum(h_sizes[:j]) + j * margin) - top_left_corner_w = int(np.sum(w_sizes[:i]) + i * margin) - bottom_right_corner_h = int(top_left_corner_h + h_sizes[j]) - bottom_right_corner_w = int(top_left_corner_w + w_sizes[i]) - - if [i, j] == input_spot: - collage[top_left_corner_h - margin/2: bottom_right_corner_h + margin/2, - top_left_corner_w - margin/2: bottom_right_corner_w + margin/2, - :] = [255, 0, 0] - collage[top_left_corner_h:bottom_right_corner_h, top_left_corner_w:bottom_right_corner_w] = images[j][i] - - return collage - - -def generate_images_for_collage(gan, input_tensor, scales, must_divide): - # NOTE: scales here is different from in the other funcs: here we only need 1d scales. - # Prepare output images list - output_images = [[[None] for _ in range(len(scales))] for _ in range(len(scales))] - - # Run over all scales and test the network for each one - for i, scale_h in enumerate(scales): - for j, scale_w in enumerate(scales): - output_images[i][j] = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) - return output_images - - -def retarget_video(gan, input_tensor, scales, must_divide, output_dir_path): - max_scale = np.max(np.array(scales)) - frame_shape = np.uint32(np.array(input_tensor.shape[2:]) * max_scale) - frame_shape[0] += (frame_shape[0] % 2) - frame_shape[1] += (frame_shape[1] % 2) - frames = np.zeros([len(scales), frame_shape[0], frame_shape[1], 3]) - for i, (scale_h, scale_w) in enumerate(scales): - output_image = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) - frames[i, 0:output_image.shape[0], 0:output_image.shape[1], :] = output_image - writer = FFmpegWriter(output_dir_path + '/vid.mp4', verbosity=1, outputdict={'-b': '30000000', '-r': '100.0'}) - - for i, _ in enumerate(scales): - for j in range(3): - writer.writeFrame(frames[i, :, :, :]) - writer.close() - - -def define_video_scales(scales): - max_v, min_v, max_h, min_h = scales - frames_per_resize = 10 - - x = np.concatenate([ - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize), - np.linspace(min_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, 1, frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize), - np.linspace(min_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, 1, frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_v, frames_per_resize), - np.linspace(max_v, max_v, 2 * frames_per_resize), - np.linspace(max_v, min_v, 2 * frames_per_resize)]) - y = np.concatenate([ - np.linspace(1, 1, frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, 2 * frames_per_resize), - np.linspace(1, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, max_h, 2 * frames_per_resize), - np.linspace(max_h, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, max_h, 2 * frames_per_resize), - np.linspace(max_h, 1, frames_per_resize), - np.linspace(1, max_h, frames_per_resize), - np.linspace(max_h, max_h, frames_per_resize), - np.linspace(max_h, min_h, 2 * frames_per_resize), - np.linspace(min_h, min_h, 2 * frames_per_resize)]) - - return zip(x, y) - - -def generate_collage_and_outputs(conf, gan, input_tensor): - output_images = generate_images_for_collage(gan, input_tensor, conf.collage_scales, conf.must_divide) - - for i in range(len(output_images)): - for j in range(len(output_images)): - Image.fromarray(output_images[i][j], 'RGB').save(conf.output_dir_path + '/test_%d_%d.png' % (i, j)) - - input_spot = conf.collage_input_spot - output_images[input_spot[0]][input_spot[1]] = util.tensor2im(input_tensor) - - collage = concat_images(output_images, margin=10, input_spot=input_spot) - - Image.fromarray(np.uint8(collage), 'RGB').save(conf.output_dir_path + '/test_collage.png') - - -def _make_homography_mask(in_mask, target_size, rand_h): - theta = homography_based_on_top_corners_x_shift(rand_h) - target_size4d = torch.Size([in_mask.shape[0], in_mask.shape[1], target_size[0], target_size[1]]) - grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), target_size4d) - out = f.grid_sample(in_mask, grid, mode='bilinear', padding_mode='border') - return out - - -def test_homo(conf, gan, input_tensor, must_divide=8): - shift_range = np.arange(conf.non_rect_shift_range[0], conf.non_rect_shift_range[1], conf.non_rect_shift_range[2]) - total = (len(conf.non_rect_scales)*len(shift_range))**2 - ind = 0 - for scale1 in conf.non_rect_scales: - for scale2 in conf.non_rect_scales: - scale = [scale1, scale2] - for shift1 in shift_range: - for shift2 in shift_range: - ind += 1 - shifts = (shift1, shift2) - sz = input_tensor.shape - out_pad = np.uint8(255*np.ones([np.uint32(np.floor(sz[2]*scale[0])), np.uint32(np.floor(3*sz[3]*scale[1])), 3])) - - pad_l = np.abs(np.int(np.ceil(sz[3] * shifts[0]))) - pad_r = np.abs(np.int(np.ceil(sz[3] * shifts[1]))) - - in_mask = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() - input_for_regular = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() - - in_size = in_mask.shape[2:] - - out_size = (np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), - np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide)) - - if pad_r > 0: - in_mask[:,:, :, pad_l:-pad_r] = torch.ones_like(input_tensor) - input_for_regular[:, :, :, pad_l:-pad_r] = input_tensor - else: - in_mask[:, :, :, pad_l:] = torch.ones_like(input_tensor) - input_for_regular[:, :, :, pad_l:] = input_tensor - - out = test_one_scale(gan, input_tensor, out_size, conf.must_divide, affine=shifts, return_tensor=True, size_instead_scale=True) - # regular = transform(input_tensor, out_size, shifts) - out_mask = _make_homography_mask(in_mask, out_size, shifts) - - out = util.tensor2im(out_mask * out + 1 - out_mask) - # regular_out = util.tensor2im(out_mask * regular + 1 - out_mask) - # out_pad[:, sz[3] - pad_l: sz[3] - pad_l + out_size[1], :] = out - shift_str = "{1:0{0}d}_{3:0{2}d}".format(2 if shift1>=0 else 3, int(10*shift1), 2 if shift2>=0 else 3, int(10*shift2)) - - # out = np.rot90(out, 3) - # regular_out = np.rot90(regular_out, 3) - - Image.fromarray(out, 'RGB').save(conf.output_dir_path + '/scale_%02d_%02d_transform %s_ingan.png' % (int(10*scale1), int(10*scale2), shift_str)) - # Image.fromarray(regular_out, 'RGB').save(conf.output_dir_path + '/scale_%02d_%02d_transform %s_ref.png' % (scale1, scale2, shift_str)) - print ind, '/', total, 'scale:', scale, 'shift:', shifts - - -def main(): - conf = Config().parse(create_dir_flag=False) - conf.name = 'TEST_' + conf.name - conf.output_dir_path = util.prepare_result_dir(conf) - gan = InGAN(conf) - - try: - gan.resume(conf.test_params_path, test_flag=True) - [input_tensor] = util.read_data(conf) - - if conf.test_video: - retarget_video(gan, input_tensor, define_video_scales(conf.test_vid_scales), 8, conf.output_dir_path) - if conf.test_collage: - generate_collage_and_outputs(conf, gan, input_tensor) - if conf.test_non_rect: - test_homo(conf, gan, input_tensor) - - print 'Done with %s' % conf.input_image_path - - except KeyboardInterrupt: - raise - except Exception as e: - # print 'Something went wrong with %s (%d/%d), iter %dk' % (input_image_path, i, n_files, snapshot_iter) - print_exc() - - -if __name__ == '__main__': - main() +import os +import torch +import numpy as np +from PIL import Image +from skvideo.io import FFmpegWriter +import util +from InGAN import InGAN +from configs import Config +from traceback import print_exc +from networks import GeoTransform +from non_rect import ( + apply_resize_and_radial, homography_based_on_top_corners_x_shift, apply_resize_and_homograhpy, homography_grid, + apply_resize_and_affine +) + + +def test_one_scale(gan, input_tensor, scale, must_divide, affine=None, return_tensor=False, size_instead_scale=False): + with torch.no_grad(): + in_size = input_tensor.shape[2:] + if size_instead_scale: + out_size = scale + else: + out_size = ( + np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), + np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide) + ) + + output_tensor, _, _ = gan.test( + input_tensor=input_tensor, + input_size=in_size, + output_size=out_size, + rand_affine=affine, + run_d_pred=False, + run_reconstruct=False + ) + if return_tensor: + return output_tensor[1] + else: + return util.tensor2im(output_tensor[1]) + + +def concat_images(images, margin, input_spot): + h_sizes = [im.shape[0] for im in list(zip(*images))[0]] + w_sizes = [im.shape[1] for im in images[0]] + h_total_size = np.sum(h_sizes) + margin * (len(images) - 1) + w_total_size = np.sum(w_sizes) + margin * (len(images) - 1) + + collage = np.ones([h_total_size, w_total_size, 3]) * 255 + for i in range(len(images)): + for j in range(len(images)): + top_left_corner_h = int(np.sum(h_sizes[:j]) + j * margin) + top_left_corner_w = int(np.sum(w_sizes[:i]) + i * margin) + bottom_right_corner_h = int(top_left_corner_h + h_sizes[j]) + bottom_right_corner_w = int(top_left_corner_w + w_sizes[i]) + + if [i, j] == input_spot: + collage[top_left_corner_h - margin // 2:bottom_right_corner_h + margin // 2, + top_left_corner_w - margin // 2:bottom_right_corner_w + margin // 2, :] = [255, 0, 0] + collage[top_left_corner_h:bottom_right_corner_h, top_left_corner_w:bottom_right_corner_w] = images[j][i] + + return collage + + +def generate_images_for_collage(gan, input_tensor, scales, must_divide): + # NOTE: scales here is different from in the other funcs: here we only need 1d scales. + # Prepare output images list + output_images = [[[None] for _ in range(len(scales))] for _ in range(len(scales))] + + # Run over all scales and test the network for each one + for i, scale_h in enumerate(scales): + for j, scale_w in enumerate(scales): + output_images[i][j] = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) + return output_images + + +def retarget_video(gan, input_tensor, scales, must_divide, output_dir_path): + max_scale = np.max(np.array(scales)) + frame_shape = np.uint32(np.array(input_tensor.shape[2:]) * max_scale) + frame_shape[0] += (frame_shape[0] % 2) + frame_shape[1] += (frame_shape[1] % 2) + frames = np.zeros([len(scales), frame_shape[0], frame_shape[1], 3]) + for i, (scale_h, scale_w) in enumerate(scales): + output_image = test_one_scale(gan, input_tensor, [scale_h, scale_w], must_divide) + frames[i, 0:output_image.shape[0], 0:output_image.shape[1], :] = output_image + writer = FFmpegWriter(output_dir_path + '/vid.mp4', verbosity=1, outputdict={'-b': '30000000', '-r': '100.0'}) + + for i, _ in enumerate(scales): + for j in range(3): + writer.writeFrame(frames[i, :, :, :]) + writer.close() + + +def define_video_scales(scales): + max_v, min_v, max_h, min_h = scales + frames_per_resize = 10 + + x = np.concatenate( + [ + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize), + np.linspace(min_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, 1, frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize), + np.linspace(min_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, 1, frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_v, frames_per_resize), + np.linspace(max_v, max_v, 2 * frames_per_resize), + np.linspace(max_v, min_v, 2 * frames_per_resize) + ] + ) + y = np.concatenate( + [ + np.linspace(1, 1, frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, 2 * frames_per_resize), + np.linspace(1, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, max_h, 2 * frames_per_resize), + np.linspace(max_h, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, max_h, 2 * frames_per_resize), + np.linspace(max_h, 1, frames_per_resize), + np.linspace(1, max_h, frames_per_resize), + np.linspace(max_h, max_h, frames_per_resize), + np.linspace(max_h, min_h, 2 * frames_per_resize), + np.linspace(min_h, min_h, 2 * frames_per_resize) + ] + ) + + return list(zip(x, y)) + + +def generate_collage_and_outputs(conf, gan, input_tensor): + output_images = generate_images_for_collage(gan, input_tensor, conf.collage_scales, conf.must_divide) + + for i in range(len(output_images)): + for j in range(len(output_images)): + Image.fromarray(output_images[i][j], 'RGB').save(conf.output_dir_path + '/test_%d_%d.png' % (i, j)) + + input_spot = conf.collage_input_spot + output_images[input_spot[0]][input_spot[1]] = util.tensor2im(input_tensor) + + collage = concat_images(output_images, margin=10, input_spot=input_spot) + + Image.fromarray(np.uint8(collage), 'RGB').save(conf.output_dir_path + '/test_collage.png') + + +def _make_homography_mask(in_mask, target_size, rand_h): + theta = homography_based_on_top_corners_x_shift(rand_h) + target_size4d = torch.Size([in_mask.shape[0], in_mask.shape[1], target_size[0], target_size[1]]) + grid = homography_grid(theta.expand(in_mask.shape[0], -1, -1), target_size4d) + out = f.grid_sample(in_mask, grid, mode='bilinear', padding_mode='border') + return out + + +def test_homo(conf, gan, input_tensor, must_divide=8): + shift_range = np.arange(conf.non_rect_shift_range[0], conf.non_rect_shift_range[1], conf.non_rect_shift_range[2]) + total = (len(conf.non_rect_scales) * len(shift_range))**2 + ind = 0 + for scale1 in conf.non_rect_scales: + for scale2 in conf.non_rect_scales: + scale = [scale1, scale2] + for shift1 in shift_range: + for shift2 in shift_range: + ind += 1 + shifts = (shift1, shift2) + sz = input_tensor.shape + out_pad = np.uint8( + 255 * + np.ones([np.uint32(np.floor(sz[2] * scale[0])), + np.uint32(np.floor(3 * sz[3] * scale[1])), 3]) + ) + + pad_l = np.abs(np.int(np.ceil(sz[3] * shifts[0]))) + pad_r = np.abs(np.int(np.ceil(sz[3] * shifts[1]))) + + in_mask = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() + input_for_regular = torch.zeros(sz[0], sz[1], sz[2], pad_l + sz[3] + pad_r).cuda() + + in_size = in_mask.shape[2:] + + out_size = ( + np.uint32(np.floor(scale[0] * in_size[0] * 1.0 / must_divide) * must_divide), + np.uint32(np.floor(scale[1] * in_size[1] * 1.0 / must_divide) * must_divide) + ) + + if pad_r > 0: + in_mask[:, :, :, pad_l:-pad_r] = torch.ones_like(input_tensor) + input_for_regular[:, :, :, pad_l:-pad_r] = input_tensor + else: + in_mask[:, :, :, pad_l:] = torch.ones_like(input_tensor) + input_for_regular[:, :, :, pad_l:] = input_tensor + + out = test_one_scale( + gan, + input_tensor, + out_size, + conf.must_divide, + affine=shifts, + return_tensor=True, + size_instead_scale=True + ) + # regular = transform(input_tensor, out_size, shifts) + out_mask = _make_homography_mask(in_mask, out_size, shifts) + + out = util.tensor2im(out_mask * out + 1 - out_mask) + # regular_out = util.tensor2im(out_mask * regular + 1 - out_mask) + # out_pad[:, sz[3] - pad_l: sz[3] - pad_l + out_size[1], :] = out + shift_str = "{1:0{0}d}_{3:0{2}d}".format( + 2 if shift1 >= 0 else 3, int(10 * shift1), 2 if shift2 >= 0 else 3, int(10 * shift2) + ) + + # out = np.rot90(out, 3) + # regular_out = np.rot90(regular_out, 3) + + Image.fromarray(out, 'RGB').save( + conf.output_dir_path + '/scale_%02d_%02d_transform %s_ingan.png' % + (int(10 * scale1), int(10 * scale2), shift_str) + ) + # Image.fromarray(regular_out, 'RGB').save(conf.output_dir_path + '/scale_%02d_%02d_transform %s_ref.png' % (scale1, scale2, shift_str)) + print((ind, '/', total, 'scale:', scale, 'shift:', shifts)) + + +def main(): + conf = Config().parse(create_dir_flag=False) + conf.name = 'TEST_' + conf.name + conf.output_dir_path = util.prepare_result_dir(conf) + gan = InGAN(conf) + + try: + gan.resume(conf.test_params_path, test_flag=True) + [input_tensor] = util.read_data(conf) + + if conf.test_video: + retarget_video(gan, input_tensor, define_video_scales(conf.test_vid_scales), 8, conf.output_dir_path) + if conf.test_collage: + generate_collage_and_outputs(conf, gan, input_tensor) + if conf.test_non_rect: + test_homo(conf, gan, input_tensor) + + print(('Done with %s' % conf.input_image_path)) + + except KeyboardInterrupt: + raise + except Exception as e: + # print 'Something went wrong with %s (%d/%d), iter %dk' % (input_image_path, i, n_files, snapshot_iter) + print_exc() + + +if __name__ == '__main__': + main() diff --git a/test_util.py b/test_util.py new file mode 100644 index 0000000..a6a2a91 --- /dev/null +++ b/test_util.py @@ -0,0 +1,18 @@ +from pytest import fixture +import torch +import numpy as np +from PIL import Image +from util import tensor2im, im2tensor + + +@fixture +def test_image(): + img = Image.open('examples/fruit/fruit.png') + img = np.array(img) + return img + + +def test_tensor2im(test_image): + tensor = torch.tensor(test_image).permute(2, 0, 1).unsqueeze(0) / 255. * 2 - 1 + img = tensor2im(tensor) + assert np.allclose(img, test_image) diff --git a/train.py b/train.py index 7759b36..77a454f 100644 --- a/train.py +++ b/train.py @@ -4,7 +4,6 @@ from util import Visualizer, read_data from traceback import print_exc - # Load configuration conf = Config().parse() @@ -30,7 +29,7 @@ except KeyboardInterrupt: raise except Exception as e: - print 'Something went wrong in iteration %d, While training.' % i + print('Something went wrong in iteration %d, While training.' % i) print_exc() # Take care of all testing, saving and presenting of current results and status @@ -39,7 +38,7 @@ except KeyboardInterrupt: raise except Exception as e: - print 'Something went wrong in iteration %d, While testing or visualizing.' % i + print('Something went wrong in iteration %d, While testing or visualizing.' % i) print_exc() # Save snapshot when needed @@ -53,5 +52,5 @@ except KeyboardInterrupt: raise except Exception as e: - print 'Something went wrong in iteration %d, While saving snapshot.' % i + print('Something went wrong in iteration %d, While saving snapshot.' % i) print_exc() diff --git a/train_supp_mat.py b/train_supp_mat.py index 2b7a25d..68480da 100644 --- a/train_supp_mat.py +++ b/train_supp_mat.py @@ -1,6 +1,6 @@ import os import threading -import Queue +import queue import subprocess base_dir = './side/' @@ -27,7 +27,7 @@ def run(self): while True: try: exp_name, item = self.inQ.get() - except Queue.Empty: + except queue.Empty: break # verify that this experiment was not executed already if experiment_was_not_already_exec(exp_name): @@ -36,13 +36,13 @@ def run(self): def main(): - q = Queue.Queue() + q = queue.Queue() workers = [Worker(q, gpu_id) for gpu_id in [0, 1]] for imgname in os.listdir(base_dir): full_img_name = os.path.join(base_dir, imgname) short_name = os.path.splitext(imgname)[0] cmd = ['python', 'train.py', '--input_image_path', full_img_name, '--gpu_id', '0'] - for aname, aa in abl_args.items(): + for aname, aa in list(abl_args.items()): exp_name = '{}_{}'.format(short_name, aname) full_cmd = cmd + aa + ['--name', exp_name] q.put((exp_name, full_cmd)) diff --git a/util.py b/util.py index af1b09d..7cd95e4 100644 --- a/util.py +++ b/util.py @@ -5,7 +5,7 @@ import glob from time import strftime, localtime from shutil import copy -from scipy.misc import imresize +# from scipy.misc import imresize import torch @@ -17,9 +17,8 @@ def read_data(conf): def read_shave_tensorize(path, must_divide): input_np = (np.array(Image.open(path).convert('RGB')) / 255.0) - input_np_shaved = input_np[:(input_np.shape[0] // must_divide) * must_divide, - :(input_np.shape[1] // must_divide) * must_divide, - :] + input_np_shaved = input_np[:(input_np.shape[0] // must_divide) * must_divide, :(input_np.shape[1] // must_divide) * + must_divide, :] input_tensor = im2tensor(input_np_shaved) @@ -48,21 +47,31 @@ def tensor2im(image_tensors, imtype=np.uint8): return image_numpys -def im2tensor(image_numpy, int_flag=False): +def im2tensor(image_numpy, int_flag=False, device=torch.device('cuda')): # the int flag indicates whether the input image is integer (and [0,255]) or float ([0,1]) if int_flag: image_numpy /= 255.0 # Undo the tensor shifting (see tensor2im function) transformed_image = np.transpose(image_numpy, (2, 0, 1)) * 2.0 - 1.0 - return torch.FloatTensor(transformed_image).unsqueeze(0).cuda() - - -def random_size(orig_size, curriculum=True, i=None, iter_for_max_range=None, must_divide=8.0, - min_scale=0.25, max_scale=2.0, max_transform_magniutude=0.3): + return torch.FloatTensor(transformed_image).unsqueeze(0).to(device) + + +def random_size( + orig_size, + curriculum=True, + i=None, + iter_for_max_range=None, + must_divide=8.0, + min_scale=0.25, + max_scale=2.0, + max_transform_magniutude=0.3 +): cur_max_scale = 1.0 + (max_scale - 1.0) * np.clip(1.0 * i / iter_for_max_range, 0, 1) if curriculum else max_scale cur_min_scale = 1.0 + (min_scale - 1.0) * np.clip(1.0 * i / iter_for_max_range, 0, 1) if curriculum else min_scale - cur_max_transform_magnitude = (max_transform_magniutude * np.clip(1.0 * i / iter_for_max_range, 0, 1) - if curriculum else max_transform_magniutude) + cur_max_transform_magnitude = ( + max_transform_magniutude * + np.clip(1.0 * i / iter_for_max_range, 0, 1) if curriculum else max_transform_magniutude + ) # set random transformation magnitude. scalar = affine, pair = homography. random_affine = -cur_max_transform_magnitude + 2 * cur_max_transform_magnitude * np.random.rand(2) @@ -83,9 +92,12 @@ def image_concat(g_preds, d_preds=None, size=None): dsize = g_pred.shape[1] if size is None or size[1] is None else size[1] result = np.ones([(1 + (d_pred is not None)) * hsize, dsize, 3]) * 255 if d_pred is not None: - d_pred_new = imresize((np.concatenate([d_pred] * 3, 2) - 128) * 2, g_pred.shape[0:2], interp='nearest') - result[hsize-g_pred.shape[0]:hsize+g_pred.shape[0], :g_pred.shape[1], :] = np.concatenate([g_pred, - d_pred_new], 0) + img = (np.concatenate([d_pred] * 3, 2) - 128) * 2 + import cv2 + # d_pred_new = imresize(img, g_pred.shape[0:2], interp='nearest') + d_pred_new = cv2.resize(img, dsize=g_pred.shape[0:2][::-1], interpolation=cv2.INTER_NEAREST) + con = np.concatenate([g_pred, d_pred_new], 0) + result[hsize - g_pred.shape[0]:hsize + g_pred.shape[0], :g_pred.shape[1], :] = con else: result[hsize - g_pred.shape[0]:, :, :] = g_pred results.append(np.uint8(np.round(result))) @@ -99,15 +111,16 @@ def save_image(image_tensor, image_path): def get_scale_weights(i, max_i, start_factor, input_shape, min_size, num_scales_limit, scale_factor): - num_scales = np.min([np.int(np.ceil(np.log(np.min(input_shape) * 1.0 / min_size) - / np.log(scale_factor))), num_scales_limit]) + num_scales = np.min( + [np.int(np.ceil(np.log(np.min(input_shape) * 1.0 / min_size) / np.log(scale_factor))), num_scales_limit] + ) # if i > max_i * 2: # i = max_i * 2 - factor = start_factor ** ((max_i - i) * 1.0 / max_i) + factor = start_factor**((max_i - i) * 1.0 / max_i) - un_normed_weights = factor ** np.arange(num_scales) + un_normed_weights = factor**np.arange(num_scales) weights = un_normed_weights / np.sum(un_normed_weights) # # np.clip(i, 0, max_i) @@ -143,9 +156,7 @@ def recreate_fig(self): self.d_map_real = self.fig.add_subplot(gs[7, 7]) # First plot data - self.plot_gan_loss = self.gan_loss.plot([], [], 'b-', - [], [], 'c--', - [], [], 'r--') + self.plot_gan_loss = self.gan_loss.plot([], [], 'b-', [], [], 'c--', [], [], 'r--') self.gan_loss.legend(('Generator loss', 'Discriminator loss (real image)', 'Discriminator loss (fake image)')) self.gan_loss.set_ylim(0, 1) @@ -170,20 +181,33 @@ def recreate_fig(self): def test_and_display(self, i): if not i % self.conf.print_freq and i > 0: - self.G_loss[i-self.conf.print_freq:i] = self.gan.losses_G_gan.detach().cpu().float().numpy().tolist() - self.D_loss_real[i-self.conf.print_freq:i] = self.gan.losses_D_real.detach().cpu().float().numpy().tolist() - self.D_loss_fake[i-self.conf.print_freq:i] = self.gan.losses_D_fake.detach().cpu().float().numpy().tolist() + self.G_loss[i - self.conf.print_freq:i] = self.gan.losses_G_gan.detach().cpu().float().numpy().tolist() + self.D_loss_real[i - + self.conf.print_freq:i] = self.gan.losses_D_real.detach().cpu().float().numpy().tolist() + self.D_loss_fake[i - + self.conf.print_freq:i] = self.gan.losses_D_fake.detach().cpu().float().numpy().tolist() if self.conf.reconstruct_loss_stop_iter > i: - self.Rec_loss[i-self.conf.print_freq:i] = self.gan.losses_G_reconstruct.detach().cpu().float().numpy().tolist() + self.Rec_loss[i - self.conf.print_freq:i] = self.gan.losses_G_reconstruct.detach().cpu().float().numpy( + ).tolist() if self.conf.reconstruct_loss_stop_iter < i: - print('iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, LR: %f' % - (i, self.G_loss[i-1], self.D_loss_real[i-1], self.D_loss_fake[i-1], - self.gan.lr_scheduler_G.get_lr()[0])) + print( + ( + 'iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, LR: %f' % ( + i, self.G_loss[i - 1], self.D_loss_real[i - 1], self.D_loss_fake[i - 1], + self.gan.lr_scheduler_G.get_lr()[0] + ) + ) + ) else: - print('iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, Rec_loss: %f, LR: %f' % - (i, self.G_loss[i-1], self.D_loss_real[i-1], self.D_loss_fake[i-1], self.Rec_loss[i-1], - self.gan.lr_scheduler_G.get_lr()[0])) + print( + ( + 'iter: %d, G_loss: %f, D_loss_real: %f, D_loss_fake: %f, Rec_loss: %f, LR: %f' % ( + i, self.G_loss[i - 1], self.D_loss_real[i - 1], self.D_loss_fake[i - 1], + self.Rec_loss[i - 1], self.gan.lr_scheduler_G.get_lr()[0] + ) + ) + ) if not i % self.conf.display_freq and i > 0: plt.gcf().clear() @@ -204,26 +228,31 @@ def test_and_display(self, i): # g_preds, d_preds, reconstructs = self.gan.test(test_input, output_size, rand_h, test_input_size) g_preds = [self.gan.input_tensor_noised, self.gan.G_pred] - d_preds = [self.gan.D.forward(self.gan.input_tensor_noised.detach(), self.gan.scale_weights), - self.gan.d_pred_fake] + d_preds = [ + self.gan.D.forward(self.gan.input_tensor_noised.detach(), self.gan.scale_weights), self.gan.d_pred_fake + ] reconstructs = self.gan.reconstruct input_size = self.gan.input_tensor_noised.shape[2:] - result = image_concat(tensor2im(g_preds), tensor2im(d_preds), (input_size[0]*2, input_size[1]*2)) - self.plot_gan_loss[0].set_data(range(i), self.G_loss[:i]) - self.plot_gan_loss[1].set_data(range(i), self.D_loss_real[:i]) - self.plot_gan_loss[2].set_data(range(i), self.D_loss_fake[:i]) + result = image_concat(tensor2im(g_preds), tensor2im(d_preds), (input_size[0] * 2, input_size[1] * 2)) + self.plot_gan_loss[0].set_data(list(range(i)), self.G_loss[:i]) + self.plot_gan_loss[1].set_data(list(range(i)), self.D_loss_real[:i]) + self.plot_gan_loss[2].set_data(list(range(i)), self.D_loss_fake[:i]) self.gan_loss.set_xlim(0, i) if self.conf.reconstruct_loss_stop_iter > i: - self.plot_reconstruct_loss[0].set_data(range(i), self.Rec_loss[:i]) + self.plot_reconstruct_loss[0].set_data(list(range(i)), self.Rec_loss[:i]) self.reconstruct_loss.set_ylim(np.min(self.Rec_loss[:i]), np.max(self.Rec_loss[:i])) self.reconstruct_loss.set_xlim(0, i) self.result.imshow(np.clip(result, 0, 255), vmin=0, vmax=255) self.real_example.imshow(np.clip(tensor2im(self.gan.real_example[0:1, :, :, :]), 0, 255), vmin=0, vmax=255) - self.d_map_real.imshow(self.gan.d_pred_real[0:1, :, :, :].detach().cpu().float().numpy().squeeze(), - cmap='gray', vmin=0, vmax=1) + self.d_map_real.imshow( + self.gan.d_pred_real[0:1, :, :, :].detach().cpu().float().numpy().squeeze(), + cmap='gray', + vmin=0, + vmax=1 + ) if self.conf.reconstruct_loss_stop_iter > i: self.reconstruction.imshow(np.clip(image_concat([tensor2im(reconstructs)]), 0, 255), vmin=0, vmax=255) @@ -247,17 +276,16 @@ def prepare_result_dir(conf): return conf.output_dir_path - def homography_based_on_top_corners_x_shift(rand_h): - p = np.array([[1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), -1. + rand_h[0]], - [0, 0, 0, 1., 1., -1., 1., 1., -1.], - [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], - [0, 0, 0, -1, -1, -1, 1, 1, 1], - [1, 0, -1, 0, 0, 0, 1, 0, -1], - [0, 0, 0, 1, 0, -1, 0, 0, 0], - [-1, 0, -1, 0, 0, 0, 1, 0, 1], - [0, 0, 0, -1, 0, -1, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=np.float32) + p = np.array( + [ + [1., 1., -1, 0, 0, 0, -(-1. + rand_h[0]), -(-1. + rand_h[0]), -1. + rand_h[0]], + [0, 0, 0, 1., 1., -1., 1., 1., -1.], [-1., -1., -1, 0, 0, 0, 1 + rand_h[1], 1 + rand_h[1], 1 + rand_h[1]], + [0, 0, 0, -1, -1, -1, 1, 1, 1], [1, 0, -1, 0, 0, 0, 1, 0, -1], [0, 0, 0, 1, 0, -1, 0, 0, 0], + [-1, 0, -1, 0, 0, 0, 1, 0, 1], [0, 0, 0, -1, 0, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1] + ], + dtype=np.float32 + ) b = np.zeros((9, 1), dtype=np.float32) b[8, 0] = 1. h = np.dot(np.linalg.inv(p), b) @@ -279,7 +307,7 @@ def homography_grid(theta, size): """ a = 1 b = 1 - y, x = torch.meshgrid((torch.linspace(-b, b, np.int(size[-2]*a)), torch.linspace(-b, b, np.int(size[-1]*a)))) + y, x = torch.meshgrid((torch.linspace(-b, b, np.int(size[-2] * a)), torch.linspace(-b, b, np.int(size[-1] * a)))) n = np.int(size[-2] * a) * np.int(size[-1] * a) hxy = torch.ones(n, 3, dtype=torch.float) hxy[:, 0] = x.contiguous().view(-1) @@ -287,7 +315,7 @@ def homography_grid(theta, size): out = hxy[None, ...].cuda().matmul(theta.transpose(1, 2)) # normalize out = out[:, :, :2] / out[:, :, 2:] - return out.view(theta.shape[0], np.int(size[-2]*a), np.int(size[-1]*a), 2) + return out.view(theta.shape[0], np.int(size[-2] * a), np.int(size[-1] * a), 2) def hist_match(source, template, mask_3ch): @@ -313,8 +341,7 @@ def hist_match(source, template, mask_3ch): template = template.ravel() # get the set of unique pixel values and their corresponding indices and # counts - s_values, bin_idx, s_counts = np.unique(source_masked, return_inverse=True, - return_counts=True) + s_values, bin_idx, s_counts = np.unique(source_masked, return_inverse=True, return_counts=True) t_values, t_counts = np.unique(template, return_counts=True) # take the cumsum of the counts and normalize by the number of pixels to