diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7acf623 --- /dev/null +++ b/.gitignore @@ -0,0 +1,186 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + + +# custom files +Checkpoints/ +Images/ +__pycache__/ +dataset/ +libs/__pycache__/utils.cpython-310.pyc +libs/__pycache__/vgg16.cpython-310.pyc +static/output.png +.DS_Store +comparison.png diff --git a/README.md b/README.md index 0329155..ad614a0 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ## Introduction -Animation movie companies like Pixar and Dreamworks render their 3d scenes using a technique called Pathtracing which enables them to create high quality photorealistic frames. Pathtracing involves shooting 1000’s of rays into a pixel randomly(Monte Carlo) which will then hit the objects in the scene and based on the reflective property of the object the rays reflect or refract or get absorbed. The colors returned by these rays are averaged to get the color of the pixel and this process is repeated for all the pixels. Due to the computational complexity it might take 8-16 hours to render a single frame. +Animation movie companies like Pixar and Dreamworks render their 3d scenes using a technique called Pathtracing which enables them to create high quality photorealistic frames. Pathtracing involves shooting 1000's of rays into a pixel randomly(Monte Carlo) which will then hit the objects in the scene and based on the reflective property of the object the rays reflect or refract or get absorbed. The colors returned by these rays are averaged to get the color of the pixel and this process is repeated for all the pixels. Due to the computational complexity it might take 8-16 hours to render a single frame. We are proposing a neural network based solution for reducing 8-16 hours to a couple of seconds using a Generative Adversarial Network. The main idea behind this proposed method is to render using small number of samples per pixel (let say 4 spp or 8 spp instead of 32K spp) and pass the noisy image to our network, which will generate a photorealistic image with high quality. @@ -11,8 +11,6 @@ We are proposing a neural network based solution for reducing 8-16 hours to a co [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/Yh_Bsoe-Qj4/0.jpg)](https://www.youtube.com/watch?v=Yh_Bsoe-Qj4) - - #### Table of Contents * [Installation](#installation) @@ -25,24 +23,65 @@ We are proposing a neural network based solution for reducing 8-16 hours to a co ## Installation -To run the project you will need: - * python 3.5 - * tensorflow (v1.1 or v1.0) - * PIL - * [CKPT FILE](https://uofi.box.com/shared/static/21a5jwdiqpnx24c50cyolwzwycnr3fwe.gz) - * [Dataset](https://uofi.box.com/shared/static/gy0t3vgwtlk1933xbtz1zvhlakkdac3n.zip) +### Prerequisites +* Python 3.5 or higher +* pip (Python package installer) +* virtualenv (recommended) + +### Setup Steps + +1. Clone the repository: +```bash +git clone https://github.com/yourusername/ImageDenoisingGAN.git +cd ImageDenoisingGAN +``` + +2. Create and activate a virtual environment: +```bash +# Create virtual environment +python -m venv venv + +# Activate virtual environment +# On Windows: +venv\Scripts\activate +# On macOS/Linux: +source venv/bin/activate +``` + +3. Install dependencies: +```bash +pip install -r requirements.txt +``` + +4. Download required files: +* [CKPT FILE](https://uofi.box.com/shared/static/21a5jwdiqpnx24c50cyolwzwycnr3fwe.gz) +* [Dataset](https://uofi.box.com/shared/static/gy0t3vgwtlk1933xbtz1zvhlakkdac3n.zip) (only if you want to train) + +### Required Files Structure +``` +ImageDenoisingGAN/ +├── venv/ # Virtual environment (created during setup) +├── Checkpoints/ # Extracted CKPT files go here +├── dataset/ # Dataset folder (if training) +├── static/ # Output images +└── requirements.txt # Project dependencies +``` ## Running -Once you have all the depenedencies ready, do the folowing: - -Download the dataset extract it to a folder named 'dataset' (ONLY if you want to train, not needed to run). +Once you have all the dependencies ready, do the following: -Extract the CKPT files to a folder named 'Checkpoints' +1. Extract the CKPT files to a folder named 'Checkpoints' -Run main.py -- python3 main.py +2. Run the application: +```bash +# Make sure your virtual environment is activated +python main.py +``` -Go to the browser, if you are running it on a server then [ip-address]:8888, if you are on your local machine then localhost:8888 +3. Access the application: +* If running locally: http://localhost:80 +* If running on a server: http://[server-ip]:80 ## Dataset We picked random 40 images from pixar movies, added gaussian noise of different standard deviation, 5 sets of 5 different standard deviation making a total of 1000 images for the training set. For validation we used 10 images completely different from the training set and added gaussian noise. For testing we had both added gaussian images and real noisy images. diff --git a/conv_helper.py b/conv_helper.py index a5b1f13..d946e7d 100644 --- a/conv_helper.py +++ b/conv_helper.py @@ -1,39 +1,36 @@ import tensorflow as tf -import tensorflow.contrib.slim as slim - from utils import * -def conv_layer(input_image, ksize, in_channels, out_channels, stride, scope_name, activation_function=lrelu, reuse=False): - with tf.variable_scope(scope_name): - filter = tf.Variable(tf.random_normal([ksize, ksize, in_channels, out_channels], stddev=0.03)) - output = tf.nn.conv2d(input_image, filter, strides=[1, stride, stride, 1], padding='SAME') - output = slim.batch_norm(output) - if activation_function: - output = activation_function(output) - return output, filter - -def residual_layer(input_image, ksize, in_channels, out_channels, stride, scope_name): - with tf.variable_scope(scope_name): - output, filter = conv_layer(input_image, ksize, in_channels, out_channels, stride, scope_name+"_conv1") - output, filter = conv_layer(output, ksize, out_channels, out_channels, stride, scope_name+"_conv2") - output = tf.add(output, tf.identity(input_image)) - return output, filter - -def transpose_deconvolution_layer(input_tensor, used_weights, new_shape, stride, scope_name): - with tf.varaible_scope(scope_name): - output = tf.nn.conv2d_transpose(input_tensor, used_weights, output_shape=new_shape, strides=[1, stride, stride, 1], padding='SAME') - output = tf.nn.relu(output) - return output +class ResidualBlock(tf.keras.layers.Layer): + def __init__(self, out_channels, ksize, stride, name=None): + super(ResidualBlock, self).__init__(name=name) + self.conv1 = tf.keras.layers.Conv2D(out_channels, ksize, strides=stride, padding='same') + self.conv2 = tf.keras.layers.Conv2D(out_channels, ksize, strides=stride, padding='same') + self.bn1 = tf.keras.layers.BatchNormalization() + self.bn2 = tf.keras.layers.BatchNormalization() + self.activation = tf.keras.layers.LeakyReLU(0.2) + self.add = tf.keras.layers.Add() -def resize_deconvolution_layer(input_tensor, new_shape, scope_name): - with tf.variable_scope(scope_name): - output = tf.image.resize_images(input_tensor, (new_shape[1], new_shape[2]), method=1) - output, unused_weights = conv_layer(output, 3, new_shape[3]*2, new_shape[3], 1, scope_name+"_deconv") - return output + def call(self, inputs): + x = self.conv1(inputs) + x = self.bn1(x) + x = self.activation(x) + + x = self.conv2(x) + x = self.bn2(x) + x = self.activation(x) + + return self.add([x, inputs]) -def deconvolution_layer(input_tensor, new_shape, scope_name): - return resize_deconvolution_layer(input_tensor, new_shape, scope_name) +class DeconvolutionBlock(tf.keras.layers.Layer): + def __init__(self, out_channels, name=None): + super(DeconvolutionBlock, self).__init__(name=name) + self.conv = tf.keras.layers.Conv2D(out_channels, 3, padding='same') + self.bn = tf.keras.layers.BatchNormalization() + self.activation = tf.keras.layers.LeakyReLU(0.2) -def output_between_zero_and_one(output): - output +=1 - return output/2 + def call(self, inputs, target_height, target_width): + x = tf.image.resize(inputs, (target_height, target_width), method='bilinear') + x = self.conv(x) + x = self.bn(x) + return self.activation(x) diff --git a/main.py b/main.py index c488a8f..553bc02 100644 --- a/main.py +++ b/main.py @@ -1,10 +1,8 @@ from flask import Flask, render_template, request, jsonify, send_file import numpy as np -import scipy.misc -import base64 from io import BytesIO from test import * -import time +import cv2 app = Flask(__name__) @@ -12,15 +10,22 @@ def index(): return render_template("index.html") - @app.route('/denoisify', methods=['GET', 'POST']) def denoisify(): if request.method == "POST": inputImg = request.files['file'] - outputImg = denoise(inputImg) - scipy.misc.imsave('static/output.png', outputImg) - return jsonify(result="Success") + # Convert the file object to a numpy array using OpenCV + input_array = cv2.imdecode(np.frombuffer(inputImg.read(), np.uint8), cv2.IMREAD_COLOR) + input_array = cv2.cvtColor(input_array, cv2.COLOR_BGR2RGB) # Convert BGR to RGB + outputImg = denoise(input_array) + print(f"Out image: {outputImg}") + + # Save the output image using OpenCV + output_image = (outputImg * 255).astype(np.uint8) # Scale to 0-255 range + output_image = cv2.cvtColor(output_image, cv2.COLOR_RGB2BGR) # Convert RGB to BGR for saving + cv2.imwrite('static/output.png', output_image) + return jsonify(result="Success") if __name__=="__main__": app.run(host="0.0.0.0",port="80") diff --git a/model.py b/model.py index f075e44..83a8976 100644 --- a/model.py +++ b/model.py @@ -1,38 +1,103 @@ import numpy as np import tensorflow as tf -import tensorflow.contrib.slim as slim +from utils import * +from conv_helper import ResidualBlock, DeconvolutionBlock +class Generator(tf.keras.Model): + def __init__(self): + super(Generator, self).__init__() + self.conv1 = tf.keras.layers.Conv2D(32, 9, padding='same') + self.conv2 = tf.keras.layers.Conv2D(64, 3, padding='same') + self.conv3 = tf.keras.layers.Conv2D(128, 3, padding='same') + self.conv4 = tf.keras.layers.Conv2D(3, 9, padding='same', activation='tanh') + + self.bn1 = tf.keras.layers.BatchNormalization() + self.bn2 = tf.keras.layers.BatchNormalization() + self.bn3 = tf.keras.layers.BatchNormalization() + + self.activation = tf.keras.layers.LeakyReLU(0.2) + + # Create residual blocks + self.res_blocks = [ResidualBlock(128, 3, 1, name=f"g_res_{i}") for i in range(3)] + + # Create deconvolution blocks + self.deconv1 = DeconvolutionBlock(64, name='g_deconv1') + self.deconv2 = DeconvolutionBlock(32, name='g_deconv2') -from utils import * -from conv_helper import * + # Add layer for final normalization + self.add1 = tf.keras.layers.Add() + self.add2 = tf.keras.layers.Add() + + def normalize_output(self, x): + return (x + 1.0) / 2.0 + + def call(self, inputs): + x = self.conv1(inputs) + x = self.bn1(x) + x = self.activation(x) + conv1 = x + + x = self.conv2(x) + x = self.bn2(x) + x = self.activation(x) + + x = self.conv3(x) + x = self.bn3(x) + x = self.activation(x) + + # Residual blocks + for res_block in self.res_blocks: + x = res_block(x) + # Deconvolution layers + x = self.deconv1(x, target_height=128, target_width=128) + x = self.deconv2(x, target_height=256, target_width=256) + x = self.add1([x, conv1]) -def generator(input): - conv1, conv1_weights = conv_layer(input, 9, 3, 32, 1, "g_conv1") - conv2, conv2_weights = conv_layer(conv1, 3, 32, 64, 1, "g_conv2") - conv3, conv3_weights = conv_layer(conv2, 3, 64, 128, 1, "g_conv3") + x = self.conv4(x) + x = self.add2([x, inputs]) + return self.normalize_output(x) - res1, res1_weights = residual_layer(conv3, 3, 128, 128, 1, "g_res1") - res2, res2_weights = residual_layer(res1, 3, 128, 128, 1, "g_res2") - res3, res3_weights = residual_layer(res2, 3, 128, 128, 1, "g_res3") +class Discriminator(tf.keras.Model): + def __init__(self): + super(Discriminator, self).__init__() + self.conv1 = tf.keras.layers.Conv2D(48, 4, strides=2, padding='same') + self.conv2 = tf.keras.layers.Conv2D(96, 4, strides=2, padding='same') + self.conv3 = tf.keras.layers.Conv2D(192, 4, strides=2, padding='same') + self.conv4 = tf.keras.layers.Conv2D(384, 4, padding='same') + self.conv5 = tf.keras.layers.Conv2D(1, 4, padding='same', activation='sigmoid') + + self.bn1 = tf.keras.layers.BatchNormalization() + self.bn2 = tf.keras.layers.BatchNormalization() + self.bn3 = tf.keras.layers.BatchNormalization() + self.bn4 = tf.keras.layers.BatchNormalization() + + self.activation = tf.keras.layers.LeakyReLU(0.2) - deconv1 = deconvolution_layer(res3, [BATCH_SIZE, 128, 128, 64], 'g_deconv1') - deconv2 = deconvolution_layer(deconv1, [BATCH_SIZE, 256, 256, 32], "g_deconv2") + def call(self, inputs): + x = self.conv1(inputs) + x = self.bn1(x) + x = self.activation(x) - deconv2 = deconv2 + conv1 + x = self.conv2(x) + x = self.bn2(x) + x = self.activation(x) - conv4, conv4_weights = conv_layer(deconv2, 9, 32, 3, 1, "g_conv5", activation_function=tf.nn.tanh) + x = self.conv3(x) + x = self.bn3(x) + x = self.activation(x) - conv4 = conv4 + input - output = output_between_zero_and_one(conv4) + x = self.conv4(x) + x = self.bn4(x) + x = self.activation(x) - return output + return self.conv5(x) -def discriminator(input, reuse=False): - conv1, conv1_weights = conv_layer(input, 4, 3, 48, 2, "d_conv1", reuse=reuse) - conv2, conv2_weights = conv_layer(conv1, 4, 48, 96, 2, "d_conv2", reuse=reuse) - conv3, conv3_weights = conv_layer(conv2, 4, 96, 192, 2, "d_conv3", reuse=reuse) - conv4, conv4_weights = conv_layer(conv3, 4, 192, 384, 1, "d_conv4", reuse=reuse) - conv5, conv5_weights = conv_layer(conv4, 4, 384, 1, 1, "d_conv5", activation_function=tf.nn.sigmoid, reuse=reuse) +# Create model instances +generator = Generator() +discriminator = Discriminator() - return conv5 +# Build the models with a sample input to initialize weights +dummy_input = tf.zeros((1, 256, 256, 3)) +_ = generator(dummy_input) +_ = discriminator(dummy_input) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f015153 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,56 @@ +absl-py==2.2.2 +astunparse==1.6.3 +blinker==1.9.0 +certifi==2025.4.26 +charset-normalizer==3.4.2 +click==8.2.0 +contourpy==1.3.2 +cycler==0.12.1 +Flask==3.1.0 +flatbuffers==25.2.10 +fonttools==4.58.0 +gast==0.6.0 +google-pasta==0.2.0 +grpcio==1.71.0 +h5py==3.13.0 +idna==3.10 +imageio==2.37.0 +itsdangerous==2.2.0 +Jinja2==3.1.6 +keras==3.9.2 +kiwisolver==1.4.8 +lazy_loader==0.4 +libclang==18.1.1 +Markdown==3.8 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +matplotlib==3.10.3 +mdurl==0.1.2 +ml_dtypes==0.5.1 +namex==0.0.9 +networkx==3.4.2 +numpy==2.1.3 +opencv-python==4.11.0.86 +opt_einsum==3.4.0 +optree==0.15.0 +packaging==25.0 +pillow==11.2.1 +protobuf==5.29.4 +Pygments==2.19.1 +pyparsing==3.2.3 +python-dateutil==2.9.0.post0 +requests==2.32.3 +rich==14.0.0 +scikit-image==0.25.2 +scipy==1.15.3 +six==1.17.0 +tensorboard==2.19.0 +tensorboard-data-server==0.7.2 +tensorflow==2.19.0 +tensorflow-io-gcs-filesystem==0.37.1 +termcolor==3.1.0 +tifffile==2025.5.10 +typing_extensions==4.13.2 +urllib3==2.4.0 +Werkzeug==3.1.3 +wrapt==1.17.2 diff --git a/static/output.png b/static/output.png deleted file mode 100644 index 107e8a5..0000000 Binary files a/static/output.png and /dev/null differ diff --git a/test.py b/test.py index 37450a8..748bc57 100644 --- a/test.py +++ b/test.py @@ -1,46 +1,40 @@ import time - import tensorflow as tf import numpy as np - +import cv2 from utils import * -from model import * - +from model import generator, discriminator from skimage import measure - - def test(image): - tf.reset_default_graph() - - global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step') - - gen_in = tf.placeholder(shape=[None, BATCH_SHAPE[1], BATCH_SHAPE[2], BATCH_SHAPE[3]], dtype=tf.float32, name='generated_image') - real_in = tf.placeholder(shape=[None, BATCH_SHAPE[1], BATCH_SHAPE[2], BATCH_SHAPE[3]], dtype=tf.float32, name='groundtruth_image') - - Gz = generator(gen_in) - + tf.keras.backend.clear_session() - - init = tf.global_variables_initializer() - with tf.Session() as sess: - sess.run(init) - - saver = initialize(sess) - initial_step = global_step.eval() - - start_time = time.time() - n_batches = 200 - total_iteration = n_batches * N_EPOCHS - - image = sess.run(tf.map_fn(lambda img: tf.image.per_image_standardization(img), image)) - image = sess.run(Gz, feed_dict={gen_in: image}) - image = np.resize(image[0][56:, :, :], [144, 256, 3]) - imsave('output', image) - return image + # Process image + image = tf.image.per_image_standardization(image) + image = generator.predict(image) + image = np.resize(image[0][56:, :, :], [144, 256, 3]) + imsave('output', image) + return image def denoise(image): - image = scipy.misc.imread(image, mode='RGB').astype('float32') + # Handle both file paths and numpy arrays + if isinstance(image, str): + # If image is a file path + image = cv2.imread(image) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert BGR to RGB + image = image.astype('float32') + elif isinstance(image, np.ndarray): + # If image is already a numpy array, ensure it's in the right format + if image.dtype != np.float32: + image = image.astype('float32') + if image.max() > 1.0: + image = image / 255.0 + # Ensure 3 channels (RGB) + if image.shape[-1] == 4: # If RGBA + image = image[..., :3] # Take only RGB channels + else: + raise ValueError("Input must be either a file path (str) or a numpy array") + npad = ((56, 56), (0, 0), (0, 0)) image = np.pad(image, pad_width=npad, mode='constant', constant_values=0) image = np.expand_dims(image, axis=0) @@ -48,10 +42,11 @@ def denoise(image): output = test(image) return output - - if __name__=='__main__': - image = scipy.misc.imread(sys.argv[-1], mode='RGB').astype('float32') + image = cv2.imread(sys.argv[-1]) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert BGR to RGB + image = image.astype('float32') / 255.0 + npad = ((56, 56), (0, 0), (0, 0)) image = np.pad(image, pad_width=npad, mode='constant', constant_values=0) image = np.expand_dims(image, axis=0) diff --git a/train.py b/train.py index 5d8544b..6223a25 100644 --- a/train.py +++ b/train.py @@ -1,15 +1,11 @@ import time - import tensorflow as tf import numpy as np - +import cv2 from utils import * from model import * - from skimage import measure - - def train(): tf.reset_default_graph() @@ -39,7 +35,6 @@ def train(): d_solver = tf.train.AdamOptimizer(LEARNING_RATE).minimize(d_loss, var_list=d_vars, global_step=global_step) g_solver = tf.train.AdamOptimizer(LEARNING_RATE).minimize(g_loss, var_list=g_vars) - init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) @@ -53,7 +48,6 @@ def train(): validation_batch = sess.run(tf.map_fn(lambda img: tf.image.per_image_standardization(img), validation)) - for index in range(initial_step, total_iteration): input_batch = load_next_training_batch() training_batch, groundtruth_batch = np.split(input_batch, 2, axis=2) @@ -61,21 +55,20 @@ def train(): training_batch = sess.run(tf.map_fn(lambda img: tf.image.per_image_standardization(img), training_batch)) groundtruth_batch = sess.run(tf.map_fn(lambda img: tf.image.per_image_standardization(img), groundtruth_batch)) - _, d_loss_cur = sess.run([d_solver, d_loss], feed_dict={gen_in: training_batch, real_in: groundtruth_batch}) _, g_loss_cur = sess.run([g_solver, g_loss], feed_dict={gen_in: training_batch, real_in: groundtruth_batch}) - - - if(index + 1) % SKIP_STEP == 0: - saver.save(sess, CKPT_DIR, index) image = sess.run(Gz, feed_dict={gen_in: validation_batch}) image = np.resize(image[7][56:, :, :], [144, 256, 3]) imsave('val_%d' % (index+1), image) - image = scipy.misc.imread(IMG_DIR+'val_%d.png' % (index+1), mode='RGB').astype('float32') + + # Load the saved image using OpenCV + image = cv2.imread(os.path.join(IMG_DIR, f'val_{index+1}.png')) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype('float32') / 255.0 + psnr = measure.compare_psnr(metrics_image, image, data_range=255) ssim = measure.compare_ssim(metrics_image, image, multichannel=True, data_range=255, win_size=11) @@ -83,8 +76,6 @@ def train(): "Step {}/{} Gen Loss: ".format(index + 1, total_iteration) + str(g_loss_cur) + " Disc Loss: " + str( d_loss_cur)+ " PSNR: "+str(psnr)+" SSIM: "+str(ssim)) - - if __name__=='__main__': training_dir_list = training_dataset_init() validation = load_validation() diff --git a/utils.py b/utils.py index 6a105cd..4cc72bf 100644 --- a/utils.py +++ b/utils.py @@ -2,18 +2,14 @@ import re import sys import glob -import scipy.misc from itertools import cycle import numpy as np import tensorflow as tf - +import cv2 from libs import vgg16 -from PIL import Image - - LEARNING_RATE = 0.002 BATCH_SIZE = 5 BATCH_SHAPE = [BATCH_SIZE, 256, 256, 3] @@ -31,7 +27,8 @@ PIXEL_LOSS_FACTOR = 1.0 STYLE_LOSS_FACTOR = 1.0 SMOOTH_LOSS_FACTOR = 1.0 -metrics_image = scipy.misc.imread(METRICS_SET_DIR+'gt.png', mode='RGB').astype('float32') +metrics_image = cv2.imread(METRICS_SET_DIR+'gt.png') +metrics_image = cv2.cvtColor(metrics_image, cv2.COLOR_BGR2RGB).astype('float32') def initialize(sess): @@ -72,14 +69,14 @@ def load_next_training_batch(): def load_validation(): filelist = sorted(glob.glob(VALIDATION_SET_DIR + '/*.png'), key=alphanum_key) - validation = np.array([np.array(scipy.misc.imread(fname, mode='RGB').astype('float32')) for fname in filelist]) + validation = np.array([cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB).astype('float32') for fname in filelist]) npad = ((0, 0), (56, 56), (0, 0), (0, 0)) validation = np.pad(validation, pad_width=npad, mode='constant', constant_values=0) return validation def training_dataset_init(): filelist = sorted(glob.glob(TRAINING_SET_DIR + '/*.png'), key=alphanum_key) - batch = np.array([np.array(scipy.misc.imread(fname, mode='RGB').astype('float32')) for fname in filelist]) + batch = np.array([cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB).astype('float32') for fname in filelist]) batch = split(batch, BATCH_SIZE) training_dir_list = get_training_dir_list() global pool @@ -88,26 +85,28 @@ def training_dataset_init(): def imsave(filename, image): - scipy.misc.imsave(IMG_DIR+filename+'.png', image) + # Create Images directory if it doesn't exist + if not os.path.exists(IMG_DIR): + os.makedirs(IMG_DIR) + # Convert RGB to BGR for OpenCV + image_bgr = cv2.cvtColor(np.uint8(image), cv2.COLOR_RGB2BGR) + cv2.imwrite(IMG_DIR+filename+'.png', image_bgr) def merge_images(file1, file2): """Merge two images into one, displayed side by side - :param file1: path to first image file - :param file2: path to second image file - :return: the merged Image object + :param file1: first image array + :param file2: second image array + :return: the merged image array """ - image1 = Image.fromarray(np.uint8(file1)) - image2 = Image.fromarray(np.uint8(file2)) - - (width1, height1) = image1.size - (width2, height2) = image2.size + height1, width1 = file1.shape[:2] + height2, width2 = file2.shape[:2] result_width = width1 + width2 result_height = max(height1, height2) - result = Image.new('RGB', (result_width, result_height)) - result.paste(im=image1, box=(0, 0)) - result.paste(im=image2, box=(width1, 0)) + result = np.zeros((result_height, result_width, 3), dtype=np.uint8) + result[:height1, :width1] = file1 + result[:height2, width1:width1+width2] = file2 return result @@ -141,9 +140,7 @@ def lrelu(x, leak=0.2, name='lrelu'): return f1 * x + f2 * abs(x) def RGB_TO_BGR(img): - img_channel_swap = img[..., ::-1] - # img_channel_swap_1 = tf.reverse(img, axis=[-1]) - return img_channel_swap + return cv2.cvtColor(img, cv2.COLOR_RGB2BGR) def get_pixel_loss(target,prediction):