diff --git a/.gitignore b/.gitignore
index 6f30fbc..3b96e90 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,108 @@
-*.pyc
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+Pipfile.lock
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Project specific
+saved_models/
 *.npz
+*.pth
+*.pt
+*.pkl
+*.pickle
+test_images/
+VOCdevkit/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
diff --git a/Pipfile b/Pipfile
index f4d4501..b2bd1ac 100644
--- a/Pipfile
+++ b/Pipfile
@@ -14,4 +14,4 @@ ipython = "*"
 scipy = "*"
 
 [requires]
-python_version = "3.6"
+python_version = "3.8"
diff --git a/README.md b/README.md
index 118d3b7..ddde119 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Approximate Convolutional Sparse Coding (ACSC)
 
-A pytorch implementation of a ACSC model based on **Lerned Convolutional Sparse Coding** model proposed [here](https://arxiv.org/abs/1711.00328) and or [here](https://ieeexplore.ieee.org/abstract/document/8462313).
+A PyTorch implementation of an ACSC model based on **Learned Convolutional Sparse Coding** model proposed [here](https://arxiv.org/abs/1711.00328) and [here](https://ieeexplore.ieee.org/abstract/document/8462313).
 
 
 ## ACSC block description
diff --git a/analyze_model.py b/analyze_model.py
index 843208c..efa60d0 100644
--- a/analyze_model.py
+++ b/analyze_model.py
@@ -48,13 +48,13 @@ def plot_dict(model, save_path):
     my_subplot(cd, [kers_per_row, kers_per_col], 'conv-dictionary', save_path)
 
 
-def evaluate_thrshold(model, save_path, name):
-    thrshold_avg = [float(model.softthrsh0.thrshold.mean())]
+def evaluate_threshold(model, save_path, name):
+    threshold_avg = [float(model.softthrsh0.threshold.mean())]
 
     for thrsh in model.softthrsh1:
-        thrshold_avg.append(float(thrsh.thrshold.mean()))
+        threshold_avg.append(float(thrsh.threshold.mean()))
 
-    plt.plot(range(len(thrshold_avg)), thrshold_avg, '*')
+    plt.plot(range(len(threshold_avg)), threshold_avg, '*')
     plt.savefig(os.path.join(save_path, name))
     plt.clf()
 
@@ -62,7 +62,7 @@ def evaluate_csc(model, img_n, save_path, im_name):
     """Plot CSC
     """
     sparse_code_delta = []
-    for csc, csc_res, lista_iter in model.forward_enc_generataor(img_n.unsqueeze(0)):
+    for csc, csc_res, lista_iter in model.forward_enc_generator(img_n.unsqueeze(0)):
         _, depth, rows, cols = csc.shape
         sc_per_col = int(np.sqrt(depth))
         sc_per_row = sc_per_col + (depth - sc_per_col**2)
@@ -94,7 +94,7 @@ def evaluate(args):
 
     plot_dict(model, log_dir)
     evaluate_csc(model, testset[7][0], log_dir, testset.image_filenames[7])
-    evaluate_thrshold(model, log_dir, 'thrshold')
+    evaluate_threshold(model, log_dir, 'threshold')
 
 def main():
     """Run test on trained model.
diff --git a/common.py b/common.py
index 3d1b765..f7b470b 100644
--- a/common.py
+++ b/common.py
@@ -13,7 +13,7 @@ def to_np(_x): return _x.data.cpu().numpy()
 
 def I(_x): return _x
 
-def normilize(_x, _val=255, shift=0):
+def normalize(_x, _val=255, shift=0):
     return (_x - shift)/ _val
 
 def count_parameters(model):
@@ -48,21 +48,6 @@ def init_model_dir(path, name):
     os.mkdir(full_path)
     return full_path
 
-    '''
-        Either string defining an activation function or module (e.g. nn.ReLU)
-    '''
-    if isinstance(act_fun, str):
-        if act_fun == 'LeakyReLU':
-            return nn.LeakyReLU(0.2, inplace=True)
-        elif act_fun == 'ELU':
-            return nn.ELU()
-        elif act_fun == 'none':
-            return nn.Sequential()
-        else:
-            assert False
-    else:
-        return act_fun()
-
 
 def flip(x, dim):
     dim = x.dim() + dim if dim < 0 else dim
@@ -113,7 +98,7 @@ def delete_pixels(ins, is_training, sample_prob=0.3):
         return ins * mask  + (1 - mask)
     return ins
 
-def reconsturction_loss(distance='l1', use_cuda=True):
+def reconstruction_loss(distance='l1', use_cuda=True):
 
     if distance == 'l1':
         dist = nn.L1Loss()
@@ -124,8 +109,6 @@ def reconsturction_loss(distance='l1', use_cuda=True):
     else:
         raise ValueError(f"unidentified value {distance}")
 
-    #if use_cuda:
-    #    dist = dist.cuda()
     return dist
 
 def get_criterion(losses_types, factors, use_cuda=True):
@@ -138,13 +121,10 @@ def get_criterion(losses_types, factors, use_cuda=True):
     """
     losses = []
     for loss_type in losses_types:
-        losses.append(reconsturction_loss(loss_type))
-
-    #if use_cuda:
-    #   losses = [l.cuda() for l in losses]
+        losses.append(reconstruction_loss(loss_type))
 
     def total_loss(results, targets):
-        """Cacluate total loss
+        """Calculate total loss
             total_loss = sum_i losses_i(results_i, targets_i)
         Args:
             results(tensor): nn outputs.
@@ -181,20 +161,19 @@ def clean(save_path, save_count=10):
         print('removing', f)
         os.remove(f)
 
-def save_train(path, model, optimizer, schedular=None, epoch=None):
+def save_train(path, model, optimizer, scheduler=None, epoch=None):
     state = {
         'model': model.state_dict(),
         'optimizer': optimizer.state_dict(),
     }
-    #TODO(hillel): fix this so we can save schedular state
-    #if schedular is not None:
-    #    state['schedular'] = schedular.state_dict()
+    if scheduler is not None:
+        state['scheduler'] = scheduler.state_dict()
     if epoch is not None:
         state['epoch'] = epoch
     torch.save(state, os.path.join(path, 'epoch_{}'.format(epoch)))
     return os.path.join(path, 'epoch_{}'.format(epoch))
 
-def load_train(path, model, optimizer, schedular=None):
+def load_train(path, model, optimizer, scheduler=None):
     state = torch.load(path)
 
     pretrained = state['model']
@@ -205,12 +184,12 @@ def load_train(path, model, optimizer, schedular=None):
         except Exception as e:
             print(f'did not restore optimizer due to error {e}')
     else:
-        print('Optimizer not inilized since no data for it exists in supplied path')
-    if schedular is not None:
-        if 'schedular' in state:
-            schedular.load_state_dict(state['schedular'])
+        print('Optimizer not initialized since no data for it exists in supplied path')
+    if scheduler is not None:
+        if 'scheduler' in state:
+            scheduler.load_state_dict(state['scheduler'])
         else:
-            print('Schedular not inilized since no data for it exists in supplied path')
+            print('Scheduler not initialized since no data for it exists in supplied path')
     if 'epoch' in state:
         e = state['epoch']
     else:
@@ -224,10 +203,9 @@ def load_eval(path, model):
 
     state = torch.load(path, map_location='cpu')
     pretrained = state['model']
-    current = model.state_dict()
 
-    # very dangerous!!!
-    pretrained = {k:v for k, v in zip(current.keys(), pretrained.values())}
+    # Load state dict with strict=False to allow for model architecture changes
+    # This will warn about missing or unexpected keys
     model.load_state_dict(pretrained, strict=False)
     model.eval()
 
diff --git a/convsparse_net.py b/convsparse_net.py
index fc7c057..6b8c57f 100644
--- a/convsparse_net.py
+++ b/convsparse_net.py
@@ -97,9 +97,9 @@ def forward_enc(self, inputs):
             csc = self.softthrsh1[lyr](csc + sc_residual)
         return csc
 
-    def forward_enc_generataor(self, inputs):
-        """forwar encoder generator
-        Use for debug and anylize model.
+    def forward_enc_generator(self, inputs):
+        """forward encoder generator
+        Use for debug and analyze model.
         """
         csc = self.softthrsh0(self.encode_conv0(inputs))
 
@@ -135,9 +135,8 @@ def __init__(self, _lambd):
         self._lambd = _lambd
 
     @property
-    def thrshold(self):
+    def threshold(self):
         return self._lambd
-#        self._lambd.register_hook(print)
 
     def forward(self, inputs):
         """ sign(inputs) * (abs(inputs)  - thrshold)"""
diff --git a/datasets.py b/datasets.py
index 8c5d73c..ea47321 100644
--- a/datasets.py
+++ b/datasets.py
@@ -1,5 +1,4 @@
 from __future__ import division
-from torch.autograd import Variable
 import torch.utils.data as data
 from functools import partial
 import torch
@@ -14,7 +13,7 @@ def is_image_file(filename):
 
 def load_img(filepath, convert='L'):
     img = np.array(Image.open(filepath).convert(convert))
-    img = Variable(torch.from_numpy(img[None,...]),requires_grad=False).float()
+    img = torch.from_numpy(img[None,...]).float()
     return img
 
 def find_file_in_folder(folder, file_name):
@@ -110,7 +109,7 @@ def __init__(self, npz_path, key, pre_transform, inputs_transform, use_cuda=True
         self._inputs_transform = inputs_transform
 
     def __getitem__(self, index):
-        _targets = Variable(torch.from_numpy(self._targets[index]).float(), requires_grad=False)
+        _targets = torch.from_numpy(self._targets[index]).float()
         _inputs = self._inputs_transform(_targets)
         if self._use_cuda:
             _targets = _targets.cuda()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..c424cff
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+# Core dependencies
+torch>=1.7.0
+torchvision>=0.8.0
+numpy>=1.19.0
+matplotlib>=3.3.0
+scipy>=1.5.0
+Pillow>=8.3.2
+ipython>=7.0.0
+
+# Development dependencies
+pybm3d>=3.0.0
diff --git a/test_denoise.py b/test_denoise.py
index 7b1a30c..d5a0092 100644
--- a/test_denoise.py
+++ b/test_denoise.py
@@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 import common
-from common import gaussian, normilize, nhwc_to_nchw, to_np
+from common import gaussian, normalize, nhwc_to_nchw, to_np
 import numpy as np
 from datasets import DatasetFromFolder
 from torch.utils.data import DataLoader
@@ -10,7 +10,7 @@
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import pybm3d
-import scipy.misc
+from PIL import Image
 from convsparse_net import LISTAConvDict
 from datasets import  DatasetFromNPZ
 import arguments
@@ -28,14 +28,13 @@ def plot_res(img, img_n, res, name, log_path, other_res=None):
     def im_path(typ):
         return  os.path.join(log_path, '{}_{}.png'.format(typ, name))
 
-    scipy.misc.toimage(img * 255, cmin=0.0, cmax=255).save(im_path('orig'))
-    scipy.misc.toimage(img_n * 255, cmin=0.0, cmax=255).save(im_path('noisy'))
-    scipy.misc.toimage(res * 255, cmin=0.0, cmax=255).save(im_path('ours'))
+    Image.fromarray(np.clip(img * 255, 0, 255).astype(np.uint8)).save(im_path('orig'))
+    Image.fromarray(np.clip(img_n * 255, 0, 255).astype(np.uint8)).save(im_path('noisy'))
+    Image.fromarray(np.clip(res * 255, 0, 255).astype(np.uint8)).save(im_path('ours'))
 
     if other_res is not None:
         sub_typ = 221
-        scipy.misc.toimage(other_res * 255, cmin=0.0,
-                           cmax=255).save(im_path('other'))
+        Image.fromarray(np.clip(other_res * 255, 0, 255).astype(np.uint8)).save(im_path('other'))
     else:
         sub_typ = 131
 
@@ -78,10 +77,10 @@ def restore_model(model_args, saved_model_path):
 def create_famous_dataset(test_path, noise, pad):
 
     def pre_process_fn(_x):
-        return normilize(_x, 255)
+        return normalize(_x, 255)
 
     def input_process_fn(_x):
-        return gaussian(_x, is_training=True, mean=0, stddev=normilize(noise, 255))
+        return gaussian(_x, is_training=True, mean=0, stddev=normalize(noise, 255))
 
     return DatasetFromFolder(
                 test_path,
@@ -93,10 +92,10 @@ def input_process_fn(_x):
 def create_test_dataset(test_path, noise, pad):
 
     def pre_process_fn(_x):
-        return normilize(_x, 255)
+        return normalize(_x, 255)
 
     def input_process_fn(_x):
-        return gaussian(_x, is_training=True, mean=0, stddev=normilize(noise, 255))
+        return gaussian(_x, is_training=True, mean=0, stddev=normalize(noise, 255))
 
     file_of_filenames =\
             os.path.join(common.project_dir(), 'pascal2010_test_imgs.txt')
@@ -109,7 +108,7 @@ def input_process_fn(_x):
                 inputs_transform=input_process_fn
             )
 
-def avarge_psnr_testset(model, test_loader, border, noise):
+def average_psnr_testset(model, test_loader, border, noise):
 
     padder =  nn.ReflectionPad2d(border)
 
@@ -128,29 +127,30 @@ def _bm3d(_img_n):
 
     print('running avg psnr avg_over image count')
     img_count = 0
-    for img, img_n in test_loader:
+    with torch.no_grad():
+        for img, img_n in test_loader:
 
-        img = padder(img)
-        img_n = padder(img_n)
+            img = padder(img)
+            img_n = padder(img_n)
 
-        output, _ = model(img_n)
+            output, _ = model(img_n)
 
-        np_img = _to_np(img)
-        np_output = np.clip(_to_np(output), 0, 1)
-        bm3d_img = np.clip(_bm3d(img_n), 0, 1)
+            np_img = _to_np(img)
+            np_output = np.clip(_to_np(output), 0, 1)
+            bm3d_img = np.clip(_bm3d(img_n), 0, 1)
 
-        bm3d_psnr += common.psnr(np_img, bm3d_img)
-        ours_psnr += common.psnr(np_img, np_output)
+            bm3d_psnr += common.psnr(np_img, bm3d_img)
+            ours_psnr += common.psnr(np_img, np_output)
 
-        img_count += 1
-        if img_count == avg_over:
-            break
+            img_count += 1
+            if img_count == avg_over:
+                break
     bm3d_psnr = bm3d_psnr / img_count
     ours_psnr = ours_psnr / img_count
-    print(f'testset avargs of {img_count} psnr ours - {ours_psnr}, bm3d - {bm3d_psnr}')
+    print(f'testset averages of {img_count} psnr ours - {ours_psnr}, bm3d - {bm3d_psnr}')
     return ours_psnr, bm3d_psnr
 
-def famous_images_teset(model, test_loader, image_names, border, noise):
+def famous_images_test(model, test_loader, image_names, border, noise):
     """Run and save tests on specific images.
     """
     padder =  nn.ReflectionPad2d(border)
@@ -167,28 +167,29 @@ def _bm3d(x):
     psnrs = []
     res_array = []
     idx = 0
-    for test_data, test_name in zip(test_loader, image_names):
+    with torch.no_grad():
+        for test_data, test_name in zip(test_loader, image_names):
 
-        img, img_n = test_data
-        img = padder(img)
-        img_n = padder(img_n)
+            img, img_n = test_data
+            img = padder(img)
+            img_n = padder(img_n)
 
-        output, _ = model(img_n)
+            output, _ = model(img_n)
 
-        np_img = _to_np(img)
-        np_output = np.clip(_to_np(output), 0, 1)
-        np_img_n = _to_np(img_n)
+            np_img = _to_np(img)
+            np_output = np.clip(_to_np(output), 0, 1)
+            np_img_n = _to_np(img_n)
 
-        bm3d_img = _bm3d(img_n)
+            bm3d_img = _bm3d(img_n)
 
-        bm3d_psnr = common.psnr(np_img, bm3d_img)
-        ours_psnr = common.psnr(np_img, np_output, False)
-        psnrs.append({'ours': ours_psnr, 'bm3d': bm3d_psnr})
-        res_array.append((np_img, np_img_n, np_output, bm3d_img))
+            bm3d_psnr = common.psnr(np_img, bm3d_img)
+            ours_psnr = common.psnr(np_img, np_output, False)
+            psnrs.append({'ours': ours_psnr, 'bm3d': bm3d_psnr})
+            res_array.append((np_img, np_img_n, np_output, bm3d_img))
 
-        print('Test Image {} psnr ours {} bm3d {}'.format(test_name, ours_psnr,
-                                                   bm3d_psnr))
-        idx += 1
+            print('Test Image {} psnr ours {} bm3d {}'.format(test_name, ours_psnr,
+                                                       bm3d_psnr))
+            idx += 1
 
     print('Avg famous psnr ours: {} other: {}'.format(np.mean([p['ours'] for p in psnrs]),
                                                np.mean([p['bm3d'] for p in psnrs])))
@@ -204,17 +205,17 @@ def test(args, saved_model_path, noise, famous_path, testset_path=None):
     if USE_CUDA:
         model = model.cuda()
 
-    norm_noise = common.normilize(noise, 255)
+    norm_noise = common.normalize(noise, 255)
     padding = 20
 
     if testset_path is not None and os.path.isdir(testset_path):
         testset = create_test_dataset(testset_path, noise, padding)
         test_loader = DataLoader(testset)
-        ours_psnr, bm3d_psnr = avarge_psnr_testset(model, test_loader,
-                                                   padding, norm_noise)
+        ours_psnr, bm3d_psnr = average_psnr_testset(model, test_loader,
+                                                     padding, norm_noise)
     else:
-        print('testset path was not provided or does not exsist on machine'
-              +' skipping to famouse images testset')
+        print('testset path was not provided or does not exist on machine'
+              +' skipping to famous images testset')
         ours_psnr = bm3d_psnr = 0
 
     testset = create_famous_dataset(famous_path, noise, padding)
@@ -222,7 +223,7 @@ def test(args, saved_model_path, noise, famous_path, testset_path=None):
     famous_loader = DataLoader(testset)
 
     fam_psnrs, fam_res_array =\
-            famous_images_teset(
+            famous_images_test(
                 model,
                 famous_loader,
                 file_names,
diff --git a/train_denoise.py b/train_denoise.py
index 9dbb2c3..fe0e078 100644
--- a/train_denoise.py
+++ b/train_denoise.py
@@ -14,8 +14,8 @@
 from convsparse_net import LISTAConvDict
 import common
 from common import save_train, load_train, clean, to_np
-from common import gaussian, normilize, nhwc_to_nchw
-from common import reconsturction_loss, init_model_dir
+from common import gaussian, normalize, nhwc_to_nchw
+from common import reconstruction_loss, init_model_dir
 from test_denoise import plot_res
 from datasets import  DatasetFromNPZ
 import arguments
@@ -32,9 +32,9 @@ def _pprint(stuff):
 def get_train_valid_loaders(dataset_path, batch_size, noise):
 
     def pre_process_fn(_x):
-        return normilize(nhwc_to_nchw(_x), 255)
+        return normalize(nhwc_to_nchw(_x), 255)
     def input_process_fn(_x):
-        return gaussian(_x, is_training=True, mean=0, stddev=normilize(noise, 255))
+        return gaussian(_x, is_training=True, mean=0, stddev=normalize(noise, 255))
 
     train_loader = DatasetFromNPZ(npz_path=dataset_path,
                               key='TRAIN', use_cuda=USE_CUDA,
@@ -64,8 +64,6 @@ def step(model, img, img_n, optimizer, criterion):
     loss = criterion(results, targets)
     loss.backward()
 
-    # torch.nn.utils.clip_grad_value_(model.decode_conv1.parameters(), 15)
-
     optimizer.step()
 
     return float(loss), output.cpu()
@@ -88,39 +86,39 @@ def maybe_save_model(
         clean(save_path, save_count=10)
     elif curr_val < max(other_values) - 1:
         load_train(path, model, opt)
-        schd.step()
-        print(f'model diverge reloded last model state current lr {schd.get_lr()}')
+        print(f'model diverge reloaded last model state current lr {schd.get_last_lr()}')
 
     return path
 
 def run_valid(model, data_loader, criterion, logdir, name, should_plot=False):
     """
-    Run over whole valid set calculate psnr and critirion loss.
+    Run over whole valid set calculate psnr and criterion loss.
     """
     loss = 0
     psnr = 0
 
     def _to_np(x): return to_np(x)[..., 20:-20, 20:-20]
 
-    for img, img_n in data_loader:
-        _out, _ = model(img_n)
-        loss += float(criterion(img.data[..., 20:-20, 20:-20],
-                                _out.data[..., 20:-20, 20:-20]))
-
-        np_output = np.clip(_to_np(_out), 0, 1)
-        np_img = _to_np(img)
-        psnr += common.psnr(np_img, np_output)
-
-    img, img_n = data_loader.dataset[0]
-    output, _ = model(img_n.unsqueeze(0))
-    if should_plot:
-        plot_res(
-            _to_np(img),
-            _to_np(img_n),
-            _to_np(output),
-            name,
-            logdir
-        )
+    with torch.no_grad():
+        for img, img_n in data_loader:
+            _out, _ = model(img_n)
+            loss += float(criterion(img.data[..., 20:-20, 20:-20],
+                                    _out.data[..., 20:-20, 20:-20]))
+
+            np_output = np.clip(_to_np(_out), 0, 1)
+            np_img = _to_np(img)
+            psnr += common.psnr(np_img, np_output)
+
+        img, img_n = data_loader.dataset[0]
+        output, _ = model(img_n.unsqueeze(0))
+        if should_plot:
+            plot_res(
+                _to_np(img),
+                _to_np(img_n),
+                _to_np(output),
+                name,
+                logdir
+            )
     return loss / len(data_loader), psnr / len(data_loader)
 
 def plot_losses(train_loss, valid_loss, valid_psnr, path):
@@ -139,29 +137,14 @@ def plot_losses(train_loss, valid_loss, valid_psnr, path):
 
 def train(model, args):
 
-    #optimizer = optim.Adam(
-    #    [
-    #        {'params': model.softthrsh0.parameters()},
-    #        {'params': model.softthrsh1.parameters()},
-    #        {'params': model.encode_conv0.parameters()},
-    #        {'params': model.encode_conv1.parameters()},
-    #        {'params': model.decode_conv1.parameters(), 'lr':
-    #         args['learning_rate']},
-    #    ],
-    #    lr=args['learning_rate']
-    #)
     optimizer = optim.Adam(
         model.parameters(),
         lr=args['learning_rate']
     )
-    break_down_sum = sum(map(common.count_parameters, [model.softthrsh0, model.encode_conv0,
-                                                       model.softthrsh1, model.encode_conv1,
-                                                       model.decode_conv1]))
 
-    # ReduceLROnPlateau(optimizer, 'min', verbose=True)
     train_loader, valid_loader = get_train_valid_loaders(args['dataset_path'], args['batch_size'], args['noise'])
 
-    valid_loss = reconsturction_loss(use_cuda=True)
+    valid_loss = reconstruction_loss(use_cuda=True)
 
     criterion = common.get_criterion(
         losses_types=['l1', 'l2'] , #, 'msssim'],
@@ -181,9 +164,6 @@ def train(model, args):
     valid_every = int(0.1 * len(train_loader))
 
     gamma = 0.1
-    #if model.ista_iters < 20 else\
-    #        0.1 * (20 / args['noise']) * (1 / model.ista_iters)**0.5
-
     scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=gamma)
 
     if args.get('load_path', '') != '':
@@ -212,8 +192,6 @@ def train(model, args):
                     itr == valid_every
                 )
 
-                scheduler.step(_v_loss)
-
                 model_path = maybe_save_model(
                     model,
                     optimizer,
@@ -232,6 +210,9 @@ def train(model, args):
                       running_loss / valid_every, _v_loss, _v_psnr))
                 running_loss = 0
 
+        # Step the scheduler at the end of each epoch
+        scheduler.step()
+
     plot_losses(_train_loss, _valid_loss, _valid_psnr, args['save_dir'])
     return model_path, _valid_loss[-1], _valid_psnr[-1]