diff --git a/.gitignore b/.gitignore
index 54f011d6..1750c7e8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
 /General_Deep_Q_RL.sln
 /General_Deep_Q_RL/theano.py
 /General_Deep_Q_RL/plot.png
+.DS_Store
diff --git a/deer/learning_algos/CRAR_pytorch.py b/deer/learning_algos/CRAR_pytorch.py
new file mode 100644
index 00000000..6f7131f5
--- /dev/null
+++ b/deer/learning_algos/CRAR_pytorch.py
@@ -0,0 +1,630 @@
+"""
+Code for the CRAR learning algorithm using Keras
+
+"""
+
+import numpy as np
+np.set_printoptions(threshold=np.nan)
+from keras.optimizers import SGD,RMSprop
+from keras import backend as K
+from ..base_classes import LearningAlgo
+from .NN_CRAR_pytorch import NN # Default Neural network used
+#import tensorflow as tf
+#config = tf.ConfigProto()
+#config.gpu_options.allow_growth=True
+#sess = tf.Session(config=config)
+import copy
+import torch
+import torch.nn.functional as F
+import torch.optim as optim
+import pdb 
+
+def mean_squared_error_p(y_true, y_pred):
+    """ Modified mean square error that clips
+    """
+    return K.clip(K.max(  K.square( y_pred - y_true )  ,  axis=-1  )-1,0.,100.)     # = modified mse error L_inf
+    #return K.clip(K.mean(  K.square( y_pred - y_true )  ,  axis=-1  )-1,0.,100.)   # = modified mse error L_2
+
+
+def mean_squared_error_p_pytorch(y_pred):
+    """ Modified mean square error that clips
+    """
+    return  torch.sum(torch.clamp( (torch.max((y_pred)**2,dim=-1)[0] - 1), 0., 100.)) # = modified mse error L_inf
+
+def exp_dec_error(y_true, y_pred):
+    return K.exp( - 5.*K.sqrt( K.clip(K.sum(K.square(y_pred), axis=-1, keepdims=True),0.000001,10) )  ) 
+
+
+def exp_dec_error_pytorch(y_pred):
+    return torch.mean(torch.exp( - 5.*torch.sqrt( torch.clamp(torch.sum(y_pred**2, dim=-1),0.000001,10) )  ))
+
+
+
+def cosine_proximity2(y_true, y_pred):
+    """ This loss is similar to the native cosine_proximity loss from Keras
+    but it differs by the fact that only the two first components of the two vectors are used
+    """
+    y_true = K.l2_normalize(y_true[:,0:2], axis=-1)
+    y_pred = K.l2_normalize(y_pred[:,0:2], axis=-1)
+    return -K.sum(y_true * y_pred, axis=-1)    
+
+
+def cosine_proximity2_pytorch(y_true, y_pred):
+    """ This loss is similar to the native cosine_proximity loss from Keras
+    but it differs by the fact that only the two first components of the two vectors are used
+    """
+
+    y_true = F.normalize(y_true[:,0:2],p=2,dim=-1)
+    y_pred = F.normalize(y_pred[:,0:2],p=2,dim=-1)
+    return -torch.sum(y_true * y_pred, dim=-1)
+
+
+# def loss_diff_s_s_(y_true, y_pred):
+#     return K.square(   1.    -    K.sqrt(  K.clip( K.sum(y_pred,axis=-1,keepdims=True), 0.000001 , 1. )  )     ) # tend to increase y_pred --> loss -1
+
+class CRAR(LearningAlgo):
+    """
+    Combined Reinforcement learning via Abstract Representations (CRAR) using Keras
+    
+    Parameters
+    -----------
+    environment : object from class Environment
+        The environment in which the agent evolves.
+    rho : float
+        Parameter for rmsprop. Default : 0.9
+    rms_epsilon : float
+        Parameter for rmsprop. Default : 0.0001
+    momentum : float
+        Momentum for SGD. Default : 0
+    clip_norm : float
+        The gradient tensor will be clipped to a maximum L2 norm given by this value.
+    freeze_interval : int
+        Period during which the target network is freezed and after which the target network is updated. Default : 1000
+    batch_size : int
+        Number of tuples taken into account for each iteration of gradient descent. Default : 32
+    update_rule: str
+        {sgd,rmsprop}. Default : rmsprop
+    random_state : numpy random number generator
+        Set the random seed.
+    double_Q : bool, optional
+        Activate or not the double_Q learning.
+        More informations in : Hado van Hasselt et al. (2015) - Deep Reinforcement Learning with Double Q-learning.
+    neural_network : object, optional
+        Default is deer.learning_algos.NN_keras
+    """
+
+    def __init__(self, environment, rho=0.9, rms_epsilon=0.0001, momentum=0, clip_norm=0, freeze_interval=1000, batch_size=32, update_rule="rmsprop", random_state=np.random.RandomState(), double_Q=False, neural_network=NN, **kwargs):
+        """ Initialize the environment
+        
+        """
+        LearningAlgo.__init__(self,environment, batch_size)
+
+        self._rho = rho
+        self._rms_epsilon = rms_epsilon
+        self._momentum = momentum
+        self._clip_norm = clip_norm
+        self._update_rule = update_rule
+        self._freeze_interval = freeze_interval
+        self._double_Q = double_Q
+        self._random_state = random_state
+        self.update_counter = 0    
+        self._high_int_dim = kwargs.get('high_int_dim',False)
+        self._internal_dim = kwargs.get('internal_dim',2)
+        self.loss_interpret=0
+        self.loss_T=0
+        self.lossR=0
+        self.loss_Q=0
+        self.loss_disentangle_t=0
+        self.loss_disambiguate1=0
+        self.loss_disambiguate2=0
+        self.loss_gamma=0
+        
+        self.learn_and_plan = neural_network(self._batch_size, self._input_dimensions, self._n_actions, self._random_state, high_int_dim=self._high_int_dim, internal_dim=self._internal_dim)
+
+
+        self.encoder = self.learn_and_plan.encoder_model()
+        self.encoder_diff = self.learn_and_plan.encoder_diff_model
+
+        
+        self.R = self.learn_and_plan.float_model()
+        self.Q = self.learn_and_plan.Q_model()
+        self.gamma = self.learn_and_plan.float_model()
+        self.transition = self.learn_and_plan.transition_model()
+
+        self.full_Q=self.learn_and_plan.full_Q_model
+
+        
+        # used to fit rewards
+        self.full_R = self.learn_and_plan.full_float_model
+
+
+        # used to fit gamma
+        self.full_gamma = self.learn_and_plan.full_float_model
+
+        
+        # used to fit transitions
+        self.diff_Tx_x_ = self.learn_and_plan.diff_Tx_x_
+
+        # constraint on consecutive t
+        self.diff_s_s_ = self.learn_and_plan.encoder_diff_model
+
+
+        # used to force features variations
+        if(self._high_int_dim==False):
+            self.force_features=self.learn_and_plan.force_features
+
+
+        self.all_models = [self.encoder,self.R,self.Q,self.gamma,self.transition]
+
+        # Compile all models
+        self._compile()
+
+        
+        # Instantiate the same neural network as a target network.
+        self.learn_and_plan_target = neural_network(self._batch_size, self._input_dimensions, self._n_actions, self._random_state, high_int_dim=self._high_int_dim, internal_dim=self._internal_dim)
+        self.encoder_target = self.learn_and_plan_target.encoder_model()
+        self.Q_target = self.learn_and_plan_target.Q_model()
+        self.R_target = self.learn_and_plan_target.float_model()
+        self.gamma_target = self.learn_and_plan_target.float_model()
+        self.transition_target = self.learn_and_plan_target.transition_model()
+
+        self.full_Q_target = self.learn_and_plan_target.full_Q_model
+
+
+        self.all_models_target = [self.encoder_target,self.R_target,self.Q_target,self.gamma_target,self.transition_target]
+
+        self._resetQHat()
+
+
+
+    def train(self, states_val, actions_val, rewards_val, next_states_val, terminals_val):
+        """
+        Train CRAR from one batch of data.
+
+        Parameters
+        -----------
+        states_val : numpy array of objects
+            Each object is a numpy array that relates to one of the observations
+            with size [batch_size * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        actions_val : numpy array of integers with size [self._batch_size]
+            actions[i] is the action taken after having observed states[:][i].
+        rewards_val : numpy array of floats with size [self._batch_size]
+            rewards[i] is the reward obtained for taking actions[i-1].
+        next_states_val : numpy array of objects
+            Each object is a numpy array that relates to one of the observations
+            with size [batch_size * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        terminals_val : numpy array of booleans with size [self._batch_size]
+            terminals[i] is True if the transition leads to a terminal state and False otherwise
+
+        Returns
+        -------
+        Average loss of the batch training for the Q-values (RMSE)
+        Individual (square) losses for the Q-values for each tuple
+        """
+        
+        onehot_actions = np.zeros((self._batch_size, self._n_actions))
+        onehot_actions[np.arange(self._batch_size), actions_val] = 1
+        onehot_actions_rand = np.zeros((self._batch_size, self._n_actions))
+        onehot_actions_rand[np.arange(self._batch_size), np.random.randint(0,2,(32))] = 1
+        states_val=list(states_val)
+        next_states_val=list(next_states_val)
+        
+        
+        states_val = torch.from_numpy(states_val[0]).float()
+        next_states_val = torch.from_numpy(next_states_val[0]).float()
+        onehot_actions = torch.from_numpy(onehot_actions).float()
+        terminals_val = torch.from_numpy(terminals_val[:,None].astype(np.int32)).float()
+        rewards_val = torch.from_numpy(rewards_val[:,None].astype(np.int32)).float()
+
+        Es_=self.encoder.predict(next_states_val).detach()
+        Es=self.encoder.predict(states_val).detach()
+        ETs=self.transition.predict(torch.cat((Es,onehot_actions),-1)).detach()
+        R=self.R.predict(torch.cat((Es,onehot_actions),-1)).detach()
+                   
+        if(self.update_counter%500==0):
+            print ("Printing a few elements useful for debugging:")
+            #print ("states_val[0][0]")
+            #print (states_val[0][0])
+            #print ("next_states_val[0][0]")
+            #print (next_states_val[0][0])
+            print ("actions_val[0], rewards_val[0], terminals_val[0]")
+            print (actions_val[0], rewards_val[0], terminals_val[0])
+            print ("Es[0],ETs[0],Es_[0]")
+
+            # if(Es.ndim==4):
+            #     print (np.transpose(Es, (0, 3, 1, 2))[0],np.transpose(ETs, (0, 3, 1, 2))[0],np.transpose(Es_, (0, 3, 1, 2))[0])    # data_format='channels_last' --> 'channels_first'
+            # else:
+            print (Es[0],ETs[0],Es_[0])
+            print ("R[0]")
+            print (R[0])
+        
+        self.optimizer_diff_Tx_x_.zero_grad()
+        out = self.diff_Tx_x_(states_val,next_states_val,onehot_actions,(1-terminals_val),self.encoder,self.transition)
+        loss = torch.nn.MSELoss()
+        loss_val = loss(out,torch.zeros_like(Es))
+        self.loss_T+= loss_val.data.numpy()
+        loss_val.backward()
+        for param in list(self.transition.parameters()):
+            param.grad.data.clamp_(-1, 1)
+        self.optimizer_diff_Tx_x_.step()
+        
+        
+
+        self.optimizer_full_R.zero_grad()
+        out = self.full_R(states_val,onehot_actions,self.encoder,self.R)
+        loss = torch.nn.MSELoss()
+        loss_val = loss(out,rewards_val)
+        self.lossR+= loss_val.data.numpy()
+        loss_val.backward()
+        for param in list(self.encoder.parameters()) + list(self.R.parameters()):
+            param.grad.data.clamp_(-1, 1)
+        self.optimizer_full_R.step()
+
+
+        self.optimizer_full_gamma.zero_grad()
+        out = self.full_gamma(states_val,onehot_actions,self.encoder,self.gamma)
+        loss = torch.nn.MSELoss()
+        loss_val = loss(out,(1-terminals_val[:])*self._df)
+        self.loss_gamma+= loss_val.data.numpy()
+        loss_val.backward()
+        for param in list(self.encoder.parameters()) + list(self.gamma.parameters()):
+            param.grad.data.clamp_(-1, 1)
+        self.optimizer_full_gamma.step()   
+        
+        
+
+        L_infinity ball of radius 1 loss
+        self.optimizer_encoder.zero_grad()
+        out = self.encoder(states_val)
+        loss_val = mean_squared_error_p_pytorch(out)
+        self.loss_disambiguate1+= loss_val.data.numpy()
+        loss_val.backward()
+        for param in list(self.encoder.parameters()):
+            param.grad.data.clamp_(-1, 1)
+        self.optimizer_encoder.step()
+
+
+
+
+        # This one is very important
+        # Entropy maximization loss (through exponential) between two random states
+        def roll(x, n):  
+            return torch.cat((x[-n:], x[:-n]))        
+        rolled = roll(states_val,-31)
+        self.optimizer_encoder_diff.zero_grad()
+        out = self.encoder_diff(self.encoder,states_val,rolled)
+        loss_val = exp_dec_error_pytorch(out)
+
+        self.loss_disambiguate2+= loss_val.data.numpy()
+        loss_val.backward()
+        for param in list(self.encoder.parameters()):
+            param.grad.data.clamp_(-1, 1)
+        self.optimizer_encoder_diff.step()
+    
+
+
+
+        # Not so much this one
+        # Entropy maximization loss (through exponential) between two consecutive states
+        self.optimizer_diff_s_s_.zero_grad()
+        out = self.diff_s_s_(self.encoder,states_val,next_states_val)
+        loss_val = exp_dec_error_pytorch(out)
+        self.loss_disentangle_t+= loss_val.data.numpy()
+        loss_val.backward()
+        for param in list(self.encoder.parameters()):
+            param.grad.data.clamp_(-1, 1)
+        self.optimizer_diff_s_s_.step()
+ 
+
+        # Q Learning loss
+        if self.update_counter % self._freeze_interval == 0:
+            self._resetQHat()
+        next_q_vals = self.full_Q_target(next_states_val,self.encoder_target,self.Q_target).detach()
+        max_next_q_vals=torch.max(next_q_vals, dim=1)[0]
+        not_terminals= (1 - terminals_val)
+        target = rewards_val + not_terminals * self._df * max_next_q_vals[:,None]
+   
+        self.optimizer_full_Q.zero_grad()
+        q_vals=self.full_Q(states_val,self.encoder,self.Q).gather(1, torch.from_numpy(actions_val.astype(int)[:,None]))
+        loss = torch.nn.MSELoss()
+        loss_val = loss(q_vals,target)
+        loss = loss_val.data.numpy()
+        self.loss_Q+= loss
+        loss_val.backward()
+        for param in list(self.encoder.parameters()) + list(self.Q.parameters()):
+            param.grad.data.clamp_(-1, 1)
+        self.optimizer_full_Q.step()
+
+
+
+    
+
+        
+        if(self.update_counter%500==0):
+            print ("self.loss_T/500., self.lossR/500., self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.")
+            print (self.loss_T/500., self.lossR/500.,self.loss_gamma/500., self.loss_Q/500., self.loss_disentangle_t/500., self.loss_disambiguate1/500., self.loss_disambiguate2/500.)
+
+            if(self._high_int_dim==False):
+                print ("self.loss_interpret/500.")
+                print (self.loss_interpret/500.)
+
+            self.lossR=0
+            self.loss_gamma=0
+            self.loss_Q=0
+            self.loss_T=0
+            self.loss_interpret=0
+
+            self.loss_disentangle_t=0
+            self.loss_disambiguate1=0
+            self.loss_disambiguate2=0
+
+
+        if(self.update_counter%100==0):
+            print ("Number of training steps:"+str(self.update_counter)+".")
+        
+        self.update_counter += 1           
+
+
+
+
+        return np.sqrt(loss),(q_vals.detach()-target)**2
+
+  
+    def _compile(self):
+        """ Compile all the optimizers for the different losses
+        """
+        
+
+        if (self._update_rule=="rmsprop"):
+            self.optimizer_full_Q=optim.RMSprop(list(self.encoder.parameters()) + list(self.Q.parameters()), lr=self._lr, alpha=self._rho, eps=self._rms_epsilon)
+            self.optimizer_diff_Tx_x_=optim.RMSprop( list(self.encoder.parameters()) +list(self.transition.parameters()), lr=self._lr, alpha=self._rho, eps=self._rms_epsilon) # Different optimizers for each network; 
+            self.optimizer_full_R=optim.RMSprop(list(self.encoder.parameters()) + list(self.R.parameters()), lr=self._lr, alpha=self._rho, eps=self._rms_epsilon) # to possibly modify them separately
+            self.optimizer_full_gamma=optim.RMSprop(list(self.encoder.parameters()) + list(self.gamma.parameters()), lr=self._lr, alpha=self._rho, eps=self._rms_epsilon) 
+            self.optimizer_encoder=optim.RMSprop(self.encoder.parameters(), lr=self._lr, alpha=self._rho, eps=self._rms_epsilon)
+            self.optimizer_encoder_diff=optim.RMSprop(self.encoder.parameters(), lr=self._lr, alpha=self._rho, eps=self._rms_epsilon)
+            self.optimizer_diff_s_s_=optim.RMSprop(self.encoder.parameters(), lr=self._lr, alpha=self._rho, eps=self._rms_epsilon)
+            # self.optimizer_force_features=optim.RMSprop(list(self.encoder.parameters()) + list(self.transition.parameters()), lr=self._lr, alpha=self._rho, eps=self._rms_epsilon) # This never gets updated
+
+        else:
+            raise Exception('The update_rule '+self._update_rule+' is not implemented.')
+        
+        self.optimizers = [self.optimizer_full_Q,self.optimizer_diff_Tx_x_,
+                           self.optimizer_full_R,self.optimizer_full_gamma,
+                           self.optimizer_encoder,self.optimizer_encoder_diff,
+                           self.optimizer_diff_s_s_  ]
+
+
+    def qValues(self, state_val):
+        """ Get the q values for one pseudo-state (without planning)
+
+        Arguments
+        ---------
+        state_val : array of objects (or list of objects)
+            Each object is a numpy array that relates to one of the observations
+            with size [1 * history size * size of punctual observation (which is 2D,1D or scalar)]).
+
+        Returns
+        -------
+        The q values for the provided pseudo state
+        """ 
+        copy_state=copy.deepcopy(state_val) #Required!
+
+        return self.full_Q.predict([np.expand_dims(state,axis=0) for state in copy_state])[0]
+
+    def qValues_planning(self, state_val, R, gamma, T, Q, d=5):
+        """ Get the average Q-values up to planning depth d for one pseudo-state.
+        
+        Arguments
+        ---------
+        state_val : array of objects (or list of objects)
+            Each object is a numpy array that relates to one of the observations
+            with size [1 * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        R : float_model
+            Model that fits the reward
+        gamma : float_model
+            Model that fits the discount factor
+        T : transition_model
+            Model that fits the transition between abstract representation
+        Q : Q_model
+            Model that fits the optimal Q-value
+        d : int
+            planning depth
+
+        Returns
+        -------
+        The average q values with planning depth up to d for the provided pseudo-state
+        """
+        encoded_x = self.encoder.predict(state_val)
+
+#        ## DEBUG PURPOSES
+#        print ( "self.full_Q.predict(state_val)[0]" )
+#        print ( self.full_Q.predict(state_val)[0] )
+#        identity_matrix = np.diag(np.ones(self._n_actions))
+#        if(encoded_x.ndim==2):
+#            tile3_encoded_x=np.tile(encoded_x,(self._n_actions,1))
+#        elif(encoded_x.ndim==4):
+#            tile3_encoded_x=np.tile(encoded_x,(self._n_actions,1,1,1))
+#        else:
+#            print ("error")
+#        
+#        repeat_identity=np.repeat(identity_matrix,len(encoded_x),axis=0)
+#        ##print tile3_encoded_x
+#        ##print repeat_identity
+#        r_vals_d0=np.array(R.predict([tile3_encoded_x,repeat_identity]))
+#        #print "r_vals_d0"
+#        #print r_vals_d0
+#        r_vals_d0=r_vals_d0.flatten()
+#        print "r_vals_d0"
+#        print r_vals_d0
+#        next_x_predicted=T.predict([tile3_encoded_x,repeat_identity])
+#        #print "next_x_predicted"
+#        #print next_x_predicted
+#        one_hot_first_action=np.zeros((1,self._n_actions))
+#        one_hot_first_action[0]=1
+#        next_x_predicted=T.predict([next_x_predicted[0:1],one_hot_first_action])
+#        next_x_predicted=T.predict([next_x_predicted[0:1],one_hot_first_action])
+#        next_x_predicted=T.predict([next_x_predicted[0:1],one_hot_first_action])
+#        #print "next_x_predicted action 0 t4"
+#        #print next_x_predicted
+#        ## END DEBUG PURPOSES
+
+        QD_plan=0
+        for i in range(d+1):
+            Qd=self.qValues_planning_abstr(encoded_x, R, gamma, T, Q, d=i, branching_factor=[self._n_actions,2,2,2,2,2,2,2]).reshape(len(encoded_x),-1)
+            print ("Qd,i")
+            print (Qd,i)
+            QD_plan+=Qd
+        QD_plan=QD_plan/(d+1)
+        
+        print ("QD_plan")
+        print (QD_plan)
+
+        return QD_plan
+  
+    def qValues_planning_abstr(self, state_abstr_val, R, gamma, T, Q, d, branching_factor=None):
+        """ Get the q values for pseudo-state(s) with a planning depth d. 
+        This function is called recursively by decreasing the depth d at every step.
+
+        Arguments
+        ---------
+        state_abstr_val : internal state(s).
+        R : float_model
+            Model that fits the reward
+        gamma : float_model
+            Model that fits the discount factor
+        T : transition_model
+            Model that fits the transition between abstract representation
+        Q : Q_model
+            Model that fits the optimal Q-value
+        d : int
+            planning depth
+
+        Returns
+        -------
+        The Q-values with planning depth d for the provided encoded state(s)
+        """
+        #if(branching_factor==None or branching_factor>self._n_actions):
+        #    branching_factor=self._n_actions
+        
+
+
+        n=len(state_abstr_val)
+        identity_matrix = np.identity(self._n_actions)
+        
+        this_branching_factor=branching_factor.pop(0)
+        if (n==1):
+            # We require that the first branching factor is self._n_actions so that this function return values 
+            # with the right dimension (=self._n_actions). 
+            this_branching_factor=self._n_actions
+                         
+        if (d==0):
+            if(this_branching_factor<self._n_actions):
+                return np.partition(Q.predict([state_abstr_val]), -this_branching_factor)[:,-this_branching_factor:]
+            else:
+                return Q.predict([state_abstr_val]) # no change in the order of the actions
+        else:
+            if(this_branching_factor==self._n_actions):
+                # All actions are considered in the tree
+                # NB: For this case, we do not use argpartition because we want to keep the actions in the natural order
+                # That way, this function returns the Q-values for all actions with planning depth d in the right order
+                repeat_identity=np.repeat(identity_matrix,len(state_abstr_val),axis=0)
+                if(state_abstr_val.ndim==2):
+                    tile3_encoded_x=np.tile(state_abstr_val,(self._n_actions,1))
+                elif(state_abstr_val.ndim==4):
+                    tile3_encoded_x=np.tile(state_abstr_val,(self._n_actions,1,1,1))
+                else:
+                    print ("error")
+            else:
+                # A subset of the actions corresponding to the best estimated Q-values are considered et each branch 
+                estim_Q_values=Q.predict([state_abstr_val])
+                ind = np.argpartition(estim_Q_values, -this_branching_factor)[:,-this_branching_factor:]
+                # Replacing ind if we want random branching
+                #ind = np.random.randint(0,self._n_actions,size=ind.shape)
+                repeat_identity=identity_matrix[ind].reshape(n*this_branching_factor,self._n_actions)
+                tile3_encoded_x=np.repeat(state_abstr_val,this_branching_factor,axis=0)
+            
+            r_vals_d0=np.array(R.predict([tile3_encoded_x,repeat_identity]))
+            r_vals_d0=r_vals_d0.flatten()
+            
+            gamma_vals_d0=np.array(gamma.predict([tile3_encoded_x,repeat_identity]))
+            gamma_vals_d0=gamma_vals_d0.flatten()
+
+            next_x_predicted=T.predict([tile3_encoded_x,repeat_identity])
+            return r_vals_d0+gamma_vals_d0*np.amax(self.qValues_planning_abstr(next_x_predicted,R,gamma,T,Q,d=d-1,branching_factor=branching_factor).reshape(len(state_abstr_val)*this_branching_factor,branching_factor[0]),axis=1).flatten()
+
+    def chooseBestAction(self, state, mode, *args, **kwargs):
+        """ Get the best action for a pseudo-state
+
+        Arguments
+        ---------
+        state : list of numpy arrays
+             One pseudo-state. The number of arrays and their dimensions matches self.environment.inputDimensions().
+        mode : int
+            Identifier of the mode (-1 is reserved for the training mode).
+
+        Returns
+        -------
+        The best action : int
+        """
+
+        copy_state=copy.deepcopy(state) #Required because of the "hack" below
+
+        if(mode==None):
+            mode=0
+        di=[0,1,3,6]
+        # We use the mode to define the planning depth
+        q_vals = self.qValues_planning([np.expand_dims(s,axis=0) for s in copy_state],self.R,self.gamma, self.transition, self.Q, d=di[mode])
+
+        return np.argmax(q_vals),np.max(q_vals)
+      
+    def _resetQHat(self):
+        """ Set the target Q-network weights equal to the main Q-network weights
+        """
+
+        # for i,(param,next_param) in enumerate(zip(self.params, self.params_target)):
+        #     K.set_value(next_param,K.get_value(param))
+
+        for mod,mod_t in zip(self.all_models,self.all_models_target):
+            mod_t.load_state_dict(mod.state_dict())
+            mod_t.eval()
+
+    def setLearningRate(self, lr):
+        """ Setting the learning rate
+
+        Parameters
+        -----------
+        lr : float
+            The learning rate that has to be set
+        """
+        
+        self._lr = lr
+        print ("New learning rate set to "+str(self._lr)+".")
+        for i,optim in enumerate(self.optimizers):
+            for param_group in optim.param_groups:
+                param_group['lr'] = lr if i != len(self.optimizers)-1 else lr/5.
+
+
+
+    # Not implemented yet
+    # def transfer(self, original, transfer, epochs=1):
+
+    #     # First, make sure that the target network and the current network are the same
+    #     self._resetQHat()
+    #     # modify the loss of the encoder
+    #     optimizer4=RMSprop(lr=self._lr, rho=0.9, epsilon=1e-06)
+    #     self.encoder.compile(optimizer=optimizer4, loss='mse')
+        
+    #     # Then, train the encoder such that the original and transfer states are mapped into the same abstract representation
+    #     x_original=self.encoder.predict(original)#[0]
+    #     print ("x_original[0:10]")
+    #     print (x_original[0:10])
+    #     for i in range(epochs):
+    #         size = original[0].shape[0]
+    #         print ( "train" )
+    #         print ( self.encoder.train_on_batch(transfer[0][0:int(size*0.8)] , x_original[0:int(size*0.8)] ) )
+    #         print ( "validation" )
+    #         print ( self.encoder.test_on_batch(transfer[0][int(size*0.8):] , x_original[int(size*0.8):]) )
+         
+    #     self.encoder.compile(optimizer=optimizer4,
+    #               loss=mean_squared_error_p)
+
diff --git a/deer/learning_algos/NN_CRAR_pytorch.py b/deer/learning_algos/NN_CRAR_pytorch.py
new file mode 100644
index 00000000..f7485682
--- /dev/null
+++ b/deer/learning_algos/NN_CRAR_pytorch.py
@@ -0,0 +1,504 @@
+"""
+CRAR Neural network using Keras
+
+"""
+
+import numpy as np
+from keras import backend as K
+from keras.models import Model
+from keras.layers import Input, Layer, Dense, Flatten, Activation, Conv2D, MaxPooling2D, UpSampling2D, Reshape, Permute, Add, Subtract, Dot, Multiply, Average, Lambda, Concatenate, BatchNormalization, merge, RepeatVector, AveragePooling2D
+from keras import regularizers
+#np.random.seed(111111)
+import pdb
+
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+
+
+class NN():
+    """
+    Deep Q-learning network using Keras
+    
+    Parameters
+    -----------
+    batch_size : int
+        Number of tuples taken into account for each iteration of gradient descent
+    input_dimensions :
+    n_actions :
+    random_state : numpy random number generator
+    high_int_dim : Boolean
+        Whether the abstract state should be high dimensional in the form of frames/vectors or whether it should 
+        be low-dimensional
+    """
+    def __init__(self, batch_size, input_dimensions, n_actions, random_state, **kwargs):
+        self._input_dimensions=input_dimensions
+        self._batch_size=batch_size
+        self._random_state=random_state
+        self._n_actions=n_actions
+        self._high_int_dim=kwargs["high_int_dim"]
+        if(self._high_int_dim==True):
+            self.n_channels_internal_dim=kwargs["internal_dim"] #dim[-3]
+        else:
+            self.internal_dim=kwargs["internal_dim"]    #2 for laby
+                                                        #3 for catcher
+
+    def encoder_model(self):
+        """ Instantiate a Keras model for the encoder of the CRAR learning algorithm.
+        
+        The model takes the following as input 
+        s : list of objects
+            Each object is a numpy array that relates to one of the observations
+            with size [batch_size * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        
+        Parameters
+        -----------
+        
+    
+        Returns
+        -------
+        Keras model with output x (= encoding of s)
+    
+        """
+
+
+        self._pooling_encoder=1
+        class Encoder(nn.Module):
+            def __init__(self,internal_dim,input_dim):
+                super(Encoder, self).__init__()
+                self.input_dim_flat = np.prod(input_dim)
+                self.lin1 = nn.Linear(self.input_dim_flat, 200)
+                self.lin2 = nn.Linear(200, 100)
+                self.lin3 = nn.Linear(100, 50)
+                self.lin4 = nn.Linear(50, 10)
+                self.lin5 = nn.Linear(10, internal_dim)
+
+
+            def forward(self, x):
+                # pdb.set_trace()
+                x = x.view(-1, self.input_dim_flat)
+                x = torch.tanh(self.lin1(x))
+                x = torch.tanh(self.lin2(x))
+                x = torch.tanh(self.lin3(x))
+                x = torch.tanh(self.lin4(x))
+                x = torch.tanh(self.lin5(x))
+                # x = self.lin5(x)
+                return x
+
+            def predict(self, x):
+                return self.forward(x)
+
+        model = Encoder(self.internal_dim,self._input_dimensions)
+        
+        return model
+
+    def encoder_diff_model(self,encoder_model,s1,s2):
+        """ Instantiate a Keras model that provides the difference between two encoded pseudo-states
+        
+        The model takes the two following inputs:
+        s1 : list of objects
+            Each object is a numpy array that relates to one of the observations
+            with size [batch_size * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        s2 : list of objects
+            Each object is a numpy array that relates to one of the observations
+            with size [batch_size * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        
+        Parameters
+        -----------
+        encoder_model: instantiation of a Keras model for the encoder
+    
+        Returns
+        -------
+        model with output the difference between the encoding of s1 and the encoding of s2
+    
+        """
+
+
+        enc_s1= encoder_model(s1)
+        enc_s2= encoder_model(s2)
+
+        
+        return enc_s1 - enc_s2
+
+    def transition_model(self):
+        """  Instantiate a Keras model for the transition between two encoded pseudo-states.
+    
+        The model takes as inputs:
+        x : internal state
+        a : int
+            the action considered
+        
+        Parameters
+        -----------
+    
+        Returns
+        -------
+        model that outputs the transition of (x,a)
+    
+        """
+
+        # MLP Transition model
+        class Transition(nn.Module):
+            def __init__(self,internal_dim,n_actions):
+                super(Transition, self).__init__()
+                self.lin1 = nn.Linear(internal_dim+n_actions, 10)
+                self.lin2 = nn.Linear(10, 30)
+                self.lin3 = nn.Linear(30, 30)
+                self.lin4 = nn.Linear(30, 10)
+                self.lin5 = nn.Linear(10, internal_dim)
+
+                self.internal_dim = internal_dim
+
+            def forward(self, x):
+                init_state = x[:,:self.internal_dim]
+                x = torch.tanh(self.lin1(x))
+                x = torch.tanh(self.lin2(x))
+                x = torch.tanh(self.lin3(x))
+                x = torch.tanh(self.lin4(x))
+                x = self.lin5(x)
+                return x + init_state
+
+            def predict(self, x):
+                return self.forward(x)
+
+
+
+
+
+        class MLP(nn.Module):
+            """Two-layer fully-connected ELU net with batch norm."""
+
+            def __init__(self, n_in, n_hid, n_out, do_prob=0.):
+                super(MLP, self).__init__()
+                self.fc1 = nn.Linear(n_in, n_hid)
+                self.fc2 = nn.Linear(n_hid, n_out)
+                # self.bn = nn.BatchNorm1d(n_out)
+                self.dropout_prob = do_prob
+
+                self.init_weights()
+
+            def init_weights(self):
+                for m in self.modules():
+                    if isinstance(m, nn.Linear):
+                        nn.init.xavier_normal(m.weight.data)
+                        m.bias.data.fill_(0.1)
+                    elif isinstance(m, nn.BatchNorm1d):
+                        m.weight.data.fill_(1)
+                        m.bias.data.zero_()
+
+            def batch_norm(self, inputs):
+                x = inputs.view(inputs.size(0) * inputs.size(1), -1)
+                x = self.bn(x)
+                return x.view(inputs.size(0), inputs.size(1), -1)
+
+            def forward(self, inputs):
+                # Input shape: [num_sims, num_things, num_features]
+                x = F.elu(self.fc1(inputs))
+                x = F.dropout(x, self.dropout_prob, training=self.training)
+                x = F.elu(self.fc2(x))
+                return x
+
+
+        # GNN Transition model
+        class TransitionGNN(nn.Module):
+            def __init__(self, internal_dim, n_actions, n_hid, do_prob=0., factor=True):
+                super(TransitionGNN, self).__init__()
+
+                self.internal_dim = internal_dim
+                self.n_actions =n_actions
+
+                n_in = 1
+                n_out = internal_dim
+
+                self.mlp1 = MLP(n_in, n_hid, n_hid, do_prob)
+                self.mlp2 = MLP(n_hid * 2, n_hid, n_hid, do_prob)
+                self.mlp3 = MLP(n_hid, n_hid, n_hid, do_prob)
+                # self.mlp4 = MLP(n_hid * 4, n_hid, n_hid, do_prob)
+                # self.mlp5 = MLP(n_hid, n_hid, n_hid, do_prob)
+                self.fc_out1 = nn.Linear(n_hid*2  * (internal_dim+n_actions), n_hid)
+                self.fc_out2 = nn.Linear(n_hid, n_out)
+                self.init_weights()
+
+                def encode_onehot(labels):
+                    classes = set(labels)
+                    classes_dict = {c:  np.identity(len(classes))[i, :] for i, c in
+                                    enumerate(classes)}
+                    labels_onehot = np.array(list(map(classes_dict.get, labels)),
+                                             dtype=np.int32)
+                    return labels_onehot
+
+                off_diag = np.ones([self.internal_dim+self.n_actions, self.internal_dim+self.n_actions]) - np.eye(self.internal_dim+self.n_actions)
+                rel_rec = np.array(encode_onehot(np.where(off_diag)[1]), dtype=np.float32)
+                rel_send = np.array(encode_onehot(np.where(off_diag)[0]), dtype=np.float32)
+                self.rel_rec = torch.FloatTensor(rel_rec)
+                self.rel_send = torch.FloatTensor(rel_send)
+
+
+            def init_weights(self):
+                for m in self.modules():
+                    if isinstance(m, nn.Linear):
+                        nn.init.xavier_normal(m.weight.data)
+                        m.bias.data.fill_(0.1)
+
+            def edge2node(self, x):
+                # NOTE: Assumes that we have the same graph across all samples.
+                incoming = torch.matmul(self.rel_rec.t(), x)
+                return incoming / incoming.size(1)
+
+            def node2edge(self, x):
+                # NOTE: Assumes that we have the same graph across all samples.
+                receivers = torch.matmul(self.rel_rec, x)
+                senders = torch.matmul(self.rel_send, x)
+                edges = torch.cat([receivers, senders], dim=2)
+                return edges
+
+            def forward(self, inputs):
+                # import pdb;pdb.set_trace()
+                
+                init_state = inputs[:,:self.internal_dim]
+                x = inputs.view(inputs.size(0), inputs.size(1), 1)
+                x = self.mlp1(x)  # 2-layer ELU net per node
+                x_skip = x
+
+                x = self.node2edge(x)
+                x = self.mlp2(x)
+                
+                x = self.edge2node(x)
+                x = self.mlp3(x)
+
+                x = torch.cat((x, x_skip), dim=2)
+
+
+
+                x = x.view(x.size(0), -1)
+                x= F.elu(self.fc_out1(x))
+                x= self.fc_out2(x)
+                return x + init_state
+
+            def predict(self, x):
+                return self.forward(x)
+
+
+        # model = Transition(self.internal_dim,self._n_actions)
+        model = TransitionGNN(self.internal_dim, self._n_actions, 32)
+
+
+
+        return model
+
+    def diff_Tx_x_(self,s1,s2,action,not_terminal,encoder_model,transition_model,plan_depth=0):
+        """ For plan_depth=0, instantiate a Keras model that provides the difference between T(E(s1),a) and E(s2).
+        Note that it gives 0 if the transition leading to s2 is terminal (we don't need to fit the transition if 
+        it is terminal).
+        
+        For plan_depth=0, the model takes the four following inputs:
+        s1 : list of objects
+            Each object is a numpy array that relates to one of the observations
+            with size [batch_size * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        s2 : list of objects
+            Each object is a numpy array that relates to one of the observations
+            with size [batch_size * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        a : list of ints with length (plan_depth+1)
+            the action(s) considered at s1
+        terminal : boolean
+            Whether the transition leading to s2 is terminal
+        
+        Parameters
+        -----------
+        encoder_model: instantiation of a Keras model for the encoder (E)
+        transition_model: instantiation of a Keras model for the transition (T)
+        plan_depth: if>1, it provides the possibility to consider a sequence of transitions between s1 and s2 
+        (input a is then a list of actions)
+    
+        Returns
+        -------
+        model with output Tx (= model estimate of x')
+    
+        """
+
+
+        enc_s1 = encoder_model(s1)
+        enc_s2 = encoder_model(s2)
+
+        Tx = transition_model(torch.cat((enc_s1,action),-1))
+
+
+        return (Tx - enc_s2)*(not_terminal)
+
+    def force_features(self,s1,s2,action,encoder_model,transition_model,plan_depth=0):
+        """ Instantiate a Keras model that provides the vector of the transition at E(s1). It is calculated as the different between E(s1) and E(T(s1)). 
+        Used to force the directions of the transitions.
+        
+        The model takes the four following inputs:
+        s1 : list of objects
+            Each object is a numpy array that relates to one of the observations
+            with size [batch_size * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        a : list of ints with length (plan_depth+1)
+            the action(s) considered at s1
+        
+        Parameters
+        -----------
+        encoder_model: instantiation of a Keras model for the encoder (E)
+        transition_model: instantiation of a Keras model for the transition (T)
+        plan_depth: if>1, it provides the possibility to consider a sequence of transitions between s1 and s2 
+        (input a is then a list of actions)
+            
+        Returns
+        -------
+        model with output E(s1)-T(E(s1))
+    
+        """
+
+
+        enc_s1 = encoder_model(s1)
+        enc_s2 = encoder_model(s2)
+
+        Tx = transition_model(torch.cat((enc_s1,action),-1))
+
+
+        return (Tx - enc_s2)
+
+
+    def float_model(self):
+        """ Instantiate a Keras model for fitting a float from x.
+                
+        The model takes the following inputs:
+        x : internal state
+        a : int
+            the action considered at x
+        
+        Parameters
+        -----------
+            
+        Returns
+        -------
+        model that outputs a float
+    
+        """
+        
+
+        class FloatModel(nn.Module):
+            def __init__(self,internal_dim,n_actions):
+                super(FloatModel, self).__init__()
+                self.lin1 = nn.Linear(internal_dim+n_actions, 10)
+                self.lin2 = nn.Linear(10, 50)
+                self.lin3 = nn.Linear(50, 20)
+                self.lin4 = nn.Linear(20, 1)
+
+            def forward(self, x):
+
+                x = torch.tanh(self.lin1(x))
+                x = torch.tanh(self.lin2(x))
+                x = torch.tanh(self.lin3(x))
+                x = self.lin4(x)
+                return x
+            def predict(self, x):
+                return self.forward(x)
+        model = FloatModel(self.internal_dim,self._n_actions)
+
+
+
+        return model
+
+    def full_float_model(self,x,action,encoder_model,float_model,plan_depth=0,transition_model=None):
+        """ Instantiate a Keras model for fitting a float from s.
+                
+        The model takes the four following inputs:
+        s : list of objects
+            Each object is a numpy array that relates to one of the observations
+            with size [batch_size * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        a : list of ints with length (plan_depth+1)
+            the action(s) considered at s
+                
+        Parameters
+        -----------
+        encoder_model: instantiation of a Keras model for the encoder (E)
+        float_model: instantiation of a Keras model for fitting a float from x
+        plan_depth: if>1, it provides the possibility to consider a sequence of transitions following s 
+        (input a is then a list of actions)
+        transition_model: instantiation of a Keras model for the transition (T)
+            
+        Returns
+        -------
+        model with output the reward r
+        """
+        
+
+        enc_x = encoder_model(x)
+        reward_pred = float_model(torch.cat((enc_x,action),-1))
+        return reward_pred
+
+    def Q_model(self):
+        """ Instantiate a  a Keras model for the Q-network from x.
+
+        The model takes the following inputs:
+        x : internal state
+
+        Parameters
+        -----------
+            
+        Returns
+        -------
+        model that outputs the Q-values for each action
+        """
+
+
+
+        class QFunction(nn.Module):
+            def __init__(self,internal_dim,n_actions):
+                super(QFunction, self).__init__()
+                self.lin1 = nn.Linear(internal_dim, 20)
+                self.lin2 = nn.Linear(20, 50)
+                self.lin3 = nn.Linear(50, 20)
+                self.lin4 = nn.Linear(20, n_actions)
+
+            def forward(self, x):
+                x = torch.tanh(self.lin1(x))
+                x = torch.tanh(self.lin2(x))
+                x = torch.tanh(self.lin3(x))
+                x = self.lin4(x)
+                return x
+            def predict(self, x):
+                return self.forward(x)
+
+        model = QFunction(self.internal_dim,self._n_actions)  
+
+
+
+
+        return model
+
+
+    def full_Q_model(self, x, encoder_model, Q_model, plan_depth=0, transition_model=None, R_model=None, discount_model=None):
+        """ Instantiate a  a Keras model for the Q-network from s.
+
+        The model takes the following inputs:
+        s : list of objects
+            Each object is a numpy array that relates to one of the observations
+            with size [batch_size * history size * size of punctual observation (which is 2D,1D or scalar)]).
+        a : list of ints with length plan_depth; if plan_depth=0, there isn't any input for a.
+            the action(s) considered at s
+    
+        Parameters
+        -----------
+        encoder_model: instantiation of a Keras model for the encoder (E)
+        Q_model: instantiation of a Keras model for the Q-network from x.
+        plan_depth: if>1, it provides the possibility to consider a sequence of transitions following s 
+        (input a is then a list of actions)
+        transition_model: instantiation of a Keras model for the transition (T)
+        R_model: instantiation of a Keras model for the reward
+        discount_model: instantiation of a Keras model for the discount
+            
+        Returns
+        -------
+        model with output the Q-values
+        """
+        
+        out = encoder_model(x)
+        Q_estim= Q_model(out)
+
+        return Q_estim
+
+if __name__ == '__main__':
+    pass
+    
\ No newline at end of file
diff --git a/examples/test_CRAR/run_simple_maze_pytorch.py b/examples/test_CRAR/run_simple_maze_pytorch.py
new file mode 100644
index 00000000..2c242b43
--- /dev/null
+++ b/examples/test_CRAR/run_simple_maze_pytorch.py
@@ -0,0 +1,199 @@
+"""Simple maze launcher
+"""
+
+import sys
+import logging
+import numpy as np
+from joblib import hash, dump
+import os
+import pdb
+
+from deer.default_parser import process_args
+from deer.agent import NeuralAgent
+from deer.learning_algos.CRAR_pytorch import CRAR
+from simple_maze_env_pytorch import MyEnv as simple_maze_env
+import deer.experiment.base_controllers as bc
+
+from deer.policies import EpsilonGreedyPolicy
+
+
+class Defaults:
+    # ----------------------
+    # Experiment Parameters
+    # ----------------------
+    STEPS_PER_EPOCH = 5000
+    EPOCHS = 50
+    STEPS_PER_TEST = 1000
+    PERIOD_BTW_SUMMARY_PERFS = 1
+    
+    # ----------------------
+    # Environment Parameters
+    # ----------------------
+    FRAME_SKIP = 2
+
+    # ----------------------
+    # DQN Agent parameters:
+    # ----------------------
+    UPDATE_RULE = 'rmsprop'
+    LEARNING_RATE = 0.0005
+    LEARNING_RATE_DECAY = 0.9
+    DISCOUNT = 0.9
+    DISCOUNT_INC = 1
+    DISCOUNT_MAX = 0.99
+    RMS_DECAY = 0.9
+    RMS_EPSILON = 0.0001
+    MOMENTUM = 0
+    CLIP_NORM = 1.0
+    EPSILON_START = 1.0
+    EPSILON_MIN = 1.0
+    EPSILON_DECAY = 10000
+    UPDATE_FREQUENCY = 1
+    REPLAY_MEMORY_SIZE = 1000000
+    BATCH_SIZE = 32
+    FREEZE_INTERVAL = 1000
+    DETERMINISTIC = False
+
+
+HIGHER_DIM_OBS = False
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    
+    # --- Parse parameters ---
+    parameters = process_args(sys.argv[1:], Defaults)
+    if parameters.deterministic:
+        rng = np.random.RandomState(123456)
+    else:
+        rng = np.random.RandomState()
+    
+    # --- Instantiate environment ---
+    env = simple_maze_env(rng, higher_dim_obs=HIGHER_DIM_OBS)
+    
+    # --- Instantiate learning_algo ---
+    learning_algo = CRAR(
+        env,
+        parameters.rms_decay,
+        parameters.rms_epsilon,
+        parameters.momentum,
+        parameters.clip_norm,
+        parameters.freeze_interval,
+        parameters.batch_size,
+        parameters.update_rule,
+        rng,
+        high_int_dim=False,
+        internal_dim=2)
+    
+    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
+
+    # --- Instantiate agent ---
+    agent = NeuralAgent(
+        env,
+        learning_algo,
+        parameters.replay_memory_size,
+        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
+        parameters.batch_size,
+        rng,
+        test_policy=test_policy)
+
+    # --- Create unique filename for FindBestController ---
+    h = hash(vars(parameters), hash_name="sha1")
+    fname = "test_" + h
+    print("The parameters hash is: {}".format(h))
+    print("The parameters are: {}".format(parameters))
+
+    # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
+    # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
+    # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
+    # episode or epoch (or never, hence the resetEvery='none').
+    agent.attach(bc.EpsilonController(
+        initial_e=parameters.epsilon_start,
+        e_decays=parameters.epsilon_decay,
+        e_min=parameters.epsilon_min,
+        evaluate_on='action',
+        periodicity=1,
+        reset_every='none'))
+
+
+    agent.run(10, 500)
+    print("end gathering data")
+
+    # --- Bind controllers to the agent ---
+    # Before every training epoch (periodicity=1), we want to print a summary of the agent's epsilon, discount and 
+    # learning rate as well as the training epoch number.
+    agent.attach(bc.VerboseController(
+        evaluate_on='epoch', 
+        periodicity=1))
+    
+    # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we 
+    # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
+    agent.attach(bc.LearningRateController(
+        initial_learning_rate=parameters.learning_rate, 
+        learning_rate_decay=parameters.learning_rate_decay,
+        periodicity=1))
+    
+    # Same for the discount factor.
+    agent.attach(bc.DiscountFactorController(
+        initial_discount_factor=parameters.discount, 
+        discount_factor_growth=parameters.discount_inc, 
+        discount_factor_max=parameters.discount_max,
+        periodicity=1))
+        
+    # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
+    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
+    # residual and the average of the V values obtained during the last episode, hence the two last arguments.
+    agent.attach(bc.TrainerController(
+        evaluate_on='action', 
+        periodicity=parameters.update_frequency, 
+        show_episode_avg_V_value=True, 
+        show_avg_Bellman_residual=True))
+    
+    # We wish to discover, among all versions of our neural network (i.e., after every training epoch), which one 
+    # seems to generalize the better, thus which one has the highest validation score. Here, we do not care about the
+    # "true generalization score", or "test score".
+    # To achieve this goal, one can use the FindBestController along with an InterleavedTestEpochControllers. It is 
+    # important that the validationID is the same than the id argument of the InterleavedTestEpochController.
+    # The FindBestController will dump on disk the validation scores for each and every network, as well as the 
+    # structure of the neural network having the best validation score. These dumps can then used to plot the evolution 
+    # of the validation and test scores (see below) or simply recover the resulting neural network for your 
+    # application.
+    agent.attach(bc.FindBestController(
+        validationID=simple_maze_env.VALIDATION_MODE,
+        testID=None,
+        unique_fname=fname))
+    
+    # All previous controllers control the agent during the epochs it goes through. However, we want to interleave a 
+    # "validation epoch" between each training epoch ("one of two epochs", hence the periodicity=2). We do not want 
+    # these validation epoch to interfere with the training of the agent, which is well established by the 
+    # TrainerController, EpsilonController and alike. Therefore, we will disable these controllers for the whole 
+    # duration of the validation epochs interleaved this way, using the controllersToDisable argument of the 
+    # InterleavedTestEpochController. For each validation epoch, we want also to display the sum of all rewards 
+    # obtained, hence the showScore=True. Finally, we want to call the summarizePerformance method of ALE_env every 
+    # [parameters.period_btw_summary_perfs] *validation* epochs.
+    agent.attach(bc.InterleavedTestEpochController(
+        id=simple_maze_env.VALIDATION_MODE, 
+        epoch_length=parameters.steps_per_test,
+        controllers_to_disable=[0, 1, 2, 3, 4],
+        periodicity=2,
+        show_score=True,
+        summarize_every=1))
+    
+    # --- Run the experiment ---
+    try:
+        os.mkdir("params")
+    except Exception:
+        pass
+    dump(vars(parameters), "params/" + fname + ".jldump")
+    agent.gathering_data=False
+
+
+    agent.run(parameters.epochs, parameters.steps_per_epoch)
+    
+    # --- Show results ---
+    basename = "scores/" + fname
+    scores = joblib.load(basename + "_scores.jldump")
+    plt.plot(range(1, len(scores['vs'])+1), scores['vs'], label="VS", color='b')
+    plt.legend()
+    plt.xlabel("Number of epochs")
+    plt.ylabel("Score")
+    plt.savefig(basename + "_scores.pdf")
+    plt.show()
\ No newline at end of file
diff --git a/examples/test_CRAR/simple_maze_env_pytorch.py b/examples/test_CRAR/simple_maze_env_pytorch.py
new file mode 100644
index 00000000..7602d31b
--- /dev/null
+++ b/examples/test_CRAR/simple_maze_env_pytorch.py
@@ -0,0 +1,429 @@
+""" Simple maze environment
+
+"""
+import numpy as np
+import cv2
+import pdb
+import torch
+
+from deer.base_classes import Environment
+
+import matplotlib
+matplotlib.use('agg')
+# matplotlib.use('qt5agg')
+from mpl_toolkits.axes_grid1 import host_subplot
+import mpl_toolkits.axisartist as AA
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+import matplotlib.cm as cm
+from matplotlib.patches import Circle, Rectangle
+from matplotlib.offsetbox import AnchoredOffsetbox, TextArea, DrawingArea, HPacker            
+import copy 
+
+class MyEnv(Environment):
+    VALIDATION_MODE = 0
+
+    def __init__(self, rng, **kwargs):
+
+        self._mode = -1
+        self._mode_score = 0.0
+        self._mode_episode_count = 0
+        self._size_maze=8
+        self._higher_dim_obs=kwargs["higher_dim_obs"]
+        self.create_map()
+        self.intern_dim=2
+
+    def create_map(self):
+        self._map=np.zeros((self._size_maze,self._size_maze))
+        self._map[-1,:]=1
+        self._map[0,:]=1
+        self._map[:,0]=1
+        self._map[:,-1]=1
+        self._map[:,self._size_maze//2]=1
+        self._map[self._size_maze//2,self._size_maze//2]=0
+        self._pos_agent=[2,2]
+        self._pos_goal=[self._size_maze-2,self._size_maze-2]
+
+                
+    def reset(self, mode):
+        self.create_map()
+
+        self._map[self._size_maze//2,self._size_maze//2]=0
+        
+        if mode == MyEnv.VALIDATION_MODE:
+            if self._mode != MyEnv.VALIDATION_MODE:
+                self._mode = MyEnv.VALIDATION_MODE
+                self._mode_score = 0.0
+                self._mode_episode_count = 0
+                
+            else:
+                self._mode_episode_count += 1
+        elif self._mode != -1:
+            self._mode = -1
+        
+        # Setting the starting position of the agent
+        self._pos_agent=[self._size_maze//2,self._size_maze//2]
+            
+        #print ("new map:")
+        #print (self._map)
+        #print ("reset mode")
+        #print (mode)
+
+        return [1 * [self._size_maze * [self._size_maze * [0]]]]
+        
+        
+    def act(self, action):
+        """Applies the agent action [action] on the environment.
+
+        Parameters
+        -----------
+        action : int
+            The action selected by the agent to operate on the environment. Should be an identifier 
+            included between 0 included and nActions() excluded.
+        """
+
+        self._cur_action=action
+        if(action==0):
+            if(self._map[self._pos_agent[0]-1,self._pos_agent[1]]==0):
+                self._pos_agent[0]=self._pos_agent[0]-1
+        elif(action==1):        
+            if(self._map[self._pos_agent[0]+1,self._pos_agent[1]]==0):
+                self._pos_agent[0]=self._pos_agent[0]+1
+        elif(action==2):        
+            if(self._map[self._pos_agent[0],self._pos_agent[1]-1]==0):
+                self._pos_agent[1]=self._pos_agent[1]-1
+        elif(action==3):        
+            if(self._map[self._pos_agent[0],self._pos_agent[1]+1]==0):
+                self._pos_agent[1]=self._pos_agent[1]+1
+        
+        # There is no reward in this simple environment
+        self.reward = 0
+
+        self._mode_score += self.reward
+        return self.reward
+
+    def summarizePerformance(self, test_data_set, learning_algo, *args, **kwargs):
+        """ Plot of the low-dimensional representation of the environment built by the model
+        """
+        
+        all_possib_inp=[] # Will store all possible inputs (=observation) for the CRAR agent
+        labels_maze=[]
+        self.create_map()
+        for y_a in range(self._size_maze):
+            for x_a in range(self._size_maze):                
+                state=copy.deepcopy(self._map)
+                state[self._size_maze//2,self._size_maze//2]=0
+                if(state[x_a,y_a]==0):
+                    if(self._higher_dim_obs==True):
+                        all_possib_inp.append(self.get_higher_dim_obs([[x_a,y_a]],[self._pos_goal]))
+                    else:
+                        state[x_a,y_a]=0.5
+                        all_possib_inp.append(state)
+                    
+                    ## labels
+                    #if(y_a<self._size_maze//2):
+                    #    labels_maze.append(0.)
+                    #elif(y_a==self._size_maze//2):
+                    #    labels_maze.append(1.)
+                    #else:
+                    #    labels_maze.append(2.)
+        
+        #arr=np.array(all_possib_inp)
+        #if(self._higher_dim_obs==False):
+        #    arr=arr.reshape(arr.shape[0],-1)
+        #else:
+        #    arr=arr.reshape(arr.shape[0],-1)
+        #    
+        #np.savetxt('tsne_python/mazesH_X.txt',arr.reshape(arr.shape[0],-1))
+        #np.savetxt('tsne_python/mazesH_labels.txt',np.array(labels_maze))
+        
+        all_possib_inp=np.expand_dims(np.array(all_possib_inp,dtype='float'),axis=1)
+        
+        all_possib_inp = torch.from_numpy(all_possib_inp).float()
+        all_possib_abs_states=learning_algo.encoder.predict(all_possib_inp)
+        # if(all_possib_abs_states.ndim==4):
+        #     all_possib_abs_states=np.transpose(all_possib_abs_states, (0, 3, 1, 2))    # data_format='channels_last' --> 'channels_first'
+        
+        n=1000
+        historics=[]
+        for i,observ in enumerate(test_data_set.observations()[0][0:n]):
+            historics.append(np.expand_dims(observ,axis=0))
+        historics=np.array(historics)
+
+        historics = torch.from_numpy(historics).float()
+        abs_states=learning_algo.encoder.predict(historics)
+        # if(abs_states.ndim==4):
+        #     abs_states=np.transpose(abs_states, (0, 3, 1, 2))    # data_format='channels_last' --> 'channels_first'
+
+        actions=test_data_set.actions()[0:n]
+        
+        if self.inTerminalState() == False:
+            self._mode_episode_count += 1
+        print("== Mean score per episode is {} over {} episodes ==".format(self._mode_score / (self._mode_episode_count+0.0001), self._mode_episode_count))
+                
+        
+        m = cm.ScalarMappable(cmap=cm.jet)
+
+
+
+        abs_states = abs_states.detach().numpy()
+        all_possib_abs_states = all_possib_abs_states.detach().numpy()
+
+        x = np.array(abs_states)[:,0]
+        y = np.array(abs_states)[:,1]
+        if(self.intern_dim>2):
+            z = np.array(abs_states)[:,2]
+                    
+        fig = plt.figure()
+        if(self.intern_dim==2):
+            ax = fig.add_subplot(111)
+            ax.set_xlabel(r'$X_1$')
+            ax.set_ylabel(r'$X_2$')
+        else:
+            ax = fig.add_subplot(111,projection='3d')
+            ax.set_xlabel(r'$X_1$')
+            ax.set_ylabel(r'$X_2$')
+            ax.set_zlabel(r'$X_3$')
+                    
+        # Plot the estimated transitions
+        for i in range(n-1):
+            # pdb.set_trace()
+            predicted1=learning_algo.transition.predict(torch.cat((torch.from_numpy(abs_states[i:i+1]).float() ,torch.from_numpy(np.array([[1,0,0,0]])).float()),-1)).detach().numpy()
+            predicted2=learning_algo.transition.predict(torch.cat((torch.from_numpy(abs_states[i:i+1]).float() ,torch.from_numpy(np.array([[0,1,0,0]])).float()),-1)).detach().numpy()
+            predicted3=learning_algo.transition.predict(torch.cat((torch.from_numpy(abs_states[i:i+1]).float() ,torch.from_numpy(np.array([[0,0,1,0]])).float()),-1)).detach().numpy()
+            predicted4=learning_algo.transition.predict(torch.cat((torch.from_numpy(abs_states[i:i+1]).float() ,torch.from_numpy(np.array([[0,0,0,1]])).float()),-1)).detach().numpy()            
+            # predicted1=learning_algo.transition.predict([abs_states[i:i+1],np.array([[1,0,0,0]])])
+            # predicted2=learning_algo.transition.predict([abs_states[i:i+1],np.array([[0,1,0,0]])])
+            # predicted3=learning_algo.transition.predict([abs_states[i:i+1],np.array([[0,0,1,0]])])
+            # predicted4=learning_algo.transition.predict([abs_states[i:i+1],np.array([[0,0,0,1]])])
+            if(self.intern_dim==2):
+                ax.plot(np.concatenate([x[i:i+1],predicted1[0,:1]]), np.concatenate([y[i:i+1],predicted1[0,1:2]]), color="0.9", alpha=0.75)
+                ax.plot(np.concatenate([x[i:i+1],predicted2[0,:1]]), np.concatenate([y[i:i+1],predicted2[0,1:2]]), color="0.65", alpha=0.75)
+                ax.plot(np.concatenate([x[i:i+1],predicted3[0,:1]]), np.concatenate([y[i:i+1],predicted3[0,1:2]]), color="0.4", alpha=0.75)
+                ax.plot(np.concatenate([x[i:i+1],predicted4[0,:1]]), np.concatenate([y[i:i+1],predicted4[0,1:2]]), color="0.15", alpha=0.75)
+            else:
+                ax.plot(np.concatenate([x[i:i+1],predicted1[0,:1]]), np.concatenate([y[i:i+1],predicted1[0,1:2]]), np.concatenate([z[i:i+1],predicted1[0,2:3]]), color="0.9", alpha=0.75)
+                ax.plot(np.concatenate([x[i:i+1],predicted2[0,:1]]), np.concatenate([y[i:i+1],predicted2[0,1:2]]), np.concatenate([z[i:i+1],predicted2[0,2:3]]), color="0.65", alpha=0.75)
+                ax.plot(np.concatenate([x[i:i+1],predicted3[0,:1]]), np.concatenate([y[i:i+1],predicted3[0,1:2]]), np.concatenate([z[i:i+1],predicted3[0,2:3]]), color="0.4", alpha=0.75)
+                ax.plot(np.concatenate([x[i:i+1],predicted4[0,:1]]), np.concatenate([y[i:i+1],predicted4[0,1:2]]), np.concatenate([z[i:i+1],predicted4[0,2:3]]), color="0.15", alpha=0.75)            
+        
+        # Plot the dots at each time step depending on the action taken
+        length_block=[[0,18],[18,19],[19,31]]
+        for i in range(3):
+            colors=['blue','orange','green']
+            if(self.intern_dim==2):
+                line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1], c=colors[i], marker='x', edgecolors='k', alpha=0.5, s=100)
+            else:
+                line3 = ax.scatter(all_possib_abs_states[length_block[i][0]:length_block[i][1],0], all_possib_abs_states[length_block[i][0]:length_block[i][1],1] ,all_possib_abs_states[length_block[i][0]:length_block[i][1],2], marker='x', depthshade=True, edgecolors='k', alpha=0.5, s=50)
+
+        if(self.intern_dim==2):
+            axes_lims=[ax.get_xlim(),ax.get_ylim()]
+        else:
+            axes_lims=[ax.get_xlim(),ax.get_ylim(),ax.get_zlim()]
+        
+        # Plot the legend for transition estimates
+        box1b = TextArea(" Estimated transitions (action 0, 1, 2 and 3): ", textprops=dict(color="k"))
+        box2b = DrawingArea(90, 20, 0, 0)
+        el1b = Rectangle((5, 10), 15,2, fc="0.9", alpha=0.75)
+        el2b = Rectangle((25, 10), 15,2, fc="0.65", alpha=0.75) 
+        el3b = Rectangle((45, 10), 15,2, fc="0.4", alpha=0.75)
+        el4b = Rectangle((65, 10), 15,2, fc="0.15", alpha=0.75) 
+        box2b.add_artist(el1b)
+        box2b.add_artist(el2b)
+        box2b.add_artist(el3b)
+        box2b.add_artist(el4b)
+        
+        boxb = HPacker(children=[box1b, box2b],
+                      align="center",
+                      pad=0, sep=5)
+        
+        anchored_box = AnchoredOffsetbox(loc=3,
+                                         child=boxb, pad=0.,
+                                         frameon=True,
+                                         bbox_to_anchor=(0., 0.98),
+                                         bbox_transform=ax.transAxes,
+                                         borderpad=0.,
+                                         )        
+        ax.add_artist(anchored_box)
+        
+        
+        #plt.show()
+        plt.savefig('pytorch/fig_base'+str(learning_algo.update_counter)+'.pdf')
+
+
+#        # Plot the Q_vals
+#        c = learning_algo.Q.predict(np.concatenate((np.expand_dims(x,axis=1),np.expand_dims(y,axis=1),np.expand_dims(z,axis=1)),axis=1))
+#        #print "actions,C"
+#        #print actions
+#        #print c
+#        #c=np.max(c,axis=1)
+#        m1=ax.scatter(x, y, z+zrange/20, c=c[:,0], vmin=-1., vmax=1., cmap=plt.cm.RdYlGn)
+#        m2=ax.scatter(x, y, z+3*zrange/40, c=c[:,1], vmin=-1., vmax=1., cmap=plt.cm.RdYlGn)
+#        
+#        #plt.colorbar(m3)
+#        ax2 = fig.add_axes([0.85, 0.15, 0.025, 0.7])
+#        cmap = matplotlib.cm.RdYlGn
+#        norm = matplotlib.colors.Normalize(vmin=-1, vmax=1)
+#
+#        # ColorbarBase derives from ScalarMappable and puts a colorbar
+#        # in a specified axes, so it has everything needed for a
+#        # standalone colorbar.  There are many more kwargs, but the
+#        # following gives a basic continuous colorbar with ticks
+#        # and labels.
+#        cb1 = matplotlib.colorbar.ColorbarBase(ax2, cmap=cmap,norm=norm,orientation='vertical')
+#        cb1.set_label('Estimated expected return')
+#
+#        #plt.show()
+#        plt.savefig('fig_w_V'+str(learning_algo.update_counter)+'.pdf')
+#
+#
+#        # fig_visuV
+#        fig = plt.figure()
+#        ax = fig.add_subplot(111, projection='3d')
+#        
+#        x = np.array([i for i in range(5) for jk in range(25)])/4.*(axes_lims[0][1]-axes_lims[0][0])+axes_lims[0][0]
+#        y = np.array([j for i in range(5) for j in range(5) for k in range(5)])/4.*(axes_lims[1][1]-axes_lims[1][0])+axes_lims[1][0]
+#        z = np.array([k for i in range(5) for j in range(5) for k in range(5)])/4.*(axes_lims[2][1]-axes_lims[2][0])+axes_lims[2][0]
+#
+#        c = learning_algo.Q.predict(np.concatenate((np.expand_dims(x,axis=1),np.expand_dims(y,axis=1),np.expand_dims(z,axis=1)),axis=1))
+#        c=np.max(c,axis=1)
+#        #print "c"
+#        #print c
+#        
+#        m=ax.scatter(x, y, z, c=c, vmin=-1., vmax=1., cmap=plt.hot())
+#        #plt.colorbar(m)
+#        fig.subplots_adjust(right=0.8)
+#        ax2 = fig.add_axes([0.875, 0.15, 0.025, 0.7])
+#        cmap = matplotlib.cm.hot
+#        norm = matplotlib.colors.Normalize(vmin=-1, vmax=1)
+#
+#        # ColorbarBase derives from ScalarMappable and puts a colorbar
+#        # in a specified axes, so it has everything needed for a
+#        # standalone colorbar.  There are many more kwargs, but the
+#        # following gives a basic continuous colorbar with ticks
+#        # and labels.
+#        cb1 = matplotlib.colorbar.ColorbarBase(ax2, cmap=cmap,norm=norm,orientation='vertical')
+#        cb1.set_label('Estimated expected return')
+#
+#        #plt.show()
+#        plt.savefig('fig_visuV'+str(learning_algo.update_counter)+'.pdf')
+#
+#
+#        # fig_visuR
+#        fig = plt.figure()
+#        ax = fig.add_subplot(111, projection='3d')
+#        
+#        x = np.array([i for i in range(5) for jk in range(25)])/4.*(axes_lims[0][1]-axes_lims[0][0])+axes_lims[0][0]
+#        y = np.array([j for i in range(5) for j in range(5) for k in range(5)])/4.*(axes_lims[1][1]-axes_lims[1][0])+axes_lims[1][0]
+#        z = np.array([k for i in range(5) for j in range(5) for k in range(5)])/4.*(axes_lims[2][1]-axes_lims[2][0])+axes_lims[2][0]
+#
+#        coords=np.concatenate((np.expand_dims(x,axis=1),np.expand_dims(y,axis=1),np.expand_dims(z,axis=1)),axis=1)
+#        repeat_nactions_coord=np.repeat(coords,self.nActions(),axis=0)
+#        identity_matrix = np.diag(np.ones(self.nActions()))
+#        tile_identity_matrix=np.tile(identity_matrix,(5*5*5,1))
+#
+#        c = learning_algo.R.predict([repeat_nactions_coord,tile_identity_matrix])
+#        c=np.max(np.reshape(c,(125,self.nActions())),axis=1)
+#        #print "c"
+#        #print c
+#        #mini=np.min(c)
+#        #maxi=np.max(c)
+#        
+#        m=ax.scatter(x, y, z, c=c, vmin=-1., vmax=1., cmap=plt.hot())
+#        #plt.colorbar(m)
+#        fig.subplots_adjust(right=0.8)
+#        ax2 = fig.add_axes([0.875, 0.15, 0.025, 0.7])
+#        cmap = matplotlib.cm.hot
+#        norm = matplotlib.colors.Normalize(vmin=-1, vmax=1)
+#
+#        # ColorbarBase derives from ScalarMappable and puts a colorbar
+#        # in a specified axes, so it has everything needed for a
+#        # standalone colorbar.  There are many more kwargs, but the
+#        # following gives a basic continuous colorbar with ticks
+#        # and labels.
+#        cb1 = matplotlib.colorbar.ColorbarBase(ax2, cmap=cmap,norm=norm,orientation='vertical')
+#        cb1.set_label('Estimated expected return')
+#
+#        #plt.show()
+#        plt.savefig('fig_visuR'+str(learning_algo.update_counter)+'.pdf')
+
+        matplotlib.pyplot.close("all") # avoids memory leaks
+
+    def inputDimensions(self):
+        if(self._higher_dim_obs==True):
+            return [(1,self._size_maze*6,self._size_maze*6)]
+        else:
+            return [(1,self._size_maze,self._size_maze)]
+
+    def observationType(self, subject):
+        return np.float
+
+    def nActions(self):
+        return 4
+
+    def observe(self):
+        obs=copy.deepcopy(self._map)
+                
+        obs[self._pos_agent[0],self._pos_agent[1]]=0.5                
+        if(self._higher_dim_obs==True):
+            "self._pos_agent"
+            self._pos_agent
+            obs=self.get_higher_dim_obs([self._pos_agent],[self._pos_goal])
+            
+        return [obs]
+    
+    def get_higher_dim_obs(self,indices_agent,indices_reward):
+        """ Obtain the high-dimensional observation from indices of the agent position and the indices of the reward positions.
+        """
+        obs=copy.deepcopy(self._map)
+        obs=obs/1.
+        obs=np.repeat(np.repeat(obs, 6, axis=0),6, axis=1)
+        # agent repr
+        agent_obs=np.zeros((6,6))
+        agent_obs[0,2]=0.7
+        agent_obs[1,0:5]=0.8
+        agent_obs[2,1:4]=0.8
+        agent_obs[3,1:4]=0.8
+        agent_obs[4,1]=0.8
+        agent_obs[4,3]=0.8
+        agent_obs[5,0:2]=0.8
+        agent_obs[5,3:5]=0.8
+        
+        # reward repr
+        reward_obs=np.zeros((6,6))
+        #reward_obs[:,1]=0.8
+        #reward_obs[0,1:4]=0.7
+        #reward_obs[1,3]=0.8
+        #reward_obs[2,1:4]=0.7
+        #reward_obs[4,2]=0.8
+        #reward_obs[5,2:4]=0.8
+        
+        for i in indices_reward:
+            obs[i[0]*6:(i[0]+1)*6:,i[1]*6:(i[1]+1)*6]=reward_obs
+
+        for i in indices_agent:
+            obs[i[0]*6:(i[0]+1)*6:,i[1]*6:(i[1]+1)*6]=agent_obs
+            
+        #plt.imshow(obs, cmap='gray_r')
+        #plt.show()
+        return obs
+
+
+    def inTerminalState(self):
+        # Uncomment the following lines to add some cases where the episode terminates.
+        # This is used to show how the environment representation interpret cases where 
+        # part of the environment could not be explored.
+#        if((self._pos_agent[0]<=1 and self._cur_action==0) ):
+#            return True
+        return False
+
+        # If there is a goal, then terminates the environment when the goas is reached.
+        #if (self._pos_agent==self._pos_goal):
+        #    return True
+        #else:
+        #    return False
+
+
+
+if __name__ == "__main__":
+    pass