diff --git a/pyESN.py b/pyESN.py
index 149d9b5..311e5db 100644
--- a/pyESN.py
+++ b/pyESN.py
@@ -33,7 +33,7 @@ class ESN():
     def __init__(self, n_inputs, n_outputs, n_reservoir=200,
                  spectral_radius=0.95, sparsity=0, noise=0.001, input_shift=None,
                  input_scaling=None, teacher_forcing=True, feedback_scaling=None,
-                 teacher_scaling=None, teacher_shift=None,
+                 teacher_scaling=None, teacher_shift=None, keras_model=None,
                  out_activation=identity, inverse_out_activation=identity,
                  random_state=None, silent=True):
         """
@@ -51,6 +51,10 @@ def __init__(self, n_inputs, n_outputs, n_reservoir=200,
             teacher_forcing: if True, feed the target back into output units
             teacher_scaling: factor applied to the target signal
             teacher_shift: additive term applied to the target signal
+            keras_model: trained to map the readout to the output.
+                         input_size must be n_reservoir+1; output_size should
+                         be the same as n_outputs. Call `compile` on the model
+                         before passing it as a parameter
             out_activation: output activation function (applied to the readout)
             inverse_out_activation: inverse of the output activation function
             random_state: positive integer seed, np.rand.RandomState object,
@@ -70,6 +74,7 @@ def __init__(self, n_inputs, n_outputs, n_reservoir=200,
         self.teacher_scaling = teacher_scaling
         self.teacher_shift = teacher_shift
 
+        self.keras_model = keras_model
         self.out_activation = out_activation
         self.inverse_out_activation = inverse_out_activation
         self.random_state = random_state
@@ -151,7 +156,7 @@ def _unscale_teacher(self, teacher_scaled):
             teacher_scaled = teacher_scaled / self.teacher_scaling
         return teacher_scaled
 
-    def fit(self, inputs, outputs, inspect=False):
+    def fit(self, inputs, outputs, inspect=False, **kwargs):
         """
         Collect the network's reaction to training data, train readout weights.
 
@@ -159,6 +164,7 @@ def fit(self, inputs, outputs, inspect=False):
             inputs: array of dimensions (N_training_samples x n_inputs)
             outputs: array of dimension (N_training_samples x n_outputs)
             inspect: show a visualisation of the collected reservoir states
+            **kwargs: these arguments are passed to keras' `fit` method
 
         Returns:
             the network's output on the training data, using the trained weights
@@ -188,9 +194,21 @@ def fit(self, inputs, outputs, inspect=False):
         transient = min(int(inputs.shape[1] / 10), 100)
         # include the raw inputs:
         extended_states = np.hstack((states, inputs_scaled))
-        # Solve for W_out:
-        self.W_out = np.dot(np.linalg.pinv(extended_states[transient:, :]),
-                            self.inverse_out_activation(teachers_scaled[transient:, :])).T
+
+        if self.keras_model is None:
+            # Solve for W_out:
+            self.W_out = np.dot(np.linalg.pinv(extended_states[transient:, :]),
+                  self.inverse_out_activation(teachers_scaled[transient:, :])).T
+            # apply learned weights to the collected states:
+            pred_train = self._unscale_teacher(self.out_activation(
+                np.dot(extended_states, self.W_out.T)))
+        else:
+            # train the output network on the states
+            self.keras_model.fit(extended_states[transient:, :],
+                                 teachers_scaled[transient:, :],
+                                 **kwargs)
+            pred_train = self._unscale_teacher(
+                                self.keras_model.predict(extended_states))
 
         # remember the last state for later:
         self.laststate = states[-1, :]
@@ -209,10 +227,6 @@ def fit(self, inputs, outputs, inspect=False):
 
         if not self.silent:
             print("training error:")
-        # apply learned weights to the collected states:
-        pred_train = self._unscale_teacher(self.out_activation(
-            np.dot(extended_states, self.W_out.T)))
-        if not self.silent:
             print(np.sqrt(np.mean((pred_train - outputs)**2)))
         return pred_train
 
@@ -249,7 +263,14 @@ def predict(self, inputs, continuation=True):
         for n in range(n_samples):
             states[
                 n + 1, :] = self._update(states[n, :], inputs[n + 1, :], outputs[n, :])
-            outputs[n + 1, :] = self.out_activation(np.dot(self.W_out,
-                                                           np.concatenate([states[n + 1, :], inputs[n + 1, :]])))
-
-        return self._unscale_teacher(self.out_activation(outputs[1:]))
+            if self.keras_model is None:
+                outputs[n + 1, :] = self.out_activation(np.dot(self.W_out,
+                                       np.concatenate([states[n + 1, :], inputs[n + 1, :]])))
+            else:
+                # keras throws an error if we don't add an empty dimension
+                outputs[n + 1, :] = self.keras_model.predict(
+                    np.expand_dims(np.concatenate([states[n+1, :], inputs[n+1, :]]), axis=0))
+
+        if self.keras_model is None:
+            outputs[1:] = self.out_activation(outputs[1:])
+        return self._unscale_teacher(outputs[1:])
diff --git a/testing.py b/testing.py
index 2e1e953..7a44dc2 100644
--- a/testing.py
+++ b/testing.py
@@ -1,5 +1,6 @@
 import unittest
 import numpy as np
+import keras
 
 from pyESN import ESN
 
@@ -139,6 +140,17 @@ def test_IODimensions(self):
             self.assertEqual(prediction_tr.shape, (N_samples, N_out))
             self.assertEqual(prediction_t.shape, (N_samples, N_out))
 
+    def test_keras_model(self):
+        """try the keras_model parameter"""
+        model = keras.models.Sequential()
+        model.add(keras.layers.Dense(units=200, activation='relu', input_dim=N+N_in))
+        model.add(keras.layers.Dense(units=N_out, activation='linear'))
+        model.compile(loss='mae', optimizer='adagrad')
+
+        esn = ESN(N_in, N_out, n_reservoir=N, keras_model=model)
+        esn.fit(self.X, self.y, epochs=20, verbose=0)
+        esn.predict(self.Xp)
+
 
 class Performance(unittest.TestCase):
     # Slighty bending the concept of a unit test, I want to catch performance changes during refactoring.