diff --git a/vbench/benchmark.py b/vbench/benchmark.py
index ed29f62..8cee053 100644
--- a/vbench/benchmark.py
+++ b/vbench/benchmark.py
@@ -18,7 +18,7 @@ class Benchmark(object):
 
     def __init__(self, code, setup, ncalls=None, repeat=3, cleanup=None,
                  name=None, description=None, start_date=None,
-                 logy=False):
+                 logy=False, memory=False):
         self.code = code
         self.setup = setup
         self.cleanup = cleanup or ''
@@ -36,6 +36,7 @@ def __init__(self, code, setup, ncalls=None, repeat=3, cleanup=None,
         self.description = description
         self.start_date = start_date
         self.logy = logy
+        self.memory = memory
 
     def __repr__(self):
         return "Benchmark('%s')" % self.name
@@ -84,6 +85,16 @@ def run(self):
             traceback.print_exc(file=buf)
             result = {'succeeded': False, 'traceback': buf.getvalue()}
 
+        if self.memory:
+            try:
+                mem_usage = magic_memit(ns, self.code, repeat=self.repeat)
+                result['memory'] = mem_usage
+                result['mem_succeeded'] = True
+            except:
+                result['mem_succeeded'] = False
+                traceback.print_exc(file=buf)
+                result['traceback'] += buf
+
         self._cleanup(ns)
         return result
 
@@ -104,8 +115,17 @@ def _run(self, ns, ncalls, disable_gc=False):
 
         return elapsed
 
-    def to_rst(self, image_path=None):
-        output = """**Benchmark setup**
+    def to_rst(self, image_paths=None):
+        """Generates rst file with a list of images
+
+        image_paths: list of tuples (title, rel_path)
+        """
+
+        if not image_paths:
+            image_paths = []
+
+        output = """\
+**Benchmark setup**
 
 .. code-block:: python
 
@@ -119,13 +139,14 @@ def to_rst(self, image_path=None):
 
 """ % (indent(self.setup), indent(self.code))
 
-        if image_path is not None:
-            output += ("**Performance graph**\n\n.. image:: %s"
-                       "\n   :width: 6in" % image_path)
+        for title, path in image_paths:
+            output += ("**%s**\n\n.. image:: %s"
+                       "\n   :width: 6in\n\n" % (title, path))
 
         return output
 
-    def plot(self, db_path, label='time', ax=None, title=True):
+    def plot(self, db_path, label='time', ax=None, title=True, y='timing',
+             ylabel='miliseconds'):
         import matplotlib.pyplot as plt
         from matplotlib.dates import MonthLocator, DateFormatter
 
@@ -135,13 +156,13 @@ def plot(self, db_path, label='time', ax=None, title=True):
             fig = plt.figure()
             ax = fig.add_subplot(111)
 
-        timing = results['timing']
+        timing = results[y]
         if self.start_date is not None:
             timing = timing.truncate(before=self.start_date)
 
         timing.plot(ax=ax, style='b-', label=label)
         ax.set_xlabel('Date')
-        ax.set_ylabel('milliseconds')
+        ax.set_ylabel(ylabel)
 
         if self.logy:
             ax2 = ax.twinx()
@@ -149,7 +170,7 @@ def plot(self, db_path, label='time', ax=None, title=True):
                 timing.plot(ax=ax2, label='%s (log scale)' % label,
                             style='r-',
                             logy=self.logy)
-                ax2.set_ylabel('milliseconds (log scale)')
+                ax2.set_ylabel(ylabel + ' (log scale)')
                 ax.legend(loc='best')
                 ax2.legend(loc='best')
             except ValueError:
@@ -376,5 +397,107 @@ def magic_timeit(ns, stmt, ncalls=None, repeat=3, force_ms=False):
             'units': units[order]}
 
 
+# Adapted from memory_profiler
+def magic_memit(ns, line='', repeat=1, timeout=None, run_in_place=False):
+    """Measure memory usage of a Python statement
+
+    Usage, in line mode:
+      %memit [-ir<R>t<T>] statement
+
+    Options:
+    -r<R>: repeat the loop iteration <R> times and take the best result.
+    Default: 3
+
+    -i: run the code in the current environment, without forking a new process.
+    This is required on some MacOS versions of Accelerate if your line contains
+    a call to `np.dot`.
+
+    -t<T>: timeout after <T> seconds. Unused if `-i` is active. Default: None
+
+    Examples
+    --------
+    ::
+
+      In [1]: import numpy as np
+
+      In [2]: %memit np.zeros(1e7)
+      maximum of 3: 76.402344 MB per loop
+
+      In [3]: %memit np.ones(1e6)
+      maximum of 3: 7.820312 MB per loop
+
+      In [4]: %memit -r 10 np.empty(1e8)
+      maximum of 10: 0.101562 MB per loop
+
+      In [5]: memit -t 3 while True: pass;
+      Subprocess timed out.
+      Subprocess timed out.
+      Subprocess timed out.
+      ERROR: all subprocesses exited unsuccessfully. Try again with the `-i`
+      option.
+      maximum of 3: -inf MB per loop
+
+    """
+    if repeat < 1:
+        repeat == 1
+    if timeout <= 0:
+        timeout = None
+
+    # Don't depend on multiprocessing:
+    try:
+        import multiprocessing as pr
+        from multiprocessing.queues import SimpleQueue
+        q = SimpleQueue()
+    except ImportError:
+        class ListWithPut(list):
+            "Just a list where the `append` method is aliased to `put`."
+            def put(self, x):
+                self.append(x)
+        q = ListWithPut()
+        print ('WARNING: cannot import module `multiprocessing`. Forcing the'
+               '`-i` option.')
+        run_in_place = True
+
+    def _get_usage(q, stmt, setup='pass', ns={}):
+        from memory_profiler import memory_usage as _mu
+        try:
+            exec setup in ns
+            _mu0 = _mu()[0]
+            exec stmt in ns
+            _mu1 = _mu()[0]
+            q.put(_mu1 - _mu0)
+        except Exception as e:
+            q.put(float('-inf'))
+            raise e
+
+    if run_in_place:
+        for _ in xrange(repeat):
+            _get_usage(q, line, ns=ns)
+    else:
+        # run in consecutive subprocesses
+        at_least_one_worked = False
+        for _ in xrange(repeat):
+            p = pr.Process(target=_get_usage, args=(q, line, 'pass', ns))
+            p.start()
+            p.join(timeout=timeout)
+            if p.exitcode == 0:
+                at_least_one_worked = True
+            else:
+                p.terminate()
+                if p.exitcode == None:
+                    print 'Subprocess timed out.'
+                else:
+                    print 'Subprocess exited with code %d.' % p.exitcode
+                q.put(float('-inf'))
+
+        if not at_least_one_worked:
+            raise RuntimeError('ERROR: all subprocesses exited unsuccessfully.'
+                               ' Try again with the `-i` option.')
+
+    usages = [q.get() for _ in xrange(repeat)]
+    usage = max(usages)
+    return usage
+
+
 def gather_benchmarks(ns):
     return [v for v in ns.values() if isinstance(v, Benchmark)]
diff --git a/vbench/db.py b/vbench/db.py
index 5b7d522..0b2cf59 100644
--- a/vbench/db.py
+++ b/vbench/db.py
@@ -29,6 +29,7 @@ def __init__(self, dbpath):
             Column('timestamp', sqltypes.DateTime, nullable=False),
             Column('ncalls', sqltypes.String(50)),
             Column('timing', sqltypes.Float),
+            Column('memory', sqltypes.Float),
             Column('traceback', sqltypes.Text),
         )
 
@@ -97,14 +98,15 @@ def delete_benchmark(self, checksum):
         pass
 
     def write_result(self, checksum, revision, timestamp, ncalls,
-                     timing, traceback=None, overwrite=False):
+                     timing, memory, traceback=None, overwrite=False):
         """
 
         """
         ins = self._results.insert()
         ins = ins.values(checksum=checksum, revision=revision,
                          timestamp=timestamp,
-                         ncalls=ncalls, timing=timing, traceback=traceback)
+                         ncalls=ncalls, timing=timing, memory=memory,
+                         traceback=traceback)
         self.conn.execute(ins)  # XXX: return the result?
 
     def delete_result(self, checksum, revision):
@@ -157,7 +159,7 @@ def get_benchmark_results(self, checksum):
         """
         tab = self._results
         stmt = sql.select([tab.c.timestamp, tab.c.revision, tab.c.ncalls,
-                           tab.c.timing, tab.c.traceback],
+                           tab.c.timing, tab.c.memory, tab.c.traceback],
                           sql.and_(tab.c.checksum == checksum))
         results = self.conn.execute(stmt)
 
diff --git a/vbench/runner.py b/vbench/runner.py
index 82a85f1..bb13f29 100644
--- a/vbench/runner.py
+++ b/vbench/runner.py
@@ -100,6 +100,7 @@ def _run_and_write_results(self, rev):
             self.db.write_result(checksum, rev, timestamp,
                                  timing.get('loops'),
                                  timing.get('timing'),
+                                 timing.get('memory'),
                                  timing.get('traceback'))
 
         return any_succeeded, n_active_benchmarks