diff --git a/.travis.yml b/.travis.yml
index 4dc7ed72d6c..92d740cd88b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,40 +1,50 @@
-# Use a build matrix to do two builds in parallel:
-# one using CMake, and one using make.
+dist: trusty
+sudo: required
+
+language: cpp
+compiler: gcc
+
 env:
+  global:
+    - NUM_THREADS=4
   matrix:
-    - WITH_CUDA=false WITH_CMAKE=false WITH_IO=true
-    - WITH_CUDA=false WITH_CMAKE=true WITH_IO=true PYTHON_VERSION=3
-    - WITH_CUDA=true WITH_CMAKE=false WITH_IO=true
-    - WITH_CUDA=true WITH_CMAKE=true WITH_IO=true
-    - WITH_CUDA=false WITH_CMAKE=false WITH_IO=false
-    - WITH_CUDA=false WITH_CMAKE=true WITH_IO=false PYTHON_VERSION=3
+    # Use a build matrix to test many builds in parallel
+    # envvar defaults:
+    #   WITH_CMAKE: false
+    #   WITH_PYTHON3: false
+    #   WITH_IO: true
+    #   WITH_CUDA: false
+    #   WITH_CUDNN: false
+    - BUILD_NAME="default-make"
+#   - BUILD_NAME="python3-make" WITH_PYTHON3=true
+    - BUILD_NAME="no-io-make" WITH_IO=false
+    - BUILD_NAME="cuda-make" WITH_CUDA=true
+    - BUILD_NAME="cudnn-make" WITH_CUDA=true WITH_CUDNN=true
 
-language: cpp
+    - BUILD_NAME="default-cmake" WITH_CMAKE=true
+    - BUILD_NAME="python3-cmake" WITH_CMAKE=true WITH_PYTHON3=true
+    - BUILD_NAME="no-io-cmake" WITH_CMAKE=true WITH_IO=false
+    - BUILD_NAME="cuda-cmake" WITH_CMAKE=true WITH_CUDA=true
+    - BUILD_NAME="cudnn-cmake" WITH_CMAKE=true WITH_CUDA=true WITH_CUDNN=true
 
-# Cache Ubuntu apt packages.
 cache:
   apt: true
-  directories:
-  - /home/travis/miniconda
-  - /home/travis/miniconda2
-  - /home/travis/miniconda3
-
-compiler: gcc
 
 before_install:
-  - export NUM_THREADS=4
-  - export SCRIPTS=./scripts/travis
-  - export CONDA_DIR="/home/travis/miniconda$PYTHON_VERSION"
+  - source ./scripts/travis/defaults.sh
 
 install:
-  - sudo -E $SCRIPTS/travis_install.sh
+  - sudo -E ./scripts/travis/install-deps.sh
+  - ./scripts/travis/setup-venv.sh ~/venv
+  - source ~/venv/bin/activate
+  - ./scripts/travis/install-python-deps.sh
 
 before_script:
-  - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/usr/local/cuda/lib64:$CONDA_DIR/lib
-  - export PATH=$CONDA_DIR/bin:$PATH
-  - if ! $WITH_CMAKE; then $SCRIPTS/travis_setup_makefile_config.sh; fi
+  - ./scripts/travis/configure.sh
 
-script: $SCRIPTS/travis_build_and_test.sh
+script:
+  - ./scripts/travis/build.sh
+  - ./scripts/travis/test.sh
 
 notifications:
 # Emails are sent to the committer's git-configured email address by default,
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c5d99cef9dd..da7142c9b3c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,8 +10,8 @@ endif()
 project(Caffe C CXX)
 
 # ---[ Caffe version
-set(CAFFE_TARGET_VERSION "1.0.0-rc3")
-set(CAFFE_TARGET_SOVERSION "1.0.0-rc3")
+set(CAFFE_TARGET_VERSION "1.0.0-rc3" CACHE STRING "Caffe logical version")
+set(CAFFE_TARGET_SOVERSION "1.0.0-rc3" CACHE STRING "Caffe soname version")
 add_definitions(-DCAFFE_VERSION=${CAFFE_TARGET_VERSION})
 
 # ---[ Using cmake scripts and modules
diff --git a/Makefile b/Makefile
index 2f81aca84e7..403e00a38a1 100644
--- a/Makefile
+++ b/Makefile
@@ -272,7 +272,7 @@ endif
 ifeq ($(OSX), 1)
 	CXX := /usr/bin/clang++
 	ifneq ($(CPU_ONLY), 1)
-		CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release \d' | grep -o '\d')
+		CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release [0-9.]*' | tr -d '[a-z ]')
 		ifeq ($(shell echo | awk '{exit $(CUDA_VERSION) < 7.0;}'), 1)
 			CXXFLAGS += -stdlib=libstdc++
 			LINKFLAGS += -stdlib=libstdc++
@@ -364,9 +364,9 @@ ifeq ($(BLAS), mkl)
 	# MKL
 	LIBRARIES += mkl_rt
 	COMMON_FLAGS += -DUSE_MKL
-	MKL_DIR ?= /opt/intel/mkl
-	BLAS_INCLUDE ?= $(MKL_DIR)/include
-	BLAS_LIB ?= $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64
+	MKLROOT ?= /opt/intel/mkl
+	BLAS_INCLUDE ?= $(MKLROOT)/include
+	BLAS_LIB ?= $(MKLROOT)/lib $(MKLROOT)/lib/intel64
 else ifeq ($(BLAS), open)
 	# OpenBLAS
 	LIBRARIES += openblas
diff --git a/Makefile.config.example b/Makefile.config.example
index 8fd49c9c1a7..07bed63ae40 100644
--- a/Makefile.config.example
+++ b/Makefile.config.example
@@ -98,6 +98,7 @@ LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
 # (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
 # USE_PKG_CONFIG := 1
 
+# N.B. both build and distribute dirs are cleared on `make clean`
 BUILD_DIR := build
 DISTRIBUTE_DIR := distribute
 
diff --git a/docker/Makefile b/docker/Makefile
index 725208c6b2b..3a6575b0c43 100644
--- a/docker/Makefile
+++ b/docker/Makefile
@@ -22,7 +22,7 @@ docker_files: standalone_files
 
 standalone_files: standalone/cpu/Dockerfile standalone/gpu/Dockerfile
 
-FROM_GPU = "nvidia/cuda:cudnn"
+FROM_GPU = "nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04"
 FROM_CPU = "ubuntu:14.04"
 GPU_CMAKE_ARGS = -DUSE_CUDNN=1
 CPU_CMAKE_ARGS = -DCPU_ONLY=1
diff --git a/docker/standalone/gpu/Dockerfile b/docker/standalone/gpu/Dockerfile
index 1ddc6560d16..daf6a7223ff 100644
--- a/docker/standalone/gpu/Dockerfile
+++ b/docker/standalone/gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:cudnn
+FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04
 MAINTAINER caffe-maint@googlegroups.com
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
diff --git a/docs/installation.md b/docs/installation.md
index 893164584d9..4aac7c42d27 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -5,13 +5,23 @@ title: Installation
 # Installation
 
 Prior to installing, have a glance through this guide and take note of the details for your platform.
-We install and run Caffe on Ubuntu 14.04 and 12.04, OS X 10.10 / 10.9 / 10.8, and AWS.
-The official Makefile and `Makefile.config` build are complemented by an automatic CMake build from the community.
+We install and run Caffe on Ubuntu 16.04–12.04, OS X 10.11–10.8, and through Docker and AWS.
+The official Makefile and `Makefile.config` build are complemented by a [community CMake build](#cmake-build).
+
+**Step-by-step Instructions**:
+
+- [Docker setup](https://github.com/BVLC/caffe/tree/master/docker) *out-of-the-box brewing*
+- [Ubuntu installation](install_apt.html) *the standard platform*
+- [OS X installation](install_osx.html)
+- [RHEL / CentOS / Fedora installation](install_yum.html)
+- [Windows](https://github.com/BVLC/caffe/tree/windows) *see the Windows branch led by Microsoft*
+- [OpenCL](https://github.com/BVLC/caffe/tree/opencl) *see the OpenCL branch led by Fabian Tschopp*
+
+**Overview**:
 
 - [Prerequisites](#prerequisites)
 - [Compilation](#compilation)
 - [Hardware](#hardware)
-- Platforms: [Ubuntu guide](install_apt.html), [OS X guide](install_osx.html), and [RHEL / CentOS / Fedora guide](install_yum.html)
 
 When updating Caffe, it's best to `make clean` before re-compiling.
 
@@ -20,7 +30,7 @@ When updating Caffe, it's best to `make clean` before re-compiling.
 Caffe has several dependencies:
 
 * [CUDA](https://developer.nvidia.com/cuda-zone) is required for GPU mode.
-    * library version 7.0 and the latest driver version are recommended, but 6.* is fine too
+    * library version 7+ and the latest driver version are recommended, but 6.* is fine too
     * 5.5, and 5.0 are compatible but considered legacy
 * [BLAS](http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) via ATLAS, MKL, or OpenBLAS.
 * [Boost](http://www.boost.org/) >= 1.55
@@ -30,14 +40,14 @@ Optional dependencies:
 
 * [OpenCV](http://opencv.org/) >= 2.4 including 3.0
 * IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`)
-* cuDNN for GPU acceleration (v3)
+* cuDNN for GPU acceleration (v5)
 
 Pycaffe and Matcaffe interfaces have their own natural needs.
 
 * For Python Caffe:  `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python`
 * For MATLAB Caffe: MATLAB with the `mex` compiler.
 
-**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v3; older versions are supported in older Caffe.
+**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v5; older versions are supported in older Caffe.
 
 **CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.
 
@@ -82,10 +92,6 @@ Install MATLAB, and make sure that its `mex` is in your `$PATH`.
 
 *Caffe's MATLAB interface works with versions 2015a, 2014a/b, 2013a/b, and 2012b.*
 
-#### Windows
-
-There is an unofficial Windows port of Caffe at [niuzhiheng/caffe:windows](https://github.com/niuzhiheng/caffe). Thanks [@niuzhiheng](https://github.com/niuzhiheng)!
-
 ## Compilation
 
 Caffe can be compiled with either Make or CMake. Make is officially supported while CMake is supported by the community.
@@ -113,7 +119,7 @@ Be sure to set your MATLAB and Python paths in `Makefile.config` first!
 
 Now that you have installed Caffe, check out the [MNIST tutorial](gathered/examples/mnist.html) and the [reference ImageNet model tutorial](gathered/examples/imagenet.html).
 
-### Compilation with CMake
+### CMake Build
 
 In lieu of manually editing `Makefile.config` to configure the build, Caffe offers an unofficial CMake build thanks to @Nerei, @akosiorek, and other members of the community. It requires CMake version >= 2.8.7.
 The basic steps are as follows:
@@ -129,9 +135,9 @@ See [PR #1667](https://github.com/BVLC/caffe/pull/1667) for options and details.
 
 ## Hardware
 
-**Laboratory Tested Hardware**: Berkeley Vision runs Caffe with K40s, K20s, and Titans including models at ImageNet/ILSVRC scale. We also run on GTX series cards (980s and 770s) and GPU-equipped MacBook Pros. We have not encountered any trouble in-house with devices with CUDA capability >= 3.0. All reported hardware issues thus-far have been due to GPU configuration, overheating, and the like.
+**Laboratory Tested Hardware**: Berkeley Vision runs Caffe with Titan Xs, K80s, GTX 980s, K40s, K20s, Titans, and GTX 770s including models at ImageNet/ILSVRC scale. We have not encountered any trouble in-house with devices with CUDA capability >= 3.0. All reported hardware issues thus-far have been due to GPU configuration, overheating, and the like.
 
-**CUDA compute capability**: devices with compute capability <= 2.0 may have to reduce CUDA thread numbers and batch sizes due to hardware constraints. Your mileage may vary.
+**CUDA compute capability**: devices with compute capability <= 2.0 may have to reduce CUDA thread numbers and batch sizes due to hardware constraints. Brew with caution; we recommend compute capability >= 3.0.
 
 Once installed, check your times against our [reference performance numbers](performance_hardware.html) to make sure everything is configured properly.
 
diff --git a/examples/cifar10/convert_cifar_data.cpp b/examples/cifar10/convert_cifar_data.cpp
index e1b89f42fb6..7385a74a679 100644
--- a/examples/cifar10/convert_cifar_data.cpp
+++ b/examples/cifar10/convert_cifar_data.cpp
@@ -91,6 +91,8 @@ void convert_dataset(const string& input_folder, const string& output_folder,
 }
 
 int main(int argc, char** argv) {
+  FLAGS_alsologtostderr = 1;
+
   if (argc != 4) {
     printf("This script converts the CIFAR dataset to the leveldb format used\n"
            "by caffe to perform classification.\n"
diff --git a/examples/cpp_classification/readme.md b/examples/cpp_classification/readme.md
index a086db1a035..0de2885b53c 100644
--- a/examples/cpp_classification/readme.md
+++ b/examples/cpp_classification/readme.md
@@ -42,7 +42,7 @@ script:
 The ImageNet labels file (also called the *synset file*) is also
 required in order to map a prediction to the name of the class:
 ```
-./data/ilsvrc12/get_ilsvrc_aux.sh.
+./data/ilsvrc12/get_ilsvrc_aux.sh
 ```
 Using the files that were downloaded, we can classify the provided cat
 image (`examples/images/cat.jpg`) using this command:
diff --git a/examples/finetune_flickr_style/readme.md b/examples/finetune_flickr_style/readme.md
index 9ba4c9217ff..188dedf1b9a 100644
--- a/examples/finetune_flickr_style/readme.md
+++ b/examples/finetune_flickr_style/readme.md
@@ -57,7 +57,11 @@ The prototxts in this example assume this, and also assume the presence of the I
 
 We'll also need the ImageNet-trained model, which you can obtain by running `./scripts/download_model_binary.py models/bvlc_reference_caffenet`.
 
-Now we can train! (You can fine-tune in CPU mode by leaving out the `-gpu` flag.)
+Now we can train! The key to fine-tuning is the `-weights` argument in the
+command below, which tells Caffe that we want to load weights from a pre-trained
+Caffe model.
+
+(You can fine-tune in CPU mode by leaving out the `-gpu` flag.)
 
     caffe % ./build/tools/caffe train -solver models/finetune_flickr_style/solver.prototxt -weights models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel -gpu 0
 
diff --git a/examples/images/cat gray.jpg b/examples/images/cat gray.jpg
new file mode 100644
index 00000000000..43c5ce37716
Binary files /dev/null and b/examples/images/cat gray.jpg differ
diff --git a/examples/mnist/convert_mnist_data.cpp b/examples/mnist/convert_mnist_data.cpp
index 16d28093dd5..57ddef77074 100644
--- a/examples/mnist/convert_mnist_data.cpp
+++ b/examples/mnist/convert_mnist_data.cpp
@@ -22,12 +22,15 @@
 #include <fstream>  // NOLINT(readability/streams)
 #include <string>
 
+#include "boost/scoped_ptr.hpp"
 #include "caffe/proto/caffe.pb.h"
+#include "caffe/util/db.hpp"
 #include "caffe/util/format.hpp"
 
 #if defined(USE_LEVELDB) && defined(USE_LMDB)
 
 using namespace caffe;  // NOLINT(build/namespaces)
+using boost::scoped_ptr;
 using std::string;
 
 DEFINE_string(backend, "lmdb", "The backend for storing the result");
@@ -67,43 +70,10 @@ void convert_dataset(const char* image_filename, const char* label_filename,
   image_file.read(reinterpret_cast<char*>(&cols), 4);
   cols = swap_endian(cols);
 
-  // lmdb
-  MDB_env *mdb_env;
-  MDB_dbi mdb_dbi;
-  MDB_val mdb_key, mdb_data;
-  MDB_txn *mdb_txn;
-  // leveldb
-  leveldb::DB* db;
-  leveldb::Options options;
-  options.error_if_exists = true;
-  options.create_if_missing = true;
-  options.write_buffer_size = 268435456;
-  leveldb::WriteBatch* batch = NULL;
-
-  // Open db
-  if (db_backend == "leveldb") {  // leveldb
-    LOG(INFO) << "Opening leveldb " << db_path;
-    leveldb::Status status = leveldb::DB::Open(
-        options, db_path, &db);
-    CHECK(status.ok()) << "Failed to open leveldb " << db_path
-        << ". Is it already existing?";
-    batch = new leveldb::WriteBatch();
-  } else if (db_backend == "lmdb") {  // lmdb
-    LOG(INFO) << "Opening lmdb " << db_path;
-    CHECK_EQ(mkdir(db_path, 0744), 0)
-        << "mkdir " << db_path << "failed";
-    CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed";
-    CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS)  // 1TB
-        << "mdb_env_set_mapsize failed";
-    CHECK_EQ(mdb_env_open(mdb_env, db_path, 0, 0664), MDB_SUCCESS)
-        << "mdb_env_open failed";
-    CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)
-        << "mdb_txn_begin failed";
-    CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS)
-        << "mdb_open failed. Does the lmdb already exist? ";
-  } else {
-    LOG(FATAL) << "Unknown db backend " << db_backend;
-  }
+
+  scoped_ptr<db::DB> db(db::GetDB(db_backend));
+  db->Open(db_path, db::NEW);
+  scoped_ptr<db::Transaction> txn(db->NewTransaction());
 
   // Storing to db
   char label;
@@ -125,52 +95,19 @@ void convert_dataset(const char* image_filename, const char* label_filename,
     string key_str = caffe::format_int(item_id, 8);
     datum.SerializeToString(&value);
 
-    // Put in db
-    if (db_backend == "leveldb") {  // leveldb
-      batch->Put(key_str, value);
-    } else if (db_backend == "lmdb") {  // lmdb
-      mdb_data.mv_size = value.size();
-      mdb_data.mv_data = reinterpret_cast<void*>(&value[0]);
-      mdb_key.mv_size = key_str.size();
-      mdb_key.mv_data = reinterpret_cast<void*>(&key_str[0]);
-      CHECK_EQ(mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0), MDB_SUCCESS)
-          << "mdb_put failed";
-    } else {
-      LOG(FATAL) << "Unknown db backend " << db_backend;
-    }
+    txn->Put(key_str, value);
 
     if (++count % 1000 == 0) {
-      // Commit txn
-      if (db_backend == "leveldb") {  // leveldb
-        db->Write(leveldb::WriteOptions(), batch);
-        delete batch;
-        batch = new leveldb::WriteBatch();
-      } else if (db_backend == "lmdb") {  // lmdb
-        CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS)
-            << "mdb_txn_commit failed";
-        CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)
-            << "mdb_txn_begin failed";
-      } else {
-        LOG(FATAL) << "Unknown db backend " << db_backend;
-      }
+      txn->Commit();
     }
   }
   // write the last batch
   if (count % 1000 != 0) {
-    if (db_backend == "leveldb") {  // leveldb
-      db->Write(leveldb::WriteOptions(), batch);
-      delete batch;
-      delete db;
-    } else if (db_backend == "lmdb") {  // lmdb
-      CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS) << "mdb_txn_commit failed";
-      mdb_close(mdb_env, mdb_dbi);
-      mdb_env_close(mdb_env);
-    } else {
-      LOG(FATAL) << "Unknown db backend " << db_backend;
-    }
-    LOG(ERROR) << "Processed " << count << " files.";
+      txn->Commit();
   }
+  LOG(INFO) << "Processed " << count << " files.";
   delete[] pixels;
+  db->Close();
 }
 
 int main(int argc, char** argv) {
@@ -178,6 +115,8 @@ int main(int argc, char** argv) {
   namespace gflags = google;
 #endif
 
+  FLAGS_alsologtostderr = 1;
+
   gflags::SetUsageMessage("This script converts the MNIST dataset to\n"
         "the lmdb/leveldb format used by Caffe to load data.\n"
         "Usage:\n"
diff --git a/examples/mnist/readme.md b/examples/mnist/readme.md
index b87a0f53c7a..35952155a30 100644
--- a/examples/mnist/readme.md
+++ b/examples/mnist/readme.md
@@ -248,7 +248,7 @@ These messages tell you the details about each layer, its connections and its ou
     I1203 solver.cpp:36] Solver scaffolding done.
     I1203 solver.cpp:44] Solving LeNet
 
-Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 1000 iterations. You will see messages like this:
+Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 500 iterations. You will see messages like this:
 
     I1203 solver.cpp:204] Iteration 100, lr = 0.00992565
     I1203 solver.cpp:66] Iteration 100, loss = 0.26044
diff --git a/examples/net_surgery.ipynb b/examples/net_surgery.ipynb
index a6092db0c40..d50d503bfe0 100644
--- a/examples/net_surgery.ipynb
+++ b/examples/net_surgery.ipynb
@@ -22,7 +22,6 @@
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
     "%matplotlib inline\n",
-    "import Image\n",
     "\n",
     "# Make sure that caffe is on the python path:\n",
     "caffe_root = '../'  # this file is expected to be in {caffe_root}/examples\n",
@@ -3511,7 +3510,7 @@
     "print(\"blobs {}\\nparams {}\".format(net.blobs.keys(), net.params.keys()))\n",
     "\n",
     "# load image and prepare as a single input batch for Caffe\n",
-    "im = np.array(Image.open('images/cat_gray.jpg'))\n",
+    "im = np.array(caffe.io.load_image('images/cat_gray.jpg', color=False)).squeeze()\n",
     "plt.title(\"original image\")\n",
     "plt.imshow(im)\n",
     "plt.axis('off')\n",
@@ -4480,8 +4479,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "pre-surgery output mean -12.93\n",
-      "post-surgery output mean -11.93\n"
+      "pre-surgery output mean -0.02\n",
+      "post-surgery output mean 0.98\n"
      ]
     }
    ],
@@ -4489,7 +4488,7 @@
     "# pick first filter output\n",
     "conv0 = net.blobs['conv'].data[0, 0]\n",
     "print(\"pre-surgery output mean {:.2f}\".format(conv0.mean()))\n",
-    "# set first filter bias to 10\n",
+    "# set first filter bias to 1\n",
     "net.params['conv'][1].data[0] = 1.\n",
     "net.forward()\n",
     "print(\"post-surgery output mean {:.2f}\".format(conv0.mean()))"
@@ -5494,13 +5493,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "1,2c1,2\r\n",
+      "1,2c1\r\n",
       "< # Fully convolutional network version of CaffeNet.\r\n",
       "< name: \"CaffeNetConv\"\r\n",
       "---\r\n",
       "> name: \"CaffeNet\"\r\n",
-      "> input: \"data\"\r\n",
-      "7,11c7\r\n",
+      "7,11c6\r\n",
       "<   input_param {\r\n",
       "<     # initial shape for a fully convolutional network:\r\n",
       "<     # the shape can be set for each input by reshape.\r\n",
@@ -5508,33 +5506,33 @@
       "<   }\r\n",
       "---\r\n",
       ">   input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } }\r\n",
-      "157,158c153,154\r\n",
+      "157,158c152,153\r\n",
       "<   name: \"fc6-conv\"\r\n",
       "<   type: \"Convolution\"\r\n",
       "---\r\n",
       ">   name: \"fc6\"\r\n",
       ">   type: \"InnerProduct\"\r\n",
-      "160,161c156,157\r\n",
+      "160,161c155,156\r\n",
       "<   top: \"fc6-conv\"\r\n",
       "<   convolution_param {\r\n",
       "---\r\n",
       ">   top: \"fc6\"\r\n",
       ">   inner_product_param {\r\n",
-      "163d158\r\n",
+      "163d157\r\n",
       "<     kernel_size: 6\r\n",
-      "169,170c164,165\r\n",
+      "169,170c163,164\r\n",
       "<   bottom: \"fc6-conv\"\r\n",
       "<   top: \"fc6-conv\"\r\n",
       "---\r\n",
       ">   bottom: \"fc6\"\r\n",
       ">   top: \"fc6\"\r\n",
-      "175,176c170,171\r\n",
+      "175,176c169,170\r\n",
       "<   bottom: \"fc6-conv\"\r\n",
       "<   top: \"fc6-conv\"\r\n",
       "---\r\n",
       ">   bottom: \"fc6\"\r\n",
       ">   top: \"fc6\"\r\n",
-      "182,186c177,181\r\n",
+      "182,186c176,180\r\n",
       "<   name: \"fc7-conv\"\r\n",
       "<   type: \"Convolution\"\r\n",
       "<   bottom: \"fc6-conv\"\r\n",
@@ -5546,21 +5544,21 @@
       ">   bottom: \"fc6\"\r\n",
       ">   top: \"fc7\"\r\n",
       ">   inner_product_param {\r\n",
-      "188d182\r\n",
+      "188d181\r\n",
       "<     kernel_size: 1\r\n",
-      "194,195c188,189\r\n",
+      "194,195c187,188\r\n",
       "<   bottom: \"fc7-conv\"\r\n",
       "<   top: \"fc7-conv\"\r\n",
       "---\r\n",
       ">   bottom: \"fc7\"\r\n",
       ">   top: \"fc7\"\r\n",
-      "200,201c194,195\r\n",
+      "200,201c193,194\r\n",
       "<   bottom: \"fc7-conv\"\r\n",
       "<   top: \"fc7-conv\"\r\n",
       "---\r\n",
       ">   bottom: \"fc7\"\r\n",
       ">   top: \"fc7\"\r\n",
-      "207,211c201,205\r\n",
+      "207,211c200,204\r\n",
       "<   name: \"fc8-conv\"\r\n",
       "<   type: \"Convolution\"\r\n",
       "<   bottom: \"fc7-conv\"\r\n",
@@ -5572,9 +5570,9 @@
       ">   bottom: \"fc7\"\r\n",
       ">   top: \"fc8\"\r\n",
       ">   inner_product_param {\r\n",
-      "213d206\r\n",
+      "213d205\r\n",
       "<     kernel_size: 1\r\n",
-      "219c212\r\n",
+      "219c211\r\n",
       "<   bottom: \"fc8-conv\"\r\n",
       "---\r\n",
       ">   bottom: \"fc8\"\r\n"
@@ -5610,13 +5608,6 @@
     }
    ],
    "source": [
-    "# Make sure that caffe is on the python path:\n",
-    "caffe_root = '../'  # this file is expected to be in {caffe_root}/examples\n",
-    "import sys\n",
-    "sys.path.insert(0, caffe_root + 'python')\n",
-    "\n",
-    "import caffe\n",
-    "\n",
     "# Load the original network and extract the fully connected layers' parameters.\n",
     "net = caffe.Net('../models/bvlc_reference_caffenet/deploy.prototxt', \n",
     "                '../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel', \n",
diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
index 3c6a076ec2f..215c6b77e7c 100644
--- a/include/caffe/common.hpp
+++ b/include/caffe/common.hpp
@@ -61,10 +61,25 @@ private:\
       const std::vector<bool>& propagate_down, \
       const std::vector<Blob<double>*>& bottom)
 
+#define INSTANTIATE_LAYER_GPU_DECONV(classname) \
+  template void classname<float>::Deconv_gpu( \
+      const std::vector<Blob<float>*>& top, \
+      const std::vector<bool>& propagate_down, \
+      const std::vector<Blob<float>*>& bottom); \
+  template void classname<double>::Deconv_gpu( \
+      const std::vector<Blob<double>*>& top, \
+      const std::vector<bool>& propagate_down, \
+      const std::vector<Blob<double>*>& bottom)
+
 #define INSTANTIATE_LAYER_GPU_FUNCS(classname) \
   INSTANTIATE_LAYER_GPU_FORWARD(classname); \
   INSTANTIATE_LAYER_GPU_BACKWARD(classname)
 
+#define INSTANTIATE_LAYER_GPU_FUNCS_WITH_DECONV(classname)  \
+  INSTANTIATE_LAYER_GPU_FORWARD(classname); \
+  INSTANTIATE_LAYER_GPU_BACKWARD(classname); \
+  INSTANTIATE_LAYER_GPU_DECONV(classname)
+
 // A simple macro to mark codes that are not implemented, so that when the code
 // is executed we will see a fatal log.
 #define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
index 10f353f94f9..703d92cf157 100644
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -176,6 +176,27 @@ class Layer {
       const vector<bool>& propagate_down,
       const vector<Blob<Dtype>*>& bottom);
 
+  /**
+   * @brief Given the top blob deconv info, compute the bottom blob deconv. Similar to Backward.
+   *
+   * The Deconv wrapper calls the relevant device wrapper function
+   * (Deconv_cpu or Deconv_gpu) to compute the bottom blob diffs given the
+   * top blob diffs.
+   *
+   * Your layer should implement Deconv_cpu and Deconv_gpu.
+   * 
+   * Note: By default, Deconv_gpu will just call Backward_gpu, and
+   * Deconv_cpu will just call Backward_cpu. In many cases this
+   * behavior is desired, e.g. for convolution or innerproduct or
+   * pooling layers. If this is not the desired behavior, override
+   * Deconv_cpu AND Deconv_gpu. If only one of Deconv_{cpu,gpu} is
+   * overridden, the other will still defer to Backward_{cpu,gpu},
+   * which will lead to confusing and inconsistent behavior!
+   */
+  inline void Deconv(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down,
+      const vector<Blob<Dtype>*>& bottom);
+
   /**
    * @brief Returns the vector of learnable parameter blobs.
    */
@@ -363,6 +384,26 @@ class Layer {
     Backward_cpu(top, propagate_down, bottom);
   }
 
+  /**
+   * @brief Using the CPU device, compute the deconv (Zeiler et al, 2013) for the bottom blobs.
+   */
+  virtual void Deconv_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down,
+      const vector<Blob<Dtype>*>& bottom) {
+    // LOG(WARNING) << "Explicit Deconv_cpu not implemented for " << type() << " yet; falling back to backward_cpu.";
+    Backward_cpu(top, propagate_down, bottom);
+  }
+  /**
+   * @brief Using the GPU device, compute the deconv (Zeiler et al, 2013) for the bottom blobs.
+   *        Fall back to Deconv_cpu() if unavailable.
+   */
+  virtual void Deconv_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down,
+      const vector<Blob<Dtype>*>& bottom) {
+    // LOG(WARNING) << "Explicit Deconv_gpu not implemented for " << type() << " yet; falling back to backward_gpu.";
+    Backward_gpu(top, propagate_down, bottom);
+  }
+
   /**
    * Called by the parent Layer's SetUp to check that the number of bottom
    * and top Blobs provided as input match the expected numbers specified by
@@ -502,6 +543,22 @@ inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
   }
 }
 
+template <typename Dtype>
+inline void Layer<Dtype>::Deconv(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  switch (Caffe::mode()) {
+  case Caffe::CPU:
+    Deconv_cpu(top, propagate_down, bottom);
+    break;
+  case Caffe::GPU:
+    Deconv_gpu(top, propagate_down, bottom);
+    break;
+  default:
+    LOG(FATAL) << "Unknown caffe mode.";
+  }
+}
+
 // Serialize LayerParameter to protocol buffer
 template <typename Dtype>
 void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
diff --git a/include/caffe/layers/crop_layer.hpp b/include/caffe/layers/crop_layer.hpp
index 5c605b2ae9e..c4fda1220c3 100644
--- a/include/caffe/layers/crop_layer.hpp
+++ b/include/caffe/layers/crop_layer.hpp
@@ -44,6 +44,7 @@ class CropLayer : public Layer<Dtype> {
   vector<int> offsets;
 
  private:
+  // Recursive copy function.
   void crop_copy(const vector<Blob<Dtype>*>& bottom,
                const vector<Blob<Dtype>*>& top,
                const vector<int>& offsets,
@@ -53,6 +54,14 @@ class CropLayer : public Layer<Dtype> {
                Dtype* dest_data,
                bool is_forward);
 
+  // Recursive copy function: this is similar to crop_copy() but loops over all
+  // but the last two dimensions to allow for ND cropping while still relying on
+  // a CUDA kernel for the innermost two dimensions for performance reasons.  An
+  // alterantive implementation could rely on the kernel more by passing
+  // offsets, but this is problematic because of its variable length.
+  // Since in the standard (N,C,W,H) case N,C are usually not cropped a speedup
+  // could be achieved by not looping the application of the copy_kernel around
+  // these dimensions.
   void crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
                 const vector<Blob<Dtype>*>& top,
                 const vector<int>& offsets,
diff --git a/include/caffe/layers/cudnn_relu_layer.hpp b/include/caffe/layers/cudnn_relu_layer.hpp
index e01f568abc9..a1cb29e7c5f 100644
--- a/include/caffe/layers/cudnn_relu_layer.hpp
+++ b/include/caffe/layers/cudnn_relu_layer.hpp
@@ -37,6 +37,7 @@ class CuDNNReLULayer : public ReLULayer<Dtype> {
   cudnnHandle_t             handle_;
   cudnnTensorDescriptor_t bottom_desc_;
   cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
 };
 #endif
 
diff --git a/include/caffe/layers/cudnn_sigmoid_layer.hpp b/include/caffe/layers/cudnn_sigmoid_layer.hpp
index 9c597958b0b..7b3486f8a7e 100644
--- a/include/caffe/layers/cudnn_sigmoid_layer.hpp
+++ b/include/caffe/layers/cudnn_sigmoid_layer.hpp
@@ -37,6 +37,7 @@ class CuDNNSigmoidLayer : public SigmoidLayer<Dtype> {
   cudnnHandle_t             handle_;
   cudnnTensorDescriptor_t bottom_desc_;
   cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
 };
 #endif
 
diff --git a/include/caffe/layers/cudnn_tanh_layer.hpp b/include/caffe/layers/cudnn_tanh_layer.hpp
index c0f0053f71e..59e758d7031 100644
--- a/include/caffe/layers/cudnn_tanh_layer.hpp
+++ b/include/caffe/layers/cudnn_tanh_layer.hpp
@@ -37,6 +37,7 @@ class CuDNNTanHLayer : public TanHLayer<Dtype> {
   cudnnHandle_t             handle_;
   cudnnTensorDescriptor_t bottom_desc_;
   cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
 };
 #endif
 
diff --git a/include/caffe/layers/lrn_layer.hpp b/include/caffe/layers/lrn_layer.hpp
index 06cf71a94cb..767f297c7b2 100644
--- a/include/caffe/layers/lrn_layer.hpp
+++ b/include/caffe/layers/lrn_layer.hpp
@@ -43,6 +43,15 @@ class LRNLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+  virtual void Deconv_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Deconv_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Deconv_passthrough_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Deconv_passthrough_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
   virtual void CrossChannelForward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
   virtual void CrossChannelForward_gpu(const vector<Blob<Dtype>*>& bottom,
@@ -87,6 +96,9 @@ class LRNLayer : public Layer<Dtype> {
   shared_ptr<EltwiseLayer<Dtype> > product_layer_;
   Blob<Dtype> product_input_;
   vector<Blob<Dtype>*> product_bottom_vec_;
+
+  // Fields used for deconv
+  bool deconv_ignore_;
 };
 
 }  // namespace caffe
diff --git a/include/caffe/layers/parameter_layer.hpp b/include/caffe/layers/parameter_layer.hpp
new file mode 100644
index 00000000000..188b92acbe2
--- /dev/null
+++ b/include/caffe/layers/parameter_layer.hpp
@@ -0,0 +1,45 @@
+#ifndef CAFFE_PARAMETER_LAYER_HPP_
+#define CAFFE_PARAMETER_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+class ParameterLayer : public Layer<Dtype> {
+ public:
+  explicit ParameterLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+    if (this->blobs_.size() > 0) {
+      LOG(INFO) << "Skipping parameter initialization";
+    } else {
+      this->blobs_.resize(1);
+      this->blobs_[0].reset(new Blob<Dtype>());
+      this->blobs_[0]->Reshape(this->layer_param_.parameter_param().shape());
+    }
+    top[0]->Reshape(this->layer_param_.parameter_param().shape());
+  }
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) { }
+  virtual inline const char* type() const { return "Parameter"; }
+  virtual inline int ExactNumBottomBlobs() const { return 0; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+    top[0]->ShareData(*(this->blobs_[0]));
+    top[0]->ShareDiff(*(this->blobs_[0]));
+  }
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
+  { }
+};
+
+}  // namespace caffe
+
+#endif
diff --git a/include/caffe/layers/python_layer.hpp b/include/caffe/layers/python_layer.hpp
index b839d52684e..66dbbdf13b8 100644
--- a/include/caffe/layers/python_layer.hpp
+++ b/include/caffe/layers/python_layer.hpp
@@ -26,6 +26,7 @@ class PythonLayer : public Layer<Dtype> {
     }
     self_.attr("param_str") = bp::str(
         this->layer_param_.python_param().param_str());
+    self_.attr("phase") = static_cast<int>(this->phase_);
     self_.attr("setup")(bottom, top);
   }
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
diff --git a/include/caffe/layers/relu_layer.hpp b/include/caffe/layers/relu_layer.hpp
index d7a73f7a8d1..b79dec17671 100644
--- a/include/caffe/layers/relu_layer.hpp
+++ b/include/caffe/layers/relu_layer.hpp
@@ -78,6 +78,11 @@ class ReLULayer : public NeuronLayer<Dtype> {
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  virtual void Deconv_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Deconv_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 };
 
 }  // namespace caffe
diff --git a/include/caffe/layers/softmax_layer.hpp b/include/caffe/layers/softmax_layer.hpp
index c65b8703e43..46f57de033b 100644
--- a/include/caffe/layers/softmax_layer.hpp
+++ b/include/caffe/layers/softmax_layer.hpp
@@ -36,6 +36,7 @@ class SoftmaxLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+
   int outer_num_;
   int inner_num_;
   int softmax_axis_;
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index 0addb3c2a6d..ba5ffef711e 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -74,6 +74,14 @@ class Net {
   void BackwardFrom(int start);
   void BackwardTo(int end);
 
+  /**
+   * The network deconv works similarly to backward and also takes no input and output.
+   */
+  void Deconv();
+  void DeconvFromTo(int start, int end);
+  void DeconvFrom(int start);
+  void DeconvTo(int end);
+
   /**
    * @brief Reshape all layers from bottom to top.
    *
@@ -245,6 +253,8 @@ class Net {
   void ForwardDebugInfo(const int layer_id);
   /// @brief Helper for displaying debug info in Backward.
   void BackwardDebugInfo(const int layer_id);
+  /// @brief Helper for displaying debug info in Deconv.
+  void DeconvDebugInfo(const int layer_id);
   /// @brief Helper for displaying debug info in Update.
   void UpdateDebugInfo(const int param_id);
 
diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp
index 8a7e17c6cd4..a7d8dbbad4c 100644
--- a/include/caffe/util/cudnn.hpp
+++ b/include/caffe/util/cudnn.hpp
@@ -91,8 +91,13 @@ template <typename Dtype>
 inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
     int n, int c, int h, int w) {
   CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
-      n, c, h, w));
+      CUDNN_TENSOR_NCHW, n, c, h, w));
+#else
+  CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType<Dtype>::type,
+      CUDNN_TENSOR_NCHW, n, c, h, w));
+#endif
 }
 
 template <typename Dtype>
@@ -123,8 +128,21 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
     LOG(FATAL) << "Unknown pooling method.";
   }
   CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
-  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode, h, w,
-        pad_h, pad_w, stride_h, stride_w));
+#if CUDNN_VERSION_MIN(5, 0, 0)
+  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode,
+        CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
+#else
+  CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(*pool_desc, *mode,
+        CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
+#endif
+}
+
+template <typename Dtype>
+inline void createActivationDescriptor(cudnnActivationDescriptor_t* activ_desc,
+    cudnnActivationMode_t mode) {
+  CUDNN_CHECK(cudnnCreateActivationDescriptor(activ_desc));
+  CUDNN_CHECK(cudnnSetActivationDescriptor(*activ_desc, mode,
+                                           CUDNN_PROPAGATE_NAN, Dtype(0)));
 }
 
 }  // namespace cudnn
diff --git a/include/caffe/util/db_lmdb.hpp b/include/caffe/util/db_lmdb.hpp
index 4e1568ace50..ee370322383 100644
--- a/include/caffe/util/db_lmdb.hpp
+++ b/include/caffe/util/db_lmdb.hpp
@@ -3,6 +3,7 @@
 #define CAFFE_UTIL_DB_LMDB_HPP
 
 #include <string>
+#include <vector>
 
 #include "lmdb.h"
 
@@ -54,14 +55,16 @@ class LMDBCursor : public Cursor {
 
 class LMDBTransaction : public Transaction {
  public:
-  explicit LMDBTransaction(MDB_dbi* mdb_dbi, MDB_txn* mdb_txn)
-    : mdb_dbi_(mdb_dbi), mdb_txn_(mdb_txn) { }
+  explicit LMDBTransaction(MDB_env* mdb_env)
+    : mdb_env_(mdb_env) { }
   virtual void Put(const string& key, const string& value);
-  virtual void Commit() { MDB_CHECK(mdb_txn_commit(mdb_txn_)); }
+  virtual void Commit();
 
  private:
-  MDB_dbi* mdb_dbi_;
-  MDB_txn* mdb_txn_;
+  MDB_env* mdb_env_;
+  vector<string> keys, values;
+
+  void DoubleMapSize();
 
   DISABLE_COPY_AND_ASSIGN(LMDBTransaction);
 };
diff --git a/include/caffe/util/device_alternate.hpp b/include/caffe/util/device_alternate.hpp
index e3fe4fe29fd..496d4a073ae 100644
--- a/include/caffe/util/device_alternate.hpp
+++ b/include/caffe/util/device_alternate.hpp
@@ -18,6 +18,19 @@ void classname<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, \
     const vector<bool>& propagate_down, \
     const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
 
+#define STUB_GPU_WITH_DECONV(classname) \
+template <typename Dtype> \
+void classname<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, \
+    const vector<Blob<Dtype>*>& top) { NO_GPU; } \
+template <typename Dtype> \
+void classname<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, \
+    const vector<bool>& propagate_down, \
+    const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
+template <typename Dtype> \
+void classname<Dtype>::Deconv_gpu(const vector<Blob<Dtype>*>& top, \
+    const vector<bool>& propagate_down, \
+    const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
+
 #define STUB_GPU_FORWARD(classname, funcname) \
 template <typename Dtype> \
 void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& bottom, \
@@ -29,6 +42,12 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
     const vector<bool>& propagate_down, \
     const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
 
+#define STUB_GPU_DECONV(classname, funcname) \
+template <typename Dtype> \
+void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
+    const vector<bool>& propagate_down, \
+    const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
+
 #else  // Normal GPU + CPU Caffe.
 
 #include <cublas_v2.h>
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index a2c46a123aa..1dbde5da31e 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -26,6 +26,19 @@
 #define PyArray_SetBaseObject(arr, x) (PyArray_BASE(arr) = (x))
 #endif
 
+/* Fix to avoid registration warnings in pycaffe (#3960) */
+#define BP_REGISTER_SHARED_PTR_TO_PYTHON(PTR) do { \
+  const boost::python::type_info info = \
+    boost::python::type_id<shared_ptr<PTR > >(); \
+  const boost::python::converter::registration* reg = \
+    boost::python::converter::registry::query(info); \
+  if (reg == NULL) { \
+    bp::register_ptr_to_python<shared_ptr<PTR > >(); \
+  } else if ((*reg).m_to_python == NULL) { \
+    bp::register_ptr_to_python<shared_ptr<PTR > >(); \
+  } \
+} while (0)
+
 namespace bp = boost::python;
 
 namespace caffe {
@@ -228,6 +241,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("__init__", bp::make_constructor(&Net_Init_Load))
     .def("_forward", &Net<Dtype>::ForwardFromTo)
     .def("_backward", &Net<Dtype>::BackwardFromTo)
+    .def("_deconv", &Net<Dtype>::DeconvFromTo)
     .def("reshape", &Net<Dtype>::Reshape)
     // The cast is to select a particular overload.
     .def("copy_from", static_cast<void (Net<Dtype>::*)(const string)>(
@@ -255,7 +269,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("_set_input_arrays", &Net_SetInputArrays,
         bp::with_custodian_and_ward<1, 2, bp::with_custodian_and_ward<1, 3> >())
     .def("save", &Net_Save);
-  bp::register_ptr_to_python<shared_ptr<Net<Dtype> > >();
+  BP_REGISTER_SHARED_PTR_TO_PYTHON(Net<Dtype>);
 
   bp::class_<Blob<Dtype>, shared_ptr<Blob<Dtype> >, boost::noncopyable>(
     "Blob", bp::no_init)
@@ -275,7 +289,7 @@ BOOST_PYTHON_MODULE(_caffe) {
           NdarrayCallPolicies()))
     .add_property("diff",     bp::make_function(&Blob<Dtype>::mutable_cpu_diff,
           NdarrayCallPolicies()));
-  bp::register_ptr_to_python<shared_ptr<Blob<Dtype> > >();
+  BP_REGISTER_SHARED_PTR_TO_PYTHON(Blob<Dtype>);
 
   bp::class_<Layer<Dtype>, shared_ptr<PythonLayer<Dtype> >,
     boost::noncopyable>("Layer", bp::init<const LayerParameter&>())
@@ -284,7 +298,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("setup", &Layer<Dtype>::LayerSetUp)
     .def("reshape", &Layer<Dtype>::Reshape)
     .add_property("type", bp::make_function(&Layer<Dtype>::type));
-  bp::register_ptr_to_python<shared_ptr<Layer<Dtype> > >();
+  BP_REGISTER_SHARED_PTR_TO_PYTHON(Layer<Dtype>);
 
   bp::class_<LayerParameter>("LayerParameter", bp::no_init);
 
@@ -299,7 +313,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("step", &Solver<Dtype>::Step)
     .def("restore", &Solver<Dtype>::Restore)
     .def("snapshot", &Solver<Dtype>::Snapshot);
-  bp::register_ptr_to_python<shared_ptr<Solver<Dtype> > >();
+  BP_REGISTER_SHARED_PTR_TO_PYTHON(Solver<Dtype>);
 
   bp::class_<SGDSolver<Dtype>, bp::bases<Solver<Dtype> >,
     shared_ptr<SGDSolver<Dtype> >, boost::noncopyable>(
diff --git a/python/caffe/classifier.py b/python/caffe/classifier.py
index 537193db8f8..ea29fed86f9 100644
--- a/python/caffe/classifier.py
+++ b/python/caffe/classifier.py
@@ -79,6 +79,7 @@ def predict(self, inputs, oversample=True):
                 -self.crop_dims / 2.0,
                 self.crop_dims / 2.0
             ])
+            crop = crop.astype(int)
             input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]
 
         # Classify
diff --git a/python/caffe/draw.py b/python/caffe/draw.py
index cfa3fc5b1fb..61205ca9f37 100644
--- a/python/caffe/draw.py
+++ b/python/caffe/draw.py
@@ -142,7 +142,7 @@ def get_pydot_graph(caffe_net, rankdir, label_edges=True):
     -------
     pydot graph object
     """
-    pydot_graph = pydot.Dot(caffe_net.name,
+    pydot_graph = pydot.Dot(caffe_net.name if caffe_net.name else 'Net',
                             graph_type='digraph',
                             rankdir=rankdir)
     pydot_nodes = {}
diff --git a/python/caffe/io.py b/python/caffe/io.py
index 75310589cec..e1759beb587 100644
--- a/python/caffe/io.py
+++ b/python/caffe/io.py
@@ -46,7 +46,7 @@ def array_to_blobproto(arr, diff=None):
     return blob
 
 
-def arraylist_to_blobprotovecor_str(arraylist):
+def arraylist_to_blobprotovector_str(arraylist):
     """Converts a list of arrays to a serialized blobprotovec, which could be
     then passed to a network for processing.
     """
@@ -63,7 +63,7 @@ def blobprotovector_str_to_arraylist(str):
     return [blobproto_to_array(blob) for blob in vec.blobs]
 
 
-def array_to_datum(arr, label=0):
+def array_to_datum(arr, label=None):
     """Converts a 3-dimensional array to datum. If the array has dtype uint8,
     the output data will be encoded as a string. Otherwise, the output data
     will be stored in float format.
@@ -76,7 +76,8 @@ def array_to_datum(arr, label=0):
         datum.data = arr.tostring()
     else:
         datum.float_data.extend(arr.flat)
-    datum.label = label
+    if label is not None:
+        datum.label = label
     return datum
 
 
diff --git a/python/caffe/net_spec.py b/python/caffe/net_spec.py
index 63de4cce4b2..5fb1f0b3fb1 100644
--- a/python/caffe/net_spec.py
+++ b/python/caffe/net_spec.py
@@ -32,7 +32,7 @@ def param_name_dict():
     # get all parameter names (typically underscore case) and corresponding
     # type names (typically camel case), which contain the layer names
     # (note that not all parameters correspond to layers, but we'll ignore that)
-    param_names = [s for s in dir(layer) if s.endswith('_param')]
+    param_names = [f.name for f in layer.DESCRIPTOR.fields if f.name.endswith('_param')]
     param_type_names = [type(getattr(layer, s)).__name__ for s in param_names]
     # strip the final '_param' or 'Parameter'
     param_names = [s[:-len('_param')] for s in param_names]
diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index c5c0b824a77..17c4ee577b5 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -27,7 +27,9 @@ def _Net_blobs(self):
     An OrderedDict (bottom to top, i.e., input to output) of network
     blobs indexed by name
     """
-    return OrderedDict(zip(self._blob_names, self._blobs))
+    if not hasattr(self, '_blobs_dict'):
+        self._blobs_dict = OrderedDict(zip(self._blob_names, self._blobs))
+    return self._blobs_dict
 
 
 @property
@@ -36,7 +38,10 @@ def _Net_blob_loss_weights(self):
     An OrderedDict (bottom to top, i.e., input to output) of network
     blob loss weights indexed by name
     """
-    return OrderedDict(zip(self._blob_names, self._blob_loss_weights))
+    if not hasattr(self, '_blobs_loss_weights_dict'):
+        self._blob_loss_weights_dict = OrderedDict(zip(self._blob_names,
+                                                       self._blob_loss_weights))
+    return self._blob_loss_weights_dict
 
 
 @property
@@ -46,19 +51,155 @@ def _Net_params(self):
     parameters indexed by name; each is a list of multiple blobs (e.g.,
     weights and biases)
     """
-    return OrderedDict([(name, lr.blobs)
-                        for name, lr in zip(self._layer_names, self.layers)
-                        if len(lr.blobs) > 0])
+    if not hasattr(self, '_params_dict'):
+        self._params_dict = OrderedDict([(name, lr.blobs)
+                                        for name, lr in zip(
+                                            self._layer_names, self.layers)
+                                        if len(lr.blobs) > 0])
+    return self._params_dict
+
+
+def _Net_zero(self, zero_param_diffs = True):
+    """
+    Set all activations (data and diffs) in the net to zero.
+
+    Take
+    zero_param_diffs: If True, also zero the parameter blob diffs,
+                      else skip parameter blobs.
+    """
+    
+    for blob_name, blob in self.blobs.items():
+        blob.data[...] = 0
+        blob.diff[...] = 0
+    if zero_param_diffs:
+        for param_name, blob_vec in self.params.items():
+            for blob in blob_vec:
+                blob.diff[...] = 0
+
+
+def _Net_backward_from_layer(self, start_name, start_diff, diffs=None, zero_higher=False):
+    """
+    Backward pass starting from somewhere in the middle of the
+    network, starting with the provided diffs.
+
+    Take
+    start_name: layer at which to begin the backward pass
+    start_diff: diff to set at start_name layer
+    diffs: list of diffs to return in addition to bottom diffs.
+    zero_higher: whether or not to zero out higher layers to reflect the true 0 derivative or leave them alone to save time.
+
+    Give
+    outs: {blob name: diff ndarray} dict.
+    """
+
+    if start_diff.shape != self.blobs[start_name].diff.shape:
+        raise Exception('Expected start_diff of shape %s but got %s' % (self.blobs[start_name].diff.shape, start_diff.shape))
+
+    self.blobs[start_name].diff[...] = start_diff
+
+    if zero_higher:
+        past_start = False
+        for blob_name, blob in self.blobs.items():
+            if past_start:
+                blob.diff[...] = 0
+            if blob_name == start_name:
+                past_start = True
+
+    return self.backward(start=start_name, diffs=diffs)
+
+
+def _Net_deconv_from_layer(self, start_name, start_diff, diffs=None, zero_higher=False):
+    """
+    Deconv pass starting from somewhere in the middle of the
+    network, starting with the provided diffs.
+
+    Take
+    start_name: layer at which to begin the deconv pass
+    start_diff: diff to set at start_name layer
+    diffs: list of diffs to return in addition to bottom diffs.
+    zero_higher: whether or not to zero out higher layers to reflect the true 0 derivative or leave them alone to save time.
+
+    Give
+    outs: {blob name: diff ndarray} dict.
+    """
+
+    if start_diff.shape != self.blobs[start_name].diff.shape:
+        raise Exception('Expected start_diff of shape %s but got %s' % (self.blobs[start_name].diff.shape, start_diff.shape))
+
+    self.blobs[start_name].diff[...] = start_diff
+
+    if zero_higher:
+        past_start = False
+        for blob_name, blob in self.blobs.items():
+            if past_start:
+                blob.diff[...] = 0
+            if blob_name == start_name:
+                past_start = True
+
+    return self.deconv(start=start_name, diffs=diffs)
+
+
+def _Net_deconv(self, diffs=None, start=None, end=None, **kwargs):
+    """
+    Deconv pass: prepare diffs and run the net backward in deconv mode. Just like _Net_Backward but calls Deconv instead.
+
+    Take
+    diffs: list of diffs to return in addition to bottom diffs.
+    kwargs: Keys are output blob names and values are diff ndarrays.
+            If None, top diffs are taken from forward loss.
+    start: optional name of layer at which to begin the backward pass
+    end: optional name of layer at which to finish the backward pass (inclusive)
+
+    Give
+    outs: {blob name: diff ndarray} dict.
+    """
+    if diffs is None:
+        diffs = []
+
+    if start is not None:
+        start_ind = list(self._layer_names).index(start)
+    else:
+        start_ind = len(self.layers) - 1
+
+    if end is not None:
+        end_ind = list(self._layer_names).index(end)
+        outputs = set([end] + diffs)
+    else:
+        end_ind = 0
+        outputs = set(self.inputs + diffs)
+
+    if kwargs:
+        if set(kwargs.keys()) != set(self.outputs):
+            raise Exception('Top diff arguments do not match net outputs.')
+        # Set top diffs according to defined shapes and make arrays single and
+        # C-contiguous as Caffe expects.
+        for top, diff in kwargs.iteritems():
+            if diff.ndim != 4:
+                raise Exception('{} diff is not 4-d'.format(top))
+            if diff.shape[0] != self.blobs[top].num:
+                raise Exception('Diff is not batch sized')
+            self.blobs[top].diff[...] = diff
+
+    self._deconv(start_ind, end_ind)
+
+    # Unpack diffs to extract
+    return {out: self.blobs[out].diff for out in outputs}
 
 
 @property
 def _Net_inputs(self):
-    return [list(self.blobs.keys())[i] for i in self._inputs]
+    if not hasattr(self, '_input_list'):
+        keys = list(self.blobs.keys())
+        self._input_list = [keys[i] for i in self._inputs]
+    return self._input_list
 
 
 @property
 def _Net_outputs(self):
-    return [list(self.blobs.keys())[i] for i in self._outputs]
+    if not hasattr(self, '_output_list'):
+        keys = list(self.blobs.keys())
+        self._output_list = [keys[i] for i in self._outputs]
+    return self._output_list
 
 
 def _Net_forward(self, blobs=None, start=None, end=None, **kwargs):
@@ -298,8 +439,12 @@ def __getitem__(self, name):
 Net.blobs = _Net_blobs
 Net.blob_loss_weights = _Net_blob_loss_weights
 Net.params = _Net_params
+Net.zero = _Net_zero
+Net.backward_from_layer = _Net_backward_from_layer
+Net.deconv_from_layer = _Net_deconv_from_layer
 Net.forward = _Net_forward
 Net.backward = _Net_backward
+Net.deconv = _Net_deconv
 Net.forward_all = _Net_forward_all
 Net.forward_backward_all = _Net_forward_backward_all
 Net.set_input_arrays = _Net_set_input_arrays
diff --git a/python/caffe/test/test_io.py b/python/caffe/test/test_io.py
index 8c86ef75fb2..4a16b5b9128 100644
--- a/python/caffe/test/test_io.py
+++ b/python/caffe/test/test_io.py
@@ -39,3 +39,18 @@ def test_scalar(self):
 
         arr = caffe.io.blobproto_to_array(blob)
         self.assertEqual(arr, 123)
+
+
+class TestArrayToDatum(unittest.TestCase):
+
+    def test_label_none_size(self):
+        # Set label
+        d1 = caffe.io.array_to_datum(
+            np.ones((10,10,3)), label=1)
+        # Don't set label
+        d2 = caffe.io.array_to_datum(
+            np.ones((10,10,3)))
+        # Not setting the label should result in a smaller object
+        self.assertGreater(
+            len(d1.SerializeToString()),
+            len(d2.SerializeToString()))
diff --git a/python/caffe/test/test_python_layer.py b/python/caffe/test/test_python_layer.py
index e46b7118014..899514e90f1 100644
--- a/python/caffe/test/test_python_layer.py
+++ b/python/caffe/test/test_python_layer.py
@@ -44,6 +44,18 @@ def forward(self, bottom, top):
     def backward(self, top, propagate_down, bottom):
         self.blobs[0].diff[0] = 1
 
+class PhaseLayer(caffe.Layer):
+    """A layer for checking attribute `phase`"""
+
+    def setup(self, bottom, top):
+        pass
+
+    def reshape(self, bootom, top):
+        top[0].reshape()
+
+    def forward(self, bottom, top):
+        top[0].data[()] = self.phase
+
 def python_net_file():
     with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f:
         f.write("""name: 'pythonnet' force_backward: true
@@ -76,6 +88,14 @@ def parameter_net_file():
           """)
         return f.name
 
+def phase_net_file():
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f:
+        f.write("""name: 'pythonnet' force_backward: true
+        layer { type: 'Python' name: 'layer' top: 'phase'
+          python_param { module: 'test_python_layer' layer: 'PhaseLayer' } }
+          """)
+        return f.name
+
 
 @unittest.skipIf('Python' not in caffe.layer_type_list(),
     'Caffe built without Python layer support')
@@ -140,3 +160,9 @@ def test_parameter(self):
         self.assertEqual(layer.blobs[0].data[0], 1)
 
         os.remove(net_file)
+
+    def test_phase(self):
+        net_file = phase_net_file()
+        for phase in caffe.TRAIN, caffe.TEST:
+            net = caffe.Net(net_file, phase)
+            self.assertEqual(net.forward()['phase'], phase)
diff --git a/scripts/download_model_binary.py b/scripts/download_model_binary.py
index 66f72f2477e..fcdbb5a91a2 100755
--- a/scripts/download_model_binary.py
+++ b/scripts/download_model_binary.py
@@ -60,7 +60,7 @@ def valid_dirname(dirname):
 
     # Closure-d function for checking SHA1.
     def model_checks_out(filename=model_filename, sha1=frontmatter['sha1']):
-        with open(filename, 'r') as f:
+        with open(filename, 'rb') as f:
             return hashlib.sha1(f.read()).hexdigest() == sha1
 
     # Check if model exists.
diff --git a/scripts/travis/build.sh b/scripts/travis/build.sh
new file mode 100755
index 00000000000..bb9406f046c
--- /dev/null
+++ b/scripts/travis/build.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# build the project
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+if ! $WITH_CMAKE ; then
+  make --jobs $NUM_THREADS all test pycaffe warn
+else
+  cd build
+  make --jobs $NUM_THREADS all test.testbin
+fi
+make lint
diff --git a/scripts/travis/configure-cmake.sh b/scripts/travis/configure-cmake.sh
new file mode 100644
index 00000000000..772f1e2ce8d
--- /dev/null
+++ b/scripts/travis/configure-cmake.sh
@@ -0,0 +1,32 @@
+# CMake configuration
+
+mkdir -p build
+cd build
+
+ARGS="-DCMAKE_BUILD_TYPE=Release -DBLAS=Open"
+
+if $WITH_PYTHON3 ; then
+  ARGS="$ARGS -Dpython_version=3"
+fi
+
+if $WITH_IO ; then
+  ARGS="$ARGS -DUSE_OPENCV=On -DUSE_LMDB=On -DUSE_LEVELDB=On"
+else
+  ARGS="$ARGS -DUSE_OPENCV=Off -DUSE_LMDB=Off -DUSE_LEVELDB=Off"
+fi
+
+if $WITH_CUDA ; then
+  # Only build SM50
+  ARGS="$ARGS -DCPU_ONLY=Off -DCUDA_ARCH_NAME=Manual -DCUDA_ARCH_BIN=\"50\" -DCUDA_ARCH_PTX=\"\""
+else
+  ARGS="$ARGS -DCPU_ONLY=On"
+fi
+
+if $WITH_CUDNN ; then
+  ARGS="$ARGS -DUSE_CUDNN=On"
+else
+  ARGS="$ARGS -DUSE_CUDNN=Off"
+fi
+
+cmake .. $ARGS
+
diff --git a/scripts/travis/configure-make.sh b/scripts/travis/configure-make.sh
new file mode 100644
index 00000000000..ddc40fffa9d
--- /dev/null
+++ b/scripts/travis/configure-make.sh
@@ -0,0 +1,36 @@
+# raw Makefile configuration
+
+LINE () {
+  echo "$@" >> Makefile.config
+}
+
+cp Makefile.config.example Makefile.config
+
+LINE "BLAS := open"
+LINE "WITH_PYTHON_LAYER := 1"
+
+if $WITH_PYTHON3 ; then
+  # TODO(lukeyeager) this path is currently disabled because of test errors like:
+  #   ImportError: dynamic module does not define init function (PyInit__caffe)
+  LINE "PYTHON_LIBRARIES := python3.4m boost_python-py34"
+  LINE "PYTHON_INCLUDE := /usr/include/python3.4 /usr/lib/python3/dist-packages/numpy/core/include"
+  LINE "INCLUDE_DIRS := \$(INCLUDE_DIRS) \$(PYTHON_INCLUDE)"
+fi
+
+if ! $WITH_IO ; then
+  LINE "USE_OPENCV := 0"
+  LINE "USE_LEVELDB := 0"
+  LINE "USE_LMDB := 0"
+fi
+
+if $WITH_CUDA ; then
+  # Only build SM50
+  LINE "CUDA_ARCH := -gencode arch=compute_50,code=sm_50"
+else
+  LINE "CPU_ONLY := 1"
+fi
+
+if $WITH_CUDNN ; then
+  LINE "USE_CUDNN := 1"
+fi
+
diff --git a/scripts/travis/configure.sh b/scripts/travis/configure.sh
new file mode 100755
index 00000000000..ef740c8982e
--- /dev/null
+++ b/scripts/travis/configure.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# configure the project
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+if ! $WITH_CMAKE ; then
+  source $BASEDIR/configure-make.sh
+else
+  source $BASEDIR/configure-cmake.sh
+fi
diff --git a/scripts/travis/defaults.sh b/scripts/travis/defaults.sh
new file mode 100755
index 00000000000..d69c0a7d964
--- /dev/null
+++ b/scripts/travis/defaults.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# set default environment variables
+
+set -e
+
+WITH_CMAKE=${WITH_CMAKE:-false}
+WITH_PYTHON3=${WITH_PYTHON3:-false}
+WITH_IO=${WITH_IO:-true}
+WITH_CUDA=${WITH_CUDA:-false}
+WITH_CUDNN=${WITH_CUDNN:-false}
diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
new file mode 100755
index 00000000000..f7bfe4c4df9
--- /dev/null
+++ b/scripts/travis/install-deps.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# install dependencies
+# (this script must be run as root)
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+apt-get -y update
+apt-get install -y --no-install-recommends \
+  build-essential \
+  libboost-filesystem-dev \
+  libboost-python-dev \
+  libboost-system-dev \
+  libboost-thread-dev \
+  libgflags-dev \
+  libgoogle-glog-dev \
+  libhdf5-serial-dev \
+  libopenblas-dev \
+  python-virtualenv \
+  wget
+
+if $WITH_CMAKE ; then
+  apt-get install -y --no-install-recommends cmake
+fi
+
+if ! $WITH_PYTHON3 ; then
+  # Python2
+  apt-get install -y --no-install-recommends \
+    libprotobuf-dev \
+    protobuf-compiler \
+    python-dev \
+    python-numpy \
+    python-protobuf \
+    python-skimage
+else
+  # Python3
+  apt-get install -y --no-install-recommends \
+    python3-dev \
+    python3-numpy \
+    python3-skimage
+
+  # build Protobuf3 since it's needed for Python3
+  echo "Building protobuf3 from source ..."
+  pushd .
+  PROTOBUF3_DIR=~/protobuf3-build
+  rm -rf $PROTOBUF3_DIR
+  mkdir $PROTOBUF3_DIR
+
+  # install some more dependencies required to build protobuf3
+  apt-get install -y --no-install-recommends \
+    curl \
+    dh-autoreconf \
+    unzip
+
+  wget https://github.com/google/protobuf/archive/v3.0.0-beta-3.tar.gz -O protobuf3.tar.gz
+  tar -xzf protobuf3.tar.gz -C $PROTOBUF3_DIR --strip 1
+  rm protobuf3.tar.gz
+  cd $PROTOBUF3_DIR
+  ./autogen.sh
+  ./configure --prefix=/usr
+  make --jobs=$NUM_THREADS
+  make install
+  popd
+fi
+
+if $WITH_IO ; then
+  apt-get install -y --no-install-recommends \
+    libleveldb-dev \
+    liblmdb-dev \
+    libopencv-dev \
+    libsnappy-dev
+fi
+
+if $WITH_CUDA ; then
+  # install repo packages
+  CUDA_REPO_PKG=cuda-repo-ubuntu1404_7.5-18_amd64.deb
+  wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/$CUDA_REPO_PKG
+  dpkg -i $CUDA_REPO_PKG
+  rm $CUDA_REPO_PKG
+
+  if $WITH_CUDNN ; then
+    ML_REPO_PKG=nvidia-machine-learning-repo_4.0-2_amd64.deb
+    wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/$ML_REPO_PKG
+    dpkg -i $ML_REPO_PKG
+  fi
+
+  # update package lists
+  apt-get -y update
+
+  # install packages
+  CUDA_PKG_VERSION="7-5"
+  CUDA_VERSION="7.5"
+  apt-get install -y --no-install-recommends \
+    cuda-core-$CUDA_PKG_VERSION \
+    cuda-cudart-dev-$CUDA_PKG_VERSION \
+    cuda-cublas-dev-$CUDA_PKG_VERSION \
+    cuda-curand-dev-$CUDA_PKG_VERSION
+  # manually create CUDA symlink
+  ln -s /usr/local/cuda-$CUDA_VERSION /usr/local/cuda
+
+  if $WITH_CUDNN ; then
+    apt-get install -y --no-install-recommends libcudnn5-dev
+  fi
+fi
+
diff --git a/scripts/travis/install-python-deps.sh b/scripts/travis/install-python-deps.sh
new file mode 100755
index 00000000000..eeec302791f
--- /dev/null
+++ b/scripts/travis/install-python-deps.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# install extra Python dependencies
+# (must come after setup-venv)
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+if ! $WITH_PYTHON3 ; then
+  # Python2
+  :
+else
+  # Python3
+  pip install --pre protobuf==3.0.0b3
+fi
diff --git a/scripts/travis/setup-venv.sh b/scripts/travis/setup-venv.sh
new file mode 100755
index 00000000000..81245f146da
--- /dev/null
+++ b/scripts/travis/setup-venv.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# setup a Python virtualenv
+# (must come after install-deps)
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+VENV_DIR=${1:-~/venv}
+
+# setup our own virtualenv
+if $WITH_PYTHON3; then
+    PYTHON_EXE='/usr/bin/python3'
+else
+    PYTHON_EXE='/usr/bin/python2'
+fi
+
+# use --system-site-packages so that Python will use deb packages
+virtualenv $VENV_DIR -p $PYTHON_EXE --system-site-packages
diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh
new file mode 100755
index 00000000000..fedd7e6b56e
--- /dev/null
+++ b/scripts/travis/test.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# test the project
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+if $WITH_CUDA ; then
+  echo "Skipping tests for CUDA build"
+  exit 0
+fi
+
+if ! $WITH_CMAKE ; then
+  make runtest
+  make pytest
+else
+  cd build
+  make runtest
+  make pytest
+fi
diff --git a/scripts/travis/travis_build_and_test.sh b/scripts/travis/travis_build_and_test.sh
deleted file mode 100755
index 174f1ee5a0a..00000000000
--- a/scripts/travis/travis_build_and_test.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-# Script called by Travis to build and test Caffe.
-# Travis CI tests are CPU-only for lack of compatible hardware.
-
-set -e
-MAKE="make --jobs=$NUM_THREADS --keep-going"
-
-if $WITH_CMAKE; then
-  mkdir build
-  cd build
-  CPU_ONLY=" -DCPU_ONLY=ON"
-  if ! $WITH_CUDA; then
-    CPU_ONLY=" -DCPU_ONLY=OFF"
-  fi
-  PYTHON_ARGS=""
-  if [ "$PYTHON_VERSION" = "3" ]; then
-    PYTHON_ARGS="$PYTHON_ARGS -Dpython_version=3 -DBOOST_LIBRARYDIR=$CONDA_DIR/lib/"
-  fi
-  if $WITH_IO; then
-    IO_ARGS="-DUSE_OPENCV=ON -DUSE_LMDB=ON -DUSE_LEVELDB=ON"
-  else
-    IO_ARGS="-DUSE_OPENCV=OFF -DUSE_LMDB=OFF -DUSE_LEVELDB=OFF"
-  fi
-  cmake -DBUILD_python=ON -DCMAKE_BUILD_TYPE=Release $CPU_ONLY $PYTHON_ARGS -DCMAKE_INCLUDE_PATH="$CONDA_DIR/include/" -DCMAKE_LIBRARY_PATH="$CONDA_DIR/lib/" $IO_ARGS ..
-  $MAKE
-  $MAKE pytest
-  if ! $WITH_CUDA; then
-    $MAKE runtest
-    $MAKE lint
-  fi
-  $MAKE clean
-  cd -
-else
-  if ! $WITH_CUDA; then
-    export CPU_ONLY=1
-  fi
-  if $WITH_IO; then
-    export USE_LMDB=1
-    export USE_LEVELDB=1
-    export USE_OPENCV=1
-  fi
-  $MAKE all test pycaffe warn lint || true
-  if ! $WITH_CUDA; then
-    $MAKE runtest
-  fi
-  $MAKE all
-  $MAKE test
-  $MAKE pycaffe
-  $MAKE pytest
-  $MAKE warn
-  if ! $WITH_CUDA; then
-    $MAKE lint
-  fi
-fi
diff --git a/scripts/travis/travis_install.sh b/scripts/travis/travis_install.sh
deleted file mode 100755
index 091e92431f0..00000000000
--- a/scripts/travis/travis_install.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/bash
-# This script must be run with sudo.
-
-set -e
-
-MAKE="make --jobs=$NUM_THREADS"
-# Install apt packages where the Ubuntu 12.04 default and ppa works for Caffe
-
-# This ppa is for gflags and glog
-add-apt-repository -y ppa:tuleu/precise-backports
-apt-get -y update
-apt-get install \
-    wget git curl \
-    python-dev python-numpy python3-dev\
-    libleveldb-dev libsnappy-dev libopencv-dev \
-    libprotobuf-dev protobuf-compiler \
-    libatlas-dev libatlas-base-dev \
-    libhdf5-serial-dev libgflags-dev libgoogle-glog-dev \
-    bc
-
-# Add a special apt-repository to install CMake 2.8.9 for CMake Caffe build,
-# if needed.  By default, Aptitude in Ubuntu 12.04 installs CMake 2.8.7, but
-# Caffe requires a minimum CMake version of 2.8.8.
-if $WITH_CMAKE; then
-  # cmake 3 will make sure that the python interpreter and libraries match
-  wget --no-check-certificate http://www.cmake.org/files/v3.2/cmake-3.2.3-Linux-x86_64.sh -O cmake3.sh
-  chmod +x cmake3.sh
-  ./cmake3.sh --prefix=/usr/ --skip-license --exclude-subdir
-fi
-
-# Install CUDA, if needed
-if $WITH_CUDA; then
-  CUDA_URL=http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1204/x86_64/cuda-repo-ubuntu1204_6.5-14_amd64.deb
-  CUDA_FILE=/tmp/cuda_install.deb
-  curl $CUDA_URL -o $CUDA_FILE
-  dpkg -i $CUDA_FILE
-  rm -f $CUDA_FILE
-  apt-get -y update
-  # Install the minimal CUDA subpackages required to test Caffe build.
-  # For a full CUDA installation, add 'cuda' to the list of packages.
-  apt-get -y install cuda-core-6-5 cuda-cublas-6-5 cuda-cublas-dev-6-5 cuda-cudart-6-5 cuda-cudart-dev-6-5 cuda-curand-6-5 cuda-curand-dev-6-5
-  # Create CUDA symlink at /usr/local/cuda
-  # (This would normally be created by the CUDA installer, but we create it
-  # manually since we did a partial installation.)
-  ln -s /usr/local/cuda-6.5 /usr/local/cuda
-fi
-
-# Install LMDB
-LMDB_URL=https://github.com/LMDB/lmdb/archive/LMDB_0.9.14.tar.gz
-LMDB_FILE=/tmp/lmdb.tar.gz
-pushd .
-wget $LMDB_URL -O $LMDB_FILE
-tar -C /tmp -xzvf $LMDB_FILE
-cd /tmp/lmdb*/libraries/liblmdb/
-$MAKE
-$MAKE install
-popd
-rm -f $LMDB_FILE
-
-# Install the Python runtime dependencies via miniconda (this is much faster
-# than using pip for everything).
-export PATH=$CONDA_DIR/bin:$PATH
-# clear any cached conda (see #3786)
-rm -rf $CONDA_DIR
-if [ ! -d $CONDA_DIR ]; then
-  if [ "$PYTHON_VERSION" -eq "3" ]; then
-    wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
-  else
-    wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
-  fi
-  chmod +x miniconda.sh
-  ./miniconda.sh -b -p $CONDA_DIR
-
-  conda update --yes conda
-  # The version of boost we're using for Python 3 depends on 3.4 for now.
-  if [ "$PYTHON_VERSION" -eq "3" ]; then
-    conda install --yes python=3.4
-  fi
-  conda install --yes numpy scipy matplotlib scikit-image pip
-  # Let conda install boost (so that boost_python matches)
-  conda install --yes -c https://conda.binstar.org/menpo boost=1.56.0
-fi
-
-# install protobuf 3 (just use the miniconda3 directory to avoid having to setup the path again)
-if [ "$PYTHON_VERSION" -eq "3" ] && [ ! -e "$CONDA_DIR/bin/protoc" ]; then
-  pushd .
-  wget https://github.com/google/protobuf/archive/v3.0.0-alpha-3.1.tar.gz -O protobuf-3.tar.gz
-  tar -C /tmp -xzvf protobuf-3.tar.gz
-  cd /tmp/protobuf-3*/
-  ./autogen.sh
-  ./configure --prefix=$CONDA_DIR
-  $MAKE
-  $MAKE install
-  popd
-fi
-
-if [ "$PYTHON_VERSION" -eq "3" ]; then
-  pip install --pre protobuf==3.0.0b2
-else
-  pip install protobuf
-fi
diff --git a/scripts/travis/travis_setup_makefile_config.sh b/scripts/travis/travis_setup_makefile_config.sh
deleted file mode 100755
index 83aacf11fb0..00000000000
--- a/scripts/travis/travis_setup_makefile_config.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-set -e
-
-mv Makefile.config.example Makefile.config
-
-if $WITH_CUDA; then
-  # Only generate compute_50.
-  GENCODE="-gencode arch=compute_50,code=sm_50"
-  GENCODE="$GENCODE -gencode arch=compute_50,code=compute_50"
-  echo "CUDA_ARCH := $GENCODE" >> Makefile.config
-fi
-
-# Remove IO library settings from Makefile.config
-# to avoid conflicts with CI configuration
-sed -i -e '/USE_LMDB/d' Makefile.config
-sed -i -e '/USE_LEVELDB/d' Makefile.config
-sed -i -e '/USE_OPENCV/d' Makefile.config
-
-cat << 'EOF' >> Makefile.config
-# Travis' nvcc doesn't like newer boost versions
-NVCCFLAGS := -Xcudafe --diag_suppress=cc_clobber_ignored -Xcudafe --diag_suppress=useless_using_declaration -Xcudafe --diag_suppress=set_but_not_used
-ANACONDA_HOME := $(CONDA_DIR)
-PYTHON_INCLUDE := $(ANACONDA_HOME)/include \
-		$(ANACONDA_HOME)/include/python2.7 \
-		$(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include
-PYTHON_LIB := $(ANACONDA_HOME)/lib
-INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
-LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
-WITH_PYTHON_LAYER := 1
-EOF
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index c86fd5d1d94..4a34e4c5856 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -30,7 +30,9 @@ void Blob<Dtype>::Reshape(const vector<int>& shape) {
   int* shape_data = static_cast<int*>(shape_data_->mutable_cpu_data());
   for (int i = 0; i < shape.size(); ++i) {
     CHECK_GE(shape[i], 0);
-    CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX";
+    if (count_ != 0) {
+      CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX";
+    }
     count_ *= shape[i];
     shape_[i] = shape[i];
     shape_data[i] = shape[i];
diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp
index e81bdd732f3..aecdcd63194 100644
--- a/src/caffe/layers/crop_layer.cpp
+++ b/src/caffe/layers/crop_layer.cpp
@@ -15,8 +15,7 @@ namespace caffe {
 template <typename Dtype>
 void CropLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
-  // All logic that depends only on the number of dimensions is here,
-  // the rest is in Reshape because it depends on Blob size.
+  // LayerSetup() handles the number of dimensions; Reshape() handles the sizes.
   // bottom[0] supplies the data
   // bottom[1] supplies the size
   const CropParameter& param = this->layer_param_.crop_param();
@@ -40,41 +39,35 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   int input_dim = bottom[0]->num_axes();
   const int start_axis = bottom[0]->CanonicalAxisIndex(param.axis());
 
-  // initialize all offsets to 0
+  // Initialize offsets to 0 and the new shape to the current shape of the data.
   offsets = vector<int>(input_dim, 0);
-  // initialize new shape to bottom[0]
   vector<int> new_shape(bottom[0]->shape());
 
-  // apply crops
+  // Determine crop offsets and the new shape post-crop.
   for (int i = 0; i < input_dim; ++i) {
     int crop_offset = 0;
-    int new_size    = bottom[0]->shape(i);
+    int new_size = bottom[0]->shape(i);
     if (i >= start_axis) {
       new_size = bottom[1]->shape(i);
-
       if (param.offset_size() == 1) {
-        // if only one crop value is supplied, crop all dimensions after axis
-        // by this crop value
+        // If only one offset is given, all crops have the same offset.
         crop_offset = param.offset(0);
       } else if (param.offset_size() > 1) {
-        // crop values specified must be equal to the number of dimensions
-        // following axis
+        // For several offsets, the number of offsets must be equal to the
+        // number of dimensions to crop, that is dimensions after the axis.
         crop_offset = param.offset(i - start_axis);
       }
+      // Check that the crop and offset are within the dimension's bounds.
+      CHECK_GE(bottom[0]->shape(i) - crop_offset, bottom[1]->shape(i))
+          << "the crop for dimension " << i << " is out-of-bounds with "
+          << "size " << bottom[1]->shape(i) << " and offset " << crop_offset;
     }
-    // Check that the image we are cropping minus the margin is bigger
-    // than the destination image.
-    CHECK_GE(bottom[0]->shape(i) - crop_offset,
-             bottom[1]->shape(i))
-        << "invalid crop parameters in dimension: " << i;
-    // Now set new size and offsets
     new_shape[i] = new_size;
     offsets[i] = crop_offset;
   }
   top[0]->Reshape(new_shape);
 }
 
-// recursive copy function
 template <typename Dtype>
 void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
              const vector<Blob<Dtype>*>& top,
diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu
index 9ed8f7cce57..f78cecbbeee 100644
--- a/src/caffe/layers/crop_layer.cu
+++ b/src/caffe/layers/crop_layer.cu
@@ -22,15 +22,6 @@ __global__ void copy_kernel(const int n, const int height, const int width,
   }
 }
 
-// recursive copy function, this function is similar to crop_copy but loops
-// over all but the last two dimensions. It is implemented this way to allow
-// for ND cropping while still relying on a CUDA kernel for the innermost
-// two dimensions for performance reasons.
-// An alternative way to implement ND cropping relying more on the kernel
-// would require passing offsets to the kernel, which is a bit problematic
-// because it is of variable length. Since in the standard (N,C,W,H) case
-// N,C are usually not cropped a speedup could be achieved by not looping
-// the application of the copy_kernel around these dimensions.
 template <typename Dtype>
 void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
              const vector<Blob<Dtype>*>& top,
diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu
index 42c4fd0260c..8bc5346248c 100644
--- a/src/caffe/layers/cudnn_conv_layer.cu
+++ b/src/caffe/layers/cudnn_conv_layer.cu
@@ -30,19 +30,11 @@ void CuDNNConvolutionLayer<Dtype>::Forward_gpu(
       // Bias.
       if (this->bias_term_) {
         const Dtype* bias_data = this->blobs_[1]->gpu_data();
-#if CUDNN_VERSION_MIN(4, 0, 0)
         CUDNN_CHECK(cudnnAddTensor(handle_[g],
               cudnn::dataType<Dtype>::one,
               bias_desc_, bias_data + bias_offset_ * g,
               cudnn::dataType<Dtype>::one,
               top_descs_[i], top_data + top_offset_ * g));
-#else
-        CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C,
-              cudnn::dataType<Dtype>::one,
-              bias_desc_, bias_data + bias_offset_ * g,
-              cudnn::dataType<Dtype>::one,
-              top_descs_[i], top_data + top_offset_ * g));
-#endif
       }
     }
 
@@ -82,7 +74,7 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       // Gradient w.r.t. weights.
       if (this->param_propagate_down_[0]) {
         const Dtype* bottom_data = bottom[i]->gpu_data();
-        CUDNN_CHECK(cudnnConvolutionBackwardFilter_v3(
+        CUDNN_CHECK(cudnnConvolutionBackwardFilter(
               handle_[1*this->group_ + g],
               cudnn::dataType<Dtype>::one,
               bottom_descs_[i], bottom_data + bottom_offset_ * g,
@@ -100,7 +92,7 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
           weight = this->blobs_[0]->gpu_data();
         }
         Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
-        CUDNN_CHECK(cudnnConvolutionBackwardData_v3(
+        CUDNN_CHECK(cudnnConvolutionBackwardData(
               handle_[2*this->group_ + g],
               cudnn::dataType<Dtype>::one,
               filter_desc_, weight + this->weight_offset_ * g,
diff --git a/src/caffe/layers/cudnn_relu_layer.cpp b/src/caffe/layers/cudnn_relu_layer.cpp
index c86c6907113..795e0a9efb0 100644
--- a/src/caffe/layers/cudnn_relu_layer.cpp
+++ b/src/caffe/layers/cudnn_relu_layer.cpp
@@ -13,6 +13,7 @@ void CuDNNReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_RELU);
   handles_setup_ = true;
 }
 
diff --git a/src/caffe/layers/cudnn_relu_layer.cu b/src/caffe/layers/cudnn_relu_layer.cu
index 9f617183baa..e7928bbd6e0 100644
--- a/src/caffe/layers/cudnn_relu_layer.cu
+++ b/src/caffe/layers/cudnn_relu_layer.cu
@@ -15,12 +15,21 @@ void CuDNNReLULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_RELU,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
 }
 
 template <typename Dtype>
@@ -40,13 +49,23 @@ void CuDNNReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_RELU,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNReLULayer);
diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cpp b/src/caffe/layers/cudnn_sigmoid_layer.cpp
index ccb955cdaff..3ce6aef1764 100644
--- a/src/caffe/layers/cudnn_sigmoid_layer.cpp
+++ b/src/caffe/layers/cudnn_sigmoid_layer.cpp
@@ -13,6 +13,8 @@ void CuDNNSigmoidLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_,
+      CUDNN_ACTIVATION_SIGMOID);
   handles_setup_ = true;
 }
 
diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cu b/src/caffe/layers/cudnn_sigmoid_layer.cu
index e2a4b460c6c..48d6cbab6de 100644
--- a/src/caffe/layers/cudnn_sigmoid_layer.cu
+++ b/src/caffe/layers/cudnn_sigmoid_layer.cu
@@ -10,12 +10,21 @@ void CuDNNSigmoidLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_SIGMOID,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
 }
 
 template <typename Dtype>
@@ -30,13 +39,23 @@ void CuDNNSigmoidLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_SIGMOID,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNSigmoidLayer);
diff --git a/src/caffe/layers/cudnn_tanh_layer.cpp b/src/caffe/layers/cudnn_tanh_layer.cpp
index 1a56418227c..e87dd9de0ab 100644
--- a/src/caffe/layers/cudnn_tanh_layer.cpp
+++ b/src/caffe/layers/cudnn_tanh_layer.cpp
@@ -13,6 +13,7 @@ void CuDNNTanHLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_TANH);
   handles_setup_ = true;
 }
 
diff --git a/src/caffe/layers/cudnn_tanh_layer.cu b/src/caffe/layers/cudnn_tanh_layer.cu
index 89df28a3e8b..6b5d7ae7ea7 100644
--- a/src/caffe/layers/cudnn_tanh_layer.cu
+++ b/src/caffe/layers/cudnn_tanh_layer.cu
@@ -10,12 +10,21 @@ void CuDNNTanHLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_TANH,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
 }
 
 template <typename Dtype>
@@ -31,13 +40,23 @@ void CuDNNTanHLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
 
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_TANH,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNTanHLayer);
diff --git a/src/caffe/layers/exp_layer.cpp b/src/caffe/layers/exp_layer.cpp
index 1f4a309fe25..0c1b463ae12 100644
--- a/src/caffe/layers/exp_layer.cpp
+++ b/src/caffe/layers/exp_layer.cpp
@@ -23,7 +23,8 @@ void ExpLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   const Dtype input_scale = this->layer_param_.exp_param().scale();
   const Dtype input_shift = this->layer_param_.exp_param().shift();
   inner_scale_ = log_base * input_scale;
-  outer_scale_ = (input_shift == Dtype(0)) ? Dtype(1) : pow(base, input_shift);
+  outer_scale_ = (input_shift == Dtype(0)) ? Dtype(1) :
+     ( (base != Dtype(-1)) ? pow(base, input_shift) : exp(input_shift) );
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp
index 62fda4accce..56d354655dc 100644
--- a/src/caffe/layers/image_data_layer.cpp
+++ b/src/caffe/layers/image_data_layer.cpp
@@ -37,10 +37,13 @@ void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   const string& source = this->layer_param_.image_data_param().source();
   LOG(INFO) << "Opening file " << source;
   std::ifstream infile(source.c_str());
-  string filename;
+  string line;
+  size_t pos;
   int label;
-  while (infile >> filename >> label) {
-    lines_.push_back(std::make_pair(filename, label));
+  while (std::getline(infile, line)) {
+    pos = line.find_last_of(' ');
+    label = atoi(line.substr(pos + 1).c_str());
+    lines_.push_back(std::make_pair(line.substr(0, pos), label));
   }
 
   if (this->layer_param_.image_data_param().shuffle()) {
diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp
index 210525e20f3..0be7911a3be 100644
--- a/src/caffe/layers/lrn_layer.cpp
+++ b/src/caffe/layers/lrn_layer.cpp
@@ -14,6 +14,7 @@ void LRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   alpha_ = this->layer_param_.lrn_param().alpha();
   beta_ = this->layer_param_.lrn_param().beta();
   k_ = this->layer_param_.lrn_param().k();
+  deconv_ignore_ = this->layer_param_.lrn_param().deconv_ignore();
   if (this->layer_param_.lrn_param().norm_region() ==
       LRNParameter_NormRegion_WITHIN_CHANNEL) {
     // Set up split_layer_ to use inputs in the numerator and denominator.
@@ -246,10 +247,36 @@ void LRNLayer<Dtype>::WithinChannelBackward(
   }
 }
 
+template <typename Dtype>
+void LRNLayer<Dtype>::Deconv_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (deconv_ignore_) {
+    // Deconv Option 1: pass through (ignore LRN layer):
+    Deconv_passthrough_cpu(top, propagate_down, bottom);
+  } else {
+    // Deconv Option 2: compute derivatives via backprop:
+    Backward_cpu(top, propagate_down, bottom);
+  }
+}
+
+template <typename Dtype>
+void LRNLayer<Dtype>::Deconv_passthrough_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+    const int count = bottom[0]->count();
+    for (int i = 0; i < count; ++i) {
+      bottom_diff[i] = top_diff[i];
+    }
+  }  
+}
+
 #ifdef CPU_ONLY
-STUB_GPU(LRNLayer);
+STUB_GPU_WITH_DECONV(LRNLayer);
 STUB_GPU_FORWARD(LRNLayer, CrossChannelForward);
 STUB_GPU_BACKWARD(LRNLayer, CrossChannelBackward);
+STUB_GPU_DECONV(LRNLayer, Deconv_passthrough);
 #endif
 
 INSTANTIATE_CLASS(LRNLayer);
diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu
index 26e619c7569..a5ee2387df7 100644
--- a/src/caffe/layers/lrn_layer.cu
+++ b/src/caffe/layers/lrn_layer.cu
@@ -195,8 +195,50 @@ template void LRNLayer<double>::CrossChannelBackward_gpu(
     const vector<Blob<double>*>& top, const vector<bool>& propagate_down,
     const vector<Blob<double>*>& bottom);
 
+template <typename Dtype>
+void LRNLayer<Dtype>::Deconv_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (deconv_ignore_) {
+    // Deconv Option 1: pass through (ignore LRN layer):
+    Deconv_passthrough_gpu(top, propagate_down, bottom);
+  } else {
+    // Deconv Option 2: compute derivatives via backprop:
+    Backward_gpu(top, propagate_down, bottom);
+  }
+}
+
+template <typename Dtype>
+__global__ void LRNDeconv_passthrough(const int n, const Dtype* in_diff,
+    Dtype* out_diff) {
+  CUDA_KERNEL_LOOP(index, n) {
+    out_diff[index] = in_diff[index];
+  }
+}
+
+template <typename Dtype>
+void LRNLayer<Dtype>::Deconv_passthrough_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  // Option 2: pass through (ignore LRN layer)
+  if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->gpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+    const int count = bottom[0]->count();
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    LRNDeconv_passthrough<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, top_diff, bottom_diff);
+    CUDA_POST_KERNEL_CHECK;
+  }
+}
+template void LRNLayer<float>::Deconv_passthrough_gpu(
+    const vector<Blob<float>*>& top, const vector<bool>& propagate_down,
+    const vector<Blob<float>*>& bottom);
+template void LRNLayer<double>::Deconv_passthrough_gpu(
+    const vector<Blob<double>*>& top, const vector<bool>& propagate_down,
+    const vector<Blob<double>*>& bottom);
+
 
 
-INSTANTIATE_LAYER_GPU_FUNCS(LRNLayer);
+//INSTANTIATE_LAYER_GPU_FUNCS(LRNLayer);
+INSTANTIATE_LAYER_GPU_FUNCS_WITH_DECONV(LRNLayer);
 
 }  // namespace caffe
diff --git a/src/caffe/layers/parameter_layer.cpp b/src/caffe/layers/parameter_layer.cpp
new file mode 100644
index 00000000000..fbd326f8469
--- /dev/null
+++ b/src/caffe/layers/parameter_layer.cpp
@@ -0,0 +1,8 @@
+#include "caffe/layers/parameter_layer.hpp"
+
+namespace caffe {
+
+INSTANTIATE_CLASS(ParameterLayer);
+REGISTER_LAYER_CLASS(Parameter);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/relu_layer.cpp b/src/caffe/layers/relu_layer.cpp
index 92a729c81bd..2f0e52b33f0 100644
--- a/src/caffe/layers/relu_layer.cpp
+++ b/src/caffe/layers/relu_layer.cpp
@@ -35,9 +35,26 @@ void ReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   }
 }
 
+template <typename Dtype>
+void ReLULayer<Dtype>::Deconv_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+    const int count = bottom[0]->count();
+    Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
+    if (negative_slope != Dtype(0))
+      LOG(WARNING) << "negative_slope parameter = " << negative_slope << " but nonzero negative_slope params are not supported for Deconv through RELU.";
+    for (int i = 0; i < count; ++i) {
+      bottom_diff[i] = std::max(top_diff[i], Dtype(0));
+    }
+  }
+}
+
 
 #ifdef CPU_ONLY
-STUB_GPU(ReLULayer);
+STUB_GPU_WITH_DECONV(ReLULayer);
 #endif
 
 INSTANTIATE_CLASS(ReLULayer);
diff --git a/src/caffe/layers/relu_layer.cu b/src/caffe/layers/relu_layer.cu
index 4bf15b3aad3..3905448727b 100644
--- a/src/caffe/layers/relu_layer.cu
+++ b/src/caffe/layers/relu_layer.cu
@@ -57,8 +57,36 @@ void ReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   }
 }
 
+template <typename Dtype>
+__global__ void ReLUDeconv(const int n, const Dtype* in_diff,
+    Dtype* out_diff) {
+  CUDA_KERNEL_LOOP(index, n) {
+    out_diff[index] = in_diff[index] > 0 ? in_diff[index] : 0;
+  }
+}
+
+template <typename Dtype>
+void ReLULayer<Dtype>::Deconv_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[0]) {
+    const Dtype* bottom_data = bottom[0]->gpu_data();
+    const Dtype* top_diff = top[0]->gpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+    const int count = bottom[0]->count();
+    Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
+    if (negative_slope != Dtype(0))
+      LOG(WARNING) << "negative_slope parameter = " << negative_slope << " but nonzero negative_slope params are not supported for Deconv through RELU.";
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    ReLUDeconv<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, top_diff, bottom_diff);
+    CUDA_POST_KERNEL_CHECK;
+  }
+}
+
 
-INSTANTIATE_LAYER_GPU_FUNCS(ReLULayer);
+//INSTANTIATE_LAYER_GPU_FUNCS(ReLULayer);
+INSTANTIATE_LAYER_GPU_FUNCS_WITH_DECONV(ReLULayer);
 
 
 }  // namespace caffe
diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index 4ca8315d791..103dd4b6af8 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -265,6 +265,9 @@ void WindowDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
   const int num_samples[2] = { batch_size - num_fg, num_fg };
 
   int item_id = 0;
+  CHECK_GT(fg_windows_.size(), 0);
+  CHECK_GT(bg_windows_.size(), 0);
+
   // sample from bg set then fg set
   for (int is_fg = 0; is_fg < 2; ++is_fg) {
     for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) {
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 23d94c97c07..7fb20266b5f 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -427,12 +427,11 @@ int Net<Dtype>::AppendBottom(const NetParameter& param, const int layer_id,
   bottom_vecs_[layer_id].push_back(blobs_[blob_id].get());
   bottom_id_vecs_[layer_id].push_back(blob_id);
   available_blobs->erase(blob_name);
-  bool propagate_down = true;
+  bool need_backward = blob_need_backward_[blob_id];
   // Check if the backpropagation on bottom_id should be skipped
-  if (layer_param.propagate_down_size() > 0)
-    propagate_down = layer_param.propagate_down(bottom_id);
-  const bool need_backward = blob_need_backward_[blob_id] &&
-                          propagate_down;
+  if (layer_param.propagate_down_size() > 0) {
+    need_backward = layer_param.propagate_down(bottom_id);
+  }
   bottom_need_backward_[layer_id].push_back(need_backward);
   return blob_id;
 }
@@ -591,6 +590,19 @@ void Net<Dtype>::BackwardFromTo(int start, int end) {
   }
 }
 
+template <typename Dtype>
+void Net<Dtype>::DeconvFromTo(int start, int end) {
+  CHECK_GE(end, 0);
+  CHECK_LT(start, layers_.size());
+  for (int i = start; i >= end; --i) {
+    if (layer_need_backward_[i]) {
+      layers_[i]->Deconv(
+          top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);
+      if (debug_info_) { DeconvDebugInfo(i); }
+    }
+  }
+}
+
 template <typename Dtype>
 void Net<Dtype>::ForwardDebugInfo(const int layer_id) {
   for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
@@ -644,6 +656,29 @@ void Net<Dtype>::BackwardDebugInfo(const int layer_id) {
   }
 }
 
+template <typename Dtype>
+void Net<Dtype>::DeconvDebugInfo(const int layer_id) {
+  const vector<Blob<Dtype>*>& bottom_vec = bottom_vecs_[layer_id];
+  for (int bottom_id = 0; bottom_id < bottom_vec.size(); ++bottom_id) {
+    if (!bottom_need_backward_[layer_id][bottom_id]) { continue; }
+    const Blob<Dtype>& blob = *bottom_vec[bottom_id];
+    const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
+    const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();
+    LOG(INFO) << "    [Deconv] "
+        << "Layer " << layer_names_[layer_id] << ", bottom blob " << blob_name
+        << " diff: " << diff_abs_val_mean;
+  }
+  for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
+       ++param_id) {
+    if (!layers_[layer_id]->param_propagate_down(param_id)) { continue; }
+    const Blob<Dtype>& blob = *layers_[layer_id]->blobs()[param_id];
+    const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();
+    LOG(INFO) << "    [Deconv] "
+        << "Layer " << layer_names_[layer_id] << ", param blob " << param_id
+        << " diff: " << diff_abs_val_mean;
+  }
+}
+
 template <typename Dtype>
 void Net<Dtype>::UpdateDebugInfo(const int param_id) {
   const Blob<Dtype>& blob = *params_[param_id];
@@ -731,6 +766,21 @@ void Net<Dtype>::Backward() {
   }
 }
 
+template <typename Dtype>
+void Net<Dtype>::DeconvFrom(int start) {
+  DeconvFromTo(start, 0);
+}
+
+template <typename Dtype>
+void Net<Dtype>::DeconvTo(int end) {
+  DeconvFromTo(layers_.size() - 1, end);
+}
+
+template <typename Dtype>
+void Net<Dtype>::Deconv() {
+  DeconvFromTo(layers_.size() - 1, 0);
+}
+
 template <typename Dtype>
 void Net<Dtype>::Reshape() {
   for (int i = 0; i < layers_.size(); ++i) {
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 6900bb71482..aa5bed461b8 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -306,7 +306,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 145 (last added: crop_param)
+// LayerParameter next available layer-specific ID: 146 (last added: parameter_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -328,7 +328,12 @@ message LayerParameter {
   // The blobs containing the numeric parameters of the layer.
   repeated BlobProto blobs = 7;
 
-  // Specifies on which bottoms the backpropagation should be skipped.
+  // Specifies whether to backpropagate to each bottom. If unspecified,
+  // Caffe will automatically infer whether each input needs backpropagation
+  // to compute parameter gradients. If set to true for some inputs,
+  // backpropagation to those inputs is forced; if set false for some inputs,
+  // backpropagation to those inputs is skipped.
+  //
   // The size must be either 0 or equal to the number of bottoms.
   repeated bool propagate_down = 11;
 
@@ -380,6 +385,7 @@ message LayerParameter {
   optional LRNParameter lrn_param = 118;
   optional MemoryDataParameter memory_data_param = 119;
   optional MVNParameter mvn_param = 120;
+  optional ParameterParameter parameter_param = 145;
   optional PoolingParameter pooling_param = 121;
   optional PowerParameter power_param = 122;
   optional PReLUParameter prelu_param = 131;
@@ -633,6 +639,7 @@ message DataParameter {
   // DEPRECATED. Each solver accesses a different subset of the database.
   optional uint32 rand_skip = 7 [default = 0];
   optional DB backend = 8 [default = LEVELDB];
+
   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
   // simple scaling and subtracting the data mean, if provided. Note that the
   // mean subtraction is always carried out before scaling.
@@ -848,6 +855,13 @@ message LRNParameter {
     CUDNN = 2;
   }
   optional Engine engine = 6 [default = DEFAULT];
+
+  // Whether or not to skip the LRN layer during a deconv pass.  If
+  // this is true, activations in a deconv will pass through the LRN
+  // layer unaffected. If it is false, deconv activations will be
+  // affected by LRN layers the same as backprop diffs are (will pass
+  // through the derivative of the layer).
+  optional bool deconv_ignore = 7 [default = false];
 }
 
 message MemoryDataParameter {
@@ -868,6 +882,10 @@ message MVNParameter {
   optional float eps = 3 [default = 1e-9];
 }
 
+message ParameterParameter {
+  optional BlobShape shape = 1;
+}
+
 message PoolingParameter {
   enum PoolMethod {
     MAX = 0;
@@ -982,7 +1000,7 @@ message ReshapeParameter {
   //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
   //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
   //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
-  //   reshape_param { shape { dim: -1  dim: 0  dim:  2 } }
+  //   reshape_param { shape { dim:  0  dim:-1  dim:  4 } }
   //
   optional BlobShape shape = 1;
 
diff --git a/src/caffe/test/test_blob.cpp b/src/caffe/test/test_blob.cpp
index a9d7d519e45..b88562223d0 100644
--- a/src/caffe/test/test_blob.cpp
+++ b/src/caffe/test/test_blob.cpp
@@ -51,6 +51,14 @@ TYPED_TEST(BlobSimpleTest, TestReshape) {
   EXPECT_EQ(this->blob_->count(), 120);
 }
 
+TYPED_TEST(BlobSimpleTest, TestReshapeZero) {
+  vector<int> shape(2);
+  shape[0] = 0;
+  shape[1] = 5;
+  this->blob_->Reshape(shape);
+  EXPECT_EQ(this->blob_->count(), 0);
+}
+
 TYPED_TEST(BlobSimpleTest, TestLegacyBlobProtoShapeEquals) {
   BlobProto blob_proto;
 
diff --git a/src/caffe/test/test_caffe_main.cpp b/src/caffe/test/test_caffe_main.cpp
index fccf6f1613b..6473b74d0a6 100644
--- a/src/caffe/test/test_caffe_main.cpp
+++ b/src/caffe/test/test_caffe_main.cpp
@@ -1,6 +1,3 @@
-// The main caffe test code. Your test cpp code should include this hpp
-// to allow a main function to be compiled into the binary.
-
 #include "caffe/caffe.hpp"
 #include "caffe/test/test_caffe_main.hpp"
 
diff --git a/src/caffe/test/test_crop_layer.cpp b/src/caffe/test/test_crop_layer.cpp
index 45f24e2ee8d..ce2c736f644 100644
--- a/src/caffe/test/test_crop_layer.cpp
+++ b/src/caffe/test/test_crop_layer.cpp
@@ -91,6 +91,24 @@ TYPED_TEST(CropLayerTest, TestSetupShapeNegativeIndexing) {
   }
 }
 
+TYPED_TEST(CropLayerTest, TestDimensionsCheck) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  // Reshape size blob to have incompatible sizes for uncropped dimensions:
+  // the size blob has more channels than the data blob, but this is fine
+  // since the channels dimension is not cropped in this configuration.
+  this->blob_bottom_1_->Reshape(2, 5, 4, 2);
+  CropLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int i = 0; i < this->blob_top_->num_axes(); ++i) {
+    if (i < 2) {
+      EXPECT_EQ(this->blob_bottom_0_->shape(i), this->blob_top_->shape(i));
+    } else {
+      EXPECT_EQ(this->blob_bottom_1_->shape(i), this->blob_top_->shape(i));
+    }
+  }
+}
+
 TYPED_TEST(CropLayerTest, TestCropAll) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
diff --git a/src/caffe/test/test_image_data_layer.cpp b/src/caffe/test/test_image_data_layer.cpp
index a4080ccd145..ce5e0bc62d6 100644
--- a/src/caffe/test/test_image_data_layer.cpp
+++ b/src/caffe/test/test_image_data_layer.cpp
@@ -34,16 +34,24 @@ class ImageDataLayerTest : public MultiDeviceTest<TypeParam> {
     std::ofstream outfile(filename_.c_str(), std::ofstream::out);
     LOG(INFO) << "Using temporary file " << filename_;
     for (int i = 0; i < 5; ++i) {
-      outfile << EXAMPLES_SOURCE_DIR "images/cat.jpg " << i;
+      outfile << EXAMPLES_SOURCE_DIR "images/cat.jpg " << i << std::endl;
     }
     outfile.close();
     // Create test input file for images of distinct sizes.
     MakeTempFilename(&filename_reshape_);
     std::ofstream reshapefile(filename_reshape_.c_str(), std::ofstream::out);
     LOG(INFO) << "Using temporary file " << filename_reshape_;
-    reshapefile << EXAMPLES_SOURCE_DIR "images/cat.jpg " << 0;
-    reshapefile << EXAMPLES_SOURCE_DIR "images/fish-bike.jpg " << 1;
+    reshapefile << EXAMPLES_SOURCE_DIR "images/cat.jpg " << 0 << std::endl;
+    reshapefile << EXAMPLES_SOURCE_DIR "images/fish-bike.jpg " << 1
+                << std::endl;
     reshapefile.close();
+    // Create test input file for images with space in names
+    MakeTempFilename(&filename_space_);
+    std::ofstream spacefile(filename_space_.c_str(), std::ofstream::out);
+    LOG(INFO) << "Using temporary file " << filename_space_;
+    spacefile << EXAMPLES_SOURCE_DIR "images/cat.jpg " << 0 << std::endl;
+    spacefile << EXAMPLES_SOURCE_DIR "images/cat gray.jpg " << 1 << std::endl;
+    spacefile.close();
   }
 
   virtual ~ImageDataLayerTest() {
@@ -54,6 +62,7 @@ class ImageDataLayerTest : public MultiDeviceTest<TypeParam> {
   int seed_;
   string filename_;
   string filename_reshape_;
+  string filename_space_;
   Blob<Dtype>* const blob_top_data_;
   Blob<Dtype>* const blob_top_label_;
   vector<Blob<Dtype>*> blob_bottom_vec_;
@@ -177,5 +186,34 @@ TYPED_TEST(ImageDataLayerTest, TestShuffle) {
   }
 }
 
+TYPED_TEST(ImageDataLayerTest, TestSpace) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter param;
+  ImageDataParameter* image_data_param = param.mutable_image_data_param();
+  image_data_param->set_batch_size(1);
+  image_data_param->set_source(this->filename_space_.c_str());
+  image_data_param->set_shuffle(false);
+  ImageDataLayer<Dtype> layer(param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_label_->num(), 1);
+  EXPECT_EQ(this->blob_top_label_->channels(), 1);
+  EXPECT_EQ(this->blob_top_label_->height(), 1);
+  EXPECT_EQ(this->blob_top_label_->width(), 1);
+  // cat.jpg
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_data_->num(), 1);
+  EXPECT_EQ(this->blob_top_data_->channels(), 3);
+  EXPECT_EQ(this->blob_top_data_->height(), 360);
+  EXPECT_EQ(this->blob_top_data_->width(), 480);
+  EXPECT_EQ(this->blob_top_label_->cpu_data()[0], 0);
+  // cat gray.jpg
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_data_->num(), 1);
+  EXPECT_EQ(this->blob_top_data_->channels(), 3);
+  EXPECT_EQ(this->blob_top_data_->height(), 360);
+  EXPECT_EQ(this->blob_top_data_->width(), 480);
+  EXPECT_EQ(this->blob_top_label_->cpu_data()[0], 1);
+}
+
 }  // namespace caffe
 #endif  // USE_OPENCV
diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 1e0788ec127..92fd317fee8 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -716,6 +716,61 @@ class NetTest : public MultiDeviceTest<TypeParam> {
     InitNetFromProtoString(proto);
   }
 
+  virtual void InitForcePropNet(bool test_force_true) {
+    string proto =
+      "name: 'ForcePropTestNetwork' "
+      "layer { "
+      "  name: 'data' "
+      "  type: 'DummyData' "
+      "  dummy_data_param { "
+      "    shape { "
+      "      dim: 5 "
+      "      dim: 2 "
+      "      dim: 3 "
+      "      dim: 4 "
+      "    } "
+      "    data_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    shape { "
+      "      dim: 5 "
+      "    } "
+      "    data_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layer { "
+      "  name: 'innerproduct' "
+      "  type: 'InnerProduct' "
+      "  inner_product_param { "
+      "    num_output: 1 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'innerproduct' ";
+    if (test_force_true) {
+      proto += "  propagate_down: true ";
+    }
+    proto +=
+      "} "
+      "layer { "
+      "  name: 'loss' "
+      "  bottom: 'innerproduct' "
+      "  bottom: 'label' "
+      "  top: 'cross_entropy_loss' "
+      "  type: 'SigmoidCrossEntropyLoss' "
+      "} ";
+    InitNetFromProtoString(proto);
+  }
+
   int seed_;
   shared_ptr<Net<Dtype> > net_;
 };
@@ -2371,4 +2426,51 @@ TYPED_TEST(NetTest, TestSkipPropagateDown) {
   }
 }
 
+TYPED_TEST(NetTest, TestForcePropagateDown) {
+  this->InitForcePropNet(false);
+  vector<bool> layer_need_backward = this->net_->layer_need_backward();
+  for (int layer_id = 0; layer_id < this->net_->layers().size(); ++layer_id) {
+    const string& layer_name = this->net_->layer_names()[layer_id];
+    const vector<bool> need_backward =
+        this->net_->bottom_need_backward()[layer_id];
+    if (layer_name == "data") {
+      ASSERT_EQ(need_backward.size(), 0);
+      EXPECT_FALSE(layer_need_backward[layer_id]);
+    } else if (layer_name == "innerproduct") {
+      ASSERT_EQ(need_backward.size(), 1);
+      EXPECT_FALSE(need_backward[0]);  // data
+      EXPECT_TRUE(layer_need_backward[layer_id]);
+    } else if (layer_name == "loss") {
+      ASSERT_EQ(need_backward.size(), 2);
+      EXPECT_TRUE(need_backward[0]);   // innerproduct
+      EXPECT_FALSE(need_backward[1]);  // label
+      EXPECT_TRUE(layer_need_backward[layer_id]);
+    } else {
+      LOG(FATAL) << "Unknown layer: " << layer_name;
+    }
+  }
+  this->InitForcePropNet(true);
+  layer_need_backward = this->net_->layer_need_backward();
+  for (int layer_id = 0; layer_id < this->net_->layers().size(); ++layer_id) {
+    const string& layer_name = this->net_->layer_names()[layer_id];
+    const vector<bool> need_backward =
+        this->net_->bottom_need_backward()[layer_id];
+    if (layer_name == "data") {
+      ASSERT_EQ(need_backward.size(), 0);
+      EXPECT_FALSE(layer_need_backward[layer_id]);
+    } else if (layer_name == "innerproduct") {
+      ASSERT_EQ(need_backward.size(), 1);
+      EXPECT_TRUE(need_backward[0]);  // data
+      EXPECT_TRUE(layer_need_backward[layer_id]);
+    } else if (layer_name == "loss") {
+      ASSERT_EQ(need_backward.size(), 2);
+      EXPECT_TRUE(need_backward[0]);   // innerproduct
+      EXPECT_FALSE(need_backward[1]);  // label
+      EXPECT_TRUE(layer_need_backward[layer_id]);
+    } else {
+      LOG(FATAL) << "Unknown layer: " << layer_name;
+    }
+  }
+}
+
 }  // namespace caffe
diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp
index dd591f7d204..342f825cec3 100644
--- a/src/caffe/test/test_neuron_layer.cpp
+++ b/src/caffe/test/test_neuron_layer.cpp
@@ -394,6 +394,26 @@ TYPED_TEST(NeuronLayerTest, TestExpGradient) {
   this->TestExpGradient(kBase, kScale, kShift);
 }
 
+TYPED_TEST(NeuronLayerTest, TestExpLayerWithShift) {
+  typedef typename TypeParam::Dtype Dtype;
+  // Test default base of "-1" -- should actually set base := e,
+  // with a non-zero shift
+  const Dtype kBase = -1;
+  const Dtype kScale = 1;
+  const Dtype kShift = 1;
+  this->TestExpForward(kBase, kScale, kShift);
+}
+
+TYPED_TEST(NeuronLayerTest, TestExpGradientWithShift) {
+  typedef typename TypeParam::Dtype Dtype;
+  // Test default base of "-1" -- should actually set base := e,
+  // with a non-zero shift
+  const Dtype kBase = -1;
+  const Dtype kScale = 1;
+  const Dtype kShift = 1;
+  this->TestExpGradient(kBase, kScale, kShift);
+}
+
 TYPED_TEST(NeuronLayerTest, TestExpLayerBase2) {
   typedef typename TypeParam::Dtype Dtype;
   const Dtype kBase = 2;
diff --git a/src/caffe/util/db_lmdb.cpp b/src/caffe/util/db_lmdb.cpp
index 0bc82b53e2b..fb1d4956aa1 100644
--- a/src/caffe/util/db_lmdb.cpp
+++ b/src/caffe/util/db_lmdb.cpp
@@ -7,13 +7,10 @@
 
 namespace caffe { namespace db {
 
-const size_t LMDB_MAP_SIZE = 1099511627776;  // 1 TB
-
 void LMDB::Open(const string& source, Mode mode) {
   MDB_CHECK(mdb_env_create(&mdb_env_));
-  MDB_CHECK(mdb_env_set_mapsize(mdb_env_, LMDB_MAP_SIZE));
   if (mode == NEW) {
-    CHECK_EQ(mkdir(source.c_str(), 0744), 0) << "mkdir " << source << "failed";
+    CHECK_EQ(mkdir(source.c_str(), 0744), 0) << "mkdir " << source << " failed";
   }
   int flags = 0;
   if (mode == READ) {
@@ -48,19 +45,67 @@ LMDBCursor* LMDB::NewCursor() {
 }
 
 LMDBTransaction* LMDB::NewTransaction() {
-  MDB_txn* mdb_txn;
-  MDB_CHECK(mdb_txn_begin(mdb_env_, NULL, 0, &mdb_txn));
-  MDB_CHECK(mdb_dbi_open(mdb_txn, NULL, 0, &mdb_dbi_));
-  return new LMDBTransaction(&mdb_dbi_, mdb_txn);
+  return new LMDBTransaction(mdb_env_);
 }
 
 void LMDBTransaction::Put(const string& key, const string& value) {
-  MDB_val mdb_key, mdb_value;
-  mdb_key.mv_data = const_cast<char*>(key.data());
-  mdb_key.mv_size = key.size();
-  mdb_value.mv_data = const_cast<char*>(value.data());
-  mdb_value.mv_size = value.size();
-  MDB_CHECK(mdb_put(mdb_txn_, *mdb_dbi_, &mdb_key, &mdb_value, 0));
+  keys.push_back(key);
+  values.push_back(value);
+}
+
+void LMDBTransaction::Commit() {
+  MDB_dbi mdb_dbi;
+  MDB_val mdb_key, mdb_data;
+  MDB_txn *mdb_txn;
+
+  // Initialize MDB variables
+  MDB_CHECK(mdb_txn_begin(mdb_env_, NULL, 0, &mdb_txn));
+  MDB_CHECK(mdb_dbi_open(mdb_txn, NULL, 0, &mdb_dbi));
+
+  for (int i = 0; i < keys.size(); i++) {
+    mdb_key.mv_size = keys[i].size();
+    mdb_key.mv_data = const_cast<char*>(keys[i].data());
+    mdb_data.mv_size = values[i].size();
+    mdb_data.mv_data = const_cast<char*>(values[i].data());
+
+    // Add data to the transaction
+    int put_rc = mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0);
+    if (put_rc == MDB_MAP_FULL) {
+      // Out of memory - double the map size and retry
+      mdb_txn_abort(mdb_txn);
+      mdb_dbi_close(mdb_env_, mdb_dbi);
+      DoubleMapSize();
+      Commit();
+      return;
+    }
+    // May have failed for some other reason
+    MDB_CHECK(put_rc);
+  }
+
+  // Commit the transaction
+  int commit_rc = mdb_txn_commit(mdb_txn);
+  if (commit_rc == MDB_MAP_FULL) {
+    // Out of memory - double the map size and retry
+    mdb_dbi_close(mdb_env_, mdb_dbi);
+    DoubleMapSize();
+    Commit();
+    return;
+  }
+  // May have failed for some other reason
+  MDB_CHECK(commit_rc);
+
+  // Cleanup after successful commit
+  mdb_dbi_close(mdb_env_, mdb_dbi);
+  keys.clear();
+  values.clear();
+}
+
+void LMDBTransaction::DoubleMapSize() {
+  struct MDB_envinfo current_info;
+  MDB_CHECK(mdb_env_info(mdb_env_, &current_info));
+  size_t new_size = current_info.me_mapsize * 2;
+  DLOG(INFO) << "Doubling LMDB map size to " << (new_size>>20) << "MB ...";
+  MDB_CHECK(mdb_env_set_mapsize(mdb_env_, new_size));
 }
 
 }  // namespace db
diff --git a/tools/caffe.cpp b/tools/caffe.cpp
index 5d9331f0c22..5bb60eb161d 100644
--- a/tools/caffe.cpp
+++ b/tools/caffe.cpp
@@ -33,7 +33,7 @@ DEFINE_string(gpu, "",
 DEFINE_string(solver, "",
     "The solver definition protocol buffer text file.");
 DEFINE_string(model, "",
-    "The model definition protocol buffer text file..");
+    "The model definition protocol buffer text file.");
 DEFINE_string(snapshot, "",
     "Optional; the snapshot solver state to resume training.");
 DEFINE_string(weights, "",
diff --git a/tools/convert_imageset.cpp b/tools/convert_imageset.cpp
index 9c52bfa0ef8..90cdb15d427 100644
--- a/tools/convert_imageset.cpp
+++ b/tools/convert_imageset.cpp
@@ -73,10 +73,13 @@ int main(int argc, char** argv) {
 
   std::ifstream infile(argv[2]);
   std::vector<std::pair<std::string, int> > lines;
-  std::string filename;
+  std::string line;
+  size_t pos;
   int label;
-  while (infile >> filename >> label) {
-    lines.push_back(std::make_pair(filename, label));
+  while (std::getline(infile, line)) {
+    pos = line.find_last_of(' ');
+    label = atoi(line.substr(pos + 1).c_str());
+    lines.push_back(std::make_pair(line.substr(0, pos), label));
   }
   if (FLAGS_shuffle) {
     // randomly shuffle data
diff --git a/tools/extra/plot_training_log.py.example b/tools/extra/plot_training_log.py.example
index 4d3ed0d15a9..79924ae5a5a 100755
--- a/tools/extra/plot_training_log.py.example
+++ b/tools/extra/plot_training_log.py.example
@@ -10,7 +10,8 @@ import matplotlib.legend as lgd
 import matplotlib.markers as mks
 
 def get_log_parsing_script():
-    dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+    dirname = os.path.dirname(os.path.abspath(inspect.getfile(
+        inspect.currentframe())))
     return dirname + '/parse_log.sh'
 
 def get_log_file_suffix():
@@ -61,16 +62,17 @@ def get_data_file_type(chart_type):
     return data_file_type
 
 def get_data_file(chart_type, path_to_log):
-    return os.path.basename(path_to_log) + '.' + get_data_file_type(chart_type).lower()
+    return (os.path.basename(path_to_log) + '.' +
+            get_data_file_type(chart_type).lower())
 
 def get_field_descriptions(chart_type):
     description = get_chart_type_description(chart_type).split(
         get_chart_type_description_separator())
     y_axis_field = description[0]
     x_axis_field = description[1]
-    return x_axis_field, y_axis_field    
+    return x_axis_field, y_axis_field
 
-def get_field_indecies(x_axis_field, y_axis_field):    
+def get_field_indices(x_axis_field, y_axis_field):
     data_file_type = get_data_file_type(chart_type)
     fields = create_field_index()[0][data_file_type]
     return fields[x_axis_field], fields[y_axis_field]
@@ -111,7 +113,7 @@ def plot_chart(chart_type, path_to_png, path_to_log_list):
         os.system('%s %s' % (get_log_parsing_script(), path_to_log))
         data_file = get_data_file(chart_type, path_to_log)
         x_axis_field, y_axis_field = get_field_descriptions(chart_type)
-        x, y = get_field_indecies(x_axis_field, y_axis_field)
+        x, y = get_field_indices(x_axis_field, y_axis_field)
         data = load_data(data_file, x, y)
         ## TODO: more systematic color cycle for lines
         color = [random.random(), random.random(), random.random()]
@@ -138,8 +140,8 @@ def plot_chart(chart_type, path_to_png, path_to_log_list):
     plt.legend(loc = legend_loc, ncol = 1) # ajust ncol to fit the space
     plt.title(get_chart_type_description(chart_type))
     plt.xlabel(x_axis_field)
-    plt.ylabel(y_axis_field)  
-    plt.savefig(path_to_png)     
+    plt.ylabel(y_axis_field)
+    plt.savefig(path_to_png)
     plt.show()
 
 def print_help():
@@ -160,28 +162,30 @@ Supported chart types:""" % (len(get_supported_chart_types()) - 1,
     num = len(supported_chart_types)
     for i in xrange(num):
         print '    %d: %s' % (i, supported_chart_types[i])
-    exit
+    sys.exit()
 
 def is_valid_chart_type(chart_type):
     return chart_type >= 0 and chart_type < len(get_supported_chart_types())
-  
+
 if __name__ == '__main__':
     if len(sys.argv) < 4:
         print_help()
     else:
         chart_type = int(sys.argv[1])
         if not is_valid_chart_type(chart_type):
+            print '%s is not a valid chart type.' % chart_type
             print_help()
         path_to_png = sys.argv[2]
         if not path_to_png.endswith('.png'):
             print 'Path must ends with png' % path_to_png
-            exit            
+            sys.exit()
         path_to_logs = sys.argv[3:]
         for path_to_log in path_to_logs:
             if not os.path.exists(path_to_log):
                 print 'Path does not exist: %s' % path_to_log
-                exit
+                sys.exit()
             if not path_to_log.endswith(get_log_file_suffix()):
+                print 'Log file must end in %s.' % get_log_file_suffix()
                 print_help()
         ## plot_chart accpets multiple path_to_logs
         plot_chart(chart_type, path_to_png, path_to_logs)