From 7f457f0e0f11f4cfa0e06d5b9819a17164ed763d Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Wed, 3 May 2017 12:22:50 -0400
Subject: [PATCH 01/17] create regularizer base class, elastic net
 regularization, update readme with dev setup.

---
 README.rst                                    |  14 ++
 dask_glm/algorithms.py                        |   4 +-
 dask_glm/regularizers.py                      | 135 +++++++++++-------
 dask_glm/tests/test_admm.py                   |   2 +-
 dask_glm/tests/test_algos_families.py         |   2 +-
 ...ElasticNetProximalOperatorDerivation.ipynb |  94 ++++++++++++
 6 files changed, 193 insertions(+), 58 deletions(-)
 create mode 100644 notebooks/ElasticNetProximalOperatorDerivation.ipynb

diff --git a/README.rst b/README.rst
index 9eeddd7..2b4b61d 100644
--- a/README.rst
+++ b/README.rst
@@ -5,5 +5,19 @@ Generalized Linear Models in Dask
 
 *This library is not ready for use.*
 
+Developer Setup
+---------------
+Setup environment (from repo directory)::
+    
+    conda create env
+    source activate dask_glm
+    pip install -e .
+
+Run tests::
+
+    py.test
+
+
+
 .. |Build Status| image:: https://travis-ci.org/dask/dask-glm.svg?branch=master
    :target: https://travis-ci.org/dask/dask-glm
diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py
index 981bdae..520c452 100644
--- a/dask_glm/algorithms.py
+++ b/dask_glm/algorithms.py
@@ -150,7 +150,7 @@ def newton(X, y, max_iter=50, tol=1e-8, family=Logistic):
     return beta
 
 
-def admm(X, y, regularizer=L1, lamduh=0.1, rho=1, over_relax=1,
+def admm(X, y, regularizer=L1(), lamduh=0.1, rho=1, over_relax=1,
          max_iter=250, abstol=1e-4, reltol=1e-2, family=Logistic):
 
     pointwise_loss = family.pointwise_loss
@@ -319,7 +319,7 @@ def bfgs(X, y, max_iter=500, tol=1e-14, family=Logistic):
     return beta
 
 
-def proximal_grad(X, y, regularizer=L1, lamduh=0.1, family=Logistic,
+def proximal_grad(X, y, regularizer=L1(), lamduh=0.1, family=Logistic,
                   max_iter=100, tol=1e-8, verbose=False):
 
     n, p = X.shape
diff --git a/dask_glm/regularizers.py b/dask_glm/regularizers.py
index 1398e6b..d866420 100644
--- a/dask_glm/regularizers.py
+++ b/dask_glm/regularizers.py
@@ -3,85 +3,112 @@
 import numpy as np
 
 
-class L2(object):
+class Regularizer(object):
+    """Abstract base class for regularization object.
 
-    @staticmethod
-    def proximal_operator(beta, t):
-        return 1 / (1 + t) * beta
+    Defines the set of methods required to create a new regularization object. This includes
+    the regularization functions itself and it's gradient, hessian, and proximal operator.
+    """
 
-    @staticmethod
-    def hessian(beta):
-        return 2 * np.eye(len(beta))
+    def f(self, beta):
+        """Regularization function."""
+        raise NotImplementedError
 
-    @staticmethod
-    def add_reg_hessian(hess, lam):
-        def wrapped(beta, *args):
-            return hess(beta, *args) + lam * L2.hessian(beta)
-        return wrapped
+    def gradient(self, beta):
+        """Gradient of regularization function."""
+        raise NotImplementedError
 
-    @staticmethod
-    def f(beta):
-        return (beta**2).sum()
+    def hessian(self, beta):
+        """Hessian of regularization function."""
+        raise NotImplementedError
 
-    @staticmethod
-    def add_reg_f(f, lam):
+    def proximal_operator(self, beta, t):
+        """Proximal operator function for non-differentiable regularization function."""
+        raise NotImplementedError
+
+    def add_reg_f(self, f, lam):
+        """Add regularization function to other function."""
         def wrapped(beta, *args):
-            return f(beta, *args) + lam * L2.f(beta)
+            return f(beta, *args) + lam * self.f(beta)
         return wrapped
 
-    @staticmethod
-    def gradient(beta):
-        return 2 * beta
+    def add_reg_grad(self, grad, lam):
+        """Add regularization gradient to other gradient function."""
+        def wrapped(beta, *args):
+            return grad(beta, *args) + lam * self.gradient(beta)
+        return wrapped
 
-    @staticmethod
-    def add_reg_grad(grad, lam):
+    def add_reg_hessian(self, hess, lam):
+        """Add regularization hessian to other hessian function."""
         def wrapped(beta, *args):
-            return grad(beta, *args) + lam * L2.gradient(beta)
+            return hess(beta, *args) + lam * self.hessian(beta)
         return wrapped
 
 
-class L1(object):
+class L2(Regularizer):
+    """L2 regularization."""
 
-    @staticmethod
-    def proximal_operator(beta, t):
-        z = np.maximum(0, beta - t) - np.maximum(0, -beta - t)
-        return z
+    def f(self, beta):
+        return (beta**2).sum()
 
-    @staticmethod
-    def hessian(beta):
-        raise ValueError('l1 norm is not twice differentiable!')
+    def gradient(self, beta):
+        return 2 * beta
 
-    @staticmethod
-    def add_reg_hessian(hess, lam):
-        def wrapped(beta, *args):
-            return hess(beta, *args) + lam * L1.hessian(beta)
-        return wrapped
+    def proximal_operator(self, beta, t):
+        return 1 / (1 + t) * beta
 
-    @staticmethod
-    def f(beta):
-        return (np.abs(beta)).sum()
+    def hessian(self, beta):
+        return 2 * np.eye(len(beta))
 
-    @staticmethod
-    def add_reg_f(f, lam):
-        def wrapped(beta, *args):
-            return f(beta, *args) + lam * L1.f(beta)
-        return wrapped
 
-    @staticmethod
-    def gradient(beta):
+class L1(Regularizer):
+    """L1 regularization."""
+
+    def f(self, beta):
+        return (np.abs(beta)).sum()
+
+    def gradient(self, beta):
         if np.any(np.isclose(beta, 0)):
             raise ValueError('l1 norm is not differentiable at 0!')
         else:
             return np.sign(beta)
 
-    @staticmethod
-    def add_reg_grad(grad, lam):
-        def wrapped(beta, *args):
-            return grad(beta, *args) + lam * L1.gradient(beta)
-        return wrapped
+    def hessian(self, beta):
+        raise ValueError('l1 norm is not twice differentiable!')
+
+    def proximal_operator(self, beta, t):
+        z = np.maximum(0, beta - t) - np.maximum(0, -beta - t)
+        return z
+
+
+class ElasticNet(Regularizer):
+    """Elastic net regularization."""
+
+    def __init__(self, weight):
+        self.weight = weight
+        self.l1 = L1()
+        self.l2 = L2()
+
+    def _weighted(self, left, right):
+        return self.weight * left + (1 - self.weight) * right
+
+    def f(self, beta):
+        return self._weighted(self.l1.f(beta), self.l2.f(beta))
+
+    def gradient(self, beta):
+        return self._weighted(self.l1.gradient(beta), self.l2.gradient(beta))
+
+    def hessian(self, beta):
+        return (1 - weight) * self.l2.hessian(beta)
+
+    def proximal_operator(self, beta, t):
+        g = self.weight * t
+        if beta <= g:
+            return 0
+        return (beta - g * np.sign(beta)) / (t - g + 1)
 
 
 _regularizers = {
-    'l1': L1,
-    'l2': L2,
+    'l1': L1(),
+    'l2': L2(),
 }
diff --git a/dask_glm/tests/test_admm.py b/dask_glm/tests/test_admm.py
index 32a7a2e..1d97cde 100644
--- a/dask_glm/tests/test_admm.py
+++ b/dask_glm/tests/test_admm.py
@@ -52,6 +52,6 @@ def test_admm_with_large_lamduh(N, p, nchunks):
     y = make_y(X, beta=np.array(beta), chunks=(N // nchunks,))
 
     X, y = persist(X, y)
-    z = admm(X, y, regularizer=L1, lamduh=1e4, rho=20, max_iter=500)
+    z = admm(X, y, regularizer=L1(), lamduh=1e4, rho=20, max_iter=500)
 
     assert np.allclose(z, np.zeros(p), atol=1e-4)
diff --git a/dask_glm/tests/test_algos_families.py b/dask_glm/tests/test_algos_families.py
index 5c75f83..f58ec43 100644
--- a/dask_glm/tests/test_algos_families.py
+++ b/dask_glm/tests/test_algos_families.py
@@ -89,7 +89,7 @@ def test_basic_unreg_descent(func, kwargs, N, nchunks, family):
 @pytest.mark.parametrize('nchunks', [1, 10])
 @pytest.mark.parametrize('family', [Logistic, Normal])
 @pytest.mark.parametrize('lam', [0.01, 1.2, 4.05])
-@pytest.mark.parametrize('reg', [L1, L2])
+@pytest.mark.parametrize('reg', [L1(), L2()])
 def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg):
     beta = np.random.normal(size=2)
     M = len(beta)
diff --git a/notebooks/ElasticNetProximalOperatorDerivation.ipynb b/notebooks/ElasticNetProximalOperatorDerivation.ipynb
new file mode 100644
index 0000000..20387ca
--- /dev/null
+++ b/notebooks/ElasticNetProximalOperatorDerivation.ipynb
@@ -0,0 +1,94 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Title:  Proximal Operator for Elastic Net Regularization  \n",
+    "Author: Christopher White, Richard Postelnik  \n",
+    "Date:   May 3, 2017 \n",
+    "\n",
+    "# Derivation of the Proximal Operator for Elastic Net Regularization\n",
+    "\n",
+    "The proximal operator for a function $f$ is defined as:\n",
+    "\n",
+    "$$prox_f(v, \\lambda)= \\arg\\min_x \\big(f(x) + \\frac{1}{2\\lambda}\\| x-v\\|^2\\big)$$\n",
+    "\n",
+    "Elastic net regularization is defined as a convex combination of the $\\ell_1$ and $\\ell_2$ norm:\n",
+    "\n",
+    "$$\\alpha\\| x\\|_1 + \\frac{(1 - \\alpha)}{2}\\| x\\|^2_2$$\n",
+    "\n",
+    "where $\\alpha\\in[0, 1]$ and the half before the $\\ell_2$ norm is added for purely for convenience in derivation.\n",
+    "\n",
+    "Plugging this into the proximal operator definition:\n",
+    "\n",
+    "$$prox_f(v, \\lambda)=\\arg\\min_x\\big(\\alpha\\| x\\|_1 + \\frac{(1 - \\alpha)}{2}\\| x\\|^2_2 + \\frac{1}{2\\lambda}\\| x-v\\|^2\\big)$$\n",
+    "\n",
+    "The first order optimality condition states that $0$ is in the subgradient:\n",
+    "\n",
+    "$$\\alpha\\partial\\Vert x\\Vert_1 + (1-\\alpha)x_i + \\frac{1}{\\lambda}(x_i - v_i) \\ni 0$$\n",
+    "\n",
+    "where:\n",
+    "\n",
+    "$$\\alpha\\partial\\Vert x\\Vert_1 = \\left\\{\n",
+    "\\begin{array}{ll}\n",
+    "      sign(x)\\alpha  & x \\neq 0 \\\\\n",
+    "      [-\\alpha, \\alpha] & x=0 \\\\\n",
+    "\\end{array} \n",
+    "\\right.$$\n",
+    "\n",
+    "When x is not 0 we have:\n",
+    "\n",
+    "$$x = \\frac{v-\\lambda sign(v)}{\\lambda - \\lambda\\alpha + 1}$$\n",
+    "\n",
+    "Plugging in values for positive and negative x we find the above holds when:\n",
+    "\n",
+    "$$|v| > \\lambda\\alpha$$\n",
+    "\n",
+    "Likewise, when x = 0 the condition is:\n",
+    "\n",
+    "$$|v| \\leq \\lambda\\alpha$$\n",
+    "\n",
+    "And we find that the proximal operator for elastic net is:\n",
+    "\n",
+    "$$prox_f(v, \\lambda) = \\left\\{\n",
+    "\\begin{array}{ll}\n",
+    "      \\frac{v-\\lambda sign(v)}{\\lambda - \\lambda\\alpha + 1}  & |v| > \\lambda\\alpha \\\\\n",
+    "      0 & |v|\\leq \\lambda\\alpha \\\\\n",
+    "\\end{array} \n",
+    "\\right.$$\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From e38cefbc39f2b06d77e2c6266a961088af84562a Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Wed, 3 May 2017 12:37:39 -0400
Subject: [PATCH 02/17] vectorize proximal operator for elastic net

---
 dask_glm/regularizers.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/dask_glm/regularizers.py b/dask_glm/regularizers.py
index d866420..7257bda 100644
--- a/dask_glm/regularizers.py
+++ b/dask_glm/regularizers.py
@@ -102,10 +102,13 @@ def hessian(self, beta):
         return (1 - weight) * self.l2.hessian(beta)
 
     def proximal_operator(self, beta, t):
+        """See notebooks/ElasticNetProximalOperatorDerivation.ipynb for derivation."""
         g = self.weight * t
-        if beta <= g:
-            return 0
-        return (beta - g * np.sign(beta)) / (t - g + 1)
+        def func(b):
+            if b <= g:
+                return 0
+            return (b - g * np.sign(b)) / (t - g + 1)
+        return func(beta)
 
 
 _regularizers = {

From aba2bca7dd04108d08c2d5f46cb040d60beb9fd0 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Wed, 3 May 2017 15:46:15 -0400
Subject: [PATCH 03/17] fix l2, write tests for regularizers

---
 dask_glm/regularizers.py            |  15 ++--
 dask_glm/tests/test_regularizers.py | 115 ++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+), 7 deletions(-)
 create mode 100644 dask_glm/tests/test_regularizers.py

diff --git a/dask_glm/regularizers.py b/dask_glm/regularizers.py
index 7257bda..40c712b 100644
--- a/dask_glm/regularizers.py
+++ b/dask_glm/regularizers.py
@@ -49,17 +49,17 @@ class L2(Regularizer):
     """L2 regularization."""
 
     def f(self, beta):
-        return (beta**2).sum()
+        return (beta**2).sum() / 2
 
     def gradient(self, beta):
-        return 2 * beta
+        return beta
+
+    def hessian(self, beta):
+        return np.eye(len(beta))
 
     def proximal_operator(self, beta, t):
         return 1 / (1 + t) * beta
 
-    def hessian(self, beta):
-        return 2 * np.eye(len(beta))
-
 
 class L1(Regularizer):
     """L1 regularization."""
@@ -84,8 +84,8 @@ def proximal_operator(self, beta, t):
 class ElasticNet(Regularizer):
     """Elastic net regularization."""
 
-    def __init__(self, weight):
-        self.weight = weight
+    def __init__(self, weight=None):
+        self.weight = weight or 0.5
         self.l1 = L1()
         self.l2 = L2()
 
@@ -114,4 +114,5 @@ def func(b):
 _regularizers = {
     'l1': L1(),
     'l2': L2(),
+    'elastic_net': ElasticNet()
 }
diff --git a/dask_glm/tests/test_regularizers.py b/dask_glm/tests/test_regularizers.py
new file mode 100644
index 0000000..da5206e
--- /dev/null
+++ b/dask_glm/tests/test_regularizers.py
@@ -0,0 +1,115 @@
+import numpy as np
+import numpy.testing as npt
+import pytest
+from dask_glm import regularizers as regs
+
+
+@pytest.mark.parametrize('func,args', [
+    ('f', [0]),
+    ('gradient', [0]),
+    ('hessian', [0]),
+    ('proximal_operator', [0, 1])
+])
+def test_base_class_raises_notimplementederror(func, args):
+    with pytest.raises(NotImplementedError):
+        getattr(regs.Regularizer(), func)(*args)
+
+
+class FooRegularizer(regs.Regularizer):
+
+    def f(self, beta):
+        return beta + 1
+
+    def gradient(self, beta):
+        return beta + 1
+
+    def hessian(self, beta):
+        return beta + 1
+
+
+@pytest.mark.parametrize('func', [
+    'add_reg_f',
+    'add_reg_grad',
+    'add_reg_hessian'
+])
+def test_add_reg_funcs(func):
+    def foo(x):
+        return x**2
+    new_func = getattr(FooRegularizer(), func)(foo, 1)
+    assert callable(new_func)
+    assert new_func(2) == 7
+
+
+@pytest.mark.parametrize('beta,expected', [
+    (np.array([0, 0, 0]), 0),
+    (np.array([1, 2, 3]), 7)
+])
+def test_l2_function(beta, expected):
+    assert regs.L2().f(beta) == expected
+
+
+@pytest.mark.parametrize('beta', [
+    np.array([0, 0, 0]),
+    np.array([1, 2, 3])
+])
+def test_l2_gradient(beta):
+    npt.assert_array_equal(regs.L2().gradient(beta), beta)
+
+
+@pytest.mark.parametrize('beta', [
+    np.array([0, 0, 0]),
+    np.array([1, 2, 3])
+])
+def test_l2_hessian(beta):
+    npt.assert_array_equal(regs.L2().hessian(beta), np.eye(len(beta)))
+
+
+@pytest.mark.parametrize('beta,expected', [
+    (np.array([0, 0, 0]), np.array([0, 0, 0])),
+    (np.array([1, 2, 3]), np.array([0.5, 1, 1.5]))
+])
+def test_l2_proximal_operator(beta, expected):
+    npt.assert_array_equal(regs.L2().proximal_operator(beta, 1), expected)
+
+
+@pytest.mark.parametrize('beta,expected', [
+    (np.array([0, 0, 0]), 0),
+    (np.array([-1, 2, 3]), 6)
+])
+def test_l1_function(beta, expected):
+    assert regs.L1().f(beta) == expected
+
+
+@pytest.mark.parametrize('beta,expected', [
+    (np.array([1, 2, 3]), np.array([1, 1, 1])),
+    (np.array([-1, 2, 3]), np.array([-1, 1, 1]))
+])
+def test_l1_gradient(beta, expected):
+    npt.assert_array_equal(regs.L1().gradient(beta), expected)
+
+
+@pytest.mark.parametrize('beta', [
+    np.array([0.00000001, 1, 2]),
+    np.array([-0.00000001, 1, 2]),
+    np.array([0, 0, 0])
+])
+def test_l1_gradient_raises_near_zero(beta):
+    with pytest.raises(ValueError):
+        regs.L1().gradient(beta)
+
+
+@pytest.mark.parametrize('beta', [
+    np.array([0, 0, 0]),
+    np.array([1, 2, 3])
+])
+def test_l1_hessian_raises(beta):
+    with pytest.raises(ValueError):
+        regs.L1().hessian(beta)
+
+
+@pytest.mark.parametrize('beta,expected', [
+    (np.array([0, 0, 0]), np.array([0, 0, 0])),
+    (np.array([1, 2, 3]), np.array([0, 1, 2]))
+])
+def test_l1_proximal_operator(beta, expected):
+    npt.assert_array_equal(regs.L1().proximal_operator(beta, 1), expected)

From 0fe6a30492c9f18df978cd4e734289d5546e2a91 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Wed, 3 May 2017 16:45:47 -0400
Subject: [PATCH 04/17] add tests for elastic net

---
 dask_glm/regularizers.py              |  9 +++---
 dask_glm/tests/test_algos_families.py |  4 +--
 dask_glm/tests/test_regularizers.py   | 44 +++++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/dask_glm/regularizers.py b/dask_glm/regularizers.py
index 40c712b..71680ba 100644
--- a/dask_glm/regularizers.py
+++ b/dask_glm/regularizers.py
@@ -84,8 +84,8 @@ def proximal_operator(self, beta, t):
 class ElasticNet(Regularizer):
     """Elastic net regularization."""
 
-    def __init__(self, weight=None):
-        self.weight = weight or 0.5
+    def __init__(self, weight=0.5):
+        self.weight = weight
         self.l1 = L1()
         self.l2 = L2()
 
@@ -99,16 +99,17 @@ def gradient(self, beta):
         return self._weighted(self.l1.gradient(beta), self.l2.gradient(beta))
 
     def hessian(self, beta):
-        return (1 - weight) * self.l2.hessian(beta)
+        return (1 - self.weight) * self.l2.hessian(beta)
 
     def proximal_operator(self, beta, t):
         """See notebooks/ElasticNetProximalOperatorDerivation.ipynb for derivation."""
         g = self.weight * t
+        @np.vectorize
         def func(b):
             if b <= g:
                 return 0
             return (b - g * np.sign(b)) / (t - g + 1)
-        return func(beta)
+        return beta
 
 
 _regularizers = {
diff --git a/dask_glm/tests/test_algos_families.py b/dask_glm/tests/test_algos_families.py
index f58ec43..f8c6d4b 100644
--- a/dask_glm/tests/test_algos_families.py
+++ b/dask_glm/tests/test_algos_families.py
@@ -9,7 +9,7 @@
 from dask_glm.algorithms import (newton, bfgs, proximal_grad,
                                  gradient_descent, admm)
 from dask_glm.families import Logistic, Normal
-from dask_glm.regularizers import L1, L2
+from dask_glm.regularizers import _regularizers
 from dask_glm.utils import sigmoid, make_y
 
 
@@ -89,7 +89,7 @@ def test_basic_unreg_descent(func, kwargs, N, nchunks, family):
 @pytest.mark.parametrize('nchunks', [1, 10])
 @pytest.mark.parametrize('family', [Logistic, Normal])
 @pytest.mark.parametrize('lam', [0.01, 1.2, 4.05])
-@pytest.mark.parametrize('reg', [L1(), L2()])
+@pytest.mark.parametrize('reg', list(_regularizers.values()))
 def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg):
     beta = np.random.normal(size=2)
     M = len(beta)
diff --git a/dask_glm/tests/test_regularizers.py b/dask_glm/tests/test_regularizers.py
index da5206e..3fd5d17 100644
--- a/dask_glm/tests/test_regularizers.py
+++ b/dask_glm/tests/test_regularizers.py
@@ -113,3 +113,47 @@ def test_l1_hessian_raises(beta):
 ])
 def test_l1_proximal_operator(beta, expected):
     npt.assert_array_equal(regs.L1().proximal_operator(beta, 1), expected)
+
+
+@pytest.mark.parametrize('beta,expected', [
+    (np.array([0, 0, 0]), 0),
+    (np.array([1, 2, 3]), 6.5)
+])
+def test_elastic_net_function(beta, expected):
+    assert regs.ElasticNet().f(beta) == expected
+
+
+def test_elastic_net_function_zero_weight_is_l2():
+    beta = np.array([1, 2, 3])
+    assert regs.ElasticNet(weight=0).f(beta) == regs.L2().f(beta)
+
+
+def test_elastic_net_function_zero_weight_is_l1():
+    beta = np.array([1, 2, 3])
+    assert regs.ElasticNet(weight=1).f(beta) == regs.L1().f(beta)
+
+
+def test_elastic_net_gradient():
+    beta = np.array([1, 2, 3])
+    npt.assert_array_equal(regs.ElasticNet(weight=0.5).gradient(beta), np.array([1, 1.5, 2]))
+
+
+def test_elastic_net_gradient_zero_weight_is_l2():
+    beta = np.array([1, 2, 3])
+    npt.assert_array_equal(regs.ElasticNet(weight=0).gradient(beta), regs.L2().gradient(beta))
+
+
+def test_elastic_net_gradient_zero_weight_is_l1():
+    beta = np.array([1, 2, 3])
+    npt.assert_array_equal(regs.ElasticNet(weight=1).gradient(beta), regs.L1().gradient(beta))
+
+
+def test_elastic_net_hessian():
+    beta = np.array([1, 2, 3])
+    npt.assert_array_equal(regs.ElasticNet(weight=0.5).hessian(beta),
+                           np.eye(len(beta)) * regs.ElasticNet().weight)
+
+
+def test_elastic_net_proximal_operator():
+    beta = np.array([1, 2, 3])
+    npt.assert_array_equal(regs.ElasticNet(weight=0.5).proximal_operator(beta, 1), beta)

From 3e2803180290849b44b7cd65c11022b73ec3bcd2 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Wed, 3 May 2017 18:33:00 -0400
Subject: [PATCH 05/17] fix flake, change to string

---
 dask_glm/algorithms.py   | 6 +++---
 dask_glm/regularizers.py | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py
index 520c452..a05855d 100644
--- a/dask_glm/algorithms.py
+++ b/dask_glm/algorithms.py
@@ -24,7 +24,7 @@
 
 from dask_glm.utils import dot, exp, log1p
 from dask_glm.families import Logistic
-from dask_glm.regularizers import L1, _regularizers
+from dask_glm.regularizers import _regularizers
 
 
 def compute_stepsize_dask(beta, step, Xbeta, Xstep, y, curr_val,
@@ -150,7 +150,7 @@ def newton(X, y, max_iter=50, tol=1e-8, family=Logistic):
     return beta
 
 
-def admm(X, y, regularizer=L1(), lamduh=0.1, rho=1, over_relax=1,
+def admm(X, y, regularizer='l1', lamduh=0.1, rho=1, over_relax=1,
          max_iter=250, abstol=1e-4, reltol=1e-2, family=Logistic):
 
     pointwise_loss = family.pointwise_loss
@@ -319,7 +319,7 @@ def bfgs(X, y, max_iter=500, tol=1e-14, family=Logistic):
     return beta
 
 
-def proximal_grad(X, y, regularizer=L1(), lamduh=0.1, family=Logistic,
+def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
                   max_iter=100, tol=1e-8, verbose=False):
 
     n, p = X.shape
diff --git a/dask_glm/regularizers.py b/dask_glm/regularizers.py
index 71680ba..a540bbe 100644
--- a/dask_glm/regularizers.py
+++ b/dask_glm/regularizers.py
@@ -104,6 +104,7 @@ def hessian(self, beta):
     def proximal_operator(self, beta, t):
         """See notebooks/ElasticNetProximalOperatorDerivation.ipynb for derivation."""
         g = self.weight * t
+
         @np.vectorize
         def func(b):
             if b <= g:

From 995a4dc0b3ebede0c826817abc5b98befbbb13a0 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Wed, 3 May 2017 18:55:44 -0400
Subject: [PATCH 06/17] use base regularizer class to retrieve subclasses via
 string.

---
 dask_glm/algorithms.py                |  6 +++---
 dask_glm/regularizers.py              | 19 ++++++++++++-------
 dask_glm/tests/test_algos_families.py |  4 ++--
 dask_glm/tests/test_estimators.py     |  6 +++---
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py
index a05855d..e7bfb4d 100644
--- a/dask_glm/algorithms.py
+++ b/dask_glm/algorithms.py
@@ -24,7 +24,7 @@
 
 from dask_glm.utils import dot, exp, log1p
 from dask_glm.families import Logistic
-from dask_glm.regularizers import _regularizers
+from dask_glm.regularizers import Regularizer
 
 
 def compute_stepsize_dask(beta, step, Xbeta, Xstep, y, curr_val,
@@ -155,7 +155,7 @@ def admm(X, y, regularizer='l1', lamduh=0.1, rho=1, over_relax=1,
 
     pointwise_loss = family.pointwise_loss
     pointwise_gradient = family.pointwise_gradient
-    regularizer = _regularizers.get(regularizer, regularizer)  # string
+    regularizer = Regularizer.get(regularizer)
 
     def create_local_gradient(func):
         @functools.wraps(func)
@@ -331,7 +331,7 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
     recalcRate = 10
     backtrackMult = firstBacktrackMult
     beta = np.zeros(p)
-    regularizer = _regularizers.get(regularizer, regularizer)  # string
+    regularizer = Regularizer.get(regularizer)
 
     if verbose:
         print('#       -f        |df/f|    |dx/x|    step')
diff --git a/dask_glm/regularizers.py b/dask_glm/regularizers.py
index a540bbe..f468e04 100644
--- a/dask_glm/regularizers.py
+++ b/dask_glm/regularizers.py
@@ -9,6 +9,7 @@ class Regularizer(object):
     Defines the set of methods required to create a new regularization object. This includes
     the regularization functions itself and it's gradient, hessian, and proximal operator.
     """
+    _name = '_base'
 
     def f(self, beta):
         """Regularization function."""
@@ -44,9 +45,18 @@ def wrapped(beta, *args):
             return hess(beta, *args) + lam * self.hessian(beta)
         return wrapped
 
+    @classmethod
+    def get(cls, obj):
+        if isinstance(obj, cls):
+            return obj
+        elif isinstance(obj, str):
+            return {o._name: o for o in cls.__subclasses__()}[obj]()
+        raise TypeError('Not a valid regularizer object.')
+
 
 class L2(Regularizer):
     """L2 regularization."""
+    _name = 'l2'
 
     def f(self, beta):
         return (beta**2).sum() / 2
@@ -63,6 +73,7 @@ def proximal_operator(self, beta, t):
 
 class L1(Regularizer):
     """L1 regularization."""
+    _name = 'l1'
 
     def f(self, beta):
         return (np.abs(beta)).sum()
@@ -83,6 +94,7 @@ def proximal_operator(self, beta, t):
 
 class ElasticNet(Regularizer):
     """Elastic net regularization."""
+    _name = 'elastic_net'
 
     def __init__(self, weight=0.5):
         self.weight = weight
@@ -111,10 +123,3 @@ def func(b):
                 return 0
             return (b - g * np.sign(b)) / (t - g + 1)
         return beta
-
-
-_regularizers = {
-    'l1': L1(),
-    'l2': L2(),
-    'elastic_net': ElasticNet()
-}
diff --git a/dask_glm/tests/test_algos_families.py b/dask_glm/tests/test_algos_families.py
index 7fc7895..773f0c8 100644
--- a/dask_glm/tests/test_algos_families.py
+++ b/dask_glm/tests/test_algos_families.py
@@ -9,7 +9,7 @@
 from dask_glm.algorithms import (newton, bfgs, proximal_grad,
                                  gradient_descent, admm)
 from dask_glm.families import Logistic, Normal, Poisson
-from dask_glm.regularizers import _regularizers
+from dask_glm.regularizers import Regularizer
 from dask_glm.utils import sigmoid, make_y
 
 
@@ -89,7 +89,7 @@ def test_basic_unreg_descent(func, kwargs, N, nchunks, family):
 @pytest.mark.parametrize('nchunks', [1, 10])
 @pytest.mark.parametrize('family', [Logistic, Normal, Poisson])
 @pytest.mark.parametrize('lam', [0.01, 1.2, 4.05])
-@pytest.mark.parametrize('reg', list(_regularizers.values()))
+@pytest.mark.parametrize('reg', [r() for r in Regularizer.__subclasses__()])
 def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg):
     beta = np.random.normal(size=2)
     M = len(beta)
diff --git a/dask_glm/tests/test_estimators.py b/dask_glm/tests/test_estimators.py
index a0b75d2..73820ec 100644
--- a/dask_glm/tests/test_estimators.py
+++ b/dask_glm/tests/test_estimators.py
@@ -3,16 +3,16 @@
 from dask_glm.estimators import LogisticRegression, LinearRegression, PoissonRegression
 from dask_glm.datasets import make_classification, make_regression, make_poisson
 from dask_glm.algorithms import _solvers
-from dask_glm.regularizers import _regularizers
+from dask_glm.regularizers import Regularizer
 
 
-@pytest.fixture(params=_solvers.keys())
+@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()])
 def solver(request):
     """Parametrized fixture for all the solver names"""
     return request.param
 
 
-@pytest.fixture(params=_regularizers.keys())
+@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()])
 def regularizer(request):
     """Parametrized fixture for all the regularizer names"""
     return request.param

From a561e3c8f02a4b48520eca3c6070b1c03b516c32 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Thu, 4 May 2017 16:12:14 -0400
Subject: [PATCH 07/17] add tests for get

---
 dask_glm/regularizers.py            | 10 +++++-----
 dask_glm/tests/test_estimators.py   |  1 -
 dask_glm/tests/test_regularizers.py | 16 ++++++++++++++++
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/dask_glm/regularizers.py b/dask_glm/regularizers.py
index f468e04..b87a74f 100644
--- a/dask_glm/regularizers.py
+++ b/dask_glm/regularizers.py
@@ -9,7 +9,7 @@ class Regularizer(object):
     Defines the set of methods required to create a new regularization object. This includes
     the regularization functions itself and it's gradient, hessian, and proximal operator.
     """
-    _name = '_base'
+    name = '_base'
 
     def f(self, beta):
         """Regularization function."""
@@ -50,13 +50,13 @@ def get(cls, obj):
         if isinstance(obj, cls):
             return obj
         elif isinstance(obj, str):
-            return {o._name: o for o in cls.__subclasses__()}[obj]()
+            return {o.name: o for o in cls.__subclasses__()}[obj]()
         raise TypeError('Not a valid regularizer object.')
 
 
 class L2(Regularizer):
     """L2 regularization."""
-    _name = 'l2'
+    name = 'l2'
 
     def f(self, beta):
         return (beta**2).sum() / 2
@@ -73,7 +73,7 @@ def proximal_operator(self, beta, t):
 
 class L1(Regularizer):
     """L1 regularization."""
-    _name = 'l1'
+    name = 'l1'
 
     def f(self, beta):
         return (np.abs(beta)).sum()
@@ -94,7 +94,7 @@ def proximal_operator(self, beta, t):
 
 class ElasticNet(Regularizer):
     """Elastic net regularization."""
-    _name = 'elastic_net'
+    name = 'elastic_net'
 
     def __init__(self, weight=0.5):
         self.weight = weight
diff --git a/dask_glm/tests/test_estimators.py b/dask_glm/tests/test_estimators.py
index 73820ec..fc913f5 100644
--- a/dask_glm/tests/test_estimators.py
+++ b/dask_glm/tests/test_estimators.py
@@ -2,7 +2,6 @@
 
 from dask_glm.estimators import LogisticRegression, LinearRegression, PoissonRegression
 from dask_glm.datasets import make_classification, make_regression, make_poisson
-from dask_glm.algorithms import _solvers
 from dask_glm.regularizers import Regularizer
 
 
diff --git a/dask_glm/tests/test_regularizers.py b/dask_glm/tests/test_regularizers.py
index 3fd5d17..dc2988e 100644
--- a/dask_glm/tests/test_regularizers.py
+++ b/dask_glm/tests/test_regularizers.py
@@ -40,6 +40,22 @@ def foo(x):
     assert new_func(2) == 7
 
 
+def test_regularizer_get_passes_through_instance():
+    x = FooRegularizer()
+    assert regs.Regularizer.get(x) == x
+
+
+def test_regularizer_get_unnamed_raises():
+    with pytest.raises(KeyError):
+        regs.Regularizer.get('foo')
+
+
+def test_regularizer_gets_from_name():
+    class Foo(regs.Regularizer):
+        name = 'foo'
+    assert isinstance(regs.Regularizer.get('foo'), Foo)
+
+
 @pytest.mark.parametrize('beta,expected', [
     (np.array([0, 0, 0]), 0),
     (np.array([1, 2, 3]), 7)

From b58bfc01104da72dfa58313752eb4e89a2057f39 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Thu, 4 May 2017 16:42:49 -0400
Subject: [PATCH 08/17] fix docstrings, add hessian for l1.

---
 dask_glm/regularizers.py            | 10 ++++++----
 dask_glm/tests/test_regularizers.py | 18 ++++++++++++------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/dask_glm/regularizers.py b/dask_glm/regularizers.py
index b87a74f..4587f3d 100644
--- a/dask_glm/regularizers.py
+++ b/dask_glm/regularizers.py
@@ -7,7 +7,7 @@ class Regularizer(object):
     """Abstract base class for regularization object.
 
     Defines the set of methods required to create a new regularization object. This includes
-    the regularization functions itself and it's gradient, hessian, and proximal operator.
+    the regularization functions itself and its gradient, hessian, and proximal operator.
     """
     name = '_base'
 
@@ -24,7 +24,7 @@ def hessian(self, beta):
         raise NotImplementedError
 
     def proximal_operator(self, beta, t):
-        """Proximal operator function for non-differentiable regularization function."""
+        """Proximal operator for regularization function."""
         raise NotImplementedError
 
     def add_reg_f(self, f, lam):
@@ -85,7 +85,9 @@ def gradient(self, beta):
             return np.sign(beta)
 
     def hessian(self, beta):
-        raise ValueError('l1 norm is not twice differentiable!')
+        if np.any(np.isclose(beta, 0)):
+            raise ValueError('l1 norm is not twice differentiable at 0!')
+        return np.zeros((beta.shape[0], beta.shape[0]))
 
     def proximal_operator(self, beta, t):
         z = np.maximum(0, beta - t) - np.maximum(0, -beta - t)
@@ -111,7 +113,7 @@ def gradient(self, beta):
         return self._weighted(self.l1.gradient(beta), self.l2.gradient(beta))
 
     def hessian(self, beta):
-        return (1 - self.weight) * self.l2.hessian(beta)
+        return self._weighted(self.l1.hessian(beta), self.l2.hessian(beta))
 
     def proximal_operator(self, beta, t):
         """See notebooks/ElasticNetProximalOperatorDerivation.ipynb for derivation."""
diff --git a/dask_glm/tests/test_regularizers.py b/dask_glm/tests/test_regularizers.py
index dc2988e..6c875af 100644
--- a/dask_glm/tests/test_regularizers.py
+++ b/dask_glm/tests/test_regularizers.py
@@ -114,13 +114,14 @@ def test_l1_gradient_raises_near_zero(beta):
         regs.L1().gradient(beta)
 
 
-@pytest.mark.parametrize('beta', [
-    np.array([0, 0, 0]),
-    np.array([1, 2, 3])
-])
-def test_l1_hessian_raises(beta):
+def test_l1_hessian():
+    npt.assert_array_equal(regs.L1().hessian(np.array([1, 2])),
+                           np.array([[0, 0], [0, 0]]))
+
+
+def test_l1_hessian_raises():
     with pytest.raises(ValueError):
-        regs.L1().hessian(beta)
+        regs.L1().hessian(np.array([0, 0, 0]))
 
 
 @pytest.mark.parametrize('beta,expected', [
@@ -170,6 +171,11 @@ def test_elastic_net_hessian():
                            np.eye(len(beta)) * regs.ElasticNet().weight)
 
 
+def test_elastic_net_hessian_raises():
+    with pytest.raises(ValueError):
+        regs.ElasticNet(weight=0.5).hessian(np.array([0, 1, 2]))
+
+
 def test_elastic_net_proximal_operator():
     beta = np.array([1, 2, 3])
     npt.assert_array_equal(regs.ElasticNet(weight=0.5).proximal_operator(beta, 1), beta)

From ef82c2a513b9de4241698089c4ac842845cd6633 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Tue, 16 May 2017 20:52:56 -0400
Subject: [PATCH 09/17] create abc for families

---
 dask_glm/families.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/dask_glm/families.py b/dask_glm/families.py
index 980b702..02e9bdc 100644
--- a/dask_glm/families.py
+++ b/dask_glm/families.py
@@ -3,6 +3,29 @@
 from dask_glm.utils import dot, exp, log1p, sigmoid
 
 
+class Family(object):
+    """Base class methods for distribution.
+
+    This class represents the required methods to add a new distribution to work
+    with the algorithms.
+    """
+
+    def loglike(self, Xbeta, y):
+        raise NotImplementedError
+
+    def pointwise_loss(self, beta, X, y):
+        raise NotImplementedError
+
+    def pointwise_gradient(self, beta, X, y):
+        raise NotImplementedError
+
+    def gradient(self, Xbeta, x, y):
+        raise NotImplementedError
+
+    def hessian(self, Xbeta, x):
+        raise NotImplementedError
+
+
 class Logistic(object):
     """Implements methods for `Logistic regression`_,
     useful for classifying binary outcomes.

From 67c591d443360850119d26109ace0f4bb0b5b793 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Tue, 16 May 2017 21:03:07 -0400
Subject: [PATCH 10/17] move pointwise functions to base.

---
 dask_glm/families.py | 85 ++++++++++++--------------------------------
 1 file changed, 23 insertions(+), 62 deletions(-)

diff --git a/dask_glm/families.py b/dask_glm/families.py
index 02e9bdc..5521939 100644
--- a/dask_glm/families.py
+++ b/dask_glm/families.py
@@ -10,14 +10,20 @@ class Family(object):
     with the algorithms.
     """
 
-    def loglike(self, Xbeta, y):
+    def loglikelihood(self, Xbeta, y):
         raise NotImplementedError
 
     def pointwise_loss(self, beta, X, y):
-        raise NotImplementedError
+        """Loss, evaluated point-wise."""
+        beta, y = beta.ravel(), y.ravel()
+        Xbeta = X.dot(beta)
+        return self.loglikelihood(Xbeta, y)
 
     def pointwise_gradient(self, beta, X, y):
-        raise NotImplementedError
+        """Loss, evaluated point-wise."""
+        beta, y = beta.ravel(), y.ravel()
+        Xbeta = X.dot(beta)
+        return self.gradient(Xbeta, X, y)
 
     def gradient(self, Xbeta, x, y):
         raise NotImplementedError
@@ -26,16 +32,16 @@ def hessian(self, Xbeta, x):
         raise NotImplementedError
 
 
-class Logistic(object):
+class Logistic(Family):
     """Implements methods for `Logistic regression`_,
     useful for classifying binary outcomes.
 
     .. _Logistic regression: https://en.wikipedia.org/wiki/Logistic_regression
     """
-    @staticmethod
-    def loglike(Xbeta, y):
+
+    def loglikelihood(self, Xbeta, y):
         """
-        Evaluate the logistic loglikeliehood
+        Evaluate the logistic loglikelihoodliehood
 
         Parameters
         ----------
@@ -45,61 +51,31 @@ def loglike(Xbeta, y):
         enXbeta = exp(-Xbeta)
         return (Xbeta + log1p(enXbeta)).sum() - dot(y, Xbeta)
 
-    @staticmethod
-    def pointwise_loss(beta, X, y):
-        """Logistic Loss, evaluated point-wise."""
-        beta, y = beta.ravel(), y.ravel()
-        Xbeta = X.dot(beta)
-        return Logistic.loglike(Xbeta, y)
-
-    @staticmethod
-    def pointwise_gradient(beta, X, y):
-        """Logistic gradient, evaluated point-wise."""
-        beta, y = beta.ravel(), y.ravel()
-        Xbeta = X.dot(beta)
-        return Logistic.gradient(Xbeta, X, y)
-
-    @staticmethod
-    def gradient(Xbeta, X, y):
+    def gradient(self, Xbeta, X, y):
         """Logistic gradient"""
         p = sigmoid(Xbeta)
         return dot(X.T, p - y)
 
-    @staticmethod
-    def hessian(Xbeta, X):
+    def hessian(self, Xbeta, X):
         """Logistic hessian"""
         p = sigmoid(Xbeta)
         return dot(p * (1 - p) * X.T, X)
 
 
-class Normal(object):
+class Normal(Family):
     """Implements methods for `Linear regression`_,
     useful for modeling continuous outcomes.
 
     .. _Linear regression: https://en.wikipedia.org/wiki/Linear_regression
     """
-    @staticmethod
-    def loglike(Xbeta, y):
-        return ((y - Xbeta) ** 2).sum()
-
-    @staticmethod
-    def pointwise_loss(beta, X, y):
-        beta, y = beta.ravel(), y.ravel()
-        Xbeta = X.dot(beta)
-        return Normal.loglike(Xbeta, y)
 
-    @staticmethod
-    def pointwise_gradient(beta, X, y):
-        beta, y = beta.ravel(), y.ravel()
-        Xbeta = X.dot(beta)
-        return Normal.gradient(Xbeta, X, y)
+    def loglikelihood(self, Xbeta, y):
+        return ((y - Xbeta) ** 2).sum()
 
-    @staticmethod
-    def gradient(Xbeta, X, y):
+    def gradient(self, Xbeta, X, y):
         return 2 * dot(X.T, Xbeta) - 2 * dot(X.T, y)
 
-    @staticmethod
-    def hessian(Xbeta, X):
+    def hessian(self, Xbeta, X):
         return 2 * dot(X.T, X)
 
 
@@ -112,31 +88,16 @@ class Poisson(object):
     .. _Poisson regression: https://en.wikipedia.org/wiki/Poisson_regression
     """
 
-    @staticmethod
-    def loglike(Xbeta, y):
+    def loglikelihood(self, Xbeta, y):
         eXbeta = exp(Xbeta)
         yXbeta = y * Xbeta
         return (eXbeta - yXbeta).sum()
 
-    @staticmethod
-    def pointwise_loss(beta, X, y):
-        beta, y = beta.ravel(), y.ravel()
-        Xbeta = X.dot(beta)
-        return Poisson.loglike(Xbeta, y)
-
-    @staticmethod
-    def pointwise_gradient(beta, X, y):
-        beta, y = beta.ravel(), y.ravel()
-        Xbeta = X.dot(beta)
-        return Poisson.gradient(Xbeta, X, y)
-
-    @staticmethod
-    def gradient(Xbeta, X, y):
+    def gradient(self, Xbeta, X, y):
         eXbeta = exp(Xbeta)
         return dot(X.T, eXbeta - y)
 
-    @staticmethod
-    def hessian(Xbeta, X):
+    def hessian(self, Xbeta, X):
         eXbeta = exp(Xbeta)
         x_diag_eXbeta = eXbeta[:, None] * X
         return dot(X.T, x_diag_eXbeta)

From bd808d1b03b07fb9e4e95ea616de0f0ad54bc9b5 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Tue, 16 May 2017 22:15:36 -0400
Subject: [PATCH 11/17] added a base registry class for families and
 regularizers, test passes.

---
 dask_glm/algorithms.py                | 33 ++++++++++++++-------------
 dask_glm/estimators.py                |  6 ++---
 dask_glm/families.py                  | 11 +++++----
 dask_glm/regularizers.py              | 24 ++-----------------
 dask_glm/tests/test_admm.py           |  4 ++--
 dask_glm/tests/test_algos_families.py |  8 +++----
 dask_glm/utils.py                     | 30 ++++++++++++++++++++++++
 7 files changed, 65 insertions(+), 51 deletions(-)

diff --git a/dask_glm/algorithms.py b/dask_glm/algorithms.py
index b7bdbd0..e3d0bf6 100644
--- a/dask_glm/algorithms.py
+++ b/dask_glm/algorithms.py
@@ -11,12 +11,12 @@
 
 
 from dask_glm.utils import dot, normalize
-from dask_glm.families import Logistic
+from dask_glm.families import Family
 from dask_glm.regularizers import Regularizer
 
 
 def compute_stepsize_dask(beta, step, Xbeta, Xstep, y, curr_val,
-                          family=Logistic, stepSize=1.0,
+                          family='logistic', stepSize=1.0,
                           armijoMult=0.1, backtrackMult=0.1):
     """Compute the optimal stepsize
 
@@ -38,8 +38,8 @@ def compute_stepsize_dask(beta, step, Xbeta, Xstep, y, curr_val,
     xBeta : array-like
     func : callable
     """
-
-    loglike = family.loglike
+    family = Family.get(family)
+    loglike = family.loglikelihood
     beta, step, Xbeta, Xstep, y, curr_val = persist(beta, step, Xbeta, Xstep, y, curr_val)
     obeta, oXbeta = beta, Xbeta
     (step,) = compute(step)
@@ -65,7 +65,7 @@ def compute_stepsize_dask(beta, step, Xbeta, Xstep, y, curr_val,
 
 
 @normalize
-def gradient_descent(X, y, max_iter=100, tol=1e-14, family=Logistic, **kwargs):
+def gradient_descent(X, y, max_iter=100, tol=1e-14, family='logistic', **kwargs):
     """
     Michael Grant's implementation of Gradient Descent.
 
@@ -85,8 +85,8 @@ def gradient_descent(X, y, max_iter=100, tol=1e-14, family=Logistic, **kwargs):
     -------
     beta : array-like, shape (n_features,)
     """
-
-    loglike, gradient = family.loglike, family.gradient
+    family = Family.get(family)
+    loglike, gradient = family.loglikelihood, family.gradient
     n, p = X.shape
     firstBacktrackMult = 0.1
     nextBacktrackMult = 0.5
@@ -138,7 +138,7 @@ def gradient_descent(X, y, max_iter=100, tol=1e-14, family=Logistic, **kwargs):
 
 
 @normalize
-def newton(X, y, max_iter=50, tol=1e-8, family=Logistic, **kwargs):
+def newton(X, y, max_iter=50, tol=1e-8, family='logistic', **kwargs):
     """Newtons Method for Logistic Regression.
 
     Parameters
@@ -157,6 +157,7 @@ def newton(X, y, max_iter=50, tol=1e-8, family=Logistic, **kwargs):
     -------
     beta : array-like, shape (n_features,)
     """
+    family = Family.get(family)
     gradient, hessian = family.gradient, family.hessian
     n, p = X.shape
     beta = np.zeros(p)  # always init to zeros?
@@ -194,7 +195,7 @@ def newton(X, y, max_iter=50, tol=1e-8, family=Logistic, **kwargs):
 
 @normalize
 def admm(X, y, regularizer='l1', lamduh=0.1, rho=1, over_relax=1,
-         max_iter=250, abstol=1e-4, reltol=1e-2, family=Logistic, **kwargs):
+         max_iter=250, abstol=1e-4, reltol=1e-2, family='logistic', **kwargs):
     """
     Alternating Direction Method of Multipliers
 
@@ -216,7 +217,7 @@ def admm(X, y, regularizer='l1', lamduh=0.1, rho=1, over_relax=1,
     -------
     beta : array-like, shape (n_features,)
     """
-
+    family = Family.get(family)
     pointwise_loss = family.pointwise_loss
     pointwise_gradient = family.pointwise_gradient
     regularizer = Regularizer.get(regularizer)
@@ -303,7 +304,7 @@ def local_update(X, y, beta, z, u, rho, f, fprime, solver=fmin_l_bfgs_b):
 
 @normalize
 def lbfgs(X, y, regularizer=None, lamduh=1.0, max_iter=100, tol=1e-4,
-          family=Logistic, verbose=False, **kwargs):
+          family='logistic', verbose=False, **kwargs):
     """L-BFGS solver using scipy.optimize implementation
 
     Parameters
@@ -322,7 +323,7 @@ def lbfgs(X, y, regularizer=None, lamduh=1.0, max_iter=100, tol=1e-4,
     -------
     beta : array-like, shape (n_features,)
     """
-
+    family = Family.get(family)
     pointwise_loss = family.pointwise_loss
     pointwise_gradient = family.pointwise_gradient
     if regularizer is not None:
@@ -349,7 +350,7 @@ def compute_loss_grad(beta, X, y):
 
 
 @normalize
-def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
+def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family='logistic',
                   max_iter=100, tol=1e-8, **kwargs):
     """
 
@@ -371,7 +372,7 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
     -------
     beta : array-like, shape (n_features,)
     """
-
+    family = Family.get(family)
     n, p = X.shape
     firstBacktrackMult = 0.1
     nextBacktrackMult = 0.5
@@ -387,7 +388,7 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
         # Compute the gradient
         if k % recalcRate == 0:
             Xbeta = X.dot(beta)
-            func = family.loglike(Xbeta, y)
+            func = family.loglikelihood(Xbeta, y)
 
         gradient = family.gradient(Xbeta, X, y)
 
@@ -405,7 +406,7 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
 
             Xbeta, beta = persist(Xbeta, beta)
 
-            func = family.loglike(Xbeta, y)
+            func = family.loglikelihood(Xbeta, y)
             func = persist(func)[0]
             func = compute(func)[0]
             df = lf - func
diff --git a/dask_glm/estimators.py b/dask_glm/estimators.py
index 9da2dcb..2ac91ad 100644
--- a/dask_glm/estimators.py
+++ b/dask_glm/estimators.py
@@ -112,7 +112,7 @@ class LogisticRegression(_GLM):
 
     @property
     def family(self):
-        return families.Logistic
+        return families.Logistic()
 
     def predict(self, X):
         return self.predict_proba(X) > .5  # TODO: verify, multiclass broken
@@ -167,7 +167,7 @@ class LinearRegression(_GLM):
     """
     @property
     def family(self):
-        return families.Normal
+        return families.Normal()
 
     def predict(self, X):
         X_ = self._maybe_add_intercept(X)
@@ -219,7 +219,7 @@ class PoissonRegression(_GLM):
     """
     @property
     def family(self):
-        return families.Poisson
+        return families.Poisson()
 
     def predict(self, X):
         X_ = self._maybe_add_intercept(X)
diff --git a/dask_glm/families.py b/dask_glm/families.py
index 5521939..b4e9572 100644
--- a/dask_glm/families.py
+++ b/dask_glm/families.py
@@ -1,9 +1,9 @@
 from __future__ import absolute_import, division, print_function
 
-from dask_glm.utils import dot, exp, log1p, sigmoid
+from dask_glm.utils import dot, exp, log1p, sigmoid, RegistryClass
 
 
-class Family(object):
+class Family(RegistryClass):
     """Base class methods for distribution.
 
     This class represents the required methods to add a new distribution to work
@@ -38,10 +38,11 @@ class Logistic(Family):
 
     .. _Logistic regression: https://en.wikipedia.org/wiki/Logistic_regression
     """
+    name = 'logistic'
 
     def loglikelihood(self, Xbeta, y):
         """
-        Evaluate the logistic loglikelihoodliehood
+        Evaluate the logistic loglikelihoodlihood
 
         Parameters
         ----------
@@ -68,6 +69,7 @@ class Normal(Family):
 
     .. _Linear regression: https://en.wikipedia.org/wiki/Linear_regression
     """
+    name = 'normal'
 
     def loglikelihood(self, Xbeta, y):
         return ((y - Xbeta) ** 2).sum()
@@ -79,7 +81,7 @@ def hessian(self, Xbeta, X):
         return 2 * dot(X.T, X)
 
 
-class Poisson(object):
+class Poisson(Family):
     """
     This implements `Poisson regression`_, useful for
     modelling count data.
@@ -87,6 +89,7 @@ class Poisson(object):
 
     .. _Poisson regression: https://en.wikipedia.org/wiki/Poisson_regression
     """
+    name = 'poisson'
 
     def loglikelihood(self, Xbeta, y):
         eXbeta = exp(Xbeta)
diff --git a/dask_glm/regularizers.py b/dask_glm/regularizers.py
index ccbb385..6719bc0 100644
--- a/dask_glm/regularizers.py
+++ b/dask_glm/regularizers.py
@@ -1,9 +1,10 @@
 from __future__ import absolute_import, division, print_function
 
 import numpy as np
+from dask_glm.utils import RegistryClass
 
 
-class Regularizer(object):
+class Regularizer(RegistryClass):
     """Abstract base class for regularization object.
 
     Defines the set of methods required to create a new regularization object. This includes
@@ -121,27 +122,6 @@ def wrapped(beta, *args):
             return hess(beta, *args) + lam * self.hessian(beta)
         return wrapped
 
-    @classmethod
-    def get(cls, obj):
-        """Get the concrete instance for the name ``obj``.
-
-        Parameters
-        ----------
-        obj : Regularizer or str
-            Valid instances of ``Regularizer`` are passed through.
-            Strings are looked up according to ``obj.name`` and a
-            new instance is created
-
-        Returns
-        -------
-        obj : Regularizer
-        """
-        if isinstance(obj, cls):
-            return obj
-        elif isinstance(obj, str):
-            return {o.name: o for o in cls.__subclasses__()}[obj]()
-        raise TypeError('Not a valid regularizer object.')
-
 
 class L2(Regularizer):
     """L2 regularization."""
diff --git a/dask_glm/tests/test_admm.py b/dask_glm/tests/test_admm.py
index 7b0373a..1c76a4b 100644
--- a/dask_glm/tests/test_admm.py
+++ b/dask_glm/tests/test_admm.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from dask_glm.algorithms import admm, local_update
-from dask_glm.families import Logistic, Normal
+from dask_glm.families import Family, Logistic, Normal
 from dask_glm.regularizers import L1
 from dask_glm.utils import make_y
 
@@ -15,7 +15,7 @@
                          [np.array([-1.5, 3]),
                           np.array([35, 2, 0, -3.2]),
                           np.array([-1e-2, 1e-4, 1.0, 2e-3, -1.2])])
-@pytest.mark.parametrize('family', [Logistic, Normal])
+@pytest.mark.parametrize('family', [Logistic(), Normal()])
 def test_local_update(N, beta, family):
     M = beta.shape[0]
     X = np.random.random((N, M))
diff --git a/dask_glm/tests/test_algos_families.py b/dask_glm/tests/test_algos_families.py
index e04ffd1..6e6a051 100644
--- a/dask_glm/tests/test_algos_families.py
+++ b/dask_glm/tests/test_algos_families.py
@@ -8,7 +8,7 @@
 
 from dask_glm.algorithms import (newton, lbfgs, proximal_grad,
                                  gradient_descent, admm)
-from dask_glm.families import Logistic, Normal, Poisson
+from dask_glm.families import Family
 from dask_glm.regularizers import Regularizer
 from dask_glm.utils import sigmoid, make_y
 
@@ -63,7 +63,7 @@ def test_methods(N, p, seed, opt):
 ])
 @pytest.mark.parametrize('N', [1000])
 @pytest.mark.parametrize('nchunks', [1, 10])
-@pytest.mark.parametrize('family', [Logistic, Normal, Poisson])
+@pytest.mark.parametrize('family', Family._all_children())
 def test_basic_unreg_descent(func, kwargs, N, nchunks, family):
     beta = np.random.normal(size=2)
     M = len(beta)
@@ -87,9 +87,9 @@ def test_basic_unreg_descent(func, kwargs, N, nchunks, family):
 ])
 @pytest.mark.parametrize('N', [1000])
 @pytest.mark.parametrize('nchunks', [1, 10])
-@pytest.mark.parametrize('family', [Logistic, Normal, Poisson])
+@pytest.mark.parametrize('family', Family._all_children())
 @pytest.mark.parametrize('lam', [0.01, 1.2, 4.05])
-@pytest.mark.parametrize('reg', [r() for r in Regularizer.__subclasses__()])
+@pytest.mark.parametrize('reg', Regularizer._all_children())
 def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg):
     beta = np.random.normal(size=2)
     M = len(beta)
diff --git a/dask_glm/utils.py b/dask_glm/utils.py
index 592c74e..25686c2 100644
--- a/dask_glm/utils.py
+++ b/dask_glm/utils.py
@@ -194,3 +194,33 @@ def package_of(obj):
         return
     base, _sep, _stem = mod.__name__.partition('.')
     return sys.modules[base]
+
+
+class RegistryClass(object):
+    """Convenience class for methods on subclasses."""
+
+    @classmethod
+    def get(cls, obj):
+        """Get the concrete instance for the name ``obj``.
+
+        Parameters
+        ----------
+        obj : Regularizer or str
+            Valid instances of ``Regularizer`` are passed through.
+            Strings are looked up according to ``obj.name`` and a
+            new instance is created
+
+        Returns
+        -------
+        obj : Regularizer
+        """
+        if isinstance(obj, cls):
+            return obj
+        elif isinstance(obj, str):
+            return {o.name: o for o in cls.__subclasses__()}[obj]()
+        raise TypeError('Not a valid {} object.'.format(cls.__name__))
+
+    @classmethod
+    def _all_children(cls):
+        """"Return a list of instances of all subclasses."""
+        return [o() for o in cls.__subclasses__()]

From ee12ed2c024ac17d2bd993d87f42302323226119 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Wed, 17 May 2017 12:43:51 -0400
Subject: [PATCH 12/17] remove unused import

---
 dask_glm/tests/test_admm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dask_glm/tests/test_admm.py b/dask_glm/tests/test_admm.py
index 1c76a4b..b5f21a3 100644
--- a/dask_glm/tests/test_admm.py
+++ b/dask_glm/tests/test_admm.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from dask_glm.algorithms import admm, local_update
-from dask_glm.families import Family, Logistic, Normal
+from dask_glm.families import Logistic, Normal
 from dask_glm.regularizers import L1
 from dask_glm.utils import make_y
 

From 7216a25542e0229d3dbd6cbcb1e8dbf5e9ac4799 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Mon, 29 May 2017 19:13:57 -0400
Subject: [PATCH 13/17] move tests for registry class to test utils

---
 dask_glm/families.py                |  2 +-
 dask_glm/tests/test_regularizers.py | 16 ----------------
 dask_glm/tests/test_utils.py        | 23 +++++++++++++++++++++++
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/dask_glm/families.py b/dask_glm/families.py
index b4e9572..d944b5a 100644
--- a/dask_glm/families.py
+++ b/dask_glm/families.py
@@ -42,7 +42,7 @@ class Logistic(Family):
 
     def loglikelihood(self, Xbeta, y):
         """
-        Evaluate the logistic loglikelihoodlihood
+        Evaluate the logistic loglikelihood
 
         Parameters
         ----------
diff --git a/dask_glm/tests/test_regularizers.py b/dask_glm/tests/test_regularizers.py
index 6c875af..5be3715 100644
--- a/dask_glm/tests/test_regularizers.py
+++ b/dask_glm/tests/test_regularizers.py
@@ -40,22 +40,6 @@ def foo(x):
     assert new_func(2) == 7
 
 
-def test_regularizer_get_passes_through_instance():
-    x = FooRegularizer()
-    assert regs.Regularizer.get(x) == x
-
-
-def test_regularizer_get_unnamed_raises():
-    with pytest.raises(KeyError):
-        regs.Regularizer.get('foo')
-
-
-def test_regularizer_gets_from_name():
-    class Foo(regs.Regularizer):
-        name = 'foo'
-    assert isinstance(regs.Regularizer.get('foo'), Foo)
-
-
 @pytest.mark.parametrize('beta,expected', [
     (np.array([0, 0, 0]), 0),
     (np.array([1, 2, 3]), 7)
diff --git a/dask_glm/tests/test_utils.py b/dask_glm/tests/test_utils.py
index 813b2fd..f2c2f8f 100644
--- a/dask_glm/tests/test_utils.py
+++ b/dask_glm/tests/test_utils.py
@@ -78,3 +78,26 @@ def test_sparse():
     assert utils.sum(x) == utils.sum(x.todense())
     for func in [utils.sigmoid, utils.sum, utils.exp]:
         assert_eq(func(x), func(y))
+
+
+@pytest.fixture
+def foo_class():
+    class FooClass(utils.RegistryClass):
+        name = 'foo'
+    return FooClass
+
+
+def test_registryclass_get_passes_through_instance(foo_class):
+    x = foo_class()
+    assert utils.RegistryClass.get(x) == x
+
+
+def test_regularizer_get_unnamed_raises():
+    with pytest.raises(KeyError):
+        utils.RegistryClass.get('bar')
+
+
+def test_regularizer_gets_from_name(foo_class):
+    assert isinstance(utils.RegistryClass.get('foo'), foo_class)
+
+

From 324383f53a244884aed831664cb4b58cde232f98 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Mon, 29 May 2017 20:15:41 -0400
Subject: [PATCH 14/17] add tests for base pointwise functions

---
 dask_glm/tests/test_algos_families.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/dask_glm/tests/test_algos_families.py b/dask_glm/tests/test_algos_families.py
index 6e6a051..516e9f3 100644
--- a/dask_glm/tests/test_algos_families.py
+++ b/dask_glm/tests/test_algos_families.py
@@ -11,6 +11,30 @@
 from dask_glm.families import Family
 from dask_glm.regularizers import Regularizer
 from dask_glm.utils import sigmoid, make_y
+from unittest.mock import patch
+
+
+def test_family_pointwise_loss():
+    beta = np.array([1, 2])
+    X = np.array([[1, 2], [3, 4]])
+    y = np.array([1, 2])
+    with patch('dask_glm.families.Family.loglikelihood') as loglike:
+        Family().pointwise_loss(beta, X, y)
+        Xbeta, new_y = loglike.call_args[0]
+        np.testing.utils.assert_array_equal(Xbeta, X.dot(beta))
+        np.testing.utils.assert_array_equal(new_y, y)
+
+
+def test_family_pointwise_loss():
+    beta = np.array([1, 2])
+    X = np.array([[1, 2], [3, 4]])
+    y = np.array([1, 2])
+    with patch('dask_glm.families.Family.gradient') as gradient:
+        Family().pointwise_gradient(beta, X, y)
+        Xbeta, new_x, new_y = gradient.call_args[0]
+        np.testing.utils.assert_array_equal(Xbeta, X.dot(beta))
+        np.testing.utils.assert_array_equal(new_x, X)
+        np.testing.utils.assert_array_equal(new_y, y)
 
 
 def add_l1(f, lam):

From 53bece493bc0639b8f535a65fb213b6b57842483 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <richard.postelnik@capitalone.com>
Date: Wed, 31 May 2017 09:55:23 -0400
Subject: [PATCH 15/17] flake8 fix

---
 dask_glm/tests/test_utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dask_glm/tests/test_utils.py b/dask_glm/tests/test_utils.py
index f2c2f8f..64ca77f 100644
--- a/dask_glm/tests/test_utils.py
+++ b/dask_glm/tests/test_utils.py
@@ -99,5 +99,3 @@ def test_regularizer_get_unnamed_raises():
 
 def test_regularizer_gets_from_name(foo_class):
     assert isinstance(utils.RegistryClass.get('foo'), foo_class)
-
-

From fd6c43879a0ebdd6235fbb8cfab1109d35fb8fc1 Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Thu, 27 Jul 2017 23:52:29 -0400
Subject: [PATCH 16/17] fix tests

---
 dask_glm/tests/test_algos_families.py |  2 +-
 dask_glm/tests/test_utils.py          | 12 +++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/dask_glm/tests/test_algos_families.py b/dask_glm/tests/test_algos_families.py
index 516e9f3..dbe5069 100644
--- a/dask_glm/tests/test_algos_families.py
+++ b/dask_glm/tests/test_algos_families.py
@@ -25,7 +25,7 @@ def test_family_pointwise_loss():
         np.testing.utils.assert_array_equal(new_y, y)
 
 
-def test_family_pointwise_loss():
+def test_family_pointwise_gradient():
     beta = np.array([1, 2])
     X = np.array([[1, 2], [3, 4]])
     y = np.array([1, 2])
diff --git a/dask_glm/tests/test_utils.py b/dask_glm/tests/test_utils.py
index f2c2f8f..6d9aee8 100644
--- a/dask_glm/tests/test_utils.py
+++ b/dask_glm/tests/test_utils.py
@@ -83,7 +83,10 @@ def test_sparse():
 @pytest.fixture
 def foo_class():
     class FooClass(utils.RegistryClass):
-        name = 'foo'
+        pass
+
+    class BarClass(FooClass):
+        name = 'bar'
     return FooClass
 
 
@@ -92,12 +95,11 @@ def test_registryclass_get_passes_through_instance(foo_class):
     assert utils.RegistryClass.get(x) == x
 
 
-def test_regularizer_get_unnamed_raises():
+def test_regularizer_get_unnamed_raises(foo_class):
     with pytest.raises(KeyError):
-        utils.RegistryClass.get('bar')
+        foo_class().get('foobar')
 
 
 def test_regularizer_gets_from_name(foo_class):
-    assert isinstance(utils.RegistryClass.get('foo'), foo_class)
-
+    assert isinstance(foo_class().get('bar'), foo_class)
 

From 175b2cfb6bc91a6b4642d3e8cab8c593a1fa210f Mon Sep 17 00:00:00 2001
From: Richard Postelnik <postelrich@gmail.com>
Date: Fri, 28 Jul 2017 08:32:27 -0400
Subject: [PATCH 17/17] use mock library instead of builtin to support 2.7

---
 dask_glm/tests/test_algos_families.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dask_glm/tests/test_algos_families.py b/dask_glm/tests/test_algos_families.py
index dbe5069..1d3aa74 100644
--- a/dask_glm/tests/test_algos_families.py
+++ b/dask_glm/tests/test_algos_families.py
@@ -11,7 +11,7 @@
 from dask_glm.families import Family
 from dask_glm.regularizers import Regularizer
 from dask_glm.utils import sigmoid, make_y
-from unittest.mock import patch
+from mock import patch
 
 
 def test_family_pointwise_loss():