numenta · lscheinkman · Mar 11, 2019 · Mar 8, 2019 · Mar 8, 2019 · Mar 8, 2019
diff --git a/htmresearch/frameworks/pytorch/cnn_sdr.py b/htmresearch/frameworks/pytorch/cnn_sdr.py
@@ -41,6 +41,7 @@ def __init__(self,
                outChannels=20,
                k=20,
                kernelSize=5,
+               stride=1,
                kInferenceFactor=1.5,
                boostStrength=1.0,
                useBatchNorm=True,
@@ -57,12 +58,15 @@ def __init__(self,
 
     :param k:
       Number of ON (non-zero) units per iteration in this convolutional layer.
-      The sparsity of this layer will be k / self.outputLength. If k >=
-      self.outputLength, the layer acts as a traditional convolutional layer.
+      The sparsity of this layer will be k / self.outputLength. If k <= 0 or
+      k >= self.outputLength, the layer acts as a traditional convolutional layer.
 
     :param kernelSize:
       Size of the CNN kernel.
 
+    :param stride:
+      stride of the convolution.
+
     :param kInferenceFactor:
       During inference (training=False) we increase k by this factor.
 
@@ -109,10 +113,11 @@ def __init__(self,
     self.kInferenceFactor = kInferenceFactor
     self.kernelSize = kernelSize
     self.imageShape = imageShape
-    self.stride = 1
+    self.stride = stride
     self.padding = 0
 
-    self.cnn = nn.Conv2d(imageShape[0], outChannels, kernel_size=kernelSize)
+    self.cnn = nn.Conv2d(imageShape[0], outChannels, kernel_size=kernelSize,
+                         stride=stride)
 
     self.bn = None
     if useBatchNorm:
@@ -124,6 +129,8 @@ def __init__(self,
     shape = self.outputSize()
     self.maxpoolWidth = int(math.floor(shape[2] / 2.0))
     self.outputLength = int(self.maxpoolWidth * self.maxpoolWidth * outChannels)
+    if k <= 0:
+      self.k = self.outputLength
 
     print("output shape before maxpool:", shape)
     print("maxpool width:", self.maxpoolWidth)

diff --git a/htmresearch/frameworks/pytorch/linear_sdr.py b/htmresearch/frameworks/pytorch/linear_sdr.py
@@ -54,8 +54,8 @@ def __init__(self,
 
     :param k:
       Number of ON units in this layer. The sparsity of this layer will be
-      k / n. If k >= n, the layer acts as a traditional fully connected RELU
-      layer.
+      k / n. If k <= 0 or k >= n, the layer acts as a traditional fully
+      connected RELU layer.
 
     :param kInferenceFactor:
       During inference (training=False) we increase k by this factor.
@@ -81,6 +81,8 @@ def __init__(self,
     self.l1 = nn.Linear(inputFeatures, self.n)
     self.weightSparsity = weightSparsity
     self.learningIterations = 0
+    if self.k <= 0:
+      self.k = self.n
 
     self.bn = None
     if useBatchNorm:

diff --git a/htmresearch/frameworks/pytorch/mnist_sparse_experiment.py b/htmresearch/frameworks/pytorch/mnist_sparse_experiment.py
@@ -137,6 +137,8 @@ def reset(self, params, repetition):
         inputSize=c1_input_shape,
         outChannels=c1_out_channels,
         c_k=c1_k,
+        kernelSize=[5] * len(c1_k),
+        stride=[1] * len(c1_k),
         dropout=params["dropout"],
         n=n,
         k=k,

diff --git a/htmresearch/frameworks/pytorch/sparse_net.py b/htmresearch/frameworks/pytorch/sparse_net.py
@@ -56,6 +56,8 @@ def __init__(self,
                k=200,
                outChannels=0,
                c_k=0,
+               kernelSize=5,
+               stride=1,
                inputSize=28*28,
                outputSize=10,
                kInferenceFactor=1.0,
@@ -64,7 +66,8 @@ def __init__(self,
                boostStrengthFactor=1.0,
                dropout=0.0,
                useBatchNorm=True,
-               normalizeWeights=False):
+               normalizeWeights=False,
+               useSoftmax=True):
     """
     A network with one or more hidden layers, which can be a sequence of
     k-sparse CNN followed by a sequence of k-sparse linear layer with optional
@@ -100,6 +103,14 @@ def __init__(self,
       layer acts as a traditional convolutional layer.
     :type c_k: int or list[int]
 
+    :param kernelSize:
+      Kernel size to use in each k-sparse convolutional layer.
+    :type kernelSize: int or list[int]
+
+    :param stride:
+      Stride value to use in each k-sparse convolutional layer.
+    :type stride: int or list[int]
+
     :param inputSize:
       If the CNN layer is enable this parameter holds a tuple representing
       (in_channels,height,width). Otherwise it will hold the total
@@ -142,21 +153,27 @@ def __init__(self,
       number of non-zeros instead of the whole input size
     :type normalizeWeights: bool
 
+    :param useSoftmax:
+      If True, use soft max to compute probabilities
+    :type useSoftmax: bool
     """
     super(SparseNet, self).__init__()
 
     assert(weightSparsity >= 0)
 
     # Validate CNN sdr params
     if isinstance(inputSize, collections.Sequence):
-      assert(inputSize[1] == inputSize[2],
-             "sparseCNN only supports square images")
+      assert inputSize[1] == inputSize[2], "sparseCNN only supports square images"
 
     if type(outChannels) is not list:
       outChannels = [outChannels]
     if type(c_k) is not list:
       c_k = [c_k]
     assert(len(outChannels) == len(c_k))
+    if type(kernelSize) is not list:
+      kernelSize = [kernelSize]
+    if type(stride) is not list:
+      stride = [stride]
 
     # Validate linear sdr params
     if type(n) is not list:
@@ -176,9 +193,11 @@ def __init__(self,
     self.weightSparsity = weightSparsity   # Pct of weights that are non-zero
     self.boostStrengthFactor = boostStrengthFactor
     self.boostStrength = boostStrength
-    self.kernelSize = 5
+    self.kernelSize = kernelSize
+    self.stride = stride
     self.learningIterations = 0
 
+
     inputFeatures = inputSize
     cnnSdr = nn.Sequential()
     # CNN Layers
@@ -187,7 +206,8 @@ def __init__(self,
         module = CNNSDR2d(imageShape=inputFeatures,
                           outChannels=outChannels[i],
                           k=c_k[i],
-                          kernelSize=self.kernelSize,
+                          kernelSize=self.kernelSize[i],
+                          stride=self.stride[i],
                           kInferenceFactor=kInferenceFactor,
                           boostStrength=boostStrength,
                           useBatchNorm=useBatchNorm,
@@ -228,8 +248,13 @@ def __init__(self,
         inputFeatures = n[i]
 
     # Add one fully connected layer after all hidden layers
-    self.fc = nn.Linear(self.n[-1], outputSize)
-    self.softmax = nn.LogSoftmax(dim=1)
+    self.fc = nn.Linear(inputFeatures, outputSize)
+
+    # Use softmax to compute probabilities
+    if useSoftmax:
+      self.softmax = nn.LogSoftmax(dim=1)
+    else:
+      self.softmax = None
 
 
   def postEpoch(self):
@@ -260,7 +285,9 @@ def forward(self, x):
     x = self.flatten(x)
     x = self.linearSdr(x)
     x = self.fc(x)
-    x = self.softmax(x)
+
+    if self.softmax is not None:
+      x = self.softmax(x)
 
     if self.training:
       batchSize = x.shape[0]

diff --git a/htmresearch/frameworks/pytorch/sparse_speech_experiment.py b/htmresearch/frameworks/pytorch/sparse_speech_experiment.py
@@ -113,6 +113,8 @@ def reset(self, params, repetition):
         outputSize=len(self.train_loader.dataset.classes),
         outChannels=c1_out_channels,
         c_k=c1_k,
+        kernelSize=[5] * len(c1_k),
+        stride=[1] * len(c1_k),
         dropout=params["dropout"],
         n=n,
         k=k,

diff --git a/htmresearch/frameworks/rl/__init__.py b/htmresearch/frameworks/rl/__init__.py
@@ -0,0 +1,21 @@
+# ----------------------------------------------------------------------
+# Numenta Platform for Intelligent Computing (NuPIC)
+# Copyright (C) 2019, Numenta, Inc.  Unless you have an agreement
+# with Numenta, Inc., for a separate license for this software code, the
+# following terms and conditions apply:
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero Public License version 3 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero Public License for more details.
+#
+# You should have received a copy of the GNU Affero Public License
+# along with this program.  If not, see http://www.gnu.org/licenses.
+#
+# http://numenta.org/licenses/
+# ----------------------------------------------------------------------
+from .dqn import DQN
diff --git a/htmresearch/frameworks/rl/dqn.py b/htmresearch/frameworks/rl/dqn.py
@@ -0,0 +1,175 @@
+# ----------------------------------------------------------------------
+# Numenta Platform for Intelligent Computing (NuPIC)
+# Copyright (C) 2019, Numenta, Inc.  Unless you have an agreement
+# with Numenta, Inc., for a separate license for this software code, the
+# following terms and conditions apply:
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero Public License version 3 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero Public License for more details.
+#
+# You should have received a copy of the GNU Affero Public License
+# along with this program.  If not, see http://www.gnu.org/licenses.
+#
+# http://numenta.org/licenses/
+# ----------------------------------------------------------------------
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.optim as optim
+import collections
+import random
+import copy
+
+class DQN(object):
+  """
+  Implements **DQN algorithm** described in https://arxiv.org/pdf/1312.5602.pdf
+  """
+
+
+  def __init__(self, actions, network,
+               eps_start=1.0, eps_end=0.01, eps_decay=0.9995,
+               learning_rate=0.0001, gamma=0.99, tau=1.0, target_update=1000,
+               batch_size=32, min_steps=10000, replay_size=10000):
+    """
+    :param actions: Number of possible actions
+    :param network: Neural neural network to use as a Q function approximator
+    :param eps_start: e-greedy exploration start epsilon
+    :param eps_end: e-greedy exploration end epsilon
+    :param eps_decay: e-greedy exploration decay
+    :param learning_rate: optimizer learning rate
+    :param gamma: future returns discount
+    :param tau: soft update smoothing coefficient. Use 1.0 for hard target update
+    :param target_update: target update interval
+    :param batch_size: batch size
+    :param min_steps: min number of experiences in replay buffer before learning
+    :param replay_size: replay memory size
+    """
+
+    self.actions = actions
+
+    # e-greedy parameters
+    self.eps = eps_start
+    self.eps_end = eps_end
+    self.eps_decay = eps_decay
+
+    # Experience replay memory: e = (s, a, r, s', done)
+    self.replay = collections.deque(maxlen=replay_size)
+    self.min_steps = min_steps
+    self.batch_size = batch_size
+
+    # Initialize Local and Target networks
+    self.local = network
+    self.target = copy.deepcopy(self.local)
+    self.device = next(network.parameters()).device
+
+    # Optimizations
+    self.optimizer = optim.Adam(self.local.parameters(), lr=learning_rate)
+    self.gamma = gamma
+    self.tau = tau
+    self.target_update = target_update
+    self.steps = 0
+
+
+  def select_action(self, state):
+    """
+    Select the best action for the given state using e-greedy exploration to
+    minimize overfitting
+
+    :return: tuple(action, value)
+    """
+    value = 0
+    if self.steps < self.min_steps:
+      action = np.random.randint(self.actions)
+    else:
+      self.eps = max(self.eps_end, self.eps * self.eps_decay)
+      if random.random() < self.eps:
+        action = np.random.randint(self.actions)
+      else:
+        self.local.eval()
+        with torch.no_grad():
+          state = torch.tensor(state, device=self.device, dtype=torch.float).unsqueeze(0)
+          Q = self.local(state)
+          value, action = torch.max(Q, 1)
+
+    return int(action), float(value)
+
+
+  def hard_target_update(self):
+    """
+    Update model parameters every 'target_update' time steps
+    See https://arxiv.org/abs/1312.5602
+    :param target_update: target update interval
+    """
+    if self.steps % self.target_update == 0:
+      self.target.load_state_dict(self.local.state_dict())
+
+
+  def soft_target_update(self):
+    """
+    Soft update model parameters:
+
+    .. math::
+      \\theta_target = \\tau \\times \\theta_local + (1 - \\tau) \\times \\theta_target ,
+      with \\tau \\ll 1
+
+    See https://arxiv.org/pdf/1509.02971.pdf
+    """
+    for target_param, local_param in zip(self.target.parameters(), self.local.parameters()):
+      target_param.data.copy_(self.tau * local_param.data + (1.0 - self.tau) * target_param.data)
+
+
+  def learn(self, state, action, reward, next_state, done):
+    """
+    Update replay memory and learn from a batch of random experiences sampled
+    from the replay buffer
+    :return: optimization loss if enough experiences are available, None otherwise
+    """
+    self.steps += 1
+    self.replay.append((state, action, reward, next_state, done))
+    if self.steps > self.min_steps and len(self.replay) > self.batch_size:
+      batch = random.sample(self.replay, self.batch_size)
+      return self.optimize(batch)
+
+    return None
+
+
+  def optimize(self, batch):
+    state, action, reward, next_state, done = map(
+      lambda x: torch.tensor(x, device=self.device, dtype=torch.float), zip(*batch))
+
+    # Get target values
+    self.target.eval()
+    with torch.no_grad():
+      target_next = self.target(next_state)
+      target_next = torch.max(target_next, dim=1, keepdim=True)[0]
+    Q_target = reward + self.gamma * target_next * (1 - done)
+
+    # Get actual values
+    self.local.train()
+    Q_local = self.local(state)
+    Q_local = Q_local.index_select(1, action.long())
+
+    # Compute Huber loss
+    loss = F.smooth_l1_loss(Q_local, Q_target)
+
+    self.optimizer.zero_grad()
+    loss.backward()
+    for param in self.local.parameters():
+      param.grad.data.clamp_(-1, 1)
+
+    self.optimizer.step()
+
+    # Update target model
+    if self.tau < 1:
+      self.soft_target_update()
+    else:
+      self.hard_target_update()
+
+    return loss.item()
+