diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 461c6fe..2c5260f 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -28,12 +28,16 @@ jobs:
         uses: docker/setup-qemu-action@v1
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.19.1
+        uses: joerick/cibuildwheel@v2.22.0
         # to supply options, put them in 'env', like:
         env:
-          CIBW_ARCHS_LINUX: ${{matrix.arch}}
-          CIBW_BEFORE_BUILD: pip install numpy setuptools wheel cython
+          CIBW_BEFORE_BUILD: pip install numpy pybind11 setuptools wheel pkginfo twine
+          CIBW_ARCHS_MACOS: "x86_64 arm64"
+          CIBW_ARCHS: auto64
 
-      - uses: actions/upload-artifact@v2
+      - name: Upload built wheels
+        uses: actions/upload-artifact@v4
         with:
+          name: built-wheels-${{ matrix.os }}-${{ matrix.arch }}
           path: ./wheelhouse/*.whl
+          if-no-files-found: warn
\ No newline at end of file
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index de08566..28e9b5c 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-20.04, macos-latest, windows-2019]
+        os: [ubuntu-latest, macos-latest, windows-latest]
         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
diff --git a/README.md b/README.md
index 33ad0a1..0a42440 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,12 @@ img = load_3d_em_stack()
 img_pyramid = tinybrain.downsample_with_averaging(img, factor=(2,2,1), num_mips=5, sparse=False)
 
 labels = load_3d_labels()
-label_pyramid = tinybrain.downsample_segmentation(labels, factor=(2,2,1), num_mips=5, sparse=False))
+label_pyramid = tinybrain.downsample_segmentation(labels, factor=(2,2,1), num_mips=5, sparse=False)
+
+# We also have a few other types
+img_pyramid = tinybrain.downsample_with_min_pooling(image, factor=(2,2,1), num_mips=5)
+img_pyramid = tinybrain.downsample_with_max_pooling(image, factor=(2,2,1), num_mips=5)
+img_pyramid = tinybrain.downsample_with_striding(image, factor=(2,2,1), num_mips=5)
 ```
 
 ## Installation 
@@ -27,7 +32,7 @@ pip install tinybrain
 ## Motivation
 
 Image hierarchy generation in connectomics uses a few different techniques for
-visualizing data, but predominantly we create image pyramids of uint8 grayscale images using 2x2 average pooling and of uint8 to uint64 segmentation labels using 2x2 mode pooling. When images become very large and people wish to visualze upper mip levels using three axes at once, it becomes desirable to perform 2x2x2 downsamples to maintain isotropy.
+visualizing data, but predominantly we create image pyramids of uint8 grayscale images using 2x2 average pooling and of uint8 to uint64 segmentation labels using 2x2 mode pooling. When images become very large and people wish to visualize upper mip levels using three axes at once, it becomes desirable to perform 2x2x2 downsamples to maintain isotropy.
 
 It's possible to compute both of these using numpy, however as multiple packages found it useful to copy the downsample functions, it makes sense to formalize these functions into a seperate library located on PyPI.
 
diff --git a/automated_test.py b/automated_test.py
index 68b7eba..d15b1f5 100644
--- a/automated_test.py
+++ b/automated_test.py
@@ -125,7 +125,7 @@ def test_even_odd2d():
   assert np.array_equal(oddimg, ans3x3x3)
   assert np.array_equal(oddimgf, ans3x3x3)
 
-@pytest.mark.parametrize("dtype", (np.uint8, np.uint16, np.float32, np.float64))
+@pytest.mark.parametrize("dtype", (np.int8, np.int16, np.uint8, np.uint16, np.float32, np.float64))
 @pytest.mark.parametrize("sparse", [False, True])
 def test_accelerated_vs_numpy_avg_pooling_2x2x1(dtype, sparse):
   image = np.random.randint(0,255, size=(512, 512, 6), dtype=np.uint8).astype(dtype, copy=False)
@@ -147,7 +147,7 @@ def test_accelerated_vs_numpy_avg_pooling_2x2x1(dtype, sparse):
 
   assert np.all(mips[-1] == npimg)
 
-@pytest.mark.parametrize("dtype", (np.uint8, np.uint16, np.float32, np.float64))
+@pytest.mark.parametrize("dtype", (np.int8, np.int16, np.uint8, np.uint16, np.float32, np.float64))
 def test_accelerated_vs_numpy_avg_pooling_2x2x1_simple_sparse(dtype):
   for x in [0,1]:
     for y in [0,1]:
@@ -189,7 +189,7 @@ def test_accelerated_vs_numpy_avg_pooling_2x2x1_simple_sparse(dtype):
   assert np.all(res[1] == ans)
 
 
-@pytest.mark.parametrize("dtype", (np.uint8, np.uint16, np.float32, np.float64))
+@pytest.mark.parametrize("dtype", (np.int8, np.int16, np.uint8, np.uint16, np.float32, np.float64))
 @pytest.mark.parametrize("sx", (6,7,1024,1025))
 @pytest.mark.parametrize("sy", (6,7,1024,1025))
 @pytest.mark.parametrize("sz", (4,5,32,33))
diff --git a/tinybrain/accelerated.hpp b/tinybrain/accelerated.hpp
index 148d2cd..036ce20 100644
--- a/tinybrain/accelerated.hpp
+++ b/tinybrain/accelerated.hpp
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2019, William Silversmith
+Copyright (C) 2019,2025 William Silversmith
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -359,7 +359,7 @@ T* _average_pooling_2x2_single_mip(
                   + static_cast<T2>(d)
                 ) / static_cast<T2>(
                     std::max(
-                      static_cast<T>((a > 0) + (b > 0) + (c > 0) + (d > 0)),
+                      static_cast<T>((a != 0) + (b != 0) + (c != 0) + (d != 0)),
                       static_cast<T>(1)
                     )
                   )
@@ -375,7 +375,7 @@ T* _average_pooling_2x2_single_mip(
                   + static_cast<T2>(b)
                 ) / static_cast<T2>(
                   std::max(
-                    static_cast<T>((a > 0) + (b > 0)),
+                    static_cast<T>((a != 0) + (b != 0)),
                     static_cast<T>(1)
                 )
               ));
@@ -931,8 +931,17 @@ U* denominator_2x2x2(
 
 template <typename T, typename U>
 inline void render_image(T* accum, U* oimg, const uint32_t bitshift, const size_t ovoxels) {
-  for (size_t i = 0; i < ovoxels; i++) {
-    oimg[i] = static_cast<U>(accum[i] >> bitshift);
+  if constexpr (std::is_signed<T>::value) {
+    for (size_t i = 0; i < ovoxels; i++) {
+      oimg[i] = (accum[i] < 0) 
+        ? -static_cast<U>(std::abs(accum[i]) >> bitshift)
+        : static_cast<U>(accum[i] >> bitshift);
+    }
+  } 
+  else {
+    for (size_t i = 0; i < ovoxels; i++) {
+      oimg[i] = static_cast<U>(accum[i] >> bitshift);
+    }
   }
 }
 
diff --git a/tinybrain/accelerated.pyx b/tinybrain/accelerated.pyx
index 9c1a9b8..e646b97 100644
--- a/tinybrain/accelerated.pyx
+++ b/tinybrain/accelerated.pyx
@@ -4,7 +4,7 @@ Cython accelerated routines for common downsampling operations.
 
 Author: William Silversmith
 Affiliation: Seung Lab, Princeton Neuroscience Institute
-Date: March 2019
+Date: March 2019, Februrary 2025
 """
 cimport cython
 from cython cimport floating
@@ -94,8 +94,12 @@ def average_pooling_2x2(channel, size_t num_mips=1, sparse=False):
     results = _average_pooling_2x2_single_mip_py(channel, sparse)
   elif channel.dtype == np.uint8:
     results = _average_pooling_2x2_uint8(channel, num_mips, sparse)
+  elif channel.dtype == np.int8:
+    results = _average_pooling_2x2_int8(channel, num_mips, sparse)
   elif channel.dtype == np.uint16:
     results = _average_pooling_2x2_uint16(channel, num_mips, sparse)
+  elif channel.dtype == np.int16:
+    results = _average_pooling_2x2_int16(channel, num_mips, sparse)
   elif channel.dtype == np.float32:
     results = _average_pooling_2x2_float(channel, num_mips, sparse)
   elif channel.dtype == np.float64:
@@ -109,17 +113,29 @@ def average_pooling_2x2(channel, size_t num_mips=1, sparse=False):
   return results
 
 def _average_pooling_2x2_single_mip_py(np.ndarray[NUMBER, ndim=5] channel, sparse):
+  cdef int8_t[:,:,:,:,:] arr_memview8i
+  cdef int16_t[:,:,:,:,:] arr_memview16i
+  cdef int32_t[:,:,:,:,:] arr_memview32i
+  cdef int64_t[:,:,:,:,:] arr_memview64i
+
   cdef uint8_t[:,:,:,:,:] arr_memview8u
   cdef uint16_t[:,:,:,:,:] arr_memview16u
   cdef uint32_t[:,:,:,:,:] arr_memview32u
   cdef uint64_t[:,:,:,:,:] arr_memview64u
+  
   cdef float[:,:,:,:,:] arr_memviewf
   cdef double[:,:,:,:,:] arr_memviewd
 
+  cdef int8_t[:,:,:,:,:] out_memview8i
+  cdef int16_t[:,:,:,:,:] out_memview16i
+  cdef int32_t[:,:,:,:,:] out_memview32i
+  cdef int64_t[:,:,:,:,:] out_memview64i
+
   cdef uint8_t[:,:,:,:,:] out_memview8u
   cdef uint16_t[:,:,:,:,:] out_memview16u
   cdef uint32_t[:,:,:,:,:] out_memview32u
   cdef uint64_t[:,:,:,:,:] out_memview64u
+  
   cdef float[:,:,:,:,:] out_memviewf
   cdef double[:,:,:,:,:] out_memviewd
 
@@ -149,6 +165,22 @@ def _average_pooling_2x2_single_mip_py(np.ndarray[NUMBER, ndim=5] channel, spars
     arr_memview64u = channel
     out_memview64u = out
     _average_pooling_2x2_single_mip[uint64_t](&arr_memview64u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview64u[0,0,0,0,0], bool(sparse))
+  elif channel.dtype == np.int8:
+    arr_memview8i = channel
+    out_memview8i = out
+    _average_pooling_2x2_single_mip[int8_t](&arr_memview8i[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview8i[0,0,0,0,0], bool(sparse))
+  elif channel.dtype == np.int16:
+    arr_memview16i = channel
+    out_memview16i = out
+    _average_pooling_2x2_single_mip[int16_t](&arr_memview16i[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview16i[0,0,0,0,0], bool(sparse))
+  elif channel.dtype == np.int32:
+    arr_memview32i = channel
+    out_memview32i = out
+    _average_pooling_2x2_single_mip[int32_t](&arr_memview32i[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview32i[0,0,0,0,0], bool(sparse))
+  elif channel.dtype == np.int64:
+    arr_memview64i = channel
+    out_memview64i = out
+    _average_pooling_2x2_single_mip[int64_t](&arr_memview64i[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview64i[0,0,0,0,0], bool(sparse))
   elif channel.dtype == np.float32:
     arr_memviewf = channel
     out_memviewf = out
@@ -230,6 +262,142 @@ def _average_pooling_2x2_uint8(np.ndarray[uint8_t, ndim=5] channel, uint32_t num
 
   return results
 
+def _average_pooling_2x2_int8(np.ndarray[int8_t, ndim=5] channel, uint32_t num_mips, sparse):
+  cdef size_t sx = channel.shape[0]
+  cdef size_t sy = channel.shape[1]
+  cdef size_t sz = channel.shape[2]
+  cdef size_t sw = channel.shape[3]
+  cdef size_t sv = channel.shape[4]
+  cdef size_t sxy = sx * sy
+
+  cdef size_t osx = (sx + 1) // 2
+  cdef size_t osy = (sy + 1) // 2
+  cdef size_t osxy = osx * osy
+  cdef size_t ovoxels = osxy * sz * sw * sv
+
+  cdef int8_t[:,:,:,:,:] channelview = channel
+  cdef int16_t* accum = accumulate_2x2[int8_t, int16_t](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv)
+  cdef int16_t[:] accumview = <int16_t[:ovoxels]>accum
+  cdef int16_t* tmp
+  cdef uint32_t mip, bitshift
+
+  cdef int16_t* denominator
+  if sparse:
+    denominator = denominator_2x2[int8_t, int16_t](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv)
+
+  cdef int8_t[:] oimgview
+
+  results = []
+  for mip in range(num_mips):
+    bitshift = 2 * ((mip % 4) + 1) # integer truncation every 4 mip levels
+    oimg = np.zeros( (ovoxels,), dtype=np.int8, order='F')
+    oimgview = oimg
+
+    if sparse:
+      render_image_sparse[int16_t, int8_t](&accumview[0], denominator, &oimgview[0], ovoxels)
+    else:
+      render_image[int16_t, int8_t](&accumview[0], &oimgview[0], bitshift, ovoxels)
+
+    results.append(
+      oimg.reshape( (osx, osy, sz, sw, sv), order='F' )
+    )
+
+    if mip == num_mips - 1:
+      break
+
+    if bitshift == 8:
+      shift_right[int16_t](accum, ovoxels, bitshift)
+
+    sx = osx 
+    sy = osy 
+    sxy = sx * sy
+    osx = (sx + 1) // 2
+    osy = (sy + 1) // 2
+    osxy = osx * osy
+    ovoxels = osxy * sz * sw * sv
+
+    tmp = accum 
+    accum = accumulate_2x2[int16_t, int16_t](accum, sx, sy, sz, sw, sv)
+    accumview = <int16_t[:ovoxels]>accum
+    PyMem_Free(tmp)
+
+    if sparse:
+      tmp = denominator
+      denominator = accumulate_2x2[int16_t, int16_t](denominator, sx, sy, sz, sw, sv)
+      PyMem_Free(tmp)
+
+  PyMem_Free(accum)
+
+  return results
+
+def _average_pooling_2x2_int16(np.ndarray[int16_t, ndim=5] channel, uint32_t num_mips, sparse):
+  cdef size_t sx = channel.shape[0]
+  cdef size_t sy = channel.shape[1]
+  cdef size_t sz = channel.shape[2]
+  cdef size_t sw = channel.shape[3]
+  cdef size_t sv = channel.shape[4]
+  cdef size_t sxy = sx * sy
+
+  cdef size_t osx = (sx + 1) // 2
+  cdef size_t osy = (sy + 1) // 2
+  cdef size_t osxy = osx * osy
+  cdef size_t ovoxels = osxy * sz * sw * sv
+
+  cdef int16_t[:,:,:,:,:] channelview = channel
+  cdef int32_t* accum = accumulate_2x2[int16_t, int32_t](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv)
+  cdef int32_t[:] accumview = <int32_t[:ovoxels]>accum
+  cdef int32_t* tmp
+  cdef int32_t mip, bitshift
+
+  cdef int32_t* denominator
+  if sparse:
+    denominator = denominator_2x2[int16_t, int32_t](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv)
+
+  cdef int16_t[:] oimgview
+
+  results = []
+  for mip in range(num_mips):
+    bitshift = 2 * ((mip % 4) + 1) # integer truncation every 4 mip levels
+    oimg = np.zeros( (ovoxels,), dtype=np.int16, order='F')
+    oimgview = oimg
+
+    if sparse:
+      render_image_sparse[int32_t, int16_t](&accumview[0], denominator, &oimgview[0], ovoxels)
+    else:
+      render_image[int32_t, int16_t](&accumview[0], &oimgview[0], bitshift, ovoxels)
+
+    results.append(
+      oimg.reshape( (osx, osy, sz, sw, sv), order='F' )
+    )
+
+    if mip == num_mips - 1:
+      break
+
+    if bitshift == 8:
+      shift_right[int32_t](accum, ovoxels, bitshift)
+
+    sx = osx 
+    sy = osy 
+    sxy = sx * sy
+    osx = (sx + 1) // 2
+    osy = (sy + 1) // 2
+    osxy = osx * osy
+    ovoxels = osxy * sz * sw * sv
+
+    tmp = accum 
+    accum = accumulate_2x2[int32_t, int32_t](accum, sx, sy, sz, sw, sv)
+    accumview = <int32_t[:ovoxels]>accum
+    PyMem_Free(tmp)
+
+    if sparse:
+      tmp = denominator
+      denominator = accumulate_2x2[int32_t, int32_t](denominator, sx, sy, sz, sw, sv)
+      PyMem_Free(tmp)
+
+  PyMem_Free(accum)
+
+  return results
+
 def _average_pooling_2x2_uint16(np.ndarray[uint16_t, ndim=5] channel, uint32_t num_mips, sparse):
   cdef size_t sx = channel.shape[0]
   cdef size_t sy = channel.shape[1]
@@ -451,8 +619,12 @@ def average_pooling_2x2x2(channel, size_t num_mips=1, sparse=False):
     results = _average_pooling_2x2x2_single_mip_py(channel)
   elif channel.dtype == np.uint8:
     results = _average_pooling_2x2x2_uint8(channel, num_mips, sparse)
+  elif channel.dtype == np.int8:
+    results = _average_pooling_2x2x2_int8(channel, num_mips, sparse)
   elif channel.dtype == np.uint16:
     results = _average_pooling_2x2x2_uint16(channel, num_mips, sparse)
+  elif channel.dtype == np.int16:
+    results = _average_pooling_2x2x2_int16(channel, num_mips, sparse)
   elif channel.dtype == np.float32:
     results = _average_pooling_2x2x2_float(channel, num_mips)
   elif channel.dtype == np.float64:
@@ -466,6 +638,8 @@ def average_pooling_2x2x2(channel, size_t num_mips=1, sparse=False):
   return results
 
 def _average_pooling_2x2x2_single_mip_py(np.ndarray[NUMBER, ndim=5] channel):
+  cdef int8_t[:,:,:,:,:] arr_memview8i
+  cdef int16_t[:,:,:,:,:] arr_memview16i
   cdef uint8_t[:,:,:,:,:] arr_memview8u
   cdef uint16_t[:,:,:,:,:] arr_memview16u
   cdef uint32_t[:,:,:,:,:] arr_memview32u
@@ -473,6 +647,8 @@ def _average_pooling_2x2x2_single_mip_py(np.ndarray[NUMBER, ndim=5] channel):
   cdef float[:,:,:,:,:] arr_memviewf
   cdef double[:,:,:,:,:] arr_memviewd
 
+  cdef int8_t[:,:,:,:,:] out_memview8i
+  cdef int16_t[:,:,:,:,:] out_memview16i
   cdef uint8_t[:,:,:,:,:] out_memview8u
   cdef uint16_t[:,:,:,:,:] out_memview16u
   cdef uint32_t[:,:,:,:,:] out_memview32u
@@ -494,10 +670,18 @@ def _average_pooling_2x2x2_single_mip_py(np.ndarray[NUMBER, ndim=5] channel):
     arr_memview8u = channel
     out_memview8u = out
     _average_pooling_2x2x2_single_mip[uint8_t](&arr_memview8u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview8u[0,0,0,0,0])
+  elif channel.dtype == np.int8:
+    arr_memview8i = channel
+    out_memview8i = out
+    _average_pooling_2x2x2_single_mip[int8_t](&arr_memview8i[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview8i[0,0,0,0,0])
   elif channel.dtype == np.uint16:
     arr_memview16u = channel
     out_memview16u = out
     _average_pooling_2x2x2_single_mip[uint16_t](&arr_memview16u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview16u[0,0,0,0,0])
+  elif channel.dtype == np.int16:
+    arr_memview16i = channel
+    out_memview16i = out
+    _average_pooling_2x2x2_single_mip[int16_t](&arr_memview16i[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview16i[0,0,0,0,0])
   elif channel.dtype == np.uint32:
     arr_memview32u = channel
     out_memview32u = out
@@ -603,6 +787,90 @@ def _average_pooling_2x2x2_uint8(np.ndarray[uint8_t, ndim=5] channel, uint32_t n
 
   return results
 
+def _average_pooling_2x2x2_int8(np.ndarray[int8_t, ndim=5] channel, uint32_t num_mips, sparse=False):
+  cdef size_t sx = channel.shape[0]
+  cdef size_t sy = channel.shape[1]
+  cdef size_t sz = channel.shape[2]
+  cdef size_t sw = channel.shape[3]
+  cdef size_t sv = channel.shape[4]
+  cdef size_t sxy = sx * sy
+
+  cdef size_t osx = (sx + 1) // 2
+  cdef size_t osy = (sy + 1) // 2
+  cdef size_t osz = (sz + 1) // 2
+  cdef size_t osxy = osx * osy
+  cdef size_t ovoxels = osxy * osz * sw * sv
+
+  cdef int8_t[:,:,:,:,:] channelview = channel
+  cdef int32_t* accum = accumulate_2x2x2[int8_t, int32_t](
+    &channelview[0,0,0,0,0], sx, sy, sz, sw, sv
+  )
+  cdef int32_t[:] accumview = <int32_t[:ovoxels]>accum
+
+  # "denominator"
+  cdef int32_t* denom
+  cdef int32_t[:] denomview
+  if sparse:
+    denom = denominator_2x2x2[int8_t, int32_t](
+      &channelview[0,0,0,0,0], sx, sy, sz, sw, sv
+    )
+    denomview = <int32_t[:ovoxels]>denom
+
+  cdef int32_t* tmp
+  cdef int32_t mip, bitshift
+
+  cdef int8_t[:] oimgview
+
+  results = []
+  for mip in range(num_mips):
+    bitshift = 3 * ((mip % 8) + 1) # integer truncation every 8 mip levels
+    oimg = np.zeros( (ovoxels,), dtype=np.int8, order='F')
+    oimgview = oimg
+
+    if sparse:
+      render_image_sparse[int32_t, int8_t](&accumview[0], &denomview[0], &oimgview[0], ovoxels)
+    else:
+      render_image[int32_t, int8_t](&accumview[0], &oimgview[0], bitshift, ovoxels)
+
+    results.append(
+      oimg.reshape( (osx, osy, osz, sw), order='F' )
+    )
+
+    if mip == num_mips - 1:
+      break
+
+    if bitshift == 24:
+      shift_right[int32_t](accum, ovoxels, bitshift)
+      if sparse:
+        shift_right[int32_t](denom, ovoxels, bitshift)
+
+    sx = osx 
+    sy = osy 
+    sz = osz
+    sxy = sx * sy
+    osx = (sx + 1) // 2
+    osy = (sy + 1) // 2
+    osz = (sz + 1) // 2
+    osxy = osx * osy
+    ovoxels = osxy * osz * sw * sv
+
+    tmp = accum 
+    accum = accumulate_2x2x2[int32_t, int32_t](accum, sx, sy, sz, sw, sv)
+    accumview = <int32_t[:ovoxels]>accum
+    PyMem_Free(tmp)
+
+    if sparse:
+      tmp = denom
+      denom = accumulate_2x2x2[int32_t, int32_t](denom, sx, sy, sz, sw, sv)
+      denomview = <int32_t[:ovoxels]>denom
+      PyMem_Free(tmp)
+
+  PyMem_Free(accum)
+  if sparse:
+    PyMem_Free(denom)
+
+  return results
+
 def _average_pooling_2x2x2_uint16(np.ndarray[uint16_t, ndim=5] channel, uint32_t num_mips, sparse):
   cdef size_t sx = channel.shape[0]
   cdef size_t sy = channel.shape[1]
@@ -686,6 +954,89 @@ def _average_pooling_2x2x2_uint16(np.ndarray[uint16_t, ndim=5] channel, uint32_t
 
   return results
 
+def _average_pooling_2x2x2_int16(np.ndarray[int16_t, ndim=5] channel, uint32_t num_mips, sparse):
+  cdef size_t sx = channel.shape[0]
+  cdef size_t sy = channel.shape[1]
+  cdef size_t sz = channel.shape[2]
+  cdef size_t sw = channel.shape[3]
+  cdef size_t sv = channel.shape[4]
+  cdef size_t sxy = sx * sy
+
+  cdef size_t osx = (sx + 1) // 2
+  cdef size_t osy = (sy + 1) // 2
+  cdef size_t osz = (sz + 1) // 2
+  cdef size_t osxy = osx * osy
+  cdef size_t ovoxels = osxy * osz * sw * sv
+
+  cdef int16_t[:,:,:,:,:] channelview = channel
+  cdef int32_t* accum = accumulate_2x2x2[int16_t, int32_t](
+    &channelview[0,0,0,0,0], sx, sy, sz, sw, sv
+  )
+  cdef int32_t[:] accumview = <int32_t[:ovoxels]>accum
+
+  # "denominator"
+  cdef int32_t* denom
+  cdef int32_t[:] denomview
+  if sparse:
+    denom = denominator_2x2x2[int16_t, int32_t](
+      &channelview[0,0,0,0,0], sx, sy, sz, sw, sv
+    )
+    denomview = <int32_t[:ovoxels]>denom
+
+  cdef int32_t* tmp
+  cdef int32_t mip, bitshift
+
+  cdef int16_t[:] oimgview
+
+  results = []
+  for mip in range(num_mips):
+    bitshift = 3 * ((mip % 5) + 1) # integer truncation every 5 mip levels
+    oimg = np.zeros( (ovoxels,), dtype=np.uint16, order='F')
+    oimgview = oimg
+    if sparse:
+      render_image_sparse[int32_t, int16_t](&accumview[0], &denomview[0], &oimgview[0], ovoxels)
+    else:
+      render_image[int32_t, int16_t](&accumview[0], &oimgview[0], bitshift, ovoxels)
+
+    results.append(
+      oimg.reshape( (osx, osy, osz, sw, sv), order='F' )
+    )
+
+    if mip == num_mips - 1:
+      break
+
+    if bitshift == 15:
+      shift_right[int32_t](accum, ovoxels, bitshift)
+      if sparse:
+        shift_right[int32_t](denom, ovoxels, bitshift)
+
+    sx = osx 
+    sy = osy 
+    sz = osz
+    sxy = sx * sy
+    osx = (sx + 1) // 2
+    osy = (sy + 1) // 2
+    osz = (sz + 1) // 2
+    osxy = osx * osy
+    ovoxels = osxy * osz * sw * sv
+
+    tmp = accum 
+    accum = accumulate_2x2x2[int32_t, int32_t](accum, sx, sy, sz, sw, sv)
+    accumview = <int32_t[:ovoxels]>accum
+    PyMem_Free(tmp)
+
+    if sparse:
+      tmp = denom
+      denom = accumulate_2x2x2[int32_t, int32_t](denom, sx, sy, sz, sw, sv)
+      denomview = <int32_t[:ovoxels]>denom
+      PyMem_Free(tmp)
+
+  PyMem_Free(accum)
+  if sparse:
+    PyMem_Free(denom)
+
+  return results
+
 def _average_pooling_2x2x2_float(np.ndarray[float, ndim=5] channel, uint32_t num_mips):
   cdef size_t sx = channel.shape[0]
   cdef size_t sy = channel.shape[1]
diff --git a/tinybrain/downsample.py b/tinybrain/downsample.py
index e7933e0..2802d92 100644
--- a/tinybrain/downsample.py
+++ b/tinybrain/downsample.py
@@ -45,7 +45,7 @@ def downsample_with_averaging(img, factor, num_mips=1, sparse=False):
   Returns: [ mip0, mip1, mip2, ..., num_mip ]
   """
   if (
-    img.dtype in (np.uint8, np.uint16, np.float32, np.float64)
+    img.dtype in (np.int8, np.int16, np.uint8, np.uint16, np.float32, np.float64)
     or num_mips == 1 # _average_pooling_2x2_single_mip_py supports all primative types
   ):
     img = np.asfortranarray(img)
@@ -136,7 +136,7 @@ def downsample_with_averaging_numpy(array, factor, sparse=False):
     indexing_expr = tuple(np.s_[:s] for s in part.shape)
     temp[indexing_expr] += part
     if sparse:
-      counts[indexing_expr] += part > 0
+      counts[indexing_expr] += part != 0
     else:
       counts[indexing_expr] += 1