From 7de77a30eb7b9ce2ec78da650a5ab5c92a7e8e6d Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Mon, 14 Feb 2022 16:11:08 +0100
Subject: [PATCH 01/18] invented DOnion

---
 dask_grblas/base.py               | 71 +++++++++++++++++++++++++
 dask_grblas/scalar.py             |  9 ++--
 dask_grblas/utils.py              |  8 +++
 dask_grblas/vector.py             | 87 +++++++++++++++++++------------
 tests/from_grblas2/test_vector.py | 53 +++++++++++++++++++
 5 files changed, 192 insertions(+), 36 deletions(-)

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index e808ade..fe7cfe7 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -1,8 +1,10 @@
 from numbers import Number
+from functools import partial
 import dask.array as da
 import grblas as gb
 import numpy as np
 from grblas.operator import UNKNOWN_OPCLASS, find_opclass, get_typed_op
+from grblas.dtypes import lookup_dtype
 
 from . import replace as replace_singleton
 from .mask import Mask
@@ -210,6 +212,9 @@ def compute_and_store_nvals():
     def nvals(self):
         from .scalar import PythonScalar
 
+        if type(self._delayed) is DOnion:
+            return PythonScalar(self._delayed.nvals)
+
         delayed = da.core.elemwise(
             _nvals,
             self._delayed,
@@ -349,6 +354,72 @@ def visualize(self, *args, **kwargs):
         return self._delayed.visualize(*args, **kwargs)
 
 
+class DOnion:
+    """
+    Dask (or Delayed) Onion (DOnion): 
+    
+    Encapsulates a dask array whose inner value is also a dask array.
+    Intended to be used in cases where the size of the inner dask
+    array (the seed) depends on the inner value of another dask array
+    (the shroud)
+    """
+    @classmethod
+    def grow(cls, shroud, seed_func, seed_meta, packed_args, packed_kwargs, *args, **kwargs):
+        """
+        Develop a DOnion from dask array `shroud`
+        
+        Shroud a dask array (the seed) returned by `seed_func` using another dask array (the
+        shroud)
+        :shroud: dask array whose inner value determines the (size of) seed dask array
+        :seed_func: the function that takes as input the inner value of `shroud` and returns
+            another dask array (the seed)
+        :seed_meta: empty instance of the inner value type of the seed
+        :packed_args: tuple of arguments to `seed_func`
+        :packed_kwargs: dict of keyword arguments to `seed_func`
+        :args: other dask arrays that together with `shroud` determine the (size of) `seed`
+        :kwargs: other named dask arrays that together with `shroud` determine the (size of) `seed`
+        """
+        seed_func = partial(seed_func, *packed_args, **packed_kwargs)
+        dtype = np_dtype(lookup_dtype(shroud.dtype))
+        _meta = np.array([], dtype=dtype)
+        kernel = shroud.map_blocks(seed_func, *args, **kwargs, dtype=dtype, meta=_meta)
+        return DOnion(kernel, meta=seed_meta)
+
+    def __init__(self, kernel, meta=None):
+        self.kernel = kernel
+        self.dtype = kernel.dtype
+        self._meta = meta
+
+    def __eq__(self, other):
+        return self.compute() == other
+
+    def compute(self, *args, **kwargs):
+        value = self.kernel.compute(*args, **kwargs)
+        while hasattr(value, 'compute'):
+            value = value.compute(*args, **kwargs)
+        return value
+
+    def persist(self, *args, **kwargs):
+        return self.kernel.compute(*args, **kwargs).persist(*args, **kwargs)
+
+    def inject(self, func, *args, **kwargs):
+        dtype = np_dtype(lookup_dtype(self.dtype))
+        meta = self._meta
+        return self.kernel.map_blocks(func, *args, **kwargs, dtype=dtype, meta=meta)
+
+    def __getattr__(self, item):
+        func = lambda x: getattr(x, item)
+        return DOnion(self.inject(func))
+
+    def getattr(self, name, packed_args, packed_kwargs, *args, **kwargs):
+        func = partial(Donion.apply, name, *packed_args, **packed_kwargs)
+        return DOnion(self.inject(func, *args, **kwargs))
+
+    @classmethod
+    def apply(cls, name, *args, **kwargs):
+        return getattr(x, name)(*args, **kwargs)
+
+
 # Dask task functions
 def _clear(x):
     x.value.clear()
diff --git a/dask_grblas/scalar.py b/dask_grblas/scalar.py
index 89c74e7..fffc1e6 100644
--- a/dask_grblas/scalar.py
+++ b/dask_grblas/scalar.py
@@ -3,7 +3,7 @@
 import numpy as np
 from dask.delayed import Delayed, delayed
 
-from .base import BaseType, InnerBaseType
+from .base import BaseType, InnerBaseType, DOnion
 from .expr import AmbiguousAssignOrExtract, GbDelayed
 from .utils import get_meta, np_dtype
 
@@ -67,9 +67,10 @@ def new(cls, dtype, *, name=None):
         return new(cls, dtype, name=name)
 
     def __init__(self, delayed, meta=None):
-        assert type(delayed) is da.Array, type(delayed)
-        assert delayed.ndim == 0
+        assert type(delayed) in {da.Array, DOnion}, type(delayed)
         self._delayed = delayed
+        if type(delayed) is da.Array:
+            assert delayed.ndim == 0
         if meta is None:
             meta = gb.Scalar.new(delayed.dtype)
         self._meta = meta
@@ -228,6 +229,8 @@ def __eq__(self, other):
 
     def compute(self, *args, **kwargs):
         innerval = self._delayed.compute(*args, **kwargs)
+        if type(self._delayed) is DOnion:
+            return innerval
         return innerval.value.value
 
 
diff --git a/dask_grblas/utils.py b/dask_grblas/utils.py
index 8cdfdf0..3c87425 100644
--- a/dask_grblas/utils.py
+++ b/dask_grblas/utils.py
@@ -8,6 +8,14 @@
 from .io import MMFile
 
 
+def package_args(*args):
+    return args
+
+
+def package_kwargs(**kwargs):
+    return kwargs
+
+
 def np_dtype(dtype):
     return np.dtype(dtype.numba_type.name)
 
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 58da806..024c2fe 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -1,4 +1,5 @@
 from numbers import Number
+from functools import partial
 import dask.array as da
 import numpy as np
 import grblas as gb
@@ -7,11 +8,13 @@
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
 
-from .base import BaseType, InnerBaseType, _nvals
+from .base import BaseType, InnerBaseType, _nvals, DOnion
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, Assigner
 from .mask import StructuralMask, ValueMask
 from ._ss.vector import ss
 from .utils import (
+    package_args,
+    package_kwargs,
     np_dtype,
     get_grblas_type,
     get_return_type,
@@ -20,6 +23,7 @@
     build_chunk_ranges_dask_array,
     build_chunk_offsets_dask_array,
 )
+from grblas.exceptions import IndexOutOfBound
 
 
 class InnerVector(InnerBaseType):
@@ -107,31 +111,32 @@ def from_values(
         /,
         size=None,
         *,
-        trust_size=False,
         dup_op=None,
         dtype=None,
         chunks="auto",
         name=None,
     ):
-        # Note: `trust_size` is a bool parameter that, when True,
-        # can be used to avoid expensive computation of max(indices)
-        # which is used to verify that `size` is indeed large enough
-        # to hold all the given tuples.
-        # TODO:
-        # dup_op support for dask_array indices/values (use reduce_assign?)
-        if dup_op is None and type(indices) is da.Array and type(values) is da.Array:
-            if not trust_size or size is None:
-                # this branch is an expensive operation:
-                implied_size = 1 + da.max(indices).compute()
-                if size is not None and implied_size > size:
-                    raise Exception()
-                size = implied_size if size is None else size
-
-            idtype = gb.Vector.new(indices.dtype).dtype
-            np_idtype_ = np_dtype(idtype)
+        if type(indices) is da.Array and type(values) is da.Array:
+            np_idtype_ = np_dtype(lookup_dtype(indices.dtype))
+            if size is not None:
+                chunks = da.core.normalize_chunks(chunks, (size,), dtype=np_idtype_)
+            else:
+                size = da.max(indices) + 1
+                # Here `size` is a dask 0d-array whose computed value is
+                # used to determine the size of the Vector to be returned.
+                # But since we do not want to compute anything just now,
+                # we instead create a "DOnion" (dask onion) object
+                meta = gb.Vector.new(values.dtype)
+                packed_args = package_args(indices, values)
+                packed_kwargs = package_kwargs(
+                    dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
+                )
+                donion = DOnion.grow(size, Vector.from_values, meta, packed_args, packed_kwargs)
+                return Vector(donion, meta=meta)
+
             vdtype = gb.Vector.new(values.dtype).dtype
             np_vdtype_ = np_dtype(vdtype)
-            chunks = da.core.normalize_chunks(chunks, (size,), dtype=np_idtype_)
+
             name_ = name
             name = str(name) if name else ""
             name = name + "-index-ranges" + tokenize(cls, chunks[0])
@@ -141,6 +146,7 @@ def from_values(
                 *(indices, "j"),
                 *(values, "j"),
                 *(index_ranges, "i"),
+                size=size,
                 dtype=np_vdtype_,
                 meta=np.array([]),
             )
@@ -150,6 +156,7 @@ def from_values(
                 *(fragments, "ij"),
                 *(index_ranges, "i"),
                 concatenate=False,
+                dup_op=dup_op,
                 gb_dtype=dtype,
                 dtype=np_vdtype_,
                 meta=meta,
@@ -185,11 +192,15 @@ def __init__(self, delayed, meta=None, nvals=None):
         # if it is already known  at the time of initialization of
         # this Vector,  otherwise its value should be left as None
         # (the default)
-        assert type(delayed) is da.Array
-        assert delayed.ndim == 1
+        assert type(delayed) in {da.Array, DOnion}
         self._delayed = delayed
-        if meta is None:
-            meta = gb.Vector.new(delayed.dtype, delayed.shape[0])
+        if type(delayed) is da.Array:
+            assert delayed.ndim == 1
+            if meta is None:
+                meta = gb.Vector.new(delayed.dtype, delayed.shape[0])
+        else:
+            if meta is None:
+                meta = gb.Vector.new(delayed.dtype)
         self._meta = meta
         self._size = meta.size
         self.dtype = meta.dtype
@@ -225,6 +236,8 @@ def V(self):
 
     @property
     def size(self):
+        if type(self._delayed) is DOnion:
+            return self._delayed.size
         return self._meta.size
 
     @property
@@ -461,11 +474,11 @@ def build(self, indices, values, *, size=None, chunks=None, dup_op=None, clear=F
         x = self._optional_dup()
         if type(indices) is list:
             if np.max(indices) >= self._size:
-                raise gb.exceptions.IndexOutOfBound
+                raise IndexOutOfBound
             indices = da.core.from_array(np.array(indices), name="indices-" + tokenize(indices))
         else:
             if da.max(indices).compute() >= self._size:
-                raise gb.exceptions.IndexOutOfBound
+                raise IndexOutOfBound
         if type(values) is list:
             values = da.core.from_array(np.array(values), name="values-" + tokenize(values))
 
@@ -503,11 +516,15 @@ def build(self, indices, values, *, size=None, chunks=None, dup_op=None, clear=F
         # vector.build(indices, values, dup_op=dup_op)
         # self.__init__(Vector.from_vector(vector)._delayed)
 
-    def to_values(self, dtype=None, chunks="auto"):
+    def to_values(self, dtype=None, chunks="auto", status=None):
         x = self._delayed
         nvals_array = da.core.blockwise(
             *(_nvals, "i"), *(x, "i"), adjust_chunks={"i": 1}, dtype=np.int64, meta=np.array([])
-        ).compute()
+        )
+        packed_args = package_args()
+        packed_kwargs = package_kwargs(dtype=dtype, chunks=chunks, status="")
+        
+        return DOnion.grow(nvals_array, self.to_values, packed_args, packed_kwargs)
 
         stops = np.cumsum(nvals_array)
         starts = np.roll(stops, 1)
@@ -609,9 +626,6 @@ def _chunk_diag(
     The returned matrix is either empty or contains a piece of
     the k-diagonal given by inner_vector 
     """
-    # This function creates a new matrix chunk with dimensions determined
-    # by the input k-diagonal vector chunk.  The matrix chunk may or may
-    # not include the k-diagonal chunk
     vector = inner_vector.value
     vec_chunk = input_range[0]
     rows = row_range[0]
@@ -749,14 +763,21 @@ def _build_1D_chunk(inner_vector, out_index_range, fragments, dup_op=None):
     return InnerVector(inner_vector.value)
 
 
-def _from_values1D(fragments, index_range, gb_dtype=None):
+def _from_values1D(fragments, index_range, dup_op=None, gb_dtype=None):
     inds = np.concatenate([inds for (inds, _) in fragments])
     vals = np.concatenate([vals for (_, vals) in fragments])
     size = index_range[0].stop - index_range[0].start
-    return InnerVector(gb.Vector.from_values(inds, vals, size=size, dtype=gb_dtype))
+    return InnerVector(gb.Vector.from_values(inds, vals, size=size, dup_op=dup_op, dtype=gb_dtype))
+
 
+def _pick1D(indices, values, index_range, size):
+    # validate indices
+    indices = np.where(indices < 0, indices + size, indices)
+    bad_indices = (indices < 0) | (size <= indices)
+    if np.any(bad_indices):
+        raise IndexOutOfBound
 
-def _pick1D(indices, values, index_range):
+    # filter into chunk:
     index_range = index_range[0]
     indices_in = (index_range.start <= indices) & (indices < index_range.stop)
     indices = indices[indices_in] - index_range.start
diff --git a/tests/from_grblas2/test_vector.py b/tests/from_grblas2/test_vector.py
index dd53de4..64c80ee 100644
--- a/tests/from_grblas2/test_vector.py
+++ b/tests/from_grblas2/test_vector.py
@@ -4,6 +4,7 @@
 import sys
 import weakref
 
+import dask.array as da
 import dask_grblas
 import grblas
 import numpy as np
@@ -124,6 +125,58 @@ def test_from_values():
         Vector.from_values([0], [1, 2])
 
 
+def test_from_values_dask():
+    indices = da.from_array(np.array([0, 1, 3]))
+    values = da.from_array(np.array([True, False, True]))
+    u = Vector.from_values(indices, values)
+    assert u.size == 4
+    assert u.nvals == 3
+    assert u.dtype == bool
+    values = da.from_array(np.array([12.3, 12.4, 12.5]))
+    u2 = Vector.from_values(indices, values, size=17)
+    assert u2.size == 17
+    assert u2.nvals == 3
+    assert u2.dtype == float
+    indices = da.from_array(np.array([0, 1, 1]))
+    values = da.from_array(np.array([1, 2, 3]))
+    u3 = Vector.from_values(indices, values, size=10, dup_op=binary.times)
+    assert u3.size == 10
+    assert u3.nvals == 2  # duplicates were combined
+    assert u3.dtype == int
+    assert u3[1].value == 6  # 2*3
+    values = da.from_array(np.array([True, True, True]))
+    with pytest.raises(ValueError, match="Duplicate indices found"):
+        # Duplicate indices requires a dup_op
+        Vector.from_values(indices, values)
+    empty_da = da.from_array(np.array([], dtype=int))
+    with pytest.raises(ValueError, match="No indices provided. Unable to infer size."):
+        Vector.from_values(empty_da, empty_da)
+
+    # Changed: Assume empty value is float64 (like numpy)
+    # with pytest.raises(ValueError, match="No values provided. Unable to determine type"):
+    w = Vector.from_values(empty_da, empty_da, size=10)
+    assert w.size == 10
+    assert w.nvals == 0
+    assert w.dtype == dtypes.FP64
+
+    with pytest.raises(ValueError, match="No indices provided. Unable to infer size"):
+        Vector.from_values(empty_da, empty_da, dtype=dtypes.INT64)
+    u4 = Vector.from_values(empty_da, empty_da, size=10, dtype=dtypes.INT64)
+    u5 = Vector.new(dtypes.INT64, size=10)
+    assert u4.isequal(u5, check_dtype=True)
+
+    # we check index dtype if given dask array
+    indices = da.from_array(np.array([1.2, 3.4]))
+    values = da.from_array(np.array([1, 2]))
+    with pytest.raises(ValueError, match="indices must be integers, not float64"):
+        Vector.from_values(indices, values)
+
+    # mis-matched sizes
+    indices = da.from_array(np.array([0]))
+    with pytest.raises(ValueError, match="`indices` and `values` lengths must match"):
+        Vector.from_values(indices, values)
+
+
 def test_from_values_scalar():
     u = Vector.from_values([0, 1, 3], 7)
     assert u.size == 4

From df03363826fbcade1c993b62ee2c3b55486daadd Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Tue, 15 Feb 2022 17:10:19 +0100
Subject: [PATCH 02/18] introduced "DOnion": a dask array whose inner value is
 also a dask array

---
 dask_grblas/base.py               |  33 ++++++--
 dask_grblas/vector.py             | 122 +++++++++++++++++++++---------
 tests/from_grblas2/test_vector.py |  24 +++---
 3 files changed, 122 insertions(+), 57 deletions(-)

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index fe7cfe7..a744304 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -8,7 +8,9 @@
 
 from . import replace as replace_singleton
 from .mask import Mask
-from .utils import get_grblas_type, get_meta, np_dtype, wrap_inner
+from .utils import (
+    package_args, package_kwargs, get_grblas_type, get_meta, np_dtype, wrap_inner
+)
 
 _expect_type = gb.base._expect_type
 
@@ -364,7 +366,7 @@ class DOnion:
     (the shroud)
     """
     @classmethod
-    def grow(cls, shroud, seed_func, seed_meta, packed_args, packed_kwargs, *args, **kwargs):
+    def sprout(cls, shroud, seed_func, seed_meta, packed_args, packed_kwargs, *args, **kwargs):
         """
         Develop a DOnion from dask array `shroud`
         
@@ -402,18 +404,33 @@ def compute(self, *args, **kwargs):
     def persist(self, *args, **kwargs):
         return self.kernel.compute(*args, **kwargs).persist(*args, **kwargs)
 
-    def inject(self, func, *args, **kwargs):
-        dtype = np_dtype(lookup_dtype(self.dtype))
-        meta = self._meta
-        return self.kernel.map_blocks(func, *args, **kwargs, dtype=dtype, meta=meta)
+    def extract(self, func, packed_args, packed_kwargs, dtype, meta, *args, **kwargs):
+        func = partial(func, *packed_args, **packed_kwargs)
+        kernel = self.kernel.map_blocks(func, *args, **kwargs, dtype=dtype, meta=meta)
+        return DOnion(kernel, meta=meta)
+
+    @classmethod
+    def extract_shared(
+        cls, donions, func, packed_args, packed_kwargs, dtype, meta, *args, **kwargs
+    ):
+        donions = tuple(donion.kernel for donion in donions)
+        func = partial(func, *packed_args, **packed_kwargs)
+        kernel = da.map_blocks(func, *donions, *args, **kwargs, dtype=dtype, meta=meta)
+        return DOnion(kernel, meta=meta)
 
     def __getattr__(self, item):
         func = lambda x: getattr(x, item)
-        return DOnion(self.inject(func))
+        # TODO: lookup dtype and meta of attribute!!!
+        dtype = np_dtype(lookup_dtype(self.dtype))
+        meta = self._meta
+        return self.extract(func, package_args(), package_kwargs(), dtype, meta)
 
     def getattr(self, name, packed_args, packed_kwargs, *args, **kwargs):
         func = partial(Donion.apply, name, *packed_args, **packed_kwargs)
-        return DOnion(self.inject(func, *args, **kwargs))
+        # TODO: lookup dtype and meta of attribute!!!
+        dtype = np_dtype(lookup_dtype(self.dtype))
+        meta = self._meta
+        return self.extract(func, package_args(), package_kwargs(), dtype, meta, *args, **kwargs)
 
     @classmethod
     def apply(cls, name, *args, **kwargs):
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 024c2fe..052fbc7 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -24,6 +24,7 @@
     build_chunk_offsets_dask_array,
 )
 from grblas.exceptions import IndexOutOfBound
+from plotly.validators.streamtube import starts
 
 
 class InnerVector(InnerBaseType):
@@ -116,25 +117,55 @@ def from_values(
         chunks="auto",
         name=None,
     ):
+        if hasattr(values, 'dtype'):
+            dtype = lookup_dtype(values.dtype if dtype is None else dtype)
+        meta = gb.Vector.new(dtype)
+        meta_dtype = np_dtype(meta.dtype)
+        packed_kwargs = package_kwargs(
+            size=size, dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
+        )
+        if type(indices) is DOnion and type(values) is DOnion:
+            packed_args = package_args()
+            return DOnion.extract_shared(
+                (indices, values), Vector.from_values, packed_args, packed_kwargs, meta_dtype, meta
+            )
+        if type(indices) is DOnion:
+            packed_args = package_args(values)
+            return DOnion.extract_shared(
+                (indices,), Vector.from_values, packed_args, packed_kwargs, meta_dtype, meta
+            )
+        if type(values) is DOnion:
+            packed_args = package_args(indices)
+            return DOnion.extract_shared(
+                (values,), Vector.from_values, packed_args, packed_kwargs, meta_dtype, meta
+            )
         if type(indices) is da.Array and type(values) is da.Array:
             np_idtype_ = np_dtype(lookup_dtype(indices.dtype))
             if size is not None:
                 chunks = da.core.normalize_chunks(chunks, (size,), dtype=np_idtype_)
             else:
+                if indices.size == 0:
+                    raise ValueError("No indices provided. Unable to infer size.")
                 size = da.max(indices) + 1
                 # Here `size` is a dask 0d-array whose computed value is
                 # used to determine the size of the Vector to be returned.
                 # But since we do not want to compute anything just now,
                 # we instead create a "DOnion" (dask onion) object
-                meta = gb.Vector.new(values.dtype)
                 packed_args = package_args(indices, values)
                 packed_kwargs = package_kwargs(
                     dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
                 )
-                donion = DOnion.grow(size, Vector.from_values, meta, packed_args, packed_kwargs)
+                donion = DOnion.sprout(size, Vector.from_values, meta, packed_args, packed_kwargs)
                 return Vector(donion, meta=meta)
 
-            vdtype = gb.Vector.new(values.dtype).dtype
+            if indices.size > 0:
+                if indices.dtype.kind not in np.typecodes["AllInteger"]:
+                    raise ValueError(f"indices must be integers, not {indices.dtype}")
+
+                if indices.size != values.size:
+                    raise ValueError("`indices` and `values` lengths must match")
+
+            vdtype = dtype
             np_vdtype_ = np_dtype(vdtype)
 
             name_ = name
@@ -516,52 +547,67 @@ def build(self, indices, values, *, size=None, chunks=None, dup_op=None, clear=F
         # vector.build(indices, values, dup_op=dup_op)
         # self.__init__(Vector.from_vector(vector)._delayed)
 
-    def to_values(self, dtype=None, chunks="auto", status=None):
+    def to_values(self, dtype=None, chunks="auto"):
         x = self._delayed
         nvals_array = da.core.blockwise(
             *(_nvals, "i"), *(x, "i"), adjust_chunks={"i": 1}, dtype=np.int64, meta=np.array([])
         )
-        packed_args = package_args()
-        packed_kwargs = package_kwargs(dtype=dtype, chunks=chunks, status="")
-        
-        return DOnion.grow(nvals_array, self.to_values, packed_args, packed_kwargs)
 
-        stops = np.cumsum(nvals_array)
-        starts = np.roll(stops, 1)
+        stops = da.cumsum(nvals_array)
+        starts = da.roll(stops, 1)
+        starts = starts.copy() if starts.size == 1 else starts  # bug!!
         starts[0] = 0
         nnz = stops[-1]
 
-        starts = starts.reshape(nvals_array.shape)
-        starts = da.from_array(starts, chunks=1, name="starts" + tokenize(starts))
-        starts = da.core.Array(starts.dask, starts.name, x.chunks, starts.dtype, meta=x._meta)
+        def _to_values(x, starts, stops, dtype, chunks, nnz):
+            # starts = da.from_array(starts, chunks=1, name="starts" + tokenize(starts))
+            starts = da.core.Array(starts.dask, starts.name, x.chunks, starts.dtype, meta=x._meta)
+
+            # stops = da.from_array(stops, chunks=1, name="stops" + tokenize(stops))
+            stops = da.core.Array(stops.dask, stops.name, x.chunks, stops.dtype, meta=x._meta)
+
+            chunks = da.core.normalize_chunks(chunks, (nnz,), dtype=np.int64)
+            output_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_ranges-")
+
+            gb_dtype = lookup_dtype(dtype)
+            dtype_ = np_dtype(gb_dtype)
+            index_offsets = build_chunk_offsets_dask_array(x, 0, "index_offset-")
+            x = da.core.blockwise(
+                *(VectorTupleExtractor, "ij"),
+                *(output_ranges, "j"),
+                *(x, "i"),
+                *(index_offsets, "i"),
+                *(starts, "i"),
+                *(stops, "i"),
+                gb_dtype=gb_dtype,
+                dtype=dtype_,
+                meta=np.array([[]]),
+            )
+            return da.reduction(
+                x, _identity, _flatten, axis=0, concatenate=False, dtype=dtype_, meta=np.array([])
+            )
 
-        stops = stops.reshape(nvals_array.shape)
-        stops = da.from_array(stops, chunks=1, name="stops" + tokenize(stops))
-        stops = da.core.Array(stops.dask, stops.name, x.chunks, stops.dtype, meta=x._meta)
+        def apply(func, dtype, meta, x):
+            return da.map_blocks(func, x, dtype=dtype, meta=meta)
 
-        chunks = da.core.normalize_chunks(chunks, (nnz,), dtype=np.int64)
-        output_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_ranges-")
-
-        dtype_ = np_dtype(self.dtype)
-        index_offsets = build_chunk_offsets_dask_array(x, 0, "index_offset-")
-        x = da.core.blockwise(
-            *(VectorTupleExtractor, "ij"),
-            *(output_ranges, "j"),
-            *(x, "i"),
-            *(index_offsets, "i"),
-            *(starts, "i"),
-            *(stops, "i"),
-            gb_dtype=dtype,
-            dtype=dtype_,
-            meta=np.array([[]]),
-        )
-        x = da.reduction(
-            x, _identity, _flatten, axis=0, concatenate=False, dtype=dtype_, meta=np.array([])
-        )
+        dtype = self.dtype if dtype is None else dtype
+        packed_args = package_args(x, starts, stops, dtype, chunks)
+        packed_kwargs = package_kwargs()
+        meta = np.array([])
+        iv_donion = DOnion.sprout(nnz, _to_values, meta, packed_args, packed_kwargs)
 
         meta_i, meta_v = self._meta.to_values(dtype)
-        indices = da.map_blocks(_get_indices, x, dtype=meta_i.dtype, meta=meta_i)
-        values = da.map_blocks(_get_values, x, dtype=meta_v.dtype, meta=meta_v)
+
+        dtype_i = np_dtype(lookup_dtype(meta_i.dtype))
+        packed_args = package_args(_get_indices, dtype_i, meta_i)
+        packed_kwargs = package_kwargs()
+        indices = iv_donion.extract(apply, packed_args, packed_kwargs, dtype_i, meta_i)
+
+        dtype_v = np_dtype(lookup_dtype(meta_v.dtype))
+        packed_args = package_args(_get_values, dtype_v, meta_v)
+        packed_kwargs = package_kwargs()
+        values = iv_donion.extract(apply, packed_args, packed_kwargs, dtype_v, meta_v)
+
         return indices, values
 
         # delayed = self._delayed
@@ -767,6 +813,8 @@ def _from_values1D(fragments, index_range, dup_op=None, gb_dtype=None):
     inds = np.concatenate([inds for (inds, _) in fragments])
     vals = np.concatenate([vals for (_, vals) in fragments])
     size = index_range[0].stop - index_range[0].start
+    if inds.size == 0:
+        return InnerVector(gb.Vector.new(gb_dtype, size=size))
     return InnerVector(gb.Vector.from_values(inds, vals, size=size, dup_op=dup_op, dtype=gb_dtype))
 
 
diff --git a/tests/from_grblas2/test_vector.py b/tests/from_grblas2/test_vector.py
index 64c80ee..fe31783 100644
--- a/tests/from_grblas2/test_vector.py
+++ b/tests/from_grblas2/test_vector.py
@@ -138,7 +138,7 @@ def test_from_values_dask():
     assert u2.nvals == 3
     assert u2.dtype == float
     indices = da.from_array(np.array([0, 1, 1]))
-    values = da.from_array(np.array([1, 2, 3]))
+    values = da.from_array(np.array([1, 2, 3], dtype=np.int64))
     u3 = Vector.from_values(indices, values, size=10, dup_op=binary.times)
     assert u3.size == 10
     assert u3.nvals == 2  # duplicates were combined
@@ -147,10 +147,10 @@ def test_from_values_dask():
     values = da.from_array(np.array([True, True, True]))
     with pytest.raises(ValueError, match="Duplicate indices found"):
         # Duplicate indices requires a dup_op
-        Vector.from_values(indices, values)
-    empty_da = da.from_array(np.array([], dtype=int))
+        Vector.from_values(indices, values).compute()
+    empty_da = da.from_array(np.array([]))
     with pytest.raises(ValueError, match="No indices provided. Unable to infer size."):
-        Vector.from_values(empty_da, empty_da)
+        Vector.from_values(empty_da, empty_da).compute()
 
     # Changed: Assume empty value is float64 (like numpy)
     # with pytest.raises(ValueError, match="No values provided. Unable to determine type"):
@@ -169,12 +169,12 @@ def test_from_values_dask():
     indices = da.from_array(np.array([1.2, 3.4]))
     values = da.from_array(np.array([1, 2]))
     with pytest.raises(ValueError, match="indices must be integers, not float64"):
-        Vector.from_values(indices, values)
+        Vector.from_values(indices, values).compute()
 
     # mis-matched sizes
     indices = da.from_array(np.array([0]))
     with pytest.raises(ValueError, match="`indices` and `values` lengths must match"):
-        Vector.from_values(indices, values)
+        Vector.from_values(indices, values).compute()
 
 
 def test_from_values_scalar():
@@ -270,20 +270,20 @@ def test_build_scalar(v):
 
 def test_extract_values(v):
     idx, vals = v.to_values()
-    np.testing.assert_array_equal(idx, (1, 3, 4, 6))
-    np.testing.assert_array_equal(vals, (1, 1, 2, 0))
+    np.testing.assert_array_equal(idx.compute(), (1, 3, 4, 6))
+    np.testing.assert_array_equal(vals.compute(), (1, 1, 2, 0))
     assert idx.dtype == np.uint64
     assert vals.dtype == np.int64
 
     idx, vals = v.to_values(dtype=int)
-    np.testing.assert_array_equal(idx, (1, 3, 4, 6))
-    np.testing.assert_array_equal(vals, (1, 1, 2, 0))
+    np.testing.assert_array_equal(idx.compute(), (1, 3, 4, 6))
+    np.testing.assert_array_equal(vals.compute(), (1, 1, 2, 0))
     assert idx.dtype == np.uint64
     assert vals.dtype == np.int64
 
     idx, vals = v.to_values(dtype=float)
-    np.testing.assert_array_equal(idx, (1, 3, 4, 6))
-    np.testing.assert_array_equal(vals, (1, 1, 2, 0))
+    np.testing.assert_array_equal(idx.compute(), (1, 3, 4, 6))
+    np.testing.assert_array_equal(vals.compute(), (1, 1, 2, 0))
     assert idx.dtype == np.uint64
     assert vals.dtype == np.float64
 

From 087d0a04832ed10421a8a0c73a65350279a85aa1 Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Tue, 15 Feb 2022 18:56:05 +0100
Subject: [PATCH 03/18] ran black .

---
 dask_grblas/_ss/matrix.py         |  1 -
 dask_grblas/_ss/vector.py         |  1 -
 dask_grblas/base.py               | 17 ++++----
 dask_grblas/expr.py               | 36 ++++++++--------
 dask_grblas/matrix.py             | 68 +++++++++++++++++--------------
 dask_grblas/ss/_core.py           |  1 -
 dask_grblas/vector.py             | 27 ++++++------
 tests/from_grblas2/test_matrix.py | 19 +++++----
 tests/from_grblas2/test_vector.py |  4 +-
 9 files changed, 94 insertions(+), 80 deletions(-)

diff --git a/dask_grblas/_ss/matrix.py b/dask_grblas/_ss/matrix.py
index 188f94d..8aad87a 100644
--- a/dask_grblas/_ss/matrix.py
+++ b/dask_grblas/_ss/matrix.py
@@ -30,4 +30,3 @@ def diag(self, vector, k=0, chunks="auto", dtype=None):
         vector = self._parent._expect_type(vector, dgb.Vector, within="ss.diag", argname="vector")
         rv = vector._diag(k, chunks=chunks, dtype=dtype)
         self._parent.__init__(rv._delayed, nvals=rv._nvals)
-
diff --git a/dask_grblas/_ss/vector.py b/dask_grblas/_ss/vector.py
index 3766e38..a1ee416 100644
--- a/dask_grblas/_ss/vector.py
+++ b/dask_grblas/_ss/vector.py
@@ -37,4 +37,3 @@ def diag(self, matrix, k=0, chunks="auto", dtype=None):
             matrix = matrix._matrix
         rv = matrix._diag(k, chunks=chunks, dtype=dtype)
         self._parent.__init__(rv._delayed, nvals=rv._nvals)
-
diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index a744304..12d7ac7 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -8,9 +8,7 @@
 
 from . import replace as replace_singleton
 from .mask import Mask
-from .utils import (
-    package_args, package_kwargs, get_grblas_type, get_meta, np_dtype, wrap_inner
-)
+from .utils import package_args, package_kwargs, get_grblas_type, get_meta, np_dtype, wrap_inner
 
 _expect_type = gb.base._expect_type
 
@@ -134,7 +132,7 @@ def dup(self, dtype=None, *, mask=None, name=None):
                 nvals = self._nvals
             else:
                 nvals = None
-                
+
             return type(self)(delayed, nvals=nvals)
         else:
             return type(self)(delayed)
@@ -202,7 +200,7 @@ def _optional_dup(self):
     def compute_and_store_nvals():
         """
         compute and store the number of values of this Vector/Matrix
-        
+
         This could be useful to increase the performance of Aggregators
         which inspect ._nvals to determine if a fast path can be taken
         to compute the aggregation result.
@@ -358,18 +356,19 @@ def visualize(self, *args, **kwargs):
 
 class DOnion:
     """
-    Dask (or Delayed) Onion (DOnion): 
-    
+    Dask (or Delayed) Onion (DOnion):
+
     Encapsulates a dask array whose inner value is also a dask array.
     Intended to be used in cases where the size of the inner dask
     array (the seed) depends on the inner value of another dask array
     (the shroud)
     """
+
     @classmethod
     def sprout(cls, shroud, seed_func, seed_meta, packed_args, packed_kwargs, *args, **kwargs):
         """
         Develop a DOnion from dask array `shroud`
-        
+
         Shroud a dask array (the seed) returned by `seed_func` using another dask array (the
         shroud)
         :shroud: dask array whose inner value determines the (size of) seed dask array
@@ -397,7 +396,7 @@ def __eq__(self, other):
 
     def compute(self, *args, **kwargs):
         value = self.kernel.compute(*args, **kwargs)
-        while hasattr(value, 'compute'):
+        while hasattr(value, "compute"):
             value = value.compute(*args, **kwargs)
         return value
 
diff --git a/dask_grblas/expr.py b/dask_grblas/expr.py
index ba618f9..c2bb355 100644
--- a/dask_grblas/expr.py
+++ b/dask_grblas/expr.py
@@ -205,8 +205,10 @@ def _reduce(self, dtype):
         )
         return delayed
 
-    def _aggregate(self, op, updating=None, dtype=None, mask=None, accum=None, replace=None, name=None):
-        """ Handover to the Aggregator to compute the reduction"""
+    def _aggregate(
+        self, op, updating=None, dtype=None, mask=None, accum=None, replace=None, name=None
+    ):
+        """Handover to the Aggregator to compute the reduction"""
 
         if updating is None:
             output = self.construct_output(dtype, name=name)
@@ -386,9 +388,9 @@ def _update(self, updating, *, mask=None, accum=None, replace=None):
     def construct_output(self, dtype=None, *, name=None):
         if dtype is None:
             dtype = self.dtype
-        return get_return_type(
-            self._meta.output_type.new(dtype)
-        ).new(dtype, *self._meta.shape, name=name)
+        return get_return_type(self._meta.output_type.new(dtype)).new(
+            dtype, *self._meta.shape, name=name
+        )
 
     @property
     def value(self):
@@ -396,22 +398,19 @@ def value(self):
         return self.new().value
 
     def _new_scalar(self, dtype, *, name=None):
-        """Create a new empty Scalar.
-        """
+        """Create a new empty Scalar."""
         from .scalar import Scalar
 
         return Scalar.new(dtype, name=name)
 
     def _new_vector(self, dtype, size=0, *, name=None):
-        """Create a new empty Vector.
-        """
+        """Create a new empty Vector."""
         from .vector import Vector
 
         return Vector.new(dtype, size, name=name)
 
     def _new_matrix(self, dtype, nrows=0, ncols=0, *, name=None):
-        """Create a new empty Matrix.
-        """
+        """Create a new empty Matrix."""
         from .matrix import Matrix
 
         return Matrix.new(dtype, nrows, ncols, name=name)
@@ -581,7 +580,7 @@ def __init__(self, parent, *, mask=None, accum=None, replace=False, input_mask=N
             self.replace = replace
         self._meta = parent._meta(mask=get_meta(mask), accum=accum, replace=replace)
         # Aggregator specific attribute requirements:
-        self.kwargs = {'mask': mask}
+        self.kwargs = {"mask": mask}
 
     def __delitem__(self, keys):
         # Occurs when user calls `del C(params)[index]`
@@ -904,7 +903,7 @@ def _data_x_index_meshpoint_4assign(*args, x_ndim, subassign, obj_offset_axes, o
         obj  = args[2 * x_ndim + 1]
     being assigned are also contained in the returned Fragmenter object.
     """
-    x_ranges = args[0 : x_ndim]
+    x_ranges = args[0:x_ndim]
     indices = args[x_ndim : 2 * x_ndim]
 
     mask = args[2 * x_ndim]
@@ -1022,7 +1021,7 @@ def _assign(
     ot,
 ):
     """
-    Performs the actual GrB_assign: 
+    Performs the actual GrB_assign:
         old_data(mask, ...)[index] << obj
     or GxB_subassign:
         old_data[index](mask, ...) << obj
@@ -1104,7 +1103,11 @@ def _upcast(grblas_object, ndim, axis_is_missing):
 
 
 def _data_x_index_meshpoint_4extract(
-    *args, xt, input_mask_type, mask_type, gb_dtype,
+    *args,
+    xt,
+    input_mask_type,
+    mask_type,
+    gb_dtype,
 ):
     """
     Returns only that part of the source inner Vector/Matrix data-chunk x = args[0]
@@ -1852,8 +1855,9 @@ def _reduce_axis_combine(op, x, axis=None, keepdims=None, computing_meta=None, d
     """Combine results from _reduce_axis on each chunk"""
     if computing_meta:
         return np.empty(0, dtype=dtype)
-    axis, = axis
+    (axis,) = axis
     if type(x) is list:
+
         def _add_blocks(monoid_, x, y):
             return x.ewise_add(y, monoid_).new()
 
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index 88e952d..1f1f80c 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -40,7 +40,7 @@ def __getitem__(self, index):
 
 
 class Matrix(BaseType):
-    __slots__ = "ss",
+    __slots__ = ("ss",)
     ndim = 2
     _is_transposed = False
 
@@ -336,12 +336,8 @@ def _diag(self, k=0, dtype=None, chunks="auto"):
         col_blockid = np.arange(A.numblocks[1])
 
         # locate first chunk containing diaagonal:
-        row_filter = (row_starts <= kdiag_row_start) & (
-            kdiag_row_start < row_stops_
-        )
-        col_filter = (col_starts <= kdiag_col_start) & (
-            kdiag_col_start < col_stops_
-        )
+        row_filter = (row_starts <= kdiag_row_start) & (kdiag_row_start < row_stops_)
+        col_filter = (col_starts <= kdiag_col_start) & (kdiag_col_start < col_stops_)
         (I,) = row_blockid[row_filter]
         (J,) = col_blockid[col_filter]
 
@@ -408,20 +404,10 @@ def _diag_old(self, k=0, dtype=None, chunks="auto"):
             meta=np.array([[[]]]),
         )
         fragments = da.reduction(
-            fragments,
-            _identity,
-            _identity,
-            axis=0,
-            dtype=dtype_,
-            meta=np.array([[]])
+            fragments, _identity, _identity, axis=0, dtype=dtype_, meta=np.array([[]])
         )
         delayed = da.reduction(
-            fragments,
-            _identity,
-            _identity,
-            axis=0,
-            dtype=dtype_,
-            meta=wrap_inner(meta)
+            fragments, _identity, _identity, axis=0, dtype=dtype_, meta=wrap_inner(meta)
         )
         nvals = 0 if self._nvals == 0 else None
         return get_return_type(meta)(delayed, nvals=nvals)
@@ -502,7 +488,18 @@ def reduce_scalar(self, op=monoid.plus):
         meta = self._meta.reduce_scalar(op)
         return GbDelayed(self, "reduce_scalar", op, meta=meta)
 
-    def build(self, rows, columns, values, *, dup_op=None, clear=False, nrows=None, ncols=None, chunks=None):
+    def build(
+        self,
+        rows,
+        columns,
+        values,
+        *,
+        dup_op=None,
+        clear=False,
+        nrows=None,
+        ncols=None,
+        chunks=None,
+    ):
         if clear:
             self.clear()
         elif self.nvals.compute() > 0:
@@ -608,7 +605,7 @@ def to_values(self, dtype=None, chunks="auto"):
         dtype_ = np_dtype(self.dtype)
         # Compute row/col offsets as dask arrays that can align with this
         # Matrix's (self's) chunks to convert chunk row/col indices to
-        # full dask-grblas Matrix indices. 
+        # full dask-grblas Matrix indices.
         row_offsets = build_chunk_offsets_dask_array(x, 0, "row_offset-")
         col_offsets = build_chunk_offsets_dask_array(x, 1, "col_offset-")
         x = da.core.blockwise(
@@ -787,7 +784,7 @@ def _chunk_diag(
     j = cols.stop
     chunk_kdiag_row_stop = j - k
 
-    # intersect chunk row range with k-diagonal within chunk column bounds 
+    # intersect chunk row range with k-diagonal within chunk column bounds
     if rows.start < chunk_kdiag_row_stop and chunk_kdiag_row_start < rows.stop:
         chunk_kdiag_row_start = max(chunk_kdiag_row_start, rows.start)
         chunk_kdiag_row_stop = min(chunk_kdiag_row_stop, rows.stop)
@@ -798,7 +795,7 @@ def _chunk_diag(
         vector_kdiag_start = chunk_kdiag_row_start - kdiag_row_start
         vector_kdiag_stop = chunk_kdiag_row_stop - kdiag_row_start
 
-        # intersect output-range with row-range of k-diagonal within chunk 
+        # intersect output-range with row-range of k-diagonal within chunk
         if output_range.start < vector_kdiag_stop and vector_kdiag_start < output_range.stop:
             vector_kdiag_start = max(output_range.start, vector_kdiag_start)
             vector_kdiag_stop = min(output_range.stop, vector_kdiag_stop)
@@ -815,8 +812,10 @@ def _chunk_diag(
             chunk_kdiag_col_start -= cols.start
             chunk_kdiag_col_stop -= cols.start
             # extract square sub-matrix containing k-diagonal
-            matrix = matrix[chunk_kdiag_row_start : chunk_kdiag_row_stop,
-                            chunk_kdiag_col_start : chunk_kdiag_col_stop]
+            matrix = matrix[
+                chunk_kdiag_row_start:chunk_kdiag_row_stop,
+                chunk_kdiag_col_start:chunk_kdiag_col_stop,
+            ]
             # extract its diagonal
             vector = gb.ss.diag(matrix.new(), k=0, dtype=gb_dtype)
             return wrap_inner(vector)
@@ -907,7 +906,11 @@ def _delitem_in_chunk(inner_mat, row_range, col_range, row, col):
 
 
 def _build_2D_chunk(
-        inner_matrix, out_row_range, out_col_range, fragments, dup_op=None,
+    inner_matrix,
+    out_row_range,
+    out_col_range,
+    fragments,
+    dup_op=None,
 ):
     """
     Reassembles filtered tuples (row, col, val) in the list `fragments`
@@ -921,7 +924,12 @@ def _build_2D_chunk(
     nrows = out_row_range[0].stop - out_row_range[0].start
     ncols = out_col_range[0].stop - out_col_range[0].start
     inner_matrix.value.build(
-        rows, cols, vals, nrows=nrows, ncols=ncols, dup_op=dup_op,
+        rows,
+        cols,
+        vals,
+        nrows=nrows,
+        ncols=ncols,
+        dup_op=dup_op,
     )
     return InnerMatrix(inner_matrix.value)
 
@@ -1105,9 +1113,9 @@ def __init__(
             self.cols += col_offset[0]
             start = start - nval_start[0, 0]
             stop = stop - nval_start[0, 0]
-            self.rows = self.rows[start : stop]
-            self.cols = self.cols[start : stop]
-            self.vals = self.vals[start : stop]
+            self.rows = self.rows[start:stop]
+            self.cols = self.cols[start:stop]
+            self.vals = self.vals[start:stop]
         else:
             self.rows = np.array([], dtype=self.rows.dtype)
             self.cols = np.array([], dtype=self.cols.dtype)
diff --git a/dask_grblas/ss/_core.py b/dask_grblas/ss/_core.py
index 1356d50..0510062 100644
--- a/dask_grblas/ss/_core.py
+++ b/dask_grblas/ss/_core.py
@@ -56,4 +56,3 @@ def diag(x, k=0, dtype=None, chunks="auto", *, name=None):
         rv = Vector.new(dtype, size=size, name=name)
         rv.ss.diag(x, k, dtype=dtype, chunks=chunks)
     return rv
-
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 052fbc7..da6ef11 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -82,7 +82,7 @@ def __getitem__(self, index):
 
 
 class Vector(BaseType):
-    __slots__ = "ss",
+    __slots__ = ("ss",)
     ndim = 1
 
     @classmethod
@@ -117,7 +117,7 @@ def from_values(
         chunks="auto",
         name=None,
     ):
-        if hasattr(values, 'dtype'):
+        if hasattr(values, "dtype"):
             dtype = lookup_dtype(values.dtype if dtype is None else dtype)
         meta = gb.Vector.new(dtype)
         meta_dtype = np_dtype(meta.dtype)
@@ -152,9 +152,7 @@ def from_values(
                 # But since we do not want to compute anything just now,
                 # we instead create a "DOnion" (dask onion) object
                 packed_args = package_args(indices, values)
-                packed_kwargs = package_kwargs(
-                    dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
-                )
+                packed_kwargs = package_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
                 donion = DOnion.sprout(size, Vector.from_values, meta, packed_args, packed_kwargs)
                 return Vector(donion, meta=meta)
 
@@ -208,7 +206,11 @@ def new(cls, dtype, size=0, *, chunks="auto", name=None):
             np_vdtype_ = np_dtype(vdtype)
             chunksz = build_ranges_dask_array_from_chunks(chunks[0], "ranges-" + tokenize(chunks))
             delayed_ = da.map_blocks(
-                _new_Vector_chunk, chunksz, gb_dtype=vdtype, dtype=np_vdtype_, meta=InnerVector(meta)
+                _new_Vector_chunk,
+                chunksz,
+                gb_dtype=vdtype,
+                dtype=np_vdtype_,
+                meta=InnerVector(meta),
             )
             return Vector(delayed_, nvals=0)
 
@@ -353,7 +355,6 @@ def _diag(self, k=0, dtype=None, chunks="auto"):
         nvals = 0 if self._nvals == 0 else None
         return get_return_type(meta)(delayed, nvals)
 
-
     def rechunk(self, inplace=False, chunks="auto"):
         chunks = da.core.normalize_chunks(chunks, self.shape, dtype=np.int64)
         if inplace:
@@ -670,7 +671,7 @@ def _chunk_diag(
     x is determined by various conditions.
 
     The returned matrix is either empty or contains a piece of
-    the k-diagonal given by inner_vector 
+    the k-diagonal given by inner_vector
     """
     vector = inner_vector.value
     vec_chunk = input_range[0]
@@ -685,8 +686,8 @@ def _chunk_diag(
     kdiag_row_stop_ = kdiag_col_stop_ - k
 
     # CHANGE REFERENCE POINT: to global matrix row 0 col 0
-    kdiag_chunk_col_start = vec_chunk.start + kdiag_col_start 
-    kdiag_chunk_col_stop_ = vec_chunk.stop + kdiag_col_start 
+    kdiag_chunk_col_start = vec_chunk.start + kdiag_col_start
+    kdiag_chunk_col_stop_ = vec_chunk.stop + kdiag_col_start
 
     # intersect matrix chunk column range with k-diagonal chunk column-range
     if cols.start < kdiag_chunk_col_stop_ and kdiag_chunk_col_start < cols.stop:
@@ -723,11 +724,11 @@ def _chunk_diag(
         kdiag_nt_col_start = kdiag_nt_row_start + k
         kdiag_nt_col_stop_ = kdiag_nt_row_stop_ + k
 
-        # extract intersecting vector and convert to diagonal matrix: 
+        # extract intersecting vector and convert to diagonal matrix:
         # CHANGE REFERENCE POINT: to vector chunk index 0
         vec_nt_start = kdiag_nt_col_start - kdiag_col_start - vec_chunk.start
         vec_nt_stop_ = kdiag_nt_col_stop_ - kdiag_col_start - vec_chunk.start
-        vector_nt = vector[vec_nt_start : vec_nt_stop_].new()
+        vector_nt = vector[vec_nt_start:vec_nt_stop_].new()
         diag_matrix = gb.ss.diag(vector_nt, k=0, dtype=gb_dtype)
 
         # insert diag_matrix into matrix chunk:
@@ -737,7 +738,7 @@ def _chunk_diag(
         # destination column index range
         j0 = kdiag_nt_col_start - out_col_start
         j1 = kdiag_nt_col_stop_ - out_col_start
-        matrix[i0 : i1, j0 : j1] << diag_matrix
+        matrix[i0:i1, j0:j1] << diag_matrix
         return wrap_inner(matrix)
 
     width = 0
diff --git a/tests/from_grblas2/test_matrix.py b/tests/from_grblas2/test_matrix.py
index 3c46ef4..9d40744 100644
--- a/tests/from_grblas2/test_matrix.py
+++ b/tests/from_grblas2/test_matrix.py
@@ -831,7 +831,7 @@ def test_subassign_row_col(A_chunks):
         A.rechunk(chunks=chunks, inplace=True)
         m = Vector.from_values([1], [True])
         v = Vector.from_values([0, 1], [10, 20])
-    
+
         A[[0, 1], 0](m.S) << v
         result1 = Matrix.from_values(
             [0, 0, 0, 1, 1, 1, 2, 2, 2],
@@ -839,7 +839,7 @@ def test_subassign_row_col(A_chunks):
             [0, 1, 2, 20, 4, 5, 6, 7, 8],
         )
         assert A.isequal(result1)
-    
+
         A[1, [1, 2]](m.V, accum=binary.plus).update(v)
         result2 = Matrix.from_values(
             [0, 0, 0, 1, 1, 1, 2, 2, 2],
@@ -847,7 +847,7 @@ def test_subassign_row_col(A_chunks):
             [0, 1, 2, 20, 4, 25, 6, 7, 8],
         )
         assert A.isequal(result2)
-    
+
         A[[0, 1], 0](m.S, binary.plus, replace=True) << v
         result3 = Matrix.from_values(
             [0, 0, 1, 1, 1, 2, 2, 2],
@@ -855,10 +855,10 @@ def test_subassign_row_col(A_chunks):
             [1, 2, 40, 4, 25, 6, 7, 8],
         )
         assert A.isequal(result3)
-    
+
         with pytest.raises(DimensionMismatch):
             A(m.S)[[0, 1], 0] << v
-    
+
         A[[0, 1], 0](m.S) << 99
         result4 = Matrix.from_values(
             [0, 0, 1, 1, 1, 2, 2, 2],
@@ -866,7 +866,7 @@ def test_subassign_row_col(A_chunks):
             [1, 2, 99, 4, 25, 6, 7, 8],
         )
         assert A.isequal(result4)
-    
+
         A[[1, 2], 0](m.S, binary.plus, replace=True) << 100
         result5 = Matrix.from_values(
             [0, 0, 1, 1, 2, 2, 2],
@@ -874,7 +874,7 @@ def test_subassign_row_col(A_chunks):
             [1, 2, 4, 25, 106, 7, 8],
         )
         assert A.isequal(result5)
-    
+
         A[2, [0, 1]](m.S) << -1
         result6 = Matrix.from_values(
             [0, 0, 1, 1, 2, 2, 2],
@@ -1480,6 +1480,7 @@ def test_reduce_agg_argminmax(A, A_chunks):
         with pytest.raises(ValueError, match="Aggregator"):
             A.reduce_scalar(silly).new()
 
+
 @pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_reduce_agg_firstlast(A, A_chunks):
     A_ = A
@@ -2697,7 +2698,9 @@ def test_diag(A, A_chunks, params):
             A = A_.dup()
             A.rechunk(chunks=in_chunks, inplace=True)
             k, indices, values = params
-            expected = Vector.from_values(indices, values, dtype=A.dtype, size=max(0, A.nrows - abs(k)))
+            expected = Vector.from_values(
+                indices, values, dtype=A.dtype, size=max(0, A.nrows - abs(k))
+            )
             v = dask_grblas.ss.diag(A, k=k, chunks=out_chunks)
             assert expected.isequal(v)
             v[:] = 0
diff --git a/tests/from_grblas2/test_vector.py b/tests/from_grblas2/test_vector.py
index fe31783..c07afc3 100644
--- a/tests/from_grblas2/test_vector.py
+++ b/tests/from_grblas2/test_vector.py
@@ -1239,7 +1239,9 @@ def test_diag(v, A_chunks):
                 size = v.size + abs(k)
                 rows = indices + max(0, -k)
                 cols = indices + max(0, k)
-                expected = Matrix.from_values(rows, cols, values, nrows=size, ncols=size, dtype=v.dtype)
+                expected = Matrix.from_values(
+                    rows, cols, values, nrows=size, ncols=size, dtype=v.dtype
+                )
                 assert expected.isequal(A)
                 w = dask_grblas.ss.diag(A, Scalar.from_value(k), chunks=in_chunks)
                 assert v.isequal(w)

From 865aa7d7fd008ad1571ba4a68da573c30de606b0 Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Thu, 17 Feb 2022 06:24:50 +0100
Subject: [PATCH 04/18] ran flake8 and black again

---
 dask_grblas/base.py   |   8 +-
 dask_grblas/io.py     | 249 +++++++++++++++++++++++++++++++++++++-----
 dask_grblas/matrix.py |   8 +-
 dask_grblas/vector.py |   3 -
 4 files changed, 230 insertions(+), 38 deletions(-)

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 759c622..94bf605 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -418,22 +418,22 @@ def extract_shared(
         return DOnion(kernel, meta=meta)
 
     def __getattr__(self, item):
-        func = lambda x: getattr(x, item)
+        func = partial(getattr, name=item)
         # TODO: lookup dtype and meta of attribute!!!
         dtype = np_dtype(lookup_dtype(self.dtype))
         meta = self._meta
         return self.extract(func, package_args(), package_kwargs(), dtype, meta)
 
     def getattr(self, name, packed_args, packed_kwargs, *args, **kwargs):
-        func = partial(Donion.apply, name, *packed_args, **packed_kwargs)
+        func = partial(DOnion.extractattr, name, packed_args, packed_kwargs)
         # TODO: lookup dtype and meta of attribute!!!
         dtype = np_dtype(lookup_dtype(self.dtype))
         meta = self._meta
         return self.extract(func, package_args(), package_kwargs(), dtype, meta, *args, **kwargs)
 
     @classmethod
-    def apply(cls, name, *args, **kwargs):
-        return getattr(x, name)(*args, **kwargs)
+    def extractattr(cls, name, packed_args, packed_kwargs, x):
+        return getattr(x, name)(*packed_args, **packed_kwargs)
 
 
 # Dask task functions
diff --git a/dask_grblas/io.py b/dask_grblas/io.py
index d3c462e..6f183cb 100644
--- a/dask_grblas/io.py
+++ b/dask_grblas/io.py
@@ -2,7 +2,6 @@
 
 from math import floor, sqrt
 from numpy import asarray, conj, zeros, concatenate, ones, empty
-from scipy.io import mmio  # noqa
 
 
 def symm_I_J(pos, n):
@@ -97,40 +96,238 @@ def home(stream, search_window_size=8):
 
 
 # -----------------------------------------------------------------------------
+def asstr(s):
+    if isinstance(s, bytes):
+        return s.decode("latin1")
+    return str(s)
 
 
-def mmread(source, *, dup_op=None, name=None, row_begin=0, row_end=None, col_begin=0, col_end=None):
-    """
-    Read the contents of a Matrix Market filename or file into a new Matrix.
+# -----------------------------------------------------------------------------
 
-    This uses `scipy.io.mmread`:
-    https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.mmread.html
 
-    For more information on the Matrix Market format, see:
-    https://math.nist.gov/MatrixMarket/formats.html
-    """
-    from . import Matrix
-
-    try:
-        from scipy.sparse import coo_matrix  # noqa
-    except ImportError:  # pragma: no cover
-        raise ImportError("scipy is required to read Matrix Market files")
-    array = MMFile().read(
-        source, row_begin=row_begin, row_end=row_end, col_begin=col_begin, col_end=col_end
+class MMFile:
+    __slots__ = ("_rows", "_cols", "_entries", "_format", "_field", "_symmetry")
+
+    @property
+    def rows(self):
+        return self._rows
+
+    @property
+    def cols(self):
+        return self._cols
+
+    @property
+    def entries(self):
+        return self._entries
+
+    @property
+    def format(self):
+        return self._format
+
+    @property
+    def field(self):
+        return self._field
+
+    @property
+    def symmetry(self):
+        return self._symmetry
+
+    @property
+    def has_symmetry(self):
+        return self._symmetry in (
+            self.SYMMETRY_SYMMETRIC,
+            self.SYMMETRY_SKEW_SYMMETRIC,
+            self.SYMMETRY_HERMITIAN,
+        )
+
+    # format values
+    FORMAT_COORDINATE = "coordinate"
+    FORMAT_ARRAY = "array"
+
+    # field values
+    FIELD_INTEGER = "integer"
+    FIELD_UNSIGNED = "unsigned-integer"
+    FIELD_REAL = "real"
+    FIELD_COMPLEX = "complex"
+    FIELD_PATTERN = "pattern"
+    FIELD_VALUES = (FIELD_INTEGER, FIELD_UNSIGNED, FIELD_REAL, FIELD_COMPLEX, FIELD_PATTERN)
+
+    # symmetry values
+    SYMMETRY_GENERAL = "general"
+    SYMMETRY_SYMMETRIC = "symmetric"
+    SYMMETRY_SKEW_SYMMETRIC = "skew-symmetric"
+    SYMMETRY_HERMITIAN = "hermitian"
+    SYMMETRY_VALUES = (
+        SYMMETRY_GENERAL,
+        SYMMETRY_SYMMETRIC,
+        SYMMETRY_SKEW_SYMMETRIC,
+        SYMMETRY_HERMITIAN,
     )
-    if isinstance(array, coo_matrix):
-        nrows, ncols = array.shape
-        return Matrix.from_values(
-            array.row, array.col, array.data, nrows=nrows, ncols=ncols, dup_op=dup_op, name=name
+
+    @classmethod
+    def info(self, source):
+        """
+        Return size, storage parameters from Matrix Market file-like 'source'.
+
+        Parameters
+        ----------
+        source : str or file-like
+            Matrix Market filename (extension .mtx) or open file-like object
+
+        Returns
+        -------
+        rows : int
+            Number of matrix rows.
+        cols : int
+            Number of matrix columns.
+        entries : int
+            Number of non-zero entries of a sparse matrix
+            or rows*cols for a dense matrix.
+        format : str
+            Either 'coordinate' or 'array'.
+        field : str
+            Either 'real', 'complex', 'pattern', or 'integer'.
+        symmetry : str
+            Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
+        """
+
+        stream, close_it = self._open(source)
+
+        try:
+
+            # read and validate header line
+            line = stream.readline()
+            mmid, matrix, format, field, symmetry = [asstr(part.strip()) for part in line.split()]
+            if not mmid.startswith("%%MatrixMarket"):
+                raise ValueError("source is not in Matrix Market format")
+            if not matrix.lower() == "matrix":
+                raise ValueError("Problem reading file header: " + line)
+
+            # http://math.nist.gov/MatrixMarket/formats.html
+            if format.lower() == "array":
+                format = self.FORMAT_ARRAY
+            elif format.lower() == "coordinate":
+                format = self.FORMAT_COORDINATE
+
+            # skip comments
+            # line.startswith('%')
+            while line and line[0] in ["%", 37]:
+                line = stream.readline()
+
+            # skip empty lines
+            while not line.strip():
+                line = stream.readline()
+
+            split_line = line.split()
+            if format == self.FORMAT_ARRAY:
+                if not len(split_line) == 2:
+                    raise ValueError("Header line not of length 2: " + line.decode("ascii"))
+                rows, cols = map(int, split_line)
+                entries = rows * cols
+            else:
+                if not len(split_line) == 3:
+                    raise ValueError("Header line not of length 3: " + line.decode("ascii"))
+                rows, cols, entries = map(int, split_line)
+
+            return (rows, cols, entries, format, field.lower(), symmetry.lower())
+
+        finally:
+            if close_it:
+                stream.close()
+
+    @staticmethod
+    def _open(filespec, mode="rb"):
+        """Return an open file stream for reading based on source.
+
+        If source is a file name, open it (after trying to find it with mtx and
+        gzipped mtx extensions). Otherwise, just return source.
+
+        Parameters
+        ----------
+        filespec : str or file-like
+            String giving file name or file-like object
+        mode : str, optional
+            Mode with which to open file, if `filespec` is a file name.
+
+        Returns
+        -------
+        fobj : file-like
+            Open file-like object.
+        close_it : bool
+            True if the calling function should close this file when done,
+            false otherwise.
+        """
+        # If 'filespec' is path-like (str, pathlib.Path, os.DirEntry, other class
+        # implementing a '__fspath__' method), try to convert it to str. If this
+        # fails by throwing a 'TypeError', assume it's an open file handle and
+        # return it as-is.
+        try:
+            filespec = os.fspath(filespec)
+        except TypeError:
+            return filespec, False
+
+        # 'filespec' is definitely a str now
+
+        # open for reading
+        if mode[0] == "r":
+
+            # determine filename plus extension
+            if not os.path.isfile(filespec):
+                if os.path.isfile(filespec + ".mtx"):
+                    filespec = filespec + ".mtx"
+                elif os.path.isfile(filespec + ".mtx.gz"):
+                    filespec = filespec + ".mtx.gz"
+                elif os.path.isfile(filespec + ".mtx.bz2"):
+                    filespec = filespec + ".mtx.bz2"
+            # open filename
+            if filespec.endswith(".gz"):
+                import gzip
+
+                stream = gzip.open(filespec, mode)
+            elif filespec.endswith(".bz2"):
+                import bz2
+
+                stream = bz2.BZ2File(filespec, "rb")
+            else:
+                stream = open(filespec, mode)
+
+        # open for writing
+        else:
+            if filespec[-4:] != ".mtx":
+                filespec = filespec + ".mtx"
+            stream = open(filespec, mode)
+
+        return stream, True
+
+    # -------------------------------------------------------------------------
+    def _parse_header(self, stream):
+        rows, cols, entries, format, field, symmetry = self.__class__.info(stream)
+        self._init_attrs(
+            rows=rows, cols=cols, entries=entries, format=format, field=field, symmetry=symmetry
         )
-    # SS, SuiteSparse-specific: import_full
-    return Matrix.ss.import_fullr(values=array, take_ownership=True, name=name)
 
+    # -------------------------------------------------------------------------
+    def _init_attrs(self, **kwargs):
+        """
+        Initialize each attributes with the corresponding keyword arg value
+        or a default of None
+        """
 
-# -----------------------------------------------------------------------------
+        attrs = self.__class__.__slots__
+        public_attrs = [attr[1:] for attr in attrs]
+        invalid_keys = set(kwargs.keys()) - set(public_attrs)
+
+        if invalid_keys:
+            raise ValueError(
+                """found %s invalid keyword arguments, please only
+                                use %s"""
+                % (tuple(invalid_keys), public_attrs)
+            )
 
+        for attr in attrs:
+            setattr(self, attr, kwargs.get(attr[1:], None))
 
-class MMFile(mmio.MMFile):
+    # -------------------------------------------------------------------------
     def get_data_begin(self, source):
         """
         Reads the contents of a Matrix Market file-like 'source' into a matrix.
@@ -157,11 +354,13 @@ def get_data_begin(self, source):
                 stream.close()
 
     # -------------------------------------------------------------------------
+
     def _get_data_begin(self, stream):
         _ = self.__class__.info(stream)
         return stream.tell()
 
     # -----------------------------------------------------------------------------
+
     def read_part(self, source, line_start=None, line_stop=None, read_begin=None, read_end=None):
         """
         Reads the contents of a Matrix Market file-like 'source' into a matrix.
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index 2261597..7efadfc 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -339,13 +339,8 @@ def _diag(self, k=0, dtype=None, chunks="auto"):
         # locate first chunk containing diaagonal:
         row_filter = (row_starts <= kdiag_row_start) & (kdiag_row_start < row_stops_)
         col_filter = (col_starts <= kdiag_col_start) & (kdiag_col_start < col_stops_)
-<<<<<<< HEAD
-        (I,) = row_blockid[row_filter]
-        (J,) = col_blockid[col_filter]
-=======
         (R,) = row_blockid[row_filter]
         (C,) = col_blockid[col_filter]
->>>>>>> refs/heads/da_index
 
         # follow k-diagonal through chunks while constructing dask graph:
         # equation of diagonal: i = j - k
@@ -1010,7 +1005,8 @@ def _identity(chunk, keepdims=None, axis=None):
 def _concatenate_files(chunk_files, keepdims=None, axis=None):
     import os
     import shutil
-    from scipy.io.mmio import MMFile, mminfo
+    from .io import MMFile
+    from scipy.io import mminfo
 
     chunk_files = chunk_files if type(chunk_files) is list else [chunk_files]
     first_chunk_file, _, row_range_first, col_range_first = chunk_files[0]
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 6a742cc..20d9dd3 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -1,5 +1,3 @@
-from numbers import Number
-from functools import partial
 import dask.array as da
 import numpy as np
 import grblas as gb
@@ -23,7 +21,6 @@
     build_chunk_offsets_dask_array,
 )
 from grblas.exceptions import IndexOutOfBound
-from plotly.validators.streamtube import starts
 
 
 class InnerVector(InnerBaseType):

From 2d8faead9b0c3f2f5048747aa936d715e9dad91d Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Sat, 19 Feb 2022 10:17:23 +0100
Subject: [PATCH 05/18] added flexible_partial()

---
 dask_grblas/base.py               | 211 ++++++++++++++++++++++++++++--
 dask_grblas/utils.py              |   6 +-
 dask_grblas/vector.py             |  51 ++++----
 tests/from_grblas2/test_vector.py |  54 ++++++++
 4 files changed, 285 insertions(+), 37 deletions(-)

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 94bf605..80362fa 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -1,18 +1,24 @@
 from numbers import Number
 from functools import partial
+from reprlib import recursive_repr
 import dask.array as da
 import grblas as gb
 import numpy as np
+from scipy.sparse import csr_matrix
 from grblas.operator import UNKNOWN_OPCLASS, find_opclass, get_typed_op
 from grblas.dtypes import lookup_dtype
 
 from . import replace as replace_singleton
 from .mask import Mask
-from .utils import package_args, package_kwargs, get_grblas_type, get_meta, np_dtype, wrap_inner
+from .utils import pack_args, pack_kwargs, get_grblas_type, get_meta, np_dtype, wrap_inner
 
 _expect_type = gb.base._expect_type
 
 
+def is_type(arg_type, a):
+    return type(a) is arg_type
+
+
 def _check_mask(mask, output=None):
     if not isinstance(mask, Mask):
         if isinstance(mask, BaseType):
@@ -409,12 +415,22 @@ def extract(self, func, packed_args, packed_kwargs, dtype, meta, *args, **kwargs
         return DOnion(kernel, meta=meta)
 
     @classmethod
-    def extract_shared(
-        cls, donions, func, packed_args, packed_kwargs, dtype, meta, *args, **kwargs
+    def joint_access(
+        cls, func, packed_args, packed_kwargs, dtype, meta
     ):
-        donions = tuple(donion.kernel for donion in donions)
-        func = partial(func, *packed_args, **packed_kwargs)
-        kernel = da.map_blocks(func, *donions, *args, **kwargs, dtype=dtype, meta=meta)
+        """
+        Pass inner values of any DOnions in `packed_args` and/or `packed_kwargs` into `func`.
+        
+        :func: Callable that can accept the contents of `packed_args` and/or `packed_kwargs`
+            as parameters 
+        :packed_args: a list of positional arguments to `func`
+        :packed_kwargs: a dict of named arguments to `func`
+        """
+        omit_DOnion = is_DOnion
+        func = flexible_partial(func, omit_DOnion, *packed_args, **packed_kwargs)
+        donion_args = tuple(arg.kernel for arg in packed_args if is_DOnion(arg))
+        donion_kwargs = {k: v.kernel for (k, v) in packed_kwargs.items() if is_DOnion(v)}
+        kernel = da.map_blocks(func, *donion_args, **donion_kwargs, dtype=dtype, meta=meta)
         return DOnion(kernel, meta=meta)
 
     def __getattr__(self, item):
@@ -422,20 +438,199 @@ def __getattr__(self, item):
         # TODO: lookup dtype and meta of attribute!!!
         dtype = np_dtype(lookup_dtype(self.dtype))
         meta = self._meta
-        return self.extract(func, package_args(), package_kwargs(), dtype, meta)
+        return self.extract(func, pack_args(), pack_kwargs(), dtype, meta)
 
     def getattr(self, name, packed_args, packed_kwargs, *args, **kwargs):
         func = partial(DOnion.extractattr, name, packed_args, packed_kwargs)
         # TODO: lookup dtype and meta of attribute!!!
         dtype = np_dtype(lookup_dtype(self.dtype))
         meta = self._meta
-        return self.extract(func, package_args(), package_kwargs(), dtype, meta, *args, **kwargs)
+        return self.extract(func, pack_args(), pack_kwargs(), dtype, meta, *args, **kwargs)
 
     @classmethod
     def extractattr(cls, name, packed_args, packed_kwargs, x):
         return getattr(x, name)(*packed_args, **packed_kwargs)
 
 
+is_DOnion = partial(is_type, DOnion)
+
+
+class skip:
+    def __repr__(self):
+        return "skip"
+    __str__ = __repr__
+    __reduce__ = __repr__  # This makes it pickle well!
+
+skip = skip()
+
+
+def normalize_occupancies(specs):
+    # Converts any valid `specs` to the form: [True, False, True, ...]
+    if isinstance(specs, Iterable):
+        try:
+            a = np.asarray(specs)
+        except Exception as e:
+            raise e
+
+        if a.ndim == 1:
+            if a.dtype is np.bool_:
+                return a, None
+            
+            if a.dtype.kind in np.typecodes["AllInteger"]:
+                pos = csr_matrix(np.ones_like(a, dtype=np.bool_), a, np.array([0]))
+                return pos.toarray(), None
+
+            occupancy = np.array([False if x is skip else True for x in a])
+            args = [x for x in a if x is not skip]
+            return occupancy, args
+
+        if a.ndim == 2 and a.shape[1] == 2:
+            pos = a[:, 0]
+            pos = csr_matrix(np.ones_like(pos, dtype=np.bool_), pos, np.array([0]))
+            return pos.toarray(), a[:, 1]
+
+    raise ValueError(
+        'specs should be an iterable of any of the following forms:\n'
+        '[True, False, True, ...]\n'
+        '[0, 2, 3, ...]\n'
+        '(skip, b, skip, skip, d, ...)'
+        '((1, b), (3, d), ...)'
+    )
+
+
+################################################################################
+### flexible_partial() argument application
+################################################################################
+
+# Purely functional, no descriptor behaviour
+class flexible_partial:
+    """New function with flexible partial application of the given
+    arguments and keywords.
+    """
+
+    __slots__ = "func", "args", "vacancies", "kwargs", "__dict__", "__weakref__"
+
+    def __new__(cls, func, specs, /, *args, **kwargs):
+        # Validate input parameters:
+        if not callable(func):
+            raise TypeError("the first argument must be callable")
+
+        occupancies, args_ = normalize_occupancies(specs)
+        args = args if args_ is None else args_
+        new_arg = iter(args)
+
+        nfilled = np.count_nonzero(occupancies)
+        nargs = len(args)
+        if nargs != nfilled:
+            raise ValueError(
+                f"Number ({nargs}) of given arguments does not match "
+                f"number ({nfilled}) of argument slots to be occupied."
+            )
+
+        self = super(flexible_partial, cls).__new__(cls)
+        
+        if hasattr(func, "func") and hasattr(func, "vacancies") and hasattr(func, "kwargs"):
+            func = func.func
+            self.args = list(func.args)
+            kwargs = {**func.kwargs, **kwargs}
+
+            old_vacancy = iter(func.vacancies)
+            slot_status = iter(occupancies)
+
+            # step through old vacancies:
+            for occupy in occupancies:
+                try:
+                    pos = next(old_vacancy)
+                except StopIteration:
+                    # old vacancies now exhausted => reset occupancies and continue elsewhere:
+                    occupancies = list(slot_status)
+                    break
+                else:
+                    next(slot_status)
+
+                if occupy:
+                    # fill the vacancy:
+                    self.args[pos] = next(new_arg)
+                else:
+                    # record the vacancy:
+                    self.vacancies.append(pos)
+        else:
+            self.args = []
+            self.vacancies = []
+
+        start = len(self.args)
+        for pos, occupied in enumerate(occupancies, start=start):
+            if occupied:
+                self.args.append(next(new_arg))
+            else:
+                # create a vacancy:
+                self.args.append(None)
+                self.vacancies.append(pos)
+
+        self.func = func
+        self.args = tuple(self.args)
+        self.vacancies = tuple(self.vacancies)
+        self.kwargs = kwargs
+        return self
+
+    def __call__(self, /, *args, **kwargs):
+        if len(args) < len(self.vacancies):
+            raise ValueError(f"Expected at least {len(self.vacancies)} positional arguments. "
+                             f"Got {len(args)}.")
+
+        new_arg = iter(args)
+        self_args = list(self.args)
+        for pos in self.vacancies:
+            # fill all the vacancies
+            self_args[pos] = next(new_arg)
+
+        # append the remaining arguments and make the call:
+        self_args.extend(new_arg)
+        kwargs = {**self.kwargs, **kwargs}
+        return self.func(*self_args, **kwargs)
+
+    @recursive_repr()
+    def __repr__(self):
+        qualname = type(self).__qualname__
+        args = [repr(self.func)]
+        c = count()
+        args.extend("_" if next(c) in self.vacancies else repr(x) for x in self.args)
+        args.extend(f"{k}={v!r}" for (k, v) in self.kwargs.items())
+        if type(self).__module__ == "functools":
+            return f"functools.{qualname}({', '.join(args)})"
+        return f"{qualname}({', '.join(args)})"
+
+    def __reduce__(self):
+        return type(self), (self.func,), (self.func, self.args, self.vacancies,
+               self.kwargs or None, self.__dict__ or None)
+
+    def __setstate__(self, state):
+        if not isinstance(state, tuple):
+            raise TypeError("argument to __setstate__ must be a tuple")
+        if len(state) != 5:
+            raise TypeError(f"expected 5 items in state, got {len(state)}")
+        func, args, vacs, kwds, namespace = state
+        if (not callable(func) or not isinstance(args, tuple) or not isinstance(vacs, tuple) or
+           (kwds is not None and not isinstance(kwds, dict)) or
+           (namespace is not None and not isinstance(namespace, dict))):
+            raise TypeError("invalid flexible_partial state")
+
+        args = tuple(args) # just in case it's a subclass
+        vacs = tuple(vacs)
+        if kwds is None:
+            kwds = {}
+        elif type(kwds) is not dict: # XXX does it need to be *exactly* dict?
+            kwds = dict(kwds)
+        if namespace is None:
+            namespace = {}
+
+        self.__dict__ = namespace
+        self.func = func
+        self.args = args
+        self.vacancies = vacs
+        self.keywords = kwds
+
+
 # Dask task functions
 def _clear(x):
     x.value.clear()
diff --git a/dask_grblas/utils.py b/dask_grblas/utils.py
index c51269d..964607b 100644
--- a/dask_grblas/utils.py
+++ b/dask_grblas/utils.py
@@ -1,3 +1,5 @@
+import inspect
+from itertools import count
 import numpy as np
 import pandas as pd
 import dask.array as da
@@ -7,11 +9,11 @@
 from .io import MMFile
 
 
-def package_args(*args):
+def pack_args(*args):
     return args
 
 
-def package_kwargs(**kwargs):
+def pack_kwargs(**kwargs):
     return kwargs
 
 
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 20d9dd3..367d7f4 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -6,13 +6,13 @@
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
 
-from .base import BaseType, InnerBaseType, _nvals, DOnion
+from .base import BaseType, InnerBaseType, _nvals, DOnion, flexible_partial, is_DOnion
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, Assigner
 from .mask import StructuralMask, ValueMask
 from ._ss.vector import ss
 from .utils import (
-    package_args,
-    package_kwargs,
+    pack_args,
+    pack_kwargs,
     np_dtype,
     get_return_type,
     wrap_inner,
@@ -117,24 +117,21 @@ def from_values(
             dtype = lookup_dtype(values.dtype if dtype is None else dtype)
         meta = gb.Vector.new(dtype)
         meta_dtype = np_dtype(meta.dtype)
-        packed_kwargs = package_kwargs(
-            size=size, dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
-        )
-        if type(indices) is DOnion and type(values) is DOnion:
-            packed_args = package_args()
-            return DOnion.extract_shared(
-                (indices, values), Vector.from_values, packed_args, packed_kwargs, meta_dtype, meta
-            )
-        if type(indices) is DOnion:
-            packed_args = package_args(values)
-            return DOnion.extract_shared(
-                (indices,), Vector.from_values, packed_args, packed_kwargs, meta_dtype, meta
+
+        # check for any DOnions:
+        donions = [True for arg in packed_args if is_DOnion(arg)]
+        donions += [True for (k, v) in packed_kwargs.items() if is_DOnion(v)]
+        if np.any(donions):
+            # dive into DOnion(s)
+            packed_args = pack_args(indices, values)
+            packed_kwargs = pack_kwargs(
+                size=size, dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
             )
-        if type(values) is DOnion:
-            packed_args = package_args(indices)
-            return DOnion.extract_shared(
-                (values,), Vector.from_values, packed_args, packed_kwargs, meta_dtype, meta
+            return DOnion.joint_access(
+                Vector.from_values, packed_args, packed_kwargs, meta_dtype, meta
             )
+
+        # no DOnions
         if type(indices) is da.Array and type(values) is da.Array:
             np_idtype_ = np_dtype(lookup_dtype(indices.dtype))
             if size is not None:
@@ -147,8 +144,8 @@ def from_values(
                 # used to determine the size of the Vector to be returned.
                 # But since we do not want to compute anything just now,
                 # we instead create a "DOnion" (dask onion) object
-                packed_args = package_args(indices, values)
-                packed_kwargs = package_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
+                packed_args = pack_args(indices, values)
+                packed_kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
                 donion = DOnion.sprout(size, Vector.from_values, meta, packed_args, packed_kwargs)
                 return Vector(donion, meta=meta)
 
@@ -587,21 +584,21 @@ def apply(func, dtype, meta, x):
             return da.map_blocks(func, x, dtype=dtype, meta=meta)
 
         dtype = self.dtype if dtype is None else dtype
-        packed_args = package_args(x, starts, stops, dtype, chunks)
-        packed_kwargs = package_kwargs()
+        packed_args = pack_args(x, starts, stops, dtype, chunks)
+        packed_kwargs = pack_kwargs()
         meta = np.array([])
         iv_donion = DOnion.sprout(nnz, _to_values, meta, packed_args, packed_kwargs)
 
         meta_i, meta_v = self._meta.to_values(dtype)
 
         dtype_i = np_dtype(lookup_dtype(meta_i.dtype))
-        packed_args = package_args(_get_indices, dtype_i, meta_i)
-        packed_kwargs = package_kwargs()
+        packed_args = pack_args(_get_indices, dtype_i, meta_i)
+        packed_kwargs = pack_kwargs()
         indices = iv_donion.extract(apply, packed_args, packed_kwargs, dtype_i, meta_i)
 
         dtype_v = np_dtype(lookup_dtype(meta_v.dtype))
-        packed_args = package_args(_get_values, dtype_v, meta_v)
-        packed_kwargs = package_kwargs()
+        packed_args = pack_args(_get_values, dtype_v, meta_v)
+        packed_kwargs = pack_kwargs()
         values = iv_donion.extract(apply, packed_args, packed_kwargs, dtype_v, meta_v)
 
         return indices, values
diff --git a/tests/from_grblas2/test_vector.py b/tests/from_grblas2/test_vector.py
index c07afc3..abb9863 100644
--- a/tests/from_grblas2/test_vector.py
+++ b/tests/from_grblas2/test_vector.py
@@ -177,6 +177,60 @@ def test_from_values_dask():
         Vector.from_values(indices, values).compute()
 
 
+def test_from_values_DOnion(v):
+    indices = da.from_array(np.array([0, 1, 3]))
+    values = da.from_array(np.array([True, False, True]))
+    u = Vector.from_values(indices, values)
+    indices, values = u.to_values()
+    v = Vector.from_values(indices, values)
+    assert v.size == 4
+    assert v.nvals == 3
+    assert v.dtype == bool
+    values = da.from_array(np.array([12.3, 12.4, 12.5]))
+    u2 = Vector.from_values(indices, values, size=17)
+    assert u2.size == 17
+    assert u2.nvals == 3
+    assert u2.dtype == float
+    indices = da.from_array(np.array([0, 1, 1]))
+    values = da.from_array(np.array([1, 2, 3], dtype=np.int64))
+    u3 = Vector.from_values(indices, values, size=10, dup_op=binary.times)
+    assert u3.size == 10
+    assert u3.nvals == 2  # duplicates were combined
+    assert u3.dtype == int
+    assert u3[1].value == 6  # 2*3
+    values = da.from_array(np.array([True, True, True]))
+    with pytest.raises(ValueError, match="Duplicate indices found"):
+        # Duplicate indices requires a dup_op
+        Vector.from_values(indices, values).compute()
+    empty_da = da.from_array(np.array([]))
+    with pytest.raises(ValueError, match="No indices provided. Unable to infer size."):
+        Vector.from_values(empty_da, empty_da).compute()
+
+    # Changed: Assume empty value is float64 (like numpy)
+    # with pytest.raises(ValueError, match="No values provided. Unable to determine type"):
+    w = Vector.from_values(empty_da, empty_da, size=10)
+    assert w.size == 10
+    assert w.nvals == 0
+    assert w.dtype == dtypes.FP64
+
+    with pytest.raises(ValueError, match="No indices provided. Unable to infer size"):
+        Vector.from_values(empty_da, empty_da, dtype=dtypes.INT64)
+    u4 = Vector.from_values(empty_da, empty_da, size=10, dtype=dtypes.INT64)
+    u5 = Vector.new(dtypes.INT64, size=10)
+    assert u4.isequal(u5, check_dtype=True)
+
+    # we check index dtype if given dask array
+    indices = da.from_array(np.array([1.2, 3.4]))
+    values = da.from_array(np.array([1, 2]))
+    with pytest.raises(ValueError, match="indices must be integers, not float64"):
+        Vector.from_values(indices, values).compute()
+
+    # mis-matched sizes
+    indices = da.from_array(np.array([0]))
+    with pytest.raises(ValueError, match="`indices` and `values` lengths must match"):
+        Vector.from_values(indices, values).compute()
+
+
 def test_from_values_scalar():
     u = Vector.from_values([0, 1, 3], 7)
     assert u.size == 4

From d1ef311e4c512b95903fa375deb8f915e718ae0a Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Sat, 19 Feb 2022 16:25:00 +0100
Subject: [PATCH 06/18] added tests for flexible_partial()

---
 dask_grblas/base.py      | 186 ++--------------------------------
 dask_grblas/functools.py | 213 +++++++++++++++++++++++++++++++++++++++
 dask_grblas/vector.py    |   2 +-
 tests/test_functools.py  |  51 ++++++++++
 4 files changed, 271 insertions(+), 181 deletions(-)
 create mode 100644 dask_grblas/functools.py
 create mode 100644 tests/test_functools.py

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 80362fa..1653d90 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -1,15 +1,14 @@
 from numbers import Number
 from functools import partial
-from reprlib import recursive_repr
 import dask.array as da
 import grblas as gb
 import numpy as np
-from scipy.sparse import csr_matrix
 from grblas.operator import UNKNOWN_OPCLASS, find_opclass, get_typed_op
 from grblas.dtypes import lookup_dtype
 
 from . import replace as replace_singleton
 from .mask import Mask
+from .functools import flexible_partial
 from .utils import pack_args, pack_kwargs, get_grblas_type, get_meta, np_dtype, wrap_inner
 
 _expect_type = gb.base._expect_type
@@ -426,8 +425,11 @@ def joint_access(
         :packed_args: a list of positional arguments to `func`
         :packed_kwargs: a dict of named arguments to `func`
         """
-        omit_DOnion = is_DOnion
-        func = flexible_partial(func, omit_DOnion, *packed_args, **packed_kwargs)
+        where_non_DOnion = [False if is_DOnion(arg) else True for arg in packed_args]
+        non_Donions = [arg for arg in packed_args if not is_DOnion(arg)]
+        non_DOnion_kwargs = {k: v for (k, v) in packed_kwargs.items() if not is_DOnion(v)}
+        func = flexible_partial(func, where_non_DOnion, *non_Donions, **non_DOnion_kwargs)
+
         donion_args = tuple(arg.kernel for arg in packed_args if is_DOnion(arg))
         donion_kwargs = {k: v.kernel for (k, v) in packed_kwargs.items() if is_DOnion(v)}
         kernel = da.map_blocks(func, *donion_args, **donion_kwargs, dtype=dtype, meta=meta)
@@ -455,182 +457,6 @@ def extractattr(cls, name, packed_args, packed_kwargs, x):
 is_DOnion = partial(is_type, DOnion)
 
 
-class skip:
-    def __repr__(self):
-        return "skip"
-    __str__ = __repr__
-    __reduce__ = __repr__  # This makes it pickle well!
-
-skip = skip()
-
-
-def normalize_occupancies(specs):
-    # Converts any valid `specs` to the form: [True, False, True, ...]
-    if isinstance(specs, Iterable):
-        try:
-            a = np.asarray(specs)
-        except Exception as e:
-            raise e
-
-        if a.ndim == 1:
-            if a.dtype is np.bool_:
-                return a, None
-            
-            if a.dtype.kind in np.typecodes["AllInteger"]:
-                pos = csr_matrix(np.ones_like(a, dtype=np.bool_), a, np.array([0]))
-                return pos.toarray(), None
-
-            occupancy = np.array([False if x is skip else True for x in a])
-            args = [x for x in a if x is not skip]
-            return occupancy, args
-
-        if a.ndim == 2 and a.shape[1] == 2:
-            pos = a[:, 0]
-            pos = csr_matrix(np.ones_like(pos, dtype=np.bool_), pos, np.array([0]))
-            return pos.toarray(), a[:, 1]
-
-    raise ValueError(
-        'specs should be an iterable of any of the following forms:\n'
-        '[True, False, True, ...]\n'
-        '[0, 2, 3, ...]\n'
-        '(skip, b, skip, skip, d, ...)'
-        '((1, b), (3, d), ...)'
-    )
-
-
-################################################################################
-### flexible_partial() argument application
-################################################################################
-
-# Purely functional, no descriptor behaviour
-class flexible_partial:
-    """New function with flexible partial application of the given
-    arguments and keywords.
-    """
-
-    __slots__ = "func", "args", "vacancies", "kwargs", "__dict__", "__weakref__"
-
-    def __new__(cls, func, specs, /, *args, **kwargs):
-        # Validate input parameters:
-        if not callable(func):
-            raise TypeError("the first argument must be callable")
-
-        occupancies, args_ = normalize_occupancies(specs)
-        args = args if args_ is None else args_
-        new_arg = iter(args)
-
-        nfilled = np.count_nonzero(occupancies)
-        nargs = len(args)
-        if nargs != nfilled:
-            raise ValueError(
-                f"Number ({nargs}) of given arguments does not match "
-                f"number ({nfilled}) of argument slots to be occupied."
-            )
-
-        self = super(flexible_partial, cls).__new__(cls)
-        
-        if hasattr(func, "func") and hasattr(func, "vacancies") and hasattr(func, "kwargs"):
-            func = func.func
-            self.args = list(func.args)
-            kwargs = {**func.kwargs, **kwargs}
-
-            old_vacancy = iter(func.vacancies)
-            slot_status = iter(occupancies)
-
-            # step through old vacancies:
-            for occupy in occupancies:
-                try:
-                    pos = next(old_vacancy)
-                except StopIteration:
-                    # old vacancies now exhausted => reset occupancies and continue elsewhere:
-                    occupancies = list(slot_status)
-                    break
-                else:
-                    next(slot_status)
-
-                if occupy:
-                    # fill the vacancy:
-                    self.args[pos] = next(new_arg)
-                else:
-                    # record the vacancy:
-                    self.vacancies.append(pos)
-        else:
-            self.args = []
-            self.vacancies = []
-
-        start = len(self.args)
-        for pos, occupied in enumerate(occupancies, start=start):
-            if occupied:
-                self.args.append(next(new_arg))
-            else:
-                # create a vacancy:
-                self.args.append(None)
-                self.vacancies.append(pos)
-
-        self.func = func
-        self.args = tuple(self.args)
-        self.vacancies = tuple(self.vacancies)
-        self.kwargs = kwargs
-        return self
-
-    def __call__(self, /, *args, **kwargs):
-        if len(args) < len(self.vacancies):
-            raise ValueError(f"Expected at least {len(self.vacancies)} positional arguments. "
-                             f"Got {len(args)}.")
-
-        new_arg = iter(args)
-        self_args = list(self.args)
-        for pos in self.vacancies:
-            # fill all the vacancies
-            self_args[pos] = next(new_arg)
-
-        # append the remaining arguments and make the call:
-        self_args.extend(new_arg)
-        kwargs = {**self.kwargs, **kwargs}
-        return self.func(*self_args, **kwargs)
-
-    @recursive_repr()
-    def __repr__(self):
-        qualname = type(self).__qualname__
-        args = [repr(self.func)]
-        c = count()
-        args.extend("_" if next(c) in self.vacancies else repr(x) for x in self.args)
-        args.extend(f"{k}={v!r}" for (k, v) in self.kwargs.items())
-        if type(self).__module__ == "functools":
-            return f"functools.{qualname}({', '.join(args)})"
-        return f"{qualname}({', '.join(args)})"
-
-    def __reduce__(self):
-        return type(self), (self.func,), (self.func, self.args, self.vacancies,
-               self.kwargs or None, self.__dict__ or None)
-
-    def __setstate__(self, state):
-        if not isinstance(state, tuple):
-            raise TypeError("argument to __setstate__ must be a tuple")
-        if len(state) != 5:
-            raise TypeError(f"expected 5 items in state, got {len(state)}")
-        func, args, vacs, kwds, namespace = state
-        if (not callable(func) or not isinstance(args, tuple) or not isinstance(vacs, tuple) or
-           (kwds is not None and not isinstance(kwds, dict)) or
-           (namespace is not None and not isinstance(namespace, dict))):
-            raise TypeError("invalid flexible_partial state")
-
-        args = tuple(args) # just in case it's a subclass
-        vacs = tuple(vacs)
-        if kwds is None:
-            kwds = {}
-        elif type(kwds) is not dict: # XXX does it need to be *exactly* dict?
-            kwds = dict(kwds)
-        if namespace is None:
-            namespace = {}
-
-        self.__dict__ = namespace
-        self.func = func
-        self.args = args
-        self.vacancies = vacs
-        self.keywords = kwds
-
-
 # Dask task functions
 def _clear(x):
     x.value.clear()
diff --git a/dask_grblas/functools.py b/dask_grblas/functools.py
new file mode 100644
index 0000000..7274a99
--- /dev/null
+++ b/dask_grblas/functools.py
@@ -0,0 +1,213 @@
+import numpy as np
+
+from reprlib import recursive_repr
+from collections.abc import Iterable
+from itertools import count
+from scipy.sparse import csr_matrix
+
+
+class skip:
+    def __repr__(self):
+        return "skip"
+    __str__ = __repr__
+    __reduce__ = __repr__  # This makes it pickle well!
+
+skip = skip()
+
+
+def normalize_occupancies(specs):
+    """
+    Convert any valid `specs` into the form: [True, False, True, ...]
+    """
+    error_msg = (
+        '`specs` should be array-like with structure matching any of the following forms:\n'
+        '[False, True, False, False, True, ...]\n'
+        '[1, 4, ...]\n'
+        '(skip, b, skip, skip, d, ...)\n'
+        '((1, b), (4, d), ...)'
+    )
+    if isinstance(specs, Iterable):
+        try:
+            a = np.asarray(specs)
+        except Exception:
+            raise ValueError(error_msg)
+
+        if a.ndim == 1:
+            if a.dtype == np.bool_:
+                # specs = [False, True, False, False, True, ...]
+                return a, None
+
+            if a.dtype.kind in np.typecodes["AllInteger"]:
+                # specs = (1, 4, ...)
+                data = np.ones_like(a, dtype=np.bool_)
+                indices = a
+                indptr = np.array([0, data.size])
+                occupancy = csr_matrix((data, indices, indptr)).toarray().squeeze()
+                return occupancy, None
+
+            # specs = (skip, b, skip, skip, d, ...)
+            occupancy = np.array([False if x is skip else True for x in a])
+            args = [x for x in a if x is not skip]
+            return occupancy, args
+
+        if a.ndim == 2 and a.shape[1] == 2:
+            # specs = [(1, b), (4, d), ...]
+            indices = np.asarray(a[:, 0], dtype=int)
+            args = a[:, 1]
+
+            data = np.ones_like(indices, dtype=np.bool_)
+            indptr = np.array([0, data.size])
+            occupancy = csr_matrix((data, indices, indptr)).toarray().squeeze()
+            return occupancy, args
+
+        raise ValueError(error_msg)
+
+    raise TypeError(error_msg)
+
+
+################################################################################
+### flexible_partial() argument application
+################################################################################
+
+# Purely functional, no descriptor behaviour
+class flexible_partial:
+    """New function with flexible partial application of the given
+    arguments and keywords. (Any argument slot of the given function
+    may be occupied.)
+    """
+
+    __slots__ = "base_func", "args", "vacancies", "kwargs", "__dict__", "__weakref__"
+
+    def __new__(cls, func, specs, /, *args, **kwargs):
+        # Validate input parameters:
+        if not callable(func):
+            raise TypeError("the first argument must be callable")
+
+        occupancies, args_ = normalize_occupancies(specs)
+        args = args if args_ is None else args_
+        new_arg = iter(args)
+
+        nfilled = np.count_nonzero(occupancies)
+        nargs = len(args)
+        if nargs != nfilled:
+            raise ValueError(
+                f"Number ({nargs}) of given arguments does not match "
+                f"number ({nfilled}) of argument slots to be occupied."
+            )
+
+        self = super(flexible_partial, cls).__new__(cls)
+
+        _func = None
+        if hasattr(func, "base_func"):
+            # `func` is `flexible_partial`
+            _func = func.base_func
+        elif hasattr(func, "func"):
+            # `func` is `partial`
+            _func = func.func
+        if _func and hasattr(func, "args") and hasattr(func, "kwargs"):
+            self.args = list(func.args)
+            kwargs = {**func.kwargs, **kwargs}
+
+            if hasattr(func, "vacancies"):
+                old_vacancy = iter(func.vacancies)
+            else:
+                old_vacancy = iter([])
+
+            func = _func
+
+            # step through old vacancies:
+            occupancy = iter(occupancies)
+            self.vacancies = []
+            for occupy in occupancies:
+                try:
+                    pos = next(old_vacancy)
+                except StopIteration:
+                    # inner vacancies now exhausted => continue elsewhere:
+                    break
+                else:
+                    next(occupancy)
+
+                if occupy:
+                    # fill the vacancy:
+                    self.args[pos] = next(new_arg)
+                else:
+                    # record the vacancy:
+                    self.vacancies.append(pos)
+
+            # reset to remaining occupancies
+            occupancies = list(occupancy)
+        else:
+            self.args = []
+            self.vacancies = []
+
+        start = len(self.args)
+        for pos, occupied in enumerate(occupancies, start=start):
+            if occupied:
+                self.args.append(next(new_arg))
+            else:
+                # create a vacancy:
+                self.args.append(None)
+                self.vacancies.append(pos)
+
+        self.base_func = func
+        self.args = tuple(self.args)
+        self.vacancies = tuple(self.vacancies)
+        self.kwargs = kwargs
+        return self
+
+    def __call__(self, /, *args, **kwargs):
+        if len(args) < len(self.vacancies):
+            raise ValueError(f"Expected at least {len(self.vacancies)} positional arguments. "
+                             f"Got {len(args)}.")
+
+        new_arg = iter(args)
+        self_args = list(self.args)
+        for pos in self.vacancies:
+            # fill all the vacancies
+            self_args[pos] = next(new_arg)
+
+        # append the remaining arguments and make the call:
+        self_args.extend(new_arg)
+        kwargs = {**self.kwargs, **kwargs}
+        return self.base_func(*self_args, **kwargs)
+
+    @recursive_repr()
+    def __repr__(self):
+        qualname = type(self).__qualname__
+        args = [repr(self.base_func)]
+        c = count()
+        args.extend("_" if next(c) in self.vacancies else repr(x) for x in self.args)
+        args.extend(f"{k}={v!r}" for (k, v) in self.kwargs.items())
+        if type(self).__module__ == "functools":
+            return f"functools.{qualname}({', '.join(args)})"
+        return f"{qualname}({', '.join(args)})"
+
+    def __reduce__(self):
+        return type(self), (self.base_func,), (self.base_func, self.args, self.vacancies,
+               self.kwargs or None, self.__dict__ or None)
+
+    def __setstate__(self, state):
+        if not isinstance(state, tuple):
+            raise TypeError("argument to __setstate__ must be a tuple")
+        if len(state) != 5:
+            raise TypeError(f"expected 5 items in state, got {len(state)}")
+        func, args, vacs, kwds, namespace = state
+        if (not callable(func) or not isinstance(args, tuple) or not isinstance(vacs, tuple) or
+           (kwds is not None and not isinstance(kwds, dict)) or
+           (namespace is not None and not isinstance(namespace, dict))):
+            raise TypeError("invalid flexible_partial state")
+
+        args = tuple(args) # just in case it's a subclass
+        vacs = tuple(vacs)
+        if kwds is None:
+            kwds = {}
+        elif type(kwds) is not dict: # XXX does it need to be *exactly* dict?
+            kwds = dict(kwds)
+        if namespace is None:
+            namespace = {}
+
+        self.__dict__ = namespace
+        self.base_func = func
+        self.args = args
+        self.vacancies = vacs
+        self.keywords = kwds
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 367d7f4..1197073 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -6,7 +6,7 @@
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
 
-from .base import BaseType, InnerBaseType, _nvals, DOnion, flexible_partial, is_DOnion
+from .base import BaseType, InnerBaseType, _nvals, DOnion, is_DOnion
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, Assigner
 from .mask import StructuralMask, ValueMask
 from ._ss.vector import ss
diff --git a/tests/test_functools.py b/tests/test_functools.py
new file mode 100644
index 0000000..77036fe
--- /dev/null
+++ b/tests/test_functools.py
@@ -0,0 +1,51 @@
+import pytest
+from dask_grblas.functools import flexible_partial, skip
+
+
+def func(a, b, c, d, e, f):
+    return a, b, c, d, e, f
+
+
+def funk(a, b, c, d, e, f, ka='a', kb='b', kc='c'):
+    return a, b, c, d, e, f, ka, kb, kc
+
+
+@pytest.mark.parametrize("specs", [
+    [[False, True, False, False, True], [True, False, True]],
+    [[1, 4], [0, 2]],
+])
+def test_flexible_partial_specs_sans_args(specs):
+    specs0, specs1 = specs
+    with pytest.raises(ValueError):
+        _ = flexible_partial(func, specs0, 1, 2, 3, 4, 5)
+    with pytest.raises(ValueError):
+        _ = flexible_partial(func, specs0, 1)
+
+    # without keyword arguments:
+    part_func = flexible_partial(func, specs0, 2, 5)
+    result = part_func(1, 3, 4, 6)
+    assert result == (1, 2, 3, 4, 5, 6)
+
+    # with keyword arguments:
+    part_funk = flexible_partial(funk, specs0, 2, 5, kb='B')
+    result = part_funk(1, 3, 4, 6, kc='C')
+    assert result == (1, 2, 3, 4, 5, 6, 'a', 'B', 'C')
+
+    # apply a 2nd flexible_partial on first flexible_partial:
+    part_funk2 = flexible_partial(part_funk, specs1, 1, 4, ka='A')
+    result = part_funk2(3, 6, kc='C')
+    assert result == (1, 2, 3, 4, 5, 6, 'A', 'B', 'C')
+
+
+@pytest.mark.parametrize("specs", [
+    (skip, 2, skip, skip, 5),
+    [(1, 2), (4, 5)],
+])
+def test_flexible_partial_specs_with_args(specs):
+    part_func = flexible_partial(func, specs)
+    result = part_func(1, 3, 4, 6)
+    assert result == (1, 2, 3, 4, 5, 6)
+
+    part_funk = flexible_partial(funk, specs, 2, 5, kb='B')
+    result = part_funk(1, 3, 4, 6, kc='C')
+    assert result == (1, 2, 3, 4, 5, 6, 'a', 'B', 'C')

From f8d49510ae883afde3df42b220d4600301a8427e Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Sat, 19 Feb 2022 16:33:33 +0100
Subject: [PATCH 07/18] ran `black .` and `flake8 .`

---
 dask_grblas/base.py      |  8 +++----
 dask_grblas/functools.py | 41 ++++++++++++++++++++++-------------
 dask_grblas/utils.py     |  2 --
 dask_grblas/vector.py    | 10 ++++-----
 tests/test_functools.py  | 46 +++++++++++++++++++++++-----------------
 5 files changed, 60 insertions(+), 47 deletions(-)

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 1653d90..27816ac 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -414,14 +414,12 @@ def extract(self, func, packed_args, packed_kwargs, dtype, meta, *args, **kwargs
         return DOnion(kernel, meta=meta)
 
     @classmethod
-    def joint_access(
-        cls, func, packed_args, packed_kwargs, dtype, meta
-    ):
+    def joint_access(cls, func, packed_args, packed_kwargs, dtype, meta):
         """
         Pass inner values of any DOnions in `packed_args` and/or `packed_kwargs` into `func`.
-        
+
         :func: Callable that can accept the contents of `packed_args` and/or `packed_kwargs`
-            as parameters 
+            as parameters
         :packed_args: a list of positional arguments to `func`
         :packed_kwargs: a dict of named arguments to `func`
         """
diff --git a/dask_grblas/functools.py b/dask_grblas/functools.py
index 7274a99..fea686b 100644
--- a/dask_grblas/functools.py
+++ b/dask_grblas/functools.py
@@ -9,9 +9,11 @@
 class skip:
     def __repr__(self):
         return "skip"
+
     __str__ = __repr__
     __reduce__ = __repr__  # This makes it pickle well!
 
+
 skip = skip()
 
 
@@ -20,11 +22,11 @@ def normalize_occupancies(specs):
     Convert any valid `specs` into the form: [True, False, True, ...]
     """
     error_msg = (
-        '`specs` should be array-like with structure matching any of the following forms:\n'
-        '[False, True, False, False, True, ...]\n'
-        '[1, 4, ...]\n'
-        '(skip, b, skip, skip, d, ...)\n'
-        '((1, b), (4, d), ...)'
+        "`specs` should be array-like with structure matching any of the following forms:\n"
+        "[False, True, False, False, True, ...]\n"
+        "[1, 4, ...]\n"
+        "(skip, b, skip, skip, d, ...)\n"
+        "((1, b), (4, d), ...)"
     )
     if isinstance(specs, Iterable):
         try:
@@ -66,7 +68,7 @@ def normalize_occupancies(specs):
 
 
 ################################################################################
-### flexible_partial() argument application
+### flexible_partial() argument application  # noqa
 ################################################################################
 
 # Purely functional, no descriptor behaviour
@@ -157,8 +159,10 @@ def __new__(cls, func, specs, /, *args, **kwargs):
 
     def __call__(self, /, *args, **kwargs):
         if len(args) < len(self.vacancies):
-            raise ValueError(f"Expected at least {len(self.vacancies)} positional arguments. "
-                             f"Got {len(args)}.")
+            raise ValueError(
+                f"Expected at least {len(self.vacancies)} positional arguments. "
+                f"Got {len(args)}."
+            )
 
         new_arg = iter(args)
         self_args = list(self.args)
@@ -183,8 +187,11 @@ def __repr__(self):
         return f"{qualname}({', '.join(args)})"
 
     def __reduce__(self):
-        return type(self), (self.base_func,), (self.base_func, self.args, self.vacancies,
-               self.kwargs or None, self.__dict__ or None)
+        return (
+            type(self),
+            (self.base_func,),
+            (self.base_func, self.args, self.vacancies, self.kwargs or None, self.__dict__ or None),
+        )
 
     def __setstate__(self, state):
         if not isinstance(state, tuple):
@@ -192,16 +199,20 @@ def __setstate__(self, state):
         if len(state) != 5:
             raise TypeError(f"expected 5 items in state, got {len(state)}")
         func, args, vacs, kwds, namespace = state
-        if (not callable(func) or not isinstance(args, tuple) or not isinstance(vacs, tuple) or
-           (kwds is not None and not isinstance(kwds, dict)) or
-           (namespace is not None and not isinstance(namespace, dict))):
+        if (
+            not callable(func)
+            or not isinstance(args, tuple)
+            or not isinstance(vacs, tuple)
+            or (kwds is not None and not isinstance(kwds, dict))
+            or (namespace is not None and not isinstance(namespace, dict))
+        ):
             raise TypeError("invalid flexible_partial state")
 
-        args = tuple(args) # just in case it's a subclass
+        args = tuple(args)  # just in case it's a subclass
         vacs = tuple(vacs)
         if kwds is None:
             kwds = {}
-        elif type(kwds) is not dict: # XXX does it need to be *exactly* dict?
+        elif type(kwds) is not dict:  # XXX does it need to be *exactly* dict?
             kwds = dict(kwds)
         if namespace is None:
             namespace = {}
diff --git a/dask_grblas/utils.py b/dask_grblas/utils.py
index 964607b..01b929d 100644
--- a/dask_grblas/utils.py
+++ b/dask_grblas/utils.py
@@ -1,5 +1,3 @@
-import inspect
-from itertools import count
 import numpy as np
 import pandas as pd
 import dask.array as da
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 1197073..86953c0 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -119,14 +119,14 @@ def from_values(
         meta_dtype = np_dtype(meta.dtype)
 
         # check for any DOnions:
+        packed_args = pack_args(indices, values)
+        packed_kwargs = pack_kwargs(
+            size=size, dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
+        )
         donions = [True for arg in packed_args if is_DOnion(arg)]
         donions += [True for (k, v) in packed_kwargs.items() if is_DOnion(v)]
         if np.any(donions):
-            # dive into DOnion(s)
-            packed_args = pack_args(indices, values)
-            packed_kwargs = pack_kwargs(
-                size=size, dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
-            )
+            # dive into DOnion(s):
             return DOnion.joint_access(
                 Vector.from_values, packed_args, packed_kwargs, meta_dtype, meta
             )
diff --git a/tests/test_functools.py b/tests/test_functools.py
index 77036fe..2ab02f0 100644
--- a/tests/test_functools.py
+++ b/tests/test_functools.py
@@ -6,14 +6,17 @@ def func(a, b, c, d, e, f):
     return a, b, c, d, e, f
 
 
-def funk(a, b, c, d, e, f, ka='a', kb='b', kc='c'):
+def funk(a, b, c, d, e, f, ka="a", kb="b", kc="c"):
     return a, b, c, d, e, f, ka, kb, kc
 
 
-@pytest.mark.parametrize("specs", [
-    [[False, True, False, False, True], [True, False, True]],
-    [[1, 4], [0, 2]],
-])
+@pytest.mark.parametrize(
+    "specs",
+    [
+        [[False, True, False, False, True], [True, False, True]],
+        [[1, 4], [0, 2]],
+    ],
+)
 def test_flexible_partial_specs_sans_args(specs):
     specs0, specs1 = specs
     with pytest.raises(ValueError):
@@ -27,25 +30,28 @@ def test_flexible_partial_specs_sans_args(specs):
     assert result == (1, 2, 3, 4, 5, 6)
 
     # with keyword arguments:
-    part_funk = flexible_partial(funk, specs0, 2, 5, kb='B')
-    result = part_funk(1, 3, 4, 6, kc='C')
-    assert result == (1, 2, 3, 4, 5, 6, 'a', 'B', 'C')
+    part_funk = flexible_partial(funk, specs0, 2, 5, kb="B")
+    result = part_funk(1, 3, 4, 6, kc="C")
+    assert result == (1, 2, 3, 4, 5, 6, "a", "B", "C")
 
     # apply a 2nd flexible_partial on first flexible_partial:
-    part_funk2 = flexible_partial(part_funk, specs1, 1, 4, ka='A')
-    result = part_funk2(3, 6, kc='C')
-    assert result == (1, 2, 3, 4, 5, 6, 'A', 'B', 'C')
-
-
-@pytest.mark.parametrize("specs", [
-    (skip, 2, skip, skip, 5),
-    [(1, 2), (4, 5)],
-])
+    part_funk2 = flexible_partial(part_funk, specs1, 1, 4, ka="A")
+    result = part_funk2(3, 6, kc="C")
+    assert result == (1, 2, 3, 4, 5, 6, "A", "B", "C")
+
+
+@pytest.mark.parametrize(
+    "specs",
+    [
+        (skip, 2, skip, skip, 5),
+        [(1, 2), (4, 5)],
+    ],
+)
 def test_flexible_partial_specs_with_args(specs):
     part_func = flexible_partial(func, specs)
     result = part_func(1, 3, 4, 6)
     assert result == (1, 2, 3, 4, 5, 6)
 
-    part_funk = flexible_partial(funk, specs, 2, 5, kb='B')
-    result = part_funk(1, 3, 4, 6, kc='C')
-    assert result == (1, 2, 3, 4, 5, 6, 'a', 'B', 'C')
+    part_funk = flexible_partial(funk, specs, 2, 5, kb="B")
+    result = part_funk(1, 3, 4, 6, kc="C")
+    assert result == (1, 2, 3, 4, 5, 6, "a", "B", "C")

From ef40cd1023635e9fca2403f6c5e39c794978506b Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Mon, 21 Feb 2022 12:16:14 +0100
Subject: [PATCH 08/18] cleaned-up flexible_partial

---
 dask_grblas/base.py               |  96 ++++++++-------
 dask_grblas/functools.py          | 188 +++++-------------------------
 dask_grblas/vector.py             |  80 ++++++-------
 tests/from_grblas2/test_vector.py |  10 ++
 tests/test_functools.py           |  52 +++------
 5 files changed, 138 insertions(+), 288 deletions(-)

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 27816ac..9239c25 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -4,12 +4,11 @@
 import grblas as gb
 import numpy as np
 from grblas.operator import UNKNOWN_OPCLASS, find_opclass, get_typed_op
-from grblas.dtypes import lookup_dtype
 
 from . import replace as replace_singleton
 from .mask import Mask
-from .functools import flexible_partial
-from .utils import pack_args, pack_kwargs, get_grblas_type, get_meta, np_dtype, wrap_inner
+from .functools import flexible_partial, skip
+from .utils import get_grblas_type, get_meta, np_dtype, wrap_inner
 
 _expect_type = gb.base._expect_type
 
@@ -359,6 +358,9 @@ def visualize(self, *args, **kwargs):
         return self._delayed.visualize(*args, **kwargs)
 
 
+_const0_DOnion = {"dtype": np.int32, "meta": np.array(0, dtype=np.int32)}
+
+
 class DOnion:
     """
     Dask (or Delayed) Onion (DOnion):
@@ -370,30 +372,26 @@ class DOnion:
     """
 
     @classmethod
-    def sprout(cls, shroud, seed_func, seed_meta, packed_args, packed_kwargs, *args, **kwargs):
+    def sprout(cls, shroud, seed_meta, seed_func, *args, **kwargs):
         """
-        Develop a DOnion from dask array `shroud`
+        Develop a DOnion from dask array `shroud` and function `seed_func`
+
+        Return shroud.map_blocks(seed_func) as a DOnion.
 
-        Shroud a dask array (the seed) returned by `seed_func` using another dask array (the
-        shroud)
         :shroud: dask array whose inner value determines the (size of) seed dask array
+        :seed_meta: empty instance of the inner value type of the seed
         :seed_func: the function that takes as input the inner value of `shroud` and returns
             another dask array (the seed)
-        :seed_meta: empty instance of the inner value type of the seed
-        :packed_args: tuple of arguments to `seed_func`
-        :packed_kwargs: dict of keyword arguments to `seed_func`
-        :args: other dask arrays that together with `shroud` determine the (size of) `seed`
-        :kwargs: other named dask arrays that together with `shroud` determine the (size of) `seed`
+        :args: tuple of arguments to `seed_func`
+        :kwargs: dict of keyword arguments to `seed_func`
         """
-        seed_func = partial(seed_func, *packed_args, **packed_kwargs)
-        dtype = np_dtype(lookup_dtype(shroud.dtype))
-        _meta = np.array([], dtype=dtype)
-        kernel = shroud.map_blocks(seed_func, *args, **kwargs, dtype=dtype, meta=_meta)
+        seed_func = partial(seed_func, *args, **kwargs)
+        kernel = shroud.map_blocks(seed_func, **_const0_DOnion)
         return DOnion(kernel, meta=seed_meta)
 
     def __init__(self, kernel, meta=None):
         self.kernel = kernel
-        self.dtype = kernel.dtype
+        self.dtype = meta.dtype
         self._meta = meta
 
     def __eq__(self, other):
@@ -408,48 +406,46 @@ def compute(self, *args, **kwargs):
     def persist(self, *args, **kwargs):
         return self.kernel.compute(*args, **kwargs).persist(*args, **kwargs)
 
-    def extract(self, func, packed_args, packed_kwargs, dtype, meta, *args, **kwargs):
-        func = partial(func, *packed_args, **packed_kwargs)
-        kernel = self.kernel.map_blocks(func, *args, **kwargs, dtype=dtype, meta=meta)
-        return DOnion(kernel, meta=meta)
-
     @classmethod
-    def joint_access(cls, func, packed_args, packed_kwargs, dtype, meta):
+    def multiple_access(cls, out_meta, func, *args, **kwargs):
         """
-        Pass inner values of any DOnions in `packed_args` and/or `packed_kwargs` into `func`.
+        Pass inner values of any DOnions in `args` and/or `kwargs` into `func`.
 
-        :func: Callable that can accept the contents of `packed_args` and/or `packed_kwargs`
+        :func: Callable that can accept the contents of `args` and/or `kwargs`
             as parameters
-        :packed_args: a list of positional arguments to `func`
-        :packed_kwargs: a dict of named arguments to `func`
+        :args: a list of positional arguments to `func`
+        :kwargs: a dict of named arguments to `func`
         """
-        where_non_DOnion = [False if is_DOnion(arg) else True for arg in packed_args]
-        non_Donions = [arg for arg in packed_args if not is_DOnion(arg)]
-        non_DOnion_kwargs = {k: v for (k, v) in packed_kwargs.items() if not is_DOnion(v)}
-        func = flexible_partial(func, where_non_DOnion, *non_Donions, **non_DOnion_kwargs)
-
-        donion_args = tuple(arg.kernel for arg in packed_args if is_DOnion(arg))
-        donion_kwargs = {k: v.kernel for (k, v) in packed_kwargs.items() if is_DOnion(v)}
-        kernel = da.map_blocks(func, *donion_args, **donion_kwargs, dtype=dtype, meta=meta)
-        return DOnion(kernel, meta=meta)
+        # First, pass non-DOnion args and kwargs to func:
+        skip_Donions = [arg if not is_DOnion(arg) else skip for arg in args]
+        non_DOnion_kwargs = {k: v for (k, v) in kwargs.items() if not is_DOnion(v)}
+        func = flexible_partial(func, *skip_Donions, **non_DOnion_kwargs)
+
+        # Next, pass func and DOnion args and kwargs to map_blocks:
+        donion_args = tuple(arg.kernel for arg in args if is_DOnion(arg))
+        donion_kwargs = {k: v.kernel for (k, v) in kwargs.items() if is_DOnion(v)}
+        kernel = da.map_blocks(func, *donion_args, **donion_kwargs, **_const0_DOnion)
+        return DOnion(kernel, meta=out_meta)
+
+    def deep_extract(self, out_meta, func, *args, **kwargs):
+        func = partial(func, *args, **kwargs)
+        kernel = self.kernel.map_blocks(func, **_const0_DOnion)
+        return DOnion(kernel, meta=out_meta)
 
     def __getattr__(self, item):
-        func = partial(getattr, name=item)
-        # TODO: lookup dtype and meta of attribute!!!
-        dtype = np_dtype(lookup_dtype(self.dtype))
-        meta = self._meta
-        return self.extract(func, pack_args(), pack_kwargs(), dtype, meta)
-
-    def getattr(self, name, packed_args, packed_kwargs, *args, **kwargs):
-        func = partial(DOnion.extractattr, name, packed_args, packed_kwargs)
-        # TODO: lookup dtype and meta of attribute!!!
-        dtype = np_dtype(lookup_dtype(self.dtype))
-        meta = self._meta
-        return self.extract(func, pack_args(), pack_kwargs(), dtype, meta, *args, **kwargs)
+        # TODO: how to compute meta of attribute?!!!
+        meta = np.array(0)
+        _getattr = flexible_partial(getattr, (skip, item))
+        return self.deep_extract(meta, _getattr)
+
+    def getattr(self, meta, name, *args, **kwargs):
+        where_args = (False,) + (True,) * (1 + len(args))
+        _getattr = flexible_partial(DOnion._getattr, where_args, name, *args, **kwargs)
+        return self.deep_extract(meta, _getattr)
 
     @classmethod
-    def extractattr(cls, name, packed_args, packed_kwargs, x):
-        return getattr(x, name)(*packed_args, **packed_kwargs)
+    def _getattr(cls, x, name, *args, **kwargs):
+        return getattr(x, name)(*args, **kwargs)
 
 
 is_DOnion = partial(is_type, DOnion)
diff --git a/dask_grblas/functools.py b/dask_grblas/functools.py
index fea686b..e7cdf90 100644
--- a/dask_grblas/functools.py
+++ b/dask_grblas/functools.py
@@ -1,9 +1,4 @@
-import numpy as np
-
 from reprlib import recursive_repr
-from collections.abc import Iterable
-from itertools import count
-from scipy.sparse import csr_matrix
 
 
 class skip:
@@ -17,171 +12,55 @@ def __repr__(self):
 skip = skip()
 
 
-def normalize_occupancies(specs):
-    """
-    Convert any valid `specs` into the form: [True, False, True, ...]
-    """
-    error_msg = (
-        "`specs` should be array-like with structure matching any of the following forms:\n"
-        "[False, True, False, False, True, ...]\n"
-        "[1, 4, ...]\n"
-        "(skip, b, skip, skip, d, ...)\n"
-        "((1, b), (4, d), ...)"
-    )
-    if isinstance(specs, Iterable):
-        try:
-            a = np.asarray(specs)
-        except Exception:
-            raise ValueError(error_msg)
-
-        if a.ndim == 1:
-            if a.dtype == np.bool_:
-                # specs = [False, True, False, False, True, ...]
-                return a, None
-
-            if a.dtype.kind in np.typecodes["AllInteger"]:
-                # specs = (1, 4, ...)
-                data = np.ones_like(a, dtype=np.bool_)
-                indices = a
-                indptr = np.array([0, data.size])
-                occupancy = csr_matrix((data, indices, indptr)).toarray().squeeze()
-                return occupancy, None
-
-            # specs = (skip, b, skip, skip, d, ...)
-            occupancy = np.array([False if x is skip else True for x in a])
-            args = [x for x in a if x is not skip]
-            return occupancy, args
-
-        if a.ndim == 2 and a.shape[1] == 2:
-            # specs = [(1, b), (4, d), ...]
-            indices = np.asarray(a[:, 0], dtype=int)
-            args = a[:, 1]
-
-            data = np.ones_like(indices, dtype=np.bool_)
-            indptr = np.array([0, data.size])
-            occupancy = csr_matrix((data, indices, indptr)).toarray().squeeze()
-            return occupancy, args
-
-        raise ValueError(error_msg)
-
-    raise TypeError(error_msg)
-
-
-################################################################################
-### flexible_partial() argument application  # noqa
-################################################################################
-
-# Purely functional, no descriptor behaviour
 class flexible_partial:
     """New function with flexible partial application of the given
-    arguments and keywords. (Any argument slot of the given function
-    may be occupied.)
+    arguments and keywords. Any argument slot of the given function
+    may be occupied (not just the leading slots).  Use the sentinel
+    `skip` to denote vacant argument slots.
     """
 
-    __slots__ = "base_func", "args", "vacancies", "kwargs", "__dict__", "__weakref__"
+    __slots__ = "base_func", "args", "keywords", "__dict__", "__weakref__"
 
-    def __new__(cls, func, specs, /, *args, **kwargs):
-        # Validate input parameters:
+    def __new__(cls, func, /, *args, **keywords):
         if not callable(func):
             raise TypeError("the first argument must be callable")
 
-        occupancies, args_ = normalize_occupancies(specs)
-        args = args if args_ is None else args_
-        new_arg = iter(args)
-
-        nfilled = np.count_nonzero(occupancies)
-        nargs = len(args)
-        if nargs != nfilled:
-            raise ValueError(
-                f"Number ({nargs}) of given arguments does not match "
-                f"number ({nfilled}) of argument slots to be occupied."
-            )
-
-        self = super(flexible_partial, cls).__new__(cls)
-
-        _func = None
         if hasattr(func, "base_func"):
-            # `func` is `flexible_partial`
-            _func = func.base_func
+            func_ = func.base_func
+            func_is_partial = True
         elif hasattr(func, "func"):
-            # `func` is `partial`
-            _func = func.func
-        if _func and hasattr(func, "args") and hasattr(func, "kwargs"):
-            self.args = list(func.args)
-            kwargs = {**func.kwargs, **kwargs}
-
-            if hasattr(func, "vacancies"):
-                old_vacancy = iter(func.vacancies)
-            else:
-                old_vacancy = iter([])
-
-            func = _func
-
-            # step through old vacancies:
-            occupancy = iter(occupancies)
-            self.vacancies = []
-            for occupy in occupancies:
-                try:
-                    pos = next(old_vacancy)
-                except StopIteration:
-                    # inner vacancies now exhausted => continue elsewhere:
-                    break
-                else:
-                    next(occupancy)
-
-                if occupy:
-                    # fill the vacancy:
-                    self.args[pos] = next(new_arg)
-                else:
-                    # record the vacancy:
-                    self.vacancies.append(pos)
-
-            # reset to remaining occupancies
-            occupancies = list(occupancy)
+            func_ = func.func
+            func_is_partial = True
         else:
-            self.args = []
-            self.vacancies = []
-
-        start = len(self.args)
-        for pos, occupied in enumerate(occupancies, start=start):
-            if occupied:
-                self.args.append(next(new_arg))
-            else:
-                # create a vacancy:
-                self.args.append(None)
-                self.vacancies.append(pos)
+            func_is_partial = False
+
+        if func_is_partial:
+            new_arg = iter(args)
+            args = tuple(next(new_arg) if arg is skip else arg for arg in func.args)
+            args += tuple(new_arg)
+            keywords = {**func.keywords, **keywords}
+            func = func_
+
+        self = super(flexible_partial, cls).__new__(cls)
 
         self.base_func = func
-        self.args = tuple(self.args)
-        self.vacancies = tuple(self.vacancies)
-        self.kwargs = kwargs
+        self.args = args
+        self.keywords = keywords
         return self
 
-    def __call__(self, /, *args, **kwargs):
-        if len(args) < len(self.vacancies):
-            raise ValueError(
-                f"Expected at least {len(self.vacancies)} positional arguments. "
-                f"Got {len(args)}."
-            )
-
+    def __call__(self, /, *args, **keywords):
         new_arg = iter(args)
-        self_args = list(self.args)
-        for pos in self.vacancies:
-            # fill all the vacancies
-            self_args[pos] = next(new_arg)
+        args = (next(new_arg) if arg is skip else arg for arg in self.args)
 
-        # append the remaining arguments and make the call:
-        self_args.extend(new_arg)
-        kwargs = {**self.kwargs, **kwargs}
-        return self.base_func(*self_args, **kwargs)
+        keywords = {**self.keywords, **keywords}
+        return self.base_func(*args, *new_arg, **keywords)
 
     @recursive_repr()
     def __repr__(self):
         qualname = type(self).__qualname__
         args = [repr(self.base_func)]
-        c = count()
-        args.extend("_" if next(c) in self.vacancies else repr(x) for x in self.args)
-        args.extend(f"{k}={v!r}" for (k, v) in self.kwargs.items())
+        args.extend(repr(x) for x in self.args)
+        args.extend(f"{k}={v!r}" for (k, v) in self.keywords.items())
         if type(self).__module__ == "functools":
             return f"functools.{qualname}({', '.join(args)})"
         return f"{qualname}({', '.join(args)})"
@@ -190,26 +69,24 @@ def __reduce__(self):
         return (
             type(self),
             (self.base_func,),
-            (self.base_func, self.args, self.vacancies, self.kwargs or None, self.__dict__ or None),
+            (self.base_func, self.args, self.keywords or None, self.__dict__ or None),
         )
 
     def __setstate__(self, state):
         if not isinstance(state, tuple):
             raise TypeError("argument to __setstate__ must be a tuple")
-        if len(state) != 5:
-            raise TypeError(f"expected 5 items in state, got {len(state)}")
-        func, args, vacs, kwds, namespace = state
+        if len(state) != 4:
+            raise TypeError(f"expected 4 items in state, got {len(state)}")
+        func, args, kwds, namespace = state
         if (
             not callable(func)
             or not isinstance(args, tuple)
-            or not isinstance(vacs, tuple)
             or (kwds is not None and not isinstance(kwds, dict))
             or (namespace is not None and not isinstance(namespace, dict))
         ):
-            raise TypeError("invalid flexible_partial state")
+            raise TypeError("invalid partial state")
 
         args = tuple(args)  # just in case it's a subclass
-        vacs = tuple(vacs)
         if kwds is None:
             kwds = {}
         elif type(kwds) is not dict:  # XXX does it need to be *exactly* dict?
@@ -220,5 +97,4 @@ def __setstate__(self, state):
         self.__dict__ = namespace
         self.base_func = func
         self.args = args
-        self.vacancies = vacs
         self.keywords = kwds
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 86953c0..12d5470 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -116,20 +116,15 @@ def from_values(
         if hasattr(values, "dtype"):
             dtype = lookup_dtype(values.dtype if dtype is None else dtype)
         meta = gb.Vector.new(dtype)
-        meta_dtype = np_dtype(meta.dtype)
 
         # check for any DOnions:
-        packed_args = pack_args(indices, values)
-        packed_kwargs = pack_kwargs(
-            size=size, dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
-        )
-        donions = [True for arg in packed_args if is_DOnion(arg)]
-        donions += [True for (k, v) in packed_kwargs.items() if is_DOnion(v)]
+        pkd_args = pack_args(indices, values)
+        pkd_kwargs = pack_kwargs(size=size, dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
+        donions = [True for arg in pkd_args if is_DOnion(arg)]
+        donions += [True for (k, v) in pkd_kwargs.items() if is_DOnion(v)]
         if np.any(donions):
             # dive into DOnion(s):
-            return DOnion.joint_access(
-                Vector.from_values, packed_args, packed_kwargs, meta_dtype, meta
-            )
+            return DOnion.multiple_access(meta, Vector.from_values, *pkd_args, **pkd_kwargs)
 
         # no DOnions
         if type(indices) is da.Array and type(values) is da.Array:
@@ -139,18 +134,20 @@ def from_values(
             else:
                 if indices.size == 0:
                     raise ValueError("No indices provided. Unable to infer size.")
-                size = da.max(indices) + 1
+                # Note: uint + int = float which numpy cannot cast to uint.  So we
+                # ensure the same dtype for each summand here:
+                size = da.max(indices) + np.asarray(1, dtype=indices.dtype)
                 # Here `size` is a dask 0d-array whose computed value is
                 # used to determine the size of the Vector to be returned.
                 # But since we do not want to compute anything just now,
                 # we instead create a "DOnion" (dask onion) object
-                packed_args = pack_args(indices, values)
-                packed_kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
-                donion = DOnion.sprout(size, Vector.from_values, meta, packed_args, packed_kwargs)
+                args = pack_args(indices, values)
+                kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
+                donion = DOnion.sprout(size, meta, Vector.from_values, *args, **kwargs)
                 return Vector(donion, meta=meta)
 
             if indices.size > 0:
-                if indices.dtype.kind not in np.typecodes["AllInteger"]:
+                if indices.dtype.kind not in "ui":
                     raise ValueError(f"indices must be integers, not {indices.dtype}")
 
                 if indices.size != values.size:
@@ -541,11 +538,23 @@ def build(self, indices, values, *, size=None, chunks=None, dup_op=None, clear=F
         # self.__init__(Vector.from_vector(vector)._delayed)
 
     def to_values(self, dtype=None, chunks="auto"):
+        dtype = lookup_dtype(self.dtype if dtype is None else dtype)
+        meta_i, meta_v = self._meta.to_values(dtype)
+
         x = self._delayed
+        if type(x) is DOnion:
+            meta = np.array([])
+            result = x.getattr(meta, "to_values", dtype=dtype)
+            indices = result.getattr(meta_i, "__getitem__", 0)
+            values = result.getattr(meta_v, "__getitem__", 1)
+            return indices, values
+
+        # get dask array of nvals for each chunk:
         nvals_array = da.core.blockwise(
             *(_nvals, "i"), *(x, "i"), adjust_chunks={"i": 1}, dtype=np.int64, meta=np.array([])
         )
 
+        # accumulate dask array to get index-ranges of the output (indices, values)
         stops = da.cumsum(nvals_array)
         starts = da.roll(stops, 1)
         starts = starts.copy() if starts.size == 1 else starts  # bug!!
@@ -553,10 +562,7 @@ def to_values(self, dtype=None, chunks="auto"):
         nnz = stops[-1]
 
         def _to_values(x, starts, stops, dtype, chunks, nnz):
-            # starts = da.from_array(starts, chunks=1, name="starts" + tokenize(starts))
             starts = da.core.Array(starts.dask, starts.name, x.chunks, starts.dtype, meta=x._meta)
-
-            # stops = da.from_array(stops, chunks=1, name="stops" + tokenize(stops))
             stops = da.core.Array(stops.dask, stops.name, x.chunks, stops.dtype, meta=x._meta)
 
             chunks = da.core.normalize_chunks(chunks, (nnz,), dtype=np.int64)
@@ -580,41 +586,21 @@ def _to_values(x, starts, stops, dtype, chunks, nnz):
                 x, _identity, _flatten, axis=0, concatenate=False, dtype=dtype_, meta=np.array([])
             )
 
-        def apply(func, dtype, meta, x):
-            return da.map_blocks(func, x, dtype=dtype, meta=meta)
-
-        dtype = self.dtype if dtype is None else dtype
-        packed_args = pack_args(x, starts, stops, dtype, chunks)
-        packed_kwargs = pack_kwargs()
+        # since the size of the output (indices, values) depends on nnz, a delayed quantity,
+        # we need to return (indices, values) as DOnions (twice-delayed dask-array)
         meta = np.array([])
-        iv_donion = DOnion.sprout(nnz, _to_values, meta, packed_args, packed_kwargs)
-
-        meta_i, meta_v = self._meta.to_values(dtype)
+        iv_donion = DOnion.sprout(nnz, meta, _to_values, x, starts, stops, dtype, chunks)
 
         dtype_i = np_dtype(lookup_dtype(meta_i.dtype))
-        packed_args = pack_args(_get_indices, dtype_i, meta_i)
-        packed_kwargs = pack_kwargs()
-        indices = iv_donion.extract(apply, packed_args, packed_kwargs, dtype_i, meta_i)
-
+        indices = iv_donion.deep_extract(
+            meta_i, da.map_blocks, _get_indices, dtype=dtype_i, meta=meta_i
+        )
         dtype_v = np_dtype(lookup_dtype(meta_v.dtype))
-        packed_args = pack_args(_get_values, dtype_v, meta_v)
-        packed_kwargs = pack_kwargs()
-        values = iv_donion.extract(apply, packed_args, packed_kwargs, dtype_v, meta_v)
-
+        values = iv_donion.deep_extract(
+            meta_v, da.map_blocks, _get_values, dtype=dtype_v, meta=meta_v
+        )
         return indices, values
 
-        # delayed = self._delayed
-        # dtype_ = np_dtype(self.dtype)
-        # meta_i, meta_v = self._meta.to_values(dtype)
-        # meta = np.array([])
-        # offsets = build_chunk_offsets_dask_array(delayed, 0, "index_offset-")
-        # x = da.map_blocks(
-        #    TupleExtractor, delayed, offsets, gb_dtype=dtype, dtype=dtype_, meta=meta
-        # )
-        # indices = da.map_blocks(_get_indices, x, dtype=meta_i.dtype, meta=meta)
-        # values = da.map_blocks(_get_values, x, dtype=meta_v.dtype, meta=meta)
-        # return indices, values
-
     def isequal(self, other, *, check_dtype=False):
         other = self._expect_type(other, Vector, within="isequal", argname="other")
         return super().isequal(other, check_dtype=check_dtype)
diff --git a/tests/from_grblas2/test_vector.py b/tests/from_grblas2/test_vector.py
index abb9863..9f7e9a0 100644
--- a/tests/from_grblas2/test_vector.py
+++ b/tests/from_grblas2/test_vector.py
@@ -180,13 +180,23 @@ def test_from_values_dask():
 def test_from_values_DOnion(v):
     indices = da.from_array(np.array([0, 1, 3]))
     values = da.from_array(np.array([True, False, True]))
+    # The following creates a Vector `u` with `type(u._delayed) == DOnion`
+    # because keyword argument `size` has not been specified:
     u = Vector.from_values(indices, values)
+    assert u.size == 4
+    assert u.nvals == 3
+    assert u.dtype == bool
+    # The output of `.to_values()` is always a tuple of DOnions
     indices, values = u.to_values()
+    # The following creates a Vector `v` with `type(v._delayed) == DOnion`
+    # because arguments `indices` and  `values` are DOnions:
     v = Vector.from_values(indices, values)
     assert v.size == 4
     assert v.nvals == 3
     assert v.dtype == bool
     values = da.from_array(np.array([12.3, 12.4, 12.5]))
+    # The following creates a Vector `u2` with `type(u2._delayed) == DOnion`
+    # because argument `indices` is a DOnion:
     u2 = Vector.from_values(indices, values, size=17)
     assert u2.size == 17
     assert u2.nvals == 3
diff --git a/tests/test_functools.py b/tests/test_functools.py
index 2ab02f0..92dcdd8 100644
--- a/tests/test_functools.py
+++ b/tests/test_functools.py
@@ -1,4 +1,5 @@
 import pytest
+from functools import partial
 from dask_grblas.functools import flexible_partial, skip
 
 
@@ -10,48 +11,29 @@ def funk(a, b, c, d, e, f, ka="a", kb="b", kc="c"):
     return a, b, c, d, e, f, ka, kb, kc
 
 
-@pytest.mark.parametrize(
-    "specs",
-    [
-        [[False, True, False, False, True], [True, False, True]],
-        [[1, 4], [0, 2]],
-    ],
-)
-def test_flexible_partial_specs_sans_args(specs):
-    specs0, specs1 = specs
-    with pytest.raises(ValueError):
-        _ = flexible_partial(func, specs0, 1, 2, 3, 4, 5)
-    with pytest.raises(ValueError):
-        _ = flexible_partial(func, specs0, 1)
-
-    # without keyword arguments:
-    part_func = flexible_partial(func, specs0, 2, 5)
+def test_flexible_partial():
+    # without keywords
+    part_func = flexible_partial(func, skip, 2, skip, skip, 5)
     result = part_func(1, 3, 4, 6)
     assert result == (1, 2, 3, 4, 5, 6)
 
-    # with keyword arguments:
-    part_funk = flexible_partial(funk, specs0, 2, 5, kb="B")
+    # with keywords
+    part_funk = flexible_partial(funk, skip, 2, skip, skip, 5, kb="B")
     result = part_funk(1, 3, 4, 6, kc="C")
     assert result == (1, 2, 3, 4, 5, 6, "a", "B", "C")
 
-    # apply a 2nd flexible_partial on first flexible_partial:
-    part_funk2 = flexible_partial(part_funk, specs1, 1, 4, ka="A")
+    # apply a 2nd `flexible_partial` on first `flexible_partial`:
+    part_funk2 = flexible_partial(part_funk, 1, skip, 4, ka="A")
     result = part_funk2(3, 6, kc="C")
     assert result == (1, 2, 3, 4, 5, 6, "A", "B", "C")
 
+    # or apply a `partial` on first `flexible_partial`:
+    part_funk2 = partial(part_funk, 1, 3, ka="A")
+    result = part_funk2(4, 6, kc="C")
+    assert result == (1, 2, 3, 4, 5, 6, "A", "B", "C")
 
-@pytest.mark.parametrize(
-    "specs",
-    [
-        (skip, 2, skip, skip, 5),
-        [(1, 2), (4, 5)],
-    ],
-)
-def test_flexible_partial_specs_with_args(specs):
-    part_func = flexible_partial(func, specs)
-    result = part_func(1, 3, 4, 6)
-    assert result == (1, 2, 3, 4, 5, 6)
-
-    part_funk = flexible_partial(funk, specs, 2, 5, kb="B")
-    result = part_funk(1, 3, 4, 6, kc="C")
-    assert result == (1, 2, 3, 4, 5, 6, "a", "B", "C")
+    # or apply a `flexible_partial` on a `partial`:
+    part_funk = partial(funk, 1, 2, kb="B")
+    part_funk2 = flexible_partial(part_funk, skip, 4, ka="A")
+    result = part_funk2(3, 5, 6, kc="C")
+    assert result == (1, 2, 3, 4, 5, 6, "A", "B", "C")

From cba8ee8710f5671d9155962e47289add0aa90c64 Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Mon, 21 Feb 2022 13:28:25 +0100
Subject: [PATCH 09/18] fixed bug in flexible_partial()

---
 dask_grblas/base.py      | 14 ++++++++------
 dask_grblas/functools.py | 18 +++++++++++++++---
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 9239c25..8eecc60 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -391,8 +391,11 @@ def sprout(cls, shroud, seed_meta, seed_func, *args, **kwargs):
 
     def __init__(self, kernel, meta=None):
         self.kernel = kernel
-        self.dtype = meta.dtype
         self._meta = meta
+        try:
+            self.dtype = meta.dtype
+        except AttributeError:
+            self.dtype = type(meta)
 
     def __eq__(self, other):
         return self.compute() == other
@@ -428,19 +431,18 @@ def multiple_access(cls, out_meta, func, *args, **kwargs):
         return DOnion(kernel, meta=out_meta)
 
     def deep_extract(self, out_meta, func, *args, **kwargs):
-        func = partial(func, *args, **kwargs)
+        func = flexible_partial(func, *args, **kwargs)
         kernel = self.kernel.map_blocks(func, **_const0_DOnion)
         return DOnion(kernel, meta=out_meta)
 
     def __getattr__(self, item):
         # TODO: how to compute meta of attribute?!!!
-        meta = np.array(0)
-        _getattr = flexible_partial(getattr, (skip, item))
+        meta = getattr(self._meta, item)
+        _getattr = flexible_partial(getattr, skip, item)
         return self.deep_extract(meta, _getattr)
 
     def getattr(self, meta, name, *args, **kwargs):
-        where_args = (False,) + (True,) * (1 + len(args))
-        _getattr = flexible_partial(DOnion._getattr, where_args, name, *args, **kwargs)
+        _getattr = flexible_partial(DOnion._getattr, skip, name, *args, **kwargs)
         return self.deep_extract(meta, _getattr)
 
     @classmethod
diff --git a/dask_grblas/functools.py b/dask_grblas/functools.py
index e7cdf90..257d5ec 100644
--- a/dask_grblas/functools.py
+++ b/dask_grblas/functools.py
@@ -35,9 +35,21 @@ def __new__(cls, func, /, *args, **keywords):
             func_is_partial = False
 
         if func_is_partial:
-            new_arg = iter(args)
-            args = tuple(next(new_arg) if arg is skip else arg for arg in func.args)
-            args += tuple(new_arg)
+            old_arg, new_arg = iter(func.args), iter(args)
+            exhausted = False
+            args = ()
+            for arg in func.args:
+                if arg is skip:
+                    try:
+                        args += (next(new_arg),)
+                    except StopIteration:
+                        exhausted = True
+                        break
+                else:
+                    args += arg
+                next(old_arg)
+
+            args += tuple(old_arg if exhausted else new_arg)
             keywords = {**func.keywords, **keywords}
             func = func_
 

From 8b259f8c7fdae71ceaaa09136b2a6e3111424386 Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Fri, 25 Feb 2022 16:28:57 +0100
Subject: [PATCH 10/18] cleaned-up .from_values() and .to_values() from which
 DOnions sprout

---
 dask_grblas/base.py               |  77 +++++++-
 dask_grblas/matrix.py             | 317 +++++++++++++++++++++---------
 dask_grblas/vector.py             |  58 ++++--
 tests/from_grblas2/test_matrix.py |  79 +++++++-
 tests/from_grblas2/test_vector.py |  20 +-
 5 files changed, 428 insertions(+), 123 deletions(-)

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 8eecc60..5e50550 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -1,4 +1,5 @@
 from numbers import Number
+from collections.abc import Iterable
 from functools import partial
 import dask.array as da
 import grblas as gb
@@ -9,6 +10,7 @@
 from .mask import Mask
 from .functools import flexible_partial, skip
 from .utils import get_grblas_type, get_meta, np_dtype, wrap_inner
+from dask.base import is_dask_collection
 
 _expect_type = gb.base._expect_type
 
@@ -41,6 +43,15 @@ class BaseType:
     def isequal(self, other, *, check_dtype=False):
         from .scalar import PythonScalar
 
+        args = [self, other]
+        if np.any([type(arg._delayed) is DOnion for arg in args]):
+            args = [arg._delayed if type(arg._delayed) is DOnion else arg for arg in args]
+            meta = gb.Scalar.new(bool)
+            delayed = DOnion.multiple_access(
+                meta, self.__class__.isequal, *args, check_dtype=check_dtype
+            )
+            return PythonScalar(delayed, meta=meta)
+
         # if type(other) is not type(self):
         #     raise TypeError(f'Argument of isequal must be of type {type(self).__name__}')
         if not self._meta.isequal(other._meta):
@@ -349,6 +360,8 @@ def wait(self):
     def compute(self, *args, **kwargs):
         # kwargs['scheduler'] = 'synchronous'
         val = self._delayed.compute(*args, **kwargs)
+        if type(self._delayed) is DOnion:
+            return val
         return val.value
 
     def persist(self, *args, **kwargs):
@@ -358,7 +371,8 @@ def visualize(self, *args, **kwargs):
         return self._delayed.visualize(*args, **kwargs)
 
 
-_const0_DOnion = {"dtype": np.int32, "meta": np.array(0, dtype=np.int32)}
+const_obj = object()
+_const0_DOnion = {"dtype": np.object_, "meta": np.array(const_obj, dtype=np.object_)}
 
 
 class DOnion:
@@ -374,23 +388,49 @@ class DOnion:
     @classmethod
     def sprout(cls, shroud, seed_meta, seed_func, *args, **kwargs):
         """
-        Develop a DOnion from dask array `shroud` and function `seed_func`
+        Develop a DOnion from dask arrays listed in `shroud` and using function `seed_func`
 
-        Return shroud.map_blocks(seed_func) as a DOnion.
+        Return dask.array.map_blocks(seed_func, shroud) as a DOnion.
 
-        :shroud: dask array whose inner value determines the (size of) seed dask array
+        :shroud: a dask array; or an iterable of multiple such dask arrays; or a tuple (x, y)
+            where x and y are respectively a list of dask arrays and a dict of named dask arrays.
+            The inner values of these arrays determine the (size of) seed dask array
         :seed_meta: empty instance of the inner value type of the seed
         :seed_func: the function that takes as input the inner value of `shroud` and returns
             another dask array (the seed)
-        :args: tuple of arguments to `seed_func`
+        :args: tuple of arguments to `seed_func`.  May contain one or more `skip` sentinels
+            denoting a vacant positions to be taken up by the inner values of dask arrays in
+            shroud.
         :kwargs: dict of keyword arguments to `seed_func`
         """
-        seed_func = partial(seed_func, *args, **kwargs)
-        kernel = shroud.map_blocks(seed_func, **_const0_DOnion)
+        named_shrouds = {}
+        if is_dask_collection(shroud):
+            shroud = [shroud]
+        else:
+            if isinstance(shroud, Iterable):
+                if len(shroud) > 0:
+                    if (
+                        len(shroud) == 2
+                        and isinstance(shroud[0], Iterable)
+                        and isinstance(shroud[1], dict)
+                    ):
+                        shroud = shroud[0]
+                        named_shrouds = shroud[1]
+                else:
+                    raise ValueError("`shroud` must contain at least one dask array!")
+            else:
+                raise ValueError(
+                    "`shroud` must be a dask array; a list x of dask arrays or"
+                    "a dict y of named dask arrays; or a tuple of both: (x, y)"
+                )
+
+        seed_func = flexible_partial(seed_func, *args, **kwargs)
+        kernel = da.map_blocks(seed_func, *shroud, **named_shrouds, **_const0_DOnion)
         return DOnion(kernel, meta=seed_meta)
 
     def __init__(self, kernel, meta=None):
         self.kernel = kernel
+        # Why ._meta and .dtype? B'cos Scalar, Vector & Matrix need them
         self._meta = meta
         try:
             self.dtype = meta.dtype
@@ -398,6 +438,8 @@ def __init__(self, kernel, meta=None):
             self.dtype = type(meta)
 
     def __eq__(self, other):
+        if type(other) is DOnion:
+            other = other.compute()
         return self.compute() == other
 
     def compute(self, *args, **kwargs):
@@ -406,8 +448,27 @@ def compute(self, *args, **kwargs):
             value = value.compute(*args, **kwargs)
         return value
 
+    def compute_once(self, *args, **kwargs):
+        value = self.kernel.compute(*args, **kwargs)
+        return value
+
     def persist(self, *args, **kwargs):
-        return self.kernel.compute(*args, **kwargs).persist(*args, **kwargs)
+        value = self.compute_once(*args, **kwargs)
+        while type(value) is DOnion or (
+            hasattr(value, "_delayed") and type(value._delayed) is DOnion
+        ):
+            if type(value) is DOnion:
+                value = value.compute_once(*args, **kwargs)
+            else:
+                value = value._delayed.compute_once(*args, **kwargs)
+
+        if hasattr(value, "persist"):
+            return value.persist(*args, **kwargs)
+        elif hasattr(value, "_persist") and hasattr(value, "_delayed"):
+            value._persist(*args, **kwargs)
+            return value._delayed
+        else:
+            raise TypeError(f'Something went wrong: {self} cannot be "persisted".')
 
     @classmethod
     def multiple_access(cls, out_meta, func, *args, **kwargs):
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index 7efadfc..deaed0b 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -1,18 +1,23 @@
 import dask.array as da
 import numpy as np
 import grblas as gb
-from dask.base import tokenize
+
+from numbers import Integral
+from dask.base import tokenize, is_dask_collection
 from dask.delayed import Delayed, delayed
 from dask.highlevelgraph import HighLevelGraph
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
+from grblas.exceptions import IndexOutOfBound
 
-from .base import BaseType, InnerBaseType
+from .base import BaseType, InnerBaseType, DOnion, is_DOnion, skip
 from .base import _nvals as _nvals_in_chunk
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater
 from .mask import StructuralMask, ValueMask
 from ._ss.matrix import ss
 from .utils import (
+    pack_args,
+    pack_kwargs,
     np_dtype,
     get_return_type,
     get_grblas_type,
@@ -121,40 +126,84 @@ def from_values(
         nrows=None,
         ncols=None,
         *,
-        trust_shape=False,
         dup_op=None,
         dtype=None,
         chunks="auto",
         name=None,
     ):
-        # Note: `trust_shape` is a bool parameter that, when True,
-        # can be used to avoid expensive computation of max(rows)
-        # and max(columns) which are used to verify that `nrows`
-        # and `ncols` are indeed large enough to hold all the given
-        # tuples.
-        if (
-            dup_op is None
-            and type(rows) is da.Array
-            and type(columns) is da.Array
-            and type(values) is da.Array
-        ):
-            if not trust_shape or nrows is None or ncols is None:
-                # this branch is an expensive operation:
-                implied_nrows = 1 + da.max(rows).compute()
-                implied_ncols = 1 + da.max(columns).compute()
-                if nrows is not None and implied_nrows > nrows:
-                    raise Exception()
-                if ncols is not None and implied_ncols > ncols:
-                    raise Exception()
-                nrows = implied_nrows if nrows is None else nrows
-                ncols = implied_ncols if ncols is None else ncols
-
-            idtype = gb.Matrix.new(rows.dtype).dtype
-            np_idtype_ = np_dtype(idtype)
-            vdtype = gb.Matrix.new(values.dtype).dtype
-            np_vdtype_ = np_dtype(vdtype)
+        if hasattr(values, "dtype"):
+            dtype = lookup_dtype(values.dtype if dtype is None else dtype)
+
+        if nrows is None:
+            if ncols is None:
+                meta = gb.Matrix.new(dtype)
+            else:
+                meta = gb.Matrix.new(dtype, ncols=ncols)
+        else:
+            if ncols is None:
+                meta = gb.Matrix.new(dtype, nrows=nrows)
+            else:
+                meta = gb.Matrix.new(dtype, nrows=nrows, ncols=ncols)
+
+        # check for any DOnions:
+        pkd_args = pack_args(rows, columns, values)
+        pkd_kwargs = pack_kwargs(
+            nrows=nrows, ncols=ncols, dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
+        )
+        donions = [True for arg in pkd_args if is_DOnion(arg)]
+        donions += [True for (k, v) in pkd_kwargs.items() if is_DOnion(v)]
+        if np.any(donions):
+            # dive into DOnion(s):
+            out_donion = DOnion.multiple_access(meta, Matrix.from_values, *pkd_args, **pkd_kwargs)
+            return Matrix(out_donion, meta=meta)
+
+        # no DOnions
+        if type(rows) is da.Array and type(columns) is da.Array and type(values) is da.Array:
+            np_idtype_ = np_dtype(lookup_dtype(rows.dtype))
+            if nrows is not None and ncols is not None:
+                chunks = da.core.normalize_chunks(chunks, (nrows, ncols), dtype=np_idtype_)
+            else:
+                if nrows is None and rows.size == 0:
+                    raise ValueError("No row indices provided. Unable to infer nrows.")
+
+                if ncols is None and columns.size == 0:
+                    raise ValueError("No column indices provided. Unable to infer ncols.")
 
-            chunks = da.core.normalize_chunks(chunks, (nrows, ncols), dtype=np_idtype_)
+                if not (rows.size == columns.size and columns.size == values.size):
+                    raise ValueError(
+                        "`rows` and `columns` and `values` lengths must match: "
+                        f"{rows.size}, {columns.size}, {values.size}"
+                    )
+
+                if rows.dtype.kind not in "ui":
+                    raise ValueError(f"rows must be integers, not {rows.dtype}")
+
+                if columns.dtype.kind not in "ui":
+                    raise ValueError(f"columns must be integers, not {columns.dtype}")
+
+                if nrows is None:
+                    nrows = da.max(rows) + np.asarray(1, dtype=rows.dtype)
+
+                if ncols is None:
+                    ncols = da.max(columns) + np.asarray(1, dtype=columns.dtype)
+
+                # use the inner value of `nrows` or `ncols` to create the new Matrix:
+                shape = (nrows, ncols)
+                _shape = [skip if is_dask_collection(x) else x for x in shape]
+                dasks = [x for x in shape if is_dask_collection(x)]
+                args = pack_args(rows, columns, values, *_shape)
+                kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
+                donion = DOnion.sprout(dasks, meta, Matrix.from_values, *args, **kwargs)
+                return Matrix(donion, meta=meta)
+
+            if rows.size > 0 and rows.size != values.size:
+                raise ValueError("`rows` and `values` lengths must match")
+
+            if columns.size > 0 and columns.size != values.size:
+                raise ValueError("`columns` and `values` lengths must match")
+
+            vdtype = dtype
+            np_vdtype_ = np_dtype(vdtype)
 
             name_ = name
             name = str(name) if name else ""
@@ -169,16 +218,18 @@ def from_values(
                 *(values, "k"),
                 *(row_ranges, "i"),
                 *(col_ranges, "j"),
+                shape=(nrows, ncols),
                 dtype=np_idtype_,
                 meta=np.array([]),
             )
-            meta = InnerMatrix(gb.Matrix.new(vdtype))
+            meta = InnerMatrix(gb.Matrix.new(vdtype, nrows=nrows, ncols=ncols))
             delayed = da.core.blockwise(
                 *(_from_values2D, "ij"),
                 *(fragments, "ijk"),
                 *(row_ranges, "i"),
                 *(col_ranges, "j"),
                 concatenate=False,
+                dup_op=dup_op,
                 gb_dtype=vdtype,
                 dtype=np_vdtype_,
                 meta=meta,
@@ -234,11 +285,15 @@ def __init__(self, delayed, meta=None, nvals=None):
         # if it is already known  at the time of initialization of
         # this Matrix,  otherwise its value should be left as None
         # (the default)
-        assert type(delayed) is da.Array
-        assert delayed.ndim == 2
+        assert type(delayed) in {da.Array, DOnion}
         self._delayed = delayed
-        if meta is None:
-            meta = gb.Matrix.new(delayed.dtype, *delayed.shape)
+        if type(delayed) is da.Array:
+            assert delayed.ndim == 2
+            if meta is None:
+                meta = gb.Matrix.new(delayed.dtype, *delayed.shape)
+        else:
+            if meta is None:
+                meta = gb.Matrix.new(delayed.dtype)
         self._meta = meta
         self._nrows = meta.nrows
         self._ncols = meta.ncols
@@ -257,18 +312,26 @@ def V(self):
 
     @property
     def T(self):
+        if is_DOnion(self._delayed):
+            return TransposedMatrix(self._delayed.T)
         return TransposedMatrix(self)
 
     @property
     def nrows(self):
+        if is_DOnion(self._delayed):
+            return self._delayed.nrows
         return self._meta.nrows
 
     @property
     def ncols(self):
+        if is_DOnion(self._delayed):
+            return self._delayed.ncols
         return self._meta.ncols
 
     @property
     def shape(self):
+        if is_DOnion(self._delayed):
+            return self._delayed.shape
         return (self._meta.nrows, self._meta.ncols)
 
     def resize(self, nrows, ncols, inplace=True, chunks="auto"):
@@ -414,6 +477,24 @@ def _diag_old(self, k=0, dtype=None, chunks="auto"):
         return get_return_type(meta)(delayed, nvals=nvals)
 
     def __getitem__(self, index):
+        if type(self._delayed) is DOnion:
+            from .scalar import Scalar, PythonScalar
+
+            if type(index) is tuple and len(index) == 2:
+                if isinstance(index[0], (Integral, Scalar, PythonScalar)):
+                    if isinstance(index[1], (Integral, Scalar, PythonScalar)):
+                        meta = gb.Scalar.new(self._meta.dtype)
+                    else:
+                        meta = gb.Vector.new(self._meta.dtype)
+                else:
+                    if isinstance(index[1], (Integral, Scalar, PythonScalar)):
+                        meta = gb.Vector.new(self._meta.dtype)
+                    else:
+                        meta = gb.Matrix.new(self._meta.dtype)
+            return self._delayed.getattr(meta, "__getitem__", index)
+        if type(index) is DOnion:
+            meta = self._meta
+            return DOnion.multiple_access(meta, self.__getitem__, index)
         return AmbiguousAssignOrExtract(self, index)
 
     def __delitem__(self, keys):
@@ -568,7 +649,18 @@ def build(
         self.__init__(delayed)
 
     def to_values(self, dtype=None, chunks="auto"):
+        dtype = lookup_dtype(self.dtype if dtype is None else dtype)
+        meta_i, _, meta_v = self._meta.to_values(dtype)
+
         x = self._delayed
+        if type(x) is DOnion:
+            meta = np.array([])
+            result = x.getattr(meta, "to_values", dtype=dtype, chunks=chunks)
+            rows = result.getattr(meta_i, "__getitem__", 0)
+            columns = result.getattr(meta_i, "__getitem__", 1)
+            values = result.getattr(meta_v, "__getitem__", 2)
+            return rows, columns, values
+
         # first find the number of values in each chunk and return
         # them as a 2D numpy array whose shape is equal to x.numblocks
         nvals_2D = da.core.blockwise(
@@ -577,61 +669,64 @@ def to_values(self, dtype=None, chunks="auto"):
             adjust_chunks={"i": 1, "j": 1},
             dtype=np.int64,
             meta=np.array([[]]),
-        ).compute()
+        )
 
         # use the above array to determine the output tuples' array
-        # bounds (`starts` and `stops`) for each chunk of this
+        # bounds (`starts` and `stops_`) for each chunk of this
         # Matrix (self)
-        nvals_1D = nvals_2D.flatten()
-
-        stops = np.cumsum(nvals_1D)
-        starts = np.roll(stops, 1)
+        stops_ = da.cumsum(nvals_2D)  # BEWARE: this function rechunks!
+        starts = da.roll(stops_, 1)
+        starts = starts.copy() if starts.size == 1 else starts  # bug!!
         starts[0] = 0
-        nnz = stops[-1]
-
-        # convert numpy 2D-arrays (`starts` and `stops`) to 2D dask Arrays
-        # of ranges.  Don't forget to fix their `chunks` in oder to enable
-        # them to align with x
-        starts = starts.reshape(nvals_2D.shape)
-        starts = da.from_array(starts, chunks=1, name="starts" + tokenize(starts))
-        starts = da.core.Array(starts.dask, starts.name, x.chunks, starts.dtype, meta=x._meta)
-
-        stops = stops.reshape(nvals_2D.shape)
-        stops = da.from_array(stops, chunks=1, name="stops" + tokenize(stops))
-        stops = da.core.Array(stops.dask, stops.name, x.chunks, stops.dtype, meta=x._meta)
-
-        chunks = da.core.normalize_chunks(chunks, (nnz,), dtype=np.int64)
-        output_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_ranges-")
+        nnz = stops_[-1]
+        starts = starts.reshape(nvals_2D.shape).rechunk(1)
+        stops_ = stops_.reshape(nvals_2D.shape).rechunk(1)
+
+        def _to_values(x, starts, stops_, dtype, chunks, nnz):
+            # the following changes the `.chunks` attribute of `starts` and `stops_` so that
+            # `blockwise()` can align them with `x`
+            starts = da.core.Array(starts.dask, starts.name, x.chunks, starts.dtype, meta=x._meta)
+            stops_ = da.core.Array(stops_.dask, stops_.name, x.chunks, stops_.dtype, meta=x._meta)
+
+            chunks = da.core.normalize_chunks(chunks, (nnz,), dtype=np.int64)
+            output_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_ranges-")
+
+            gb_dtype = lookup_dtype(dtype)
+            dtype_ = np_dtype(gb_dtype)
+            # Compute row/col offsets as dask arrays that can align with this
+            # Matrix's (self's) chunks to convert chunk row/col indices to
+            # full dask-grblas Matrix indices.
+            row_offsets = build_chunk_offsets_dask_array(x, 0, "row_offset-")
+            col_offsets = build_chunk_offsets_dask_array(x, 1, "col_offset-")
+            x = da.core.blockwise(
+                *(MatrixTupleExtractor, "ijk"),
+                *(output_ranges, "k"),
+                *(x, "ij"),
+                *(row_offsets, "i"),
+                *(col_offsets, "j"),
+                *(starts, "ij"),
+                *(stops_, "ij"),
+                gb_dtype=dtype,
+                dtype=dtype_,
+                meta=np.array([[[]]]),
+            )
+            x = da.reduction(
+                x, _identity, _flatten, axis=1, concatenate=False, dtype=dtype_, meta=np.array([[]])
+            )
+            return da.reduction(
+                x, _identity, _flatten, axis=0, concatenate=False, dtype=dtype_, meta=np.array([])
+            )
 
-        dtype_ = np_dtype(self.dtype)
-        # Compute row/col offsets as dask arrays that can align with this
-        # Matrix's (self's) chunks to convert chunk row/col indices to
-        # full dask-grblas Matrix indices.
-        row_offsets = build_chunk_offsets_dask_array(x, 0, "row_offset-")
-        col_offsets = build_chunk_offsets_dask_array(x, 1, "col_offset-")
-        x = da.core.blockwise(
-            *(MatrixTupleExtractor, "ijk"),
-            *(output_ranges, "k"),
-            *(x, "ij"),
-            *(row_offsets, "i"),
-            *(col_offsets, "j"),
-            *(starts, "ij"),
-            *(stops, "ij"),
-            gb_dtype=dtype,
-            dtype=dtype_,
-            meta=np.array([[[]]]),
-        )
-        x = da.reduction(
-            x, _identity, _flatten, axis=1, concatenate=False, dtype=dtype_, meta=np.array([[]])
-        )
-        x = da.reduction(
-            x, _identity, _flatten, axis=0, concatenate=False, dtype=dtype_, meta=np.array([])
-        )
+        # since the size of the output (rows, columns, values) depends on nnz, a delayed quantity,
+        # we need to return the output as DOnions (twice-delayed dask-arrays)
+        meta = np.array([])
+        rcv_donion = DOnion.sprout(nnz, meta, _to_values, x, starts, stops_, dtype, chunks)
 
-        meta_i, meta_j, meta_v = self._meta.to_values(dtype)
-        rows = da.map_blocks(_get_rows, x, dtype=meta_i.dtype, meta=meta_i)
-        cols = da.map_blocks(_get_cols, x, dtype=meta_j.dtype, meta=meta_j)
-        vals = da.map_blocks(_get_vals, x, dtype=meta_v.dtype, meta=meta_v)
+        dtype_i = np_dtype(lookup_dtype(meta_i.dtype))
+        rows = rcv_donion.deep_extract(meta_i, da.map_blocks, _get_rows, dtype=dtype_i, meta=meta_i)
+        cols = rcv_donion.deep_extract(meta_i, da.map_blocks, _get_cols, dtype=dtype_i, meta=meta_i)
+        dtype_v = np_dtype(lookup_dtype(meta_v.dtype))
+        vals = rcv_donion.deep_extract(meta_v, da.map_blocks, _get_vals, dtype=dtype_v, meta=meta_v)
         return rows, cols, vals
 
     def rechunk(self, inplace=False, chunks="auto"):
@@ -717,6 +812,8 @@ def new(self, *, dtype=None, mask=None):
 
     @property
     def T(self):
+        if is_DOnion(self._matrix._delayed):
+            return Matrix(self._matrix._delayed.T)
         return self._matrix
 
     @property
@@ -724,15 +821,35 @@ def dtype(self):
         return self._meta.dtype
 
     def to_values(self, dtype=None, chunks="auto"):
-        # TODO: make this lazy; can we do something smart with this?
+        if is_DOnion(self._matrix._delayed):
+            return self._matrix.to_values(dtype=dtype, chunks=chunks)
         rows, cols, vals = self._matrix.to_values(dtype=dtype, chunks=chunks)
         return cols, rows, vals
 
     # Properties
-    nrows = Matrix.nrows
-    ncols = Matrix.ncols
-    shape = Matrix.shape
-    nvals = Matrix.nvals
+    @property
+    def nrows(self):
+        if is_DOnion(self._matrix._delayed):
+            return self._matrix._delayed.nrows
+        return self._meta.nrows
+
+    @property
+    def ncols(self):
+        if is_DOnion(self._matrix._delayed):
+            return self._matrix._delayed.ncols
+        return self._meta.ncols
+
+    @property
+    def shape(self):
+        if is_DOnion(self._matrix._delayed):
+            return self._matrix._delayed.shape
+        return self._meta.shape
+
+    @property
+    def nvals(self):
+        if is_DOnion(self._matrix._delayed):
+            return self._matrix._delayed.nvals
+        return self._meta.shape
 
     # Delayed methods
     ewise_add = Matrix.ewise_add
@@ -944,7 +1061,7 @@ def _new_Matrix_chunk(out_row_range, out_col_range, gb_dtype=None):
     return InnerMatrix(gb.Matrix.new(gb_dtype, nrows=nrows, ncols=ncols))
 
 
-def _from_values2D(fragments, out_row_range, out_col_range, gb_dtype=None):
+def _from_values2D(fragments, out_row_range, out_col_range, dup_op=None, gb_dtype=None):
     """
     Reassembles filtered tuples (row, col, val) in the list `fragments`
     obtained from _pick2D() for the chunk within the given row and column
@@ -956,17 +1073,33 @@ def _from_values2D(fragments, out_row_range, out_col_range, gb_dtype=None):
     vals = np.concatenate([vals for (_, _, vals) in fragments])
     nrows = out_row_range[0].stop - out_row_range[0].start
     ncols = out_col_range[0].stop - out_col_range[0].start
+    if rows.size == 0 or cols.size == 0:
+        return InnerMatrix(gb.Matrix.new(gb_dtype, nrows=nrows, ncols=ncols))
     return InnerMatrix(
-        gb.Matrix.from_values(rows, cols, vals, nrows=nrows, ncols=ncols, dtype=gb_dtype)
+        gb.Matrix.from_values(
+            rows, cols, vals, nrows=nrows, ncols=ncols, dup_op=dup_op, dtype=gb_dtype
+        )
     )
 
 
-def _pick2D(rows, cols, values, row_range, col_range):
+def _pick2D(rows, cols, values, row_range, col_range, shape):
     """
     Filters out only those tuples (row, col, val) that lie within
     the given row and column ranges.  Indices are also offset
     appropriately.
     """
+    # validate indices:
+    rows = np.where(rows < 0, rows + shape[0], rows)
+    bad_indices = (rows < 0) | (shape[0] <= rows)
+    if np.any(bad_indices):
+        raise IndexOutOfBound
+
+    cols = np.where(cols < 0, cols + shape[1], cols)
+    bad_indices = (cols < 0) | (shape[1] <= cols)
+    if np.any(bad_indices):
+        raise IndexOutOfBound
+
+    # filter into chunk:
     row_range, col_range = row_range[0], col_range[0]
     rows_in = (row_range.start <= rows) & (rows < row_range.stop)
     cols_in = (col_range.start <= cols) & (cols < col_range.stop)
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 12d5470..fc8d536 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -1,10 +1,13 @@
 import dask.array as da
 import numpy as np
 import grblas as gb
+
+from numbers import Integral
 from dask.base import tokenize
 from dask.delayed import Delayed, delayed
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
+from grblas.exceptions import IndexOutOfBound
 
 from .base import BaseType, InnerBaseType, _nvals, DOnion, is_DOnion
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, Assigner
@@ -20,7 +23,6 @@
     build_chunk_ranges_dask_array,
     build_chunk_offsets_dask_array,
 )
-from grblas.exceptions import IndexOutOfBound
 
 
 class InnerVector(InnerBaseType):
@@ -115,7 +117,8 @@ def from_values(
     ):
         if hasattr(values, "dtype"):
             dtype = lookup_dtype(values.dtype if dtype is None else dtype)
-        meta = gb.Vector.new(dtype)
+
+        meta = gb.Vector.new(dtype) if size is None else gb.Vector.new(dtype, size=size)
 
         # check for any DOnions:
         pkd_args = pack_args(indices, values)
@@ -124,7 +127,8 @@ def from_values(
         donions += [True for (k, v) in pkd_kwargs.items() if is_DOnion(v)]
         if np.any(donions):
             # dive into DOnion(s):
-            return DOnion.multiple_access(meta, Vector.from_values, *pkd_args, **pkd_kwargs)
+            out_donion = DOnion.multiple_access(meta, Vector.from_values, *pkd_args, **pkd_kwargs)
+            return Vector(out_donion, meta=meta)
 
         # no DOnions
         if type(indices) is da.Array and type(values) is da.Array:
@@ -134,22 +138,25 @@ def from_values(
             else:
                 if indices.size == 0:
                     raise ValueError("No indices provided. Unable to infer size.")
+
+                if indices.dtype.kind not in "ui":
+                    raise ValueError(f"indices must be integers, not {indices.dtype}")
+
                 # Note: uint + int = float which numpy cannot cast to uint.  So we
                 # ensure the same dtype for each summand here:
                 size = da.max(indices) + np.asarray(1, dtype=indices.dtype)
                 # Here `size` is a dask 0d-array whose computed value is
                 # used to determine the size of the Vector to be returned.
                 # But since we do not want to compute anything just now,
-                # we instead create a "DOnion" (dask onion) object
+                # we instead create a "DOnion" (dask onion) object.  This
+                # effectively means that we will use the inner value of
+                # `size` to create the new Vector:
                 args = pack_args(indices, values)
                 kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
                 donion = DOnion.sprout(size, meta, Vector.from_values, *args, **kwargs)
                 return Vector(donion, meta=meta)
 
             if indices.size > 0:
-                if indices.dtype.kind not in "ui":
-                    raise ValueError(f"indices must be integers, not {indices.dtype}")
-
                 if indices.size != values.size:
                     raise ValueError("`indices` and `values` lengths must match")
 
@@ -169,7 +176,7 @@ def from_values(
                 dtype=np_vdtype_,
                 meta=np.array([]),
             )
-            meta = InnerVector(gb.Vector.new(vdtype))
+            meta = InnerVector(gb.Vector.new(vdtype, size=size))
             delayed = da.core.blockwise(
                 *(_from_values1D, "i"),
                 *(fragments, "ij"),
@@ -359,6 +366,17 @@ def rechunk(self, inplace=False, chunks="auto"):
         #     return new
 
     def __getitem__(self, index):
+        if type(self._delayed) is DOnion:
+            from .scalar import Scalar, PythonScalar
+
+            if isinstance(index, (Integral, Scalar, PythonScalar)):
+                meta = gb.Scalar.new(self._meta.dtype)
+            else:
+                meta = gb.Vector.new(self._meta.dtype)
+            return self._delayed.getattr(meta, "__getitem__", index)
+        if type(index) is DOnion:
+            meta = self._meta
+            return DOnion.multiple_access(meta, self.__getitem__, index)
         return AmbiguousAssignOrExtract(self, index)
 
     def __delitem__(self, keys):
@@ -544,7 +562,7 @@ def to_values(self, dtype=None, chunks="auto"):
         x = self._delayed
         if type(x) is DOnion:
             meta = np.array([])
-            result = x.getattr(meta, "to_values", dtype=dtype)
+            result = x.getattr(meta, "to_values", dtype=dtype, chunks=chunks)
             indices = result.getattr(meta_i, "__getitem__", 0)
             values = result.getattr(meta_v, "__getitem__", 1)
             return indices, values
@@ -555,15 +573,19 @@ def to_values(self, dtype=None, chunks="auto"):
         )
 
         # accumulate dask array to get index-ranges of the output (indices, values)
-        stops = da.cumsum(nvals_array)
-        starts = da.roll(stops, 1)
+        stops_ = da.cumsum(nvals_array)  # BEWARE: this function rechunks!
+        starts = da.roll(stops_, 1)
         starts = starts.copy() if starts.size == 1 else starts  # bug!!
         starts[0] = 0
-        nnz = stops[-1]
+        nnz = stops_[-1]
+        starts = starts.rechunk(1)
+        stops_ = stops_.rechunk(1)
 
-        def _to_values(x, starts, stops, dtype, chunks, nnz):
+        def _to_values(x, starts, stops_, dtype, chunks, nnz):
+            # the following changes the `.chunks` attribute of `starts` and `stops_` so that
+            # `blockwise()` can align them with `x`
             starts = da.core.Array(starts.dask, starts.name, x.chunks, starts.dtype, meta=x._meta)
-            stops = da.core.Array(stops.dask, stops.name, x.chunks, stops.dtype, meta=x._meta)
+            stops_ = da.core.Array(stops_.dask, stops_.name, x.chunks, stops_.dtype, meta=x._meta)
 
             chunks = da.core.normalize_chunks(chunks, (nnz,), dtype=np.int64)
             output_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_ranges-")
@@ -577,7 +599,7 @@ def _to_values(x, starts, stops, dtype, chunks, nnz):
                 *(x, "i"),
                 *(index_offsets, "i"),
                 *(starts, "i"),
-                *(stops, "i"),
+                *(stops_, "i"),
                 gb_dtype=gb_dtype,
                 dtype=dtype_,
                 meta=np.array([[]]),
@@ -587,9 +609,9 @@ def _to_values(x, starts, stops, dtype, chunks, nnz):
             )
 
         # since the size of the output (indices, values) depends on nnz, a delayed quantity,
-        # we need to return (indices, values) as DOnions (twice-delayed dask-array)
+        # we need to return (indices, values) as DOnions (twice-delayed dask-arrays)
         meta = np.array([])
-        iv_donion = DOnion.sprout(nnz, meta, _to_values, x, starts, stops, dtype, chunks)
+        iv_donion = DOnion.sprout(nnz, meta, _to_values, x, starts, stops_, dtype, chunks)
 
         dtype_i = np_dtype(lookup_dtype(meta_i.dtype))
         indices = iv_donion.deep_extract(
@@ -796,7 +818,7 @@ def _from_values1D(fragments, index_range, dup_op=None, gb_dtype=None):
 
 
 def _pick1D(indices, values, index_range, size):
-    # validate indices
+    # validate indices:
     indices = np.where(indices < 0, indices + size, indices)
     bad_indices = (indices < 0) | (size <= indices)
     if np.any(bad_indices):
diff --git a/tests/from_grblas2/test_matrix.py b/tests/from_grblas2/test_matrix.py
index 9d40744..66f21fe 100644
--- a/tests/from_grblas2/test_matrix.py
+++ b/tests/from_grblas2/test_matrix.py
@@ -4,6 +4,7 @@
 import sys
 import weakref
 
+import dask.array as da
 import dask_grblas
 import grblas
 import numpy as np
@@ -139,6 +140,78 @@ def test_from_values():
         Matrix.from_values([0], [1, 2], [0])
 
 
+def test_from_values_dask():
+    rows = da.from_array(np.array([0, 1, 3]))
+    cols = da.from_array(np.array([1, 1, 2]))
+    vals = da.from_array(np.array([True, False, True]))
+    C = Matrix.from_values(rows, cols, vals)
+    assert C.nrows == 4
+    assert C.ncols == 3
+    assert C.nvals == 3
+    assert C.dtype == bool
+
+    vals = da.from_array(np.array([12.3, 12.4, 12.5]))
+    C2 = Matrix.from_values(rows, cols, vals, nrows=17, ncols=3)
+    assert C2.nrows == 17
+    assert C2.ncols == 3
+    assert C2.nvals == 3
+    assert C2.dtype == float
+
+    rows = da.from_array(np.array([0, 1, 1]))
+    cols = da.from_array(np.array([2, 1, 1]))
+    vals = da.from_array(np.array([1, 2, 3], dtype=np.int64))
+    C3 = Matrix.from_values(rows, cols, vals, nrows=10, dup_op=binary.times)
+    assert C3.nrows == 10
+    assert C3.ncols == 3
+    assert C3.nvals == 2  # duplicates were combined
+    assert C3.dtype == int
+    assert C3[1, 1].value == 6  # 2*3
+    C3monoid = Matrix.from_values(rows, cols, vals, nrows=10, dup_op=monoid.times)
+    assert C3.isequal(C3monoid)
+
+    vals = da.from_array(np.array([True, True, True]))
+    with pytest.raises(ValueError, match="Duplicate indices found"):
+        # Duplicate indices requires a dup_op
+        Matrix.from_values(rows, cols, vals).compute()
+
+    rows = da.from_array(np.array([0, 1, 3]))
+    cols = da.from_array(np.array([1, 1, 2]))
+    vals = da.from_array(np.array([12.3, 12.4, 12.5]))
+    with pytest.raises(IndexOutOfBound):
+        # Specified ncols can't hold provided indexes
+        Matrix.from_values(rows, cols, vals, nrows=17, ncols=2).compute()
+
+    empty_da = da.from_array(np.array([]))
+    with pytest.raises(ValueError, match="No row indices provided. Unable to infer nrows."):
+        Matrix.from_values(empty_da, empty_da, empty_da)
+
+    # Changed: Assume empty value is float64 (like numpy)
+    # with pytest.raises(ValueError, match="No vals provided. Unable to determine type"):
+    empty1 = Matrix.from_values(empty_da, empty_da, empty_da, nrows=3, ncols=4)
+    assert empty1.dtype == dtypes.FP64
+    assert empty1.nrows == 3
+    assert empty1.ncols == 4
+    assert empty1.nvals == 0
+
+    with pytest.raises(ValueError, match="Unable to infer"):
+        Matrix.from_values(empty_da, empty_da, empty_da, dtype=dtypes.INT64)
+
+    zero_da = da.from_array(np.array([0]))
+    with pytest.raises(ValueError, match="Unable to infer"):
+        # could also raise b/c rows and columns are different sizes
+        Matrix.from_values(zero_da, empty_da, zero_da, dtype=dtypes.INT64)
+
+    C4 = Matrix.from_values(empty_da, empty_da, empty_da, nrows=3, ncols=4, dtype=dtypes.INT64)
+    C5 = Matrix.new(dtypes.INT64, nrows=3, ncols=4)
+    assert C4.isequal(C5, check_dtype=True)
+
+    cols = da.from_array(np.array([1, 2]))
+    with pytest.raises(
+        ValueError, match="`rows` and `columns` and `values` lengths must match: 1, 2, 1"
+    ):
+        Matrix.from_values(zero_da, cols, zero_da)
+
+
 @pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_from_values_scalar():
     C = Matrix.from_values([0, 1, 3], [1, 1, 2], 7)
@@ -316,9 +389,9 @@ def test_extract_values(A, A_chunks):
         assert cols.dtype == np.uint64
         assert vals.dtype == np.int64
         Trows, Tcols, Tvals = A.T.to_values(dtype=float)
-        np.testing.assert_array_equal(rows, Tcols)
-        np.testing.assert_array_equal(cols, Trows)
-        np.testing.assert_array_equal(vals, Tvals)
+        np.testing.assert_array_equal(rows.compute(), Tcols.compute())
+        np.testing.assert_array_equal(cols.compute(), Trows.compute())
+        np.testing.assert_array_equal(vals.compute(), Tvals.compute())
         assert Trows.dtype == np.uint64
         assert Tcols.dtype == np.uint64
         assert Tvals.dtype == np.float64
diff --git a/tests/from_grblas2/test_vector.py b/tests/from_grblas2/test_vector.py
index 9f7e9a0..72338b9 100644
--- a/tests/from_grblas2/test_vector.py
+++ b/tests/from_grblas2/test_vector.py
@@ -180,14 +180,17 @@ def test_from_values_dask():
 def test_from_values_DOnion(v):
     indices = da.from_array(np.array([0, 1, 3]))
     values = da.from_array(np.array([True, False, True]))
+
     # The following creates a Vector `u` with `type(u._delayed) == DOnion`
     # because keyword argument `size` has not been specified:
     u = Vector.from_values(indices, values)
     assert u.size == 4
     assert u.nvals == 3
     assert u.dtype == bool
+
     # The output of `.to_values()` is always a tuple of DOnions
     indices, values = u.to_values()
+
     # The following creates a Vector `v` with `type(v._delayed) == DOnion`
     # because arguments `indices` and  `values` are DOnions:
     v = Vector.from_values(indices, values)
@@ -195,24 +198,33 @@ def test_from_values_DOnion(v):
     assert v.nvals == 3
     assert v.dtype == bool
     values = da.from_array(np.array([12.3, 12.4, 12.5]))
+
     # The following creates a Vector `u2` with `type(u2._delayed) == DOnion`
     # because argument `indices` is a DOnion:
     u2 = Vector.from_values(indices, values, size=17)
     assert u2.size == 17
     assert u2.nvals == 3
     assert u2.dtype == float
+
     indices = da.from_array(np.array([0, 1, 1]))
+    indices_ = da.from_array(np.array([1, 2, 3]))
+    i0 = Vector.from_values(indices_, indices)
+    _, indices = i0.to_values()
     values = da.from_array(np.array([1, 2, 3], dtype=np.int64))
+
+    # The following creates a Vector `u3` with `type(u3._delayed) == DOnion`
+    # because arguments `indices` and `values` are DOnions:
     u3 = Vector.from_values(indices, values, size=10, dup_op=binary.times)
     assert u3.size == 10
     assert u3.nvals == 2  # duplicates were combined
     assert u3.dtype == int
     assert u3[1].value == 6  # 2*3
+
     values = da.from_array(np.array([True, True, True]))
     with pytest.raises(ValueError, match="Duplicate indices found"):
         # Duplicate indices requires a dup_op
         Vector.from_values(indices, values).compute()
-    empty_da = da.from_array(np.array([]))
+    _, empty_da = Vector.new(float).to_values()
     with pytest.raises(ValueError, match="No indices provided. Unable to infer size."):
         Vector.from_values(empty_da, empty_da).compute()
 
@@ -224,7 +236,7 @@ def test_from_values_DOnion(v):
     assert w.dtype == dtypes.FP64
 
     with pytest.raises(ValueError, match="No indices provided. Unable to infer size"):
-        Vector.from_values(empty_da, empty_da, dtype=dtypes.INT64)
+        Vector.from_values(empty_da, empty_da, dtype=dtypes.INT64).compute()
     u4 = Vector.from_values(empty_da, empty_da, size=10, dtype=dtypes.INT64)
     u5 = Vector.new(dtypes.INT64, size=10)
     assert u4.isequal(u5, check_dtype=True)
@@ -232,11 +244,15 @@ def test_from_values_DOnion(v):
     # we check index dtype if given dask array
     indices = da.from_array(np.array([1.2, 3.4]))
     values = da.from_array(np.array([1, 2]))
+    i0 = Vector.from_values(values, indices)
+    _, indices = i0.to_values()
     with pytest.raises(ValueError, match="indices must be integers, not float64"):
         Vector.from_values(indices, values).compute()
 
     # mis-matched sizes
     indices = da.from_array(np.array([0]))
+    i0 = Vector.from_values(indices, indices)
+    indices, _ = i0.to_values()
     with pytest.raises(ValueError, match="`indices` and `values` lengths must match"):
         Vector.from_values(indices, values).compute()
 

From 120b8f2783f6157c3b600318b39d17e2f09b1a8d Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Sat, 5 Mar 2022 15:09:17 +0100
Subject: [PATCH 11/18] more DOnion support

---
 dask_grblas/_ss/matrix.py         |  24 +
 dask_grblas/base.py               | 148 ++++-
 dask_grblas/expr.py               | 102 ++--
 dask_grblas/mask.py               |   4 +-
 dask_grblas/matrix.py             | 511 ++++++++++++-----
 dask_grblas/scalar.py             |   3 +-
 tests/from_grblas2/test_matrix.py | 909 +++++++++++++++++-------------
 7 files changed, 1116 insertions(+), 585 deletions(-)

diff --git a/dask_grblas/_ss/matrix.py b/dask_grblas/_ss/matrix.py
index 8aad87a..3c63154 100644
--- a/dask_grblas/_ss/matrix.py
+++ b/dask_grblas/_ss/matrix.py
@@ -30,3 +30,27 @@ def diag(self, vector, k=0, chunks="auto", dtype=None):
         vector = self._parent._expect_type(vector, dgb.Vector, within="ss.diag", argname="vector")
         rv = vector._diag(k, chunks=chunks, dtype=dtype)
         self._parent.__init__(rv._delayed, nvals=rv._nvals)
+
+    def build_scalar(
+        self,
+        rows,
+        columns,
+        values,
+        *,
+        dup_op=None,
+        clear=False,
+        nrows=None,
+        ncols=None,
+        chunks=None,
+        in_DOnion=False,  # not part of the API
+    ):
+        self._parent.build(
+            rows,
+            columns,
+            values,
+            dup_op=dup_op,
+            clear=clear,
+            nrows=nrows,
+            ncols=ncols,
+            chunks=chunks,
+        )
\ No newline at end of file
diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 5e50550..cef30fc 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -1,5 +1,6 @@
 from numbers import Number
 from collections.abc import Iterable
+from tlz import compose
 from functools import partial
 import dask.array as da
 import grblas as gb
@@ -108,7 +109,30 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
             )
         return PythonScalar(delayed)
 
+    def _clear(self):
+        delayed = self._optional_dup()
+        # for a function like this, what's the difference between `map_blocks` and `elemwise`?
+        if self.ndim == 0:
+            return self.__class__(
+                    delayed.map_blocks(
+                    _clear,
+                    dtype=np_dtype(self.dtype),
+                )
+            )
+        else:
+            return self.__class__(
+                delayed.map_blocks(
+                    _clear,
+                    dtype=np_dtype(self.dtype),
+                ),
+                nvals=0,
+            )
+
     def clear(self):
+        if is_DOnion(self._delayed):
+            self.__init__(self._delayed.getattr(self._meta, '_clear'), meta=self._meta, nvals=0)
+            return
+
         # Should we copy and mutate or simply create new chunks?
         delayed = self._optional_dup()
         # for a function like this, what's the difference between `map_blocks` and `elemwise`?
@@ -127,6 +151,19 @@ def clear(self):
             )
 
     def dup(self, dtype=None, *, mask=None, name=None):
+        if is_DOnion(self._delayed):
+            mask_meta = mask._meta if mask else None
+            meta = self._meta.dup(dtype=dtype, mask=mask_meta, name=name)
+            donion = DOnion.multiple_access(
+                meta, self.__class__.dup, self._delayed, dtype=dtype, mask=mask, name=name
+            )
+            return self.__class__(donion, meta=meta)
+
+        if mask and is_DOnion(mask.mask):
+            meta = self._meta.dup(dtype=dtype, name=name)
+            donion = DOnion.multiple_access(meta, self.dup, dtype=dtype, mask=mask.mask, name=name)
+            return self.__class__(donion, meta=meta)
+
         if mask is not None:
             if not isinstance(mask, Mask):
                 self._meta.dup(dtype=dtype, mask=mask, name=name)  # should raise
@@ -259,7 +296,29 @@ def _name_html(self):
             return self.name
         return f"{split[0]}<sub>{split[1]}</sub>"
 
-    def update(self, expr):
+    def update(self, expr, in_DOnion=False):
+        typ = type(expr)
+        if (
+                is_DOnion(self._delayed)
+                or typ is GbDelayed and is_DOnion(expr.parent)
+                or typ is AmbiguousAssignOrExtract and is_DOnion(expr._donion)
+                or typ is type(self) and is_DOnion(expr._delayed)
+        ):
+            self_ = self.__class__(self._delayed, meta=self._meta)
+            self_ = self_._delayed if is_DOnion(self_._delayed) else self_
+            expr_ = expr
+            if typ is GbDelayed and is_DOnion(expr.parent):
+                expr_ = expr.parent
+            elif typ is AmbiguousAssignOrExtract and is_DOnion(expr._donion):
+                expr_ = expr._donion
+            elif typ is type(self) and is_DOnion(expr._delayed):
+                expr_ = expr._delayed
+            donion = DOnion.multiple_access(
+                self._meta, BaseType.update, self_, expr_, in_DOnion=True
+            )
+            self.__init__(donion, self._meta)
+            return
+
         if isinstance(expr, Number):
             if self.ndim == 2:
                 raise TypeError(
@@ -271,10 +330,12 @@ def update(self, expr):
                     "\n\n    M[:, :] = s"
                 )
             Updater(self)[...] << expr
+            if in_DOnion:
+                return self.__class__(self._delayed, meta=self._meta)
             return
+
         self._meta.update(expr._meta)
         self._meta.clear()
-        typ = type(expr)
         if typ is AmbiguousAssignOrExtract:
             # Extract (w << v[index])
             # Is it safe/reasonable to simply replace `_delayed`?
@@ -295,12 +356,46 @@ def update(self, expr):
         else:
             # Anything else we need to handle?
             raise TypeError()
+        if in_DOnion:
+            return self.__class__(self._delayed, meta=self._meta)
+
+    def _update(self, expr, *, mask=None, accum=None, replace=None, in_DOnion=False):
+        typ = type(expr)
+        if (
+                is_DOnion(self._delayed)
+                or mask is not None and is_DOnion(mask.mask)
+                or typ is GbDelayed and is_DOnion(expr.parent)
+                or typ is AmbiguousAssignOrExtract and is_DOnion(expr._donion)
+                or typ is type(self) and is_DOnion(expr._delayed)
+        ):
+            self_ = self._delayed if is_DOnion(self._delayed) else self
+            mask_ = mask.mask if mask is not None and is_DOnion(mask.mask) else mask
+            expr_ = expr
+            if typ is GbDelayed and is_DOnion(expr.parent):
+                expr_ = expr.parent
+            elif typ is AmbiguousAssignOrExtract and is_DOnion(expr._donion):
+                expr_ = expr._donion
+            elif typ is type(self) and is_DOnion(expr._delayed):
+                expr_ = expr._delayed
+
+            donion = DOnion.multiple_access(
+                self._meta,
+                BaseType._update,
+                self_,
+                expr_,
+                mask=mask_,
+                accum=accum,
+                replace=replace,
+                in_DOnion=True,
+            )
+            self.__init__(donion, meta=self._meta)
+            return
 
-    def _update(self, expr, *, mask=None, accum=None, replace=None):
         if mask is None and accum is None:
             self.update(expr)
+            if in_DOnion:
+                return self
             return
-        typ = type(expr)
         if typ is AmbiguousAssignOrExtract:
             # Extract (w(mask=mask, accum=accum) << v[index])
             delayed = self._optional_dup()
@@ -353,6 +448,9 @@ def _update(self, expr, *, mask=None, accum=None, replace=None):
         else:
             raise NotImplementedError(f"{typ}")
 
+        if in_DOnion:
+            return self
+
     def wait(self):
         # TODO: What should this do?
         self._meta.wait()
@@ -371,6 +469,19 @@ def visualize(self, *args, **kwargs):
         return self._delayed.visualize(*args, **kwargs)
 
 
+class Box:
+    """
+    An arbitrary wrapper to wrap around the inner values of
+    an Array object to prevent dask from post-processing the
+    Array at the end of compute()
+    """
+    def __init__(self, content):
+        self.content = content
+
+    def __getattr__(self, item):
+        return getattr(self.content, item)
+
+
 const_obj = object()
 _const0_DOnion = {"dtype": np.object_, "meta": np.array(const_obj, dtype=np.object_)}
 
@@ -446,10 +557,14 @@ def compute(self, *args, **kwargs):
         value = self.kernel.compute(*args, **kwargs)
         while hasattr(value, "compute"):
             value = value.compute(*args, **kwargs)
+        if type(value) is Box:
+            value = value.content
         return value
 
     def compute_once(self, *args, **kwargs):
         value = self.kernel.compute(*args, **kwargs)
+        if type(value) is Box:
+            value = value.content
         return value
 
     def persist(self, *args, **kwargs):
@@ -493,27 +608,44 @@ def multiple_access(cls, out_meta, func, *args, **kwargs):
 
     def deep_extract(self, out_meta, func, *args, **kwargs):
         func = flexible_partial(func, *args, **kwargs)
+        if not isinstance(out_meta, (gb.base.BaseType, gb.mask.Mask, gb.matrix.TransposedMatrix)):
+            func = compose(Box, func)
         kernel = self.kernel.map_blocks(func, **_const0_DOnion)
         return DOnion(kernel, meta=out_meta)
 
+    def __call__(self, *args, **kwargs):
+        meta = self._meta(*args, **kwargs)
+        return self.getattr(meta, '__call__', *args, **kwargs)
+        
     def __getattr__(self, item):
         # TODO: how to compute meta of attribute?!!!
         meta = getattr(self._meta, item)
         _getattr = flexible_partial(getattr, skip, item)
         return self.deep_extract(meta, _getattr)
 
-    def getattr(self, meta, name, *args, **kwargs):
-        _getattr = flexible_partial(DOnion._getattr, skip, name, *args, **kwargs)
+    def getattr(self, meta, attr_name, *args, **kwargs):
+        _getattr = flexible_partial(DOnion._getattr, skip, attr_name, *args, **kwargs)
         return self.deep_extract(meta, _getattr)
 
     @classmethod
-    def _getattr(cls, x, name, *args, **kwargs):
-        return getattr(x, name)(*args, **kwargs)
+    def _getattr(cls, x, attr_name, *args, **kwargs):
+        return getattr(x, attr_name)(*args, **kwargs)
 
 
 is_DOnion = partial(is_type, DOnion)
 
 
+def like_DOnion(what):
+    return (
+        is_DOnion(what)
+        or isinstance(what, BaseType) and is_DOnion(what._delayed)
+        or hasattr(what, '_matrix') and is_DOnion(what._matrix)
+        or hasattr(what, 'parent') and is_DOnion(what.parent)
+        or hasattr(what, 'mask') and is_DOnion(what.mask)
+        or hasattr(what, '_donion') and is_DOnion(what._donion)
+    )
+
+
 # Dask task functions
 def _clear(x):
     x.value.clear()
diff --git a/dask_grblas/expr.py b/dask_grblas/expr.py
index e226ef0..6636cda 100644
--- a/dask_grblas/expr.py
+++ b/dask_grblas/expr.py
@@ -9,7 +9,7 @@
 from grblas.exceptions import DimensionMismatch
 from dask.base import tokenize
 
-from .base import BaseType, InnerBaseType, _check_mask
+from .base import BaseType, InnerBaseType, _check_mask, DOnion, is_DOnion, like_DOnion
 from .mask import Mask
 from .utils import (
     get_grblas_type,
@@ -231,6 +231,16 @@ def _aggregate(
     def new(self, dtype=None, *, mask=None, name=None):
         if mask is not None:
             _check_mask(mask)
+
+        if is_DOnion(self.parent) or mask is not None and is_DOnion(mask.mask):
+            meta = self._meta.new(dtype=dtype)
+            ret_type = get_return_type(meta)
+            donion = DOnion.multiple_access(
+                meta, self.__class__.new, self.parent, dtype=dtype, mask=mask, name=name
+            )
+            return ret_type(donion, meta=meta)
+
+        if mask is not None:
             meta = self._meta.new(dtype=dtype, mask=mask._meta)
             delayed_mask = mask.mask._delayed
             grblas_mask_type = get_grblas_type(mask)
@@ -420,12 +430,12 @@ def _new_matrix(self, dtype, nrows=0, ncols=0, *, name=None):
 
 
 class IndexerResolver:
-    def __init__(self, obj, indices):
+    def __init__(self, obj, indices, check_shape=True):
         self.obj = obj
         if indices is Ellipsis:
             from .vector import Vector
 
-            if type(obj) is Vector:
+            if type(obj) in {Vector, gb.Vector}:
                 normalized = slice(None).indices(obj._size)
                 self.indices = [AxisIndex(obj._size, slice(*normalized))]
             else:
@@ -436,7 +446,7 @@ def __init__(self, obj, indices):
                     AxisIndex(obj._ncols, slice(*normalized1)),
                 ]
         else:
-            self.indices = self.parse_indices(indices, obj.shape)
+            self.indices = self.parse_indices(indices, obj.shape, check_shape)
 
     @property
     def is_single_element(self):
@@ -445,7 +455,7 @@ def is_single_element(self):
                 return False
         return True
 
-    def parse_indices(self, indices, shape):
+    def parse_indices(self, indices, shape, check_shape=True):
         """
         Returns
             [(rows, rowsize), (cols, colsize)] for Matrix
@@ -469,20 +479,22 @@ def parse_indices(self, indices, shape):
                 raise TypeError(
                     f"Index in position {i} cannot be a tuple; must use slice or list or int"
                 )
-            out.append(self.parse_index(idx, typ, shape[i]))
+            out.append(self.parse_index(idx, typ, shape[i], check_shape))
         return out
 
-    def parse_index(self, index, typ, size):
+    def parse_index(self, index, typ, size, check_shape=True):
         if np.issubdtype(typ, np.integer):
             if index >= size:
-                raise IndexError(f"Index out of range: index={index}, size={size}")
+                if check_shape:
+                    raise IndexError(f"Index out of range: index={index}, size={size}")
             if index < 0:
                 index += size
                 if index < 0:
-                    raise IndexError(f"Index out of range: index={index - size}, size={size}")
+                    if check_shape:
+                        raise IndexError(f"Index out of range: index={index - size}, size={size}")
             return AxisIndex(None, IndexerResolver.normalize_index(index, size))
         if typ is list:
-            index = [IndexerResolver.normalize_index(i, size) for i in index]
+            index = [IndexerResolver.normalize_index(i, size, check_shape) for i in index]
             return AxisIndex(len(index), index)
         elif typ is slice:
             normalized = index.indices(size)
@@ -530,7 +542,7 @@ def parse_index(self, index, typ, size):
                         f"`x(mask={index.name}) << value`."
                     )
                 raise TypeError(f"Invalid type for index: {typ}; unable to convert to list")
-            index = [IndexerResolver.normalize_index(i, size) for i in index]
+            index = [IndexerResolver.normalize_index(i, size, check_shape) for i in index]
         return AxisIndex(len(index), index)
 
     def get_index(self, dim):
@@ -548,18 +560,20 @@ def validate_types(cls, indices):
         return
 
     @classmethod
-    def normalize_index(cls, index, size):
+    def normalize_index(cls, index, size, check_size=True):
         if type(index) is get_return_type(gb.Scalar.new(int)):
             # This branch needs a second look: How to work with the lazy index?
             index = index.value.compute()
             if not isinstance(index, Integral):
                 raise TypeError("An integer is required for indexing")
         if index >= size:
-            raise IndexError(f"Index out of range: index={index}, size={size}")
+            if check_size:
+                raise IndexError(f"Index out of range: index={index}, size={size}")
         if index < 0:
             index += size
             if index < 0:
-                raise IndexError(f"Index out of range: index={index - size}, size={size}")
+                if check_size:
+                    raise IndexError(f"Index out of range: index={index - size}, size={size}")
         return int(index)
 
 
@@ -637,12 +651,17 @@ def update(self, delayed):
 
         if self.mask is None and self.accum is None:
             return self.parent.update(delayed)
-        self.parent._meta._update(
-            get_meta(delayed),
-            mask=get_meta(self.mask),
-            accum=self.accum,
-            replace=self.replace,
-        )
+
+        if like_DOnion(self.parent) or like_DOnion(delayed):
+            self.parent._meta = delayed._meta.new()
+        else:
+            self.parent._meta._update(
+                get_meta(delayed),
+                mask=get_meta(self.mask),
+                accum=self.accum,
+                replace=self.replace,
+            )
+
         if self.parent._meta._is_scalar:
             self.parent._update(delayed, accum=self.accum)
         else:
@@ -1246,23 +1265,32 @@ def _defrag_to_index_chunk(*args, x_chunks, dtype=None):
 
 
 class AmbiguousAssignOrExtract:
-    def __init__(self, parent, index):
-        self.resolved_indices = IndexerResolver(parent, index)
+    def __init__(self, parent, index, self_donion=None, meta=None):
         self.parent = parent
         self.index = index
-        # IndexerResolver.validate_types(self.index)
-        self._meta = parent._meta[index]
-        # infix expression requirements:
-        shape = tuple(i.size for i in self.resolved_indices.indices if i.size)
-        self.ndim = len(shape)
-        self.output_type = _get_grblas_type_with_ndims(self.ndim)
-        if self.ndim == 1:
-            self._size = shape[0]
-        elif self.ndim == 2:
-            self._nrows = shape[0]
-            self._ncols = shape[1]
+        self._donion = self_donion
+        self._meta = parent._meta[index] if meta is None else meta
+        if (
+                not (hasattr(parent, '_delayed') and is_DOnion(parent._delayed))
+                and not (hasattr(parent, '_matrix') and is_DOnion(parent._matrix))
+        ):
+            self.resolved_indices = IndexerResolver(parent, index)
+            # infix expression requirements:
+            shape = tuple(i.size for i in self.resolved_indices.indices if i.size)
+            self.ndim = len(shape)
+            self.output_type = _get_grblas_type_with_ndims(self.ndim)
+            if self.ndim == 1:
+                self._size = shape[0]
+            elif self.ndim == 2:
+                self._nrows = shape[0]
+                self._ncols = shape[1]
 
     def new(self, *, dtype=None, mask=None, input_mask=None, name=None):
+        if self._donion is not None:
+            return get_return_type(self._meta.new(dtype=dtype))(
+                self._donion.new(dtype=dtype, mask=mask, input_mask=input_mask, name=name)
+            )
+
         parent = self.parent
         xt = False  # xt = parent._is_transposed
         dxn = 1  # dxn = -1 if xt else 1
@@ -1447,6 +1475,10 @@ def __call__(self, *args, **kwargs):
         return Assigner(self.parent(*args, **kwargs), self.index, subassign=True)
 
     def update(self, obj):
+        if is_DOnion(self.parent._delayed):
+            self.parent.__setitem__(self.index, obj)
+            return
+
         if getattr(self.parent, "_is_transposed", False):
             raise TypeError("'TransposedMatrix' object does not support item assignment")
         Assigner(Updater(self.parent), self.index).update(obj)
@@ -1456,6 +1488,10 @@ def __lshift__(self, rhs):
 
     @property
     def value(self):
+        if self._donion is not None:
+            ret_type = get_return_type(self._meta.new())
+            return ret_type(self._donion.new()).value
+
         self._meta.value
         return self.new().value
 
diff --git a/dask_grblas/mask.py b/dask_grblas/mask.py
index 314781a..51c4e3b 100644
--- a/dask_grblas/mask.py
+++ b/dask_grblas/mask.py
@@ -7,11 +7,13 @@ class Mask:
     value = False
 
     def __init__(self, mask):
-        from . import matrix, vector
+        from . import matrix, vector, base
 
         assert type(mask) in {vector.Vector, matrix.Matrix}
         self.mask = mask
         self._meta = get_grblas_type(self)(mask._meta)
+        if base.is_DOnion(mask._delayed):
+            self.mask = mask._delayed.deep_extract(self._meta, self.__class__)
 
 
 class StructuralMask(Mask):
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index deaed0b..9740776 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -2,15 +2,15 @@
 import numpy as np
 import grblas as gb
 
-from numbers import Integral
+from numbers import Integral, Number
 from dask.base import tokenize, is_dask_collection
 from dask.delayed import Delayed, delayed
 from dask.highlevelgraph import HighLevelGraph
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
-from grblas.exceptions import IndexOutOfBound
+from grblas.exceptions import IndexOutOfBound, EmptyObject, DimensionMismatch
 
-from .base import BaseType, InnerBaseType, DOnion, is_DOnion, skip
+from .base import BaseType, InnerBaseType, DOnion, is_DOnion, like_DOnion, skip
 from .base import _nvals as _nvals_in_chunk
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater
 from .mask import StructuralMask, ValueMask
@@ -27,6 +27,7 @@
     build_chunk_ranges_dask_array,
     wrap_dataframe,
 )
+from builtins import isinstance
 
 
 class InnerMatrix(InnerBaseType):
@@ -131,19 +132,12 @@ def from_values(
         chunks="auto",
         name=None,
     ):
-        if hasattr(values, "dtype"):
+        if isinstance(values, Number):
+            dtype = lookup_dtype(type(values) if dtype is None else dtype)
+        elif hasattr(values, "dtype"):
             dtype = lookup_dtype(values.dtype if dtype is None else dtype)
 
-        if nrows is None:
-            if ncols is None:
-                meta = gb.Matrix.new(dtype)
-            else:
-                meta = gb.Matrix.new(dtype, ncols=ncols)
-        else:
-            if ncols is None:
-                meta = gb.Matrix.new(dtype, nrows=nrows)
-            else:
-                meta = gb.Matrix.new(dtype, nrows=nrows, ncols=ncols)
+        meta = gb.Matrix.new(dtype, nrows=nrows or 0, ncols=ncols or 0)
 
         # check for any DOnions:
         pkd_args = pack_args(rows, columns, values)
@@ -158,7 +152,9 @@ def from_values(
             return Matrix(out_donion, meta=meta)
 
         # no DOnions
-        if type(rows) is da.Array and type(columns) is da.Array and type(values) is da.Array:
+        if type(rows) is da.Array and type(columns) is da.Array and (
+                type(values) is da.Array or isinstance(values, Number)
+        ):
             np_idtype_ = np_dtype(lookup_dtype(rows.dtype))
             if nrows is not None and ncols is not None:
                 chunks = da.core.normalize_chunks(chunks, (nrows, ncols), dtype=np_idtype_)
@@ -169,11 +165,17 @@ def from_values(
                 if ncols is None and columns.size == 0:
                     raise ValueError("No column indices provided. Unable to infer ncols.")
 
-                if not (rows.size == columns.size and columns.size == values.size):
+                if type(values) is da.Array and (
+                    rows.size != columns.size or columns.size != values.size
+                ):
                     raise ValueError(
                         "`rows` and `columns` and `values` lengths must match: "
                         f"{rows.size}, {columns.size}, {values.size}"
                     )
+                elif rows.size != columns.size:
+                    raise ValueError(
+                        f"`rows` and `columns` lengths must match: {rows.size}, {columns.size}"
+                    )
 
                 if rows.dtype.kind not in "ui":
                     raise ValueError(f"rows must be integers, not {rows.dtype}")
@@ -196,12 +198,6 @@ def from_values(
                 donion = DOnion.sprout(dasks, meta, Matrix.from_values, *args, **kwargs)
                 return Matrix(donion, meta=meta)
 
-            if rows.size > 0 and rows.size != values.size:
-                raise ValueError("`rows` and `values` lengths must match")
-
-            if columns.size > 0 and columns.size != values.size:
-                raise ValueError("`columns` and `values` lengths must match")
-
             vdtype = dtype
             np_vdtype_ = np_dtype(vdtype)
 
@@ -215,7 +211,7 @@ def from_values(
                 *(_pick2D, "ijk"),
                 *(rows, "k"),
                 *(columns, "k"),
-                *(values, "k"),
+                *(values, "k" if type(values) is da.Array else None),
                 *(row_ranges, "i"),
                 *(col_ranges, "j"),
                 shape=(nrows, ncols),
@@ -225,6 +221,7 @@ def from_values(
             meta = InnerMatrix(gb.Matrix.new(vdtype, nrows=nrows, ncols=ncols))
             delayed = da.core.blockwise(
                 *(_from_values2D, "ij"),
+                *(values if isinstance(values, Number) else None, None),
                 *(fragments, "ijk"),
                 *(row_ranges, "i"),
                 *(col_ranges, "j"),
@@ -243,6 +240,119 @@ def from_values(
         )
         return cls.from_matrix(matrix, chunks=chunks, name=name)
 
+    def build(
+        self,
+        rows,
+        columns,
+        values,
+        *,
+        dup_op=None,
+        clear=False,
+        nrows=None,
+        ncols=None,
+        chunks=None,
+        in_DOnion=False,  # not part of the API
+    ):
+        if not clear and self._nvals != 0:
+            raise gb.exceptions.OutputNotEmpty()
+
+        nrows = nrows or self._nrows
+        ncols = ncols or self._ncols
+        meta = self._meta
+        meta.resize(nrows, ncols)
+
+        # check for any DOnions:
+        self_ = self._delayed if is_DOnion(self._delayed) else self
+        pkd_args = pack_args(self_, rows, columns, values)
+        pkd_kwargs = pack_kwargs(
+            dup_op=dup_op, clear=clear, nrows=nrows, ncols=ncols, chunks=chunks, in_DOnion=True
+        )
+        donions = [True for arg in pkd_args if is_DOnion(arg)]
+        donions += [True for (k, v) in pkd_kwargs.items() if is_DOnion(v)]
+        if np.any(donions):
+            # dive into DOnion(s):
+            out_donion = DOnion.multiple_access(meta, Matrix.build, *pkd_args, **pkd_kwargs)
+            self.__init__(out_donion, meta=meta)
+            return
+
+        # no DOnions
+        if clear:
+            self.clear()
+
+        self.resize(nrows, ncols)
+
+        if chunks is not None:
+            self.rechunk(inplace=True, chunks=chunks)
+
+        x = self._optional_dup()
+        if type(rows) is list:
+            if np.max(rows) >= self._nrows:
+                raise gb.exceptions.IndexOutOfBound
+            rows = da.core.from_array(np.array(rows), name="rows-" + tokenize(rows))
+
+        if type(columns) is list:
+            if np.max(columns) >= self._ncols:
+                raise gb.exceptions.IndexOutOfBound
+            columns = da.core.from_array(np.array(columns), name="columns-" + tokenize(columns))
+
+        if type(values) is list:
+            values = da.core.from_array(np.array(values), name="values-" + tokenize(values))
+
+        if type(values) is da.Array and (
+            rows.size != columns.size or columns.size != values.size
+        ):
+            raise ValueError(
+                "`rows` and `columns` and `values` lengths must match: "
+                f"{rows.size}, {columns.size}, {values.size}"
+            )
+        elif rows.size != columns.size:
+            raise ValueError(
+                f"`rows` and `columns` lengths must match: {rows.size}, {columns.size}"
+            )
+        elif values is None:
+            raise EmptyObject()
+
+        idtype = gb.Matrix.new(rows.dtype).dtype
+        np_idtype_ = np_dtype(idtype)
+        vdtype = (
+            lookup_dtype(type(values)) if isinstance(values, Number)
+            else gb.Matrix.new(values.dtype).dtype
+        )
+        np_vdtype_ = np_dtype(vdtype)
+
+        rname = "-row-ranges" + tokenize(x, x.chunks[0])
+        cname = "-col-ranges" + tokenize(x, x.chunks[1])
+        row_ranges = build_chunk_ranges_dask_array(x, 0, rname)
+        col_ranges = build_chunk_ranges_dask_array(x, 1, cname)
+        fragments = da.core.blockwise(
+            *(_pick2D, "ijk"),
+            *(rows, "k"),
+            *(columns, "k"),
+            *(values, None if isinstance(values, Number) else "k"),
+            *(row_ranges, "i"),
+            *(col_ranges, "j"),
+            shape=(nrows, ncols),
+            dtype=np_idtype_,
+            meta=np.array([]),
+        )
+        meta = InnerMatrix(gb.Matrix.new(vdtype))
+        delayed = da.core.blockwise(
+            *(_build_2D_chunk, "ij"),
+            *(x, "ij"),
+            *(row_ranges, "i"),
+            *(col_ranges, "j"),
+            *(fragments, "ijk"),
+            values=values if isinstance(values, Number) else None,
+            dup_op=dup_op,
+            clear=False,
+            concatenate=False,
+            dtype=np_vdtype_,
+            meta=meta,
+        )
+        if in_DOnion:
+            return Matrix(delayed)
+        self.__init__(delayed)
+
     @classmethod
     def new(cls, dtype, nrows=0, ncols=0, *, chunks="auto", name=None):
         dtype = dtype.lower() if isinstance(dtype, str) else dtype
@@ -335,6 +445,16 @@ def shape(self):
         return (self._meta.nrows, self._meta.ncols)
 
     def resize(self, nrows, ncols, inplace=True, chunks="auto"):
+        if is_DOnion(self._delayed):
+            donion = self._delayed.getattr(
+                self._meta, 'resize', nrows, ncols, inplace=False, chunks=chunks
+            )
+            if inplace:
+                self.__init__(donion, meta=self._meta)
+                return
+            else:
+                return Matrix(donion, meta=self._meta)
+
         chunks = da.core.normalize_chunks(chunks, (nrows, ncols), dtype=np.int64)
         output_row_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_row_ranges-")
         output_col_ranges = build_ranges_dask_array_from_chunks(chunks[1], "output_col_ranges-")
@@ -374,6 +494,23 @@ def resize(self, nrows, ncols, inplace=True, chunks="auto"):
         else:
             return Matrix(x, nvals=nvals)
 
+    def rechunk(self, inplace=False, chunks="auto"):
+        if is_DOnion(self._delayed):
+            meta = self._meta
+            donion = self._delayed.getattr(meta, 'rechunk', inplace=False, chunks=chunks)
+            if inplace:
+                self.__init__(donion, meta=meta)
+                return
+            else:
+                return Matrix(donion, meta=meta)
+
+        chunks = da.core.normalize_chunks(chunks, self.shape, dtype=np.int64)
+        if inplace:
+            self.resize(*self.shape, chunks=chunks)
+            return
+        else:
+            return self.resize(*self.shape, chunks=chunks, inplace=False)
+
     def _diag(self, k=0, dtype=None, chunks="auto"):
         kdiag_row_start = max(0, -k)
         kdiag_col_start = max(0, k)
@@ -477,31 +614,84 @@ def _diag_old(self, k=0, dtype=None, chunks="auto"):
         return get_return_type(meta)(delayed, nvals=nvals)
 
     def __getitem__(self, index):
-        if type(self._delayed) is DOnion:
+        if (
+                type(self._delayed) is DOnion
+                or type(index) is tuple and len(index) == 2
+                and (is_DOnion(index[0]) or is_DOnion(index[1]))
+        ):
             from .scalar import Scalar, PythonScalar
+            from .expr import IndexerResolver
 
+            self_delayed = self._delayed if type(self._delayed) is DOnion else self
             if type(index) is tuple and len(index) == 2:
-                if isinstance(index[0], (Integral, Scalar, PythonScalar)):
-                    if isinstance(index[1], (Integral, Scalar, PythonScalar)):
-                        meta = gb.Scalar.new(self._meta.dtype)
-                    else:
-                        meta = gb.Vector.new(self._meta.dtype)
-                else:
-                    if isinstance(index[1], (Integral, Scalar, PythonScalar)):
-                        meta = gb.Vector.new(self._meta.dtype)
-                    else:
-                        meta = gb.Matrix.new(self._meta.dtype)
-            return self._delayed.getattr(meta, "__getitem__", index)
-        if type(index) is DOnion:
-            meta = self._meta
-            return DOnion.multiple_access(meta, self.__getitem__, index)
+                def getitem(self_, i0, i1):
+                    return self.__class__.__getitem__(self_, (i0, i1))
+
+                # Since grblas does not support indices that are dask arrays
+                # this complicates meta deduction.  We therefore substitute
+                # any non-Integral type indices with `slice(None)`
+                meta_index = tuple(
+                    x if isinstance(x, (Integral, Scalar, PythonScalar))
+                    else slice(None) for x in index
+                )
+                # Next, we resize `meta` to accept any Integral-type indices:
+                numbers = [x for x in index if isinstance(x, (Integral, Scalar, PythonScalar))]
+                max_index = np.max(numbers) if numbers else None
+                if max_index is not None:
+                    self._meta.resize(nrows=max_index + 1, ncols=max_index + 1)
+                meta = self._meta[meta_index]
+                
+                IndexerResolver(self._meta, index, check_shape=False)
+                donion = DOnion.multiple_access(meta, getitem, self_delayed, index[0], index[1])
+                return AmbiguousAssignOrExtract(self, index, self_donion=donion, meta=meta)
+            else:
+                raise ValueError("Matrix indices must be a 2-tuple.")
+
         return AmbiguousAssignOrExtract(self, index)
 
-    def __delitem__(self, keys):
+    def __delitem__(self, keys, in_DOnion=False):
+        if is_DOnion(self._delayed):
+            good_keys = [x for x in keys if isinstance(x, Integral)]
+            if len(good_keys) != 2:
+                raise TypeError("Remove Element only supports scalars.")
+
+            donion = self._delayed.getattr(
+                self._meta, '__delitem__', keys, in_DOnion=True
+            )
+            self.__init__(donion, meta=self._meta)
+            return
+
         del Updater(self)[keys]
+        if in_DOnion:
+            return self
+
+    def __setitem__(self, index, delayed, in_DOnion=False):
+        if is_DOnion(self._delayed):
+            donion = self._delayed.getattr(
+                self._meta, '__setitem__', index, delayed, in_DOnion=True
+            )
+            self.__init__(donion, meta=self._meta)
+            return
+
+        dlayd_is_donion = like_DOnion(delayed)
+        if dlayd_is_donion:
+            delayed = (
+                delayed._delayed if hasattr(delayed, '_delayed') and is_DOnion(delayed._delayed)
+                else delayed
+            )
+        if dlayd_is_donion or type(index) is tuple and len(index) == 2 and (
+                is_DOnion(index[0]) or is_DOnion(index[1])
+        ):
+            def func(i0, i1, delayed):
+                return self.__setitem__((i0, i1), delayed)
+
+            donion = DOnion.multiple_access(self._meta, func, index[0], index[1], delayed)
+            self.__init__(donion, meta=self._meta)
+            return
 
-    def __setitem__(self, index, delayed):
         Updater(self)[index] = delayed
+        if in_DOnion:
+            return self
 
     def __contains__(self, index):
         extractor = self[index]
@@ -519,11 +709,41 @@ def __iter__(self):
 
     def ewise_add(self, other, op=monoid.plus, *, require_monoid=True):
         assert type(other) is Matrix  # TODO: or TransposedMatrix
+
+        self_delayed = self._matrix if self._is_transposed else self._delayed
+        other_delayed = other._matrix if other._is_transposed else other._delayed
+        if is_DOnion(self_delayed) or is_DOnion(other_delayed):
+            self_ = self_delayed if is_DOnion(self_delayed) else self
+            other_ = other_delayed if is_DOnion(other_delayed) else other
+            try:
+                meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid)
+            except DimensionMismatch:
+                meta = self._meta.ewise_add(self._meta, op=op, require_monoid=require_monoid)
+
+            donion = DOnion.multiple_access(
+                meta, Matrix.ewise_add, self_, other_, op=op, require_monoid=require_monoid
+            )
+            return GbDelayed(donion, 'ewise_add', op, meta=meta)
+
         meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid)
         return GbDelayed(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta)
 
     def ewise_mult(self, other, op=binary.times):
         assert type(other) is Matrix  # TODO: or TransposedMatrix
+
+        self_delayed = self._matrix if self._is_transposed else self._delayed
+        other_delayed = other._matrix if other._is_transposed else other._delayed
+        if is_DOnion(self_delayed) or is_DOnion(other_delayed):
+            self_ = self_delayed if is_DOnion(self_delayed) else self
+            other_ = other_delayed if is_DOnion(other_delayed) else other
+            try:
+                meta = self._meta.ewise_mult(other._meta, op=op)
+            except DimensionMismatch:
+                meta = self._meta.ewise_mult(self._meta, op=op)
+
+            donion = DOnion.multiple_access(meta, Matrix.ewise_mult, self_, other_, op=op)
+            return GbDelayed(donion, 'ewise_mult', op, meta=meta)
+
         meta = self._meta.ewise_mult(other._meta, op=op)
         return GbDelayed(self, "ewise_mult", other, op, meta=meta)
 
@@ -531,11 +751,40 @@ def mxv(self, other, op=semiring.plus_times):
         from .vector import Vector
 
         assert type(other) is Vector
+
+        self_delayed = self._matrix if self._is_transposed else self._delayed
+        if is_DOnion(self_delayed) or is_DOnion(other._delayed):
+            self_ = self_delayed if is_DOnion(self_delayed) else self
+            other_ = other._delayed if is_DOnion(other._delayed) else other
+            try:
+                meta = self._meta.mxv(other._meta, op=op)
+            except DimensionMismatch:
+                other_meta = gb.Vector.new(dtype=other._meta.dtype, size=self._meta.ncols)
+                meta = self._meta.mxv(other_meta, op=op)
+            donion = DOnion.multiple_access(meta, Matrix.mxv, self_, other_, op=op)
+            return GbDelayed(donion, 'mxv', op, meta=meta)
+
         meta = self._meta.mxv(other._meta, op=op)
         return GbDelayed(self, "mxv", other, op, meta=meta)
 
     def mxm(self, other, op=semiring.plus_times):
         assert type(other) in (Matrix, TransposedMatrix)
+
+        self_delayed = self._matrix if self._is_transposed else self._delayed
+        other_delayed = other._matrix if other._is_transposed else other._delayed
+        if is_DOnion(self_delayed) or is_DOnion(other_delayed):
+            self_ = self_delayed if is_DOnion(self_delayed) else self
+            other_ = other_delayed if is_DOnion(other_delayed) else other
+            try:
+                meta = self._meta.mxm(other._meta, op=op)
+            except DimensionMismatch:
+                other_meta = gb.Matrix.new(
+                    dtype=other._meta.dtype, nrows=self._meta.ncols, ncols=other._meta.ncols
+                )
+                meta = self._meta.mxm(other_meta, op=op)
+            donion = DOnion.multiple_access(meta, Matrix.mxm, self_, other_, op=op)
+            return GbDelayed(donion, 'mxm', op, meta=meta)
+
         meta = self._meta.mxm(other._meta, op=op)
         return GbDelayed(self, "mxm", other, op, meta=meta)
 
@@ -567,87 +816,14 @@ def reduce_columnwise(self, op=monoid.plus):
         return GbDelayed(self, "reduce_columnwise", op, meta=meta)
 
     def reduce_scalar(self, op=monoid.plus):
+        if is_DOnion(self._delayed):
+            meta = self._meta.reduce_scalar(op)
+            donion = self._delayed.getattr(meta, 'reduce_scalar', op=op)
+            return GbDelayed(donion, 'reduce_scalar', op, meta=meta)
+
         meta = self._meta.reduce_scalar(op)
         return GbDelayed(self, "reduce_scalar", op, meta=meta)
 
-    def build(
-        self,
-        rows,
-        columns,
-        values,
-        *,
-        dup_op=None,
-        clear=False,
-        nrows=None,
-        ncols=None,
-        chunks=None,
-    ):
-        if clear:
-            self.clear()
-        elif self.nvals.compute() > 0:
-            raise gb.exceptions.OutputNotEmpty
-
-        if nrows is not None or ncols is not None:
-            if nrows is None:
-                nrows = self._nrows
-            if ncols is None:
-                ncols = self._ncols
-            self.resize(nrows, ncols)
-
-        if chunks is not None:
-            self.rechunk(inplace=True, chunks=chunks)
-
-        x = self._optional_dup()
-        if type(rows) is list:
-            if np.max(rows) >= self._nrows:
-                raise gb.exceptions.IndexOutOfBound
-            rows = da.core.from_array(np.array(rows), name="rows-" + tokenize(rows))
-        else:
-            if da.max(rows).compute() >= self._nrows:
-                raise gb.exceptions.IndexOutOfBound
-        if type(columns) is list:
-            if np.max(columns) >= self._ncols:
-                raise gb.exceptions.IndexOutOfBound
-            columns = da.core.from_array(np.array(columns), name="columns-" + tokenize(columns))
-        else:
-            if da.max(columns).compute() >= self._ncols:
-                raise gb.exceptions.IndexOutOfBound
-        if type(values) is list:
-            values = da.core.from_array(np.array(values), name="values-" + tokenize(values))
-
-        idtype = gb.Matrix.new(rows.dtype).dtype
-        np_idtype_ = np_dtype(idtype)
-        vdtype = gb.Matrix.new(values.dtype).dtype
-        np_vdtype_ = np_dtype(vdtype)
-
-        rname = "-row-ranges" + tokenize(x, x.chunks[0])
-        cname = "-col-ranges" + tokenize(x, x.chunks[1])
-        row_ranges = build_chunk_ranges_dask_array(x, 0, rname)
-        col_ranges = build_chunk_ranges_dask_array(x, 1, cname)
-        fragments = da.core.blockwise(
-            *(_pick2D, "ijk"),
-            *(rows, "k"),
-            *(columns, "k"),
-            *(values, "k"),
-            *(row_ranges, "i"),
-            *(col_ranges, "j"),
-            dtype=np_idtype_,
-            meta=np.array([]),
-        )
-        meta = InnerMatrix(gb.Matrix.new(vdtype))
-        delayed = da.core.blockwise(
-            *(_build_2D_chunk, "ij"),
-            *(x, "ij"),
-            *(row_ranges, "i"),
-            *(col_ranges, "j"),
-            *(fragments, "ijk"),
-            dup_op=dup_op,
-            concatenate=False,
-            dtype=np_vdtype_,
-            meta=meta,
-        )
-        self.__init__(delayed)
-
     def to_values(self, dtype=None, chunks="auto"):
         dtype = lookup_dtype(self.dtype if dtype is None else dtype)
         meta_i, _, meta_v = self._meta.to_values(dtype)
@@ -729,13 +905,6 @@ def _to_values(x, starts, stops_, dtype, chunks, nnz):
         vals = rcv_donion.deep_extract(meta_v, da.map_blocks, _get_vals, dtype=dtype_v, meta=meta_v)
         return rows, cols, vals
 
-    def rechunk(self, inplace=False, chunks="auto"):
-        chunks = da.core.normalize_chunks(chunks, self.shape, dtype=np.int64)
-        if inplace:
-            self.resize(*self.shape, chunks=chunks)
-        else:
-            return self.resize(*self.shape, chunks=chunks, inplace=False)
-
     def isequal(self, other, *, check_dtype=False):
         other = self._expect_type(
             other, (Matrix, TransposedMatrix), within="isequal", argname="other"
@@ -779,13 +948,13 @@ class TransposedMatrix:
     _is_transposed = True
 
     def __init__(self, matrix):
-        assert type(matrix) is Matrix
+        assert type(matrix) in {Matrix, DOnion}
         self._matrix = matrix
         self._meta = matrix._meta.T
 
         # Aggregator-specific requirements:
-        self._nrows = self.nrows
-        self._ncols = self.ncols
+        self._nrows = self._meta.nrows
+        self._ncols = self._meta.ncols
 
     def new(self, *, dtype=None, mask=None):
         gb_dtype = self._matrix.dtype if dtype is None else lookup_dtype(dtype)
@@ -821,8 +990,15 @@ def dtype(self):
         return self._meta.dtype
 
     def to_values(self, dtype=None, chunks="auto"):
-        if is_DOnion(self._matrix._delayed):
-            return self._matrix.to_values(dtype=dtype, chunks=chunks)
+        if is_DOnion(self._matrix):
+            out_meta = np.array([])
+            result = self._matrix.getattr(out_meta, 'to_values', dtype=dtype, chunks=chunks)
+            meta_i, _, meta_v = self._meta.to_values(dtype)
+            rows = result.getattr(meta_i, "__getitem__", 0)
+            columns = result.getattr(meta_i, "__getitem__", 1)
+            values = result.getattr(meta_v, "__getitem__", 2)
+            return rows, columns, values
+
         rows, cols, vals = self._matrix.to_values(dtype=dtype, chunks=chunks)
         return cols, rows, vals
 
@@ -851,6 +1027,42 @@ def nvals(self):
             return self._matrix._delayed.nvals
         return self._meta.shape
 
+    def __getitem__(self, index):
+        if (
+                type(self._matrix) is DOnion
+                or type(index) is tuple and len(index) == 2
+                and (is_DOnion(index[0]) or is_DOnion(index[1]))
+        ):
+            from .scalar import Scalar, PythonScalar
+            from .expr import IndexerResolver
+
+            self_delayed = self._matrix if type(self._matrix) is DOnion else self
+            if type(index) is tuple and len(index) == 2:
+                def getitem(self_, i0, i1):
+                    return self.__class__.__getitem__(self_, (i0, i1))
+
+                # Since grblas does not support indices that are dask arrays
+                # this complicates meta deduction.  We therefore substitute
+                # any non-Integral type indices with `slice(None)`
+                meta_index = tuple(
+                    x if isinstance(x, (Integral, Scalar, PythonScalar))
+                    else slice(None) for x in index
+                )
+                # Next, we resize `meta` to accept any Integral-type indices:
+                numbers = [x for x in index if isinstance(x, (Integral, Scalar, PythonScalar))]
+                max_index = np.max(numbers) if numbers else None
+                if max_index is not None:
+                    self._meta.resize(nrows=max_index + 1, ncols=max_index + 1)
+                meta = self._meta[meta_index]
+                
+                IndexerResolver(self._meta, index, check_shape=False)
+                donion = DOnion.multiple_access(meta, getitem, self_delayed, index[0], index[1])
+                return AmbiguousAssignOrExtract(self, index, self_donion=donion, meta=meta)
+            else:
+                raise ValueError("Matrix indices must be a 2-tuple.")
+
+        return AmbiguousAssignOrExtract(self, index)
+
     # Delayed methods
     ewise_add = Matrix.ewise_add
     ewise_mult = Matrix.ewise_mult
@@ -865,7 +1077,6 @@ def nvals(self):
     # Misc.
     isequal = Matrix.isequal
     isclose = Matrix.isclose
-    __getitem__ = Matrix.__getitem__
     __array__ = Matrix.__array__
     name = Matrix.name
 
@@ -1028,7 +1239,9 @@ def _build_2D_chunk(
     out_row_range,
     out_col_range,
     fragments,
+    values,
     dup_op=None,
+    clear=False,
 ):
     """
     Reassembles filtered tuples (row, col, val) in the list `fragments`
@@ -1038,17 +1251,25 @@ def _build_2D_chunk(
     """
     rows = np.concatenate([rows for (rows, _, _) in fragments])
     cols = np.concatenate([cols for (_, cols, _) in fragments])
-    vals = np.concatenate([vals for (_, _, vals) in fragments])
     nrows = out_row_range[0].stop - out_row_range[0].start
     ncols = out_col_range[0].stop - out_col_range[0].start
-    inner_matrix.value.build(
-        rows,
-        cols,
-        vals,
-        nrows=nrows,
-        ncols=ncols,
-        dup_op=dup_op,
-    )
+    if not clear and inner_matrix.value.nvals > 0:
+        raise gb.exceptions.OutputNotEmpty()
+
+    if values is None:
+        vals = np.concatenate([vals for (_, _, vals) in fragments])
+        inner_matrix.value.build(
+            rows,
+            cols,
+            vals,
+            nrows=nrows,
+            ncols=ncols,
+            dup_op=dup_op,
+            clear=clear,
+        )
+    else:
+        vals = values
+        inner_matrix.value.ss.build_scalar(rows, cols, vals)
     return InnerMatrix(inner_matrix.value)
 
 
@@ -1061,7 +1282,7 @@ def _new_Matrix_chunk(out_row_range, out_col_range, gb_dtype=None):
     return InnerMatrix(gb.Matrix.new(gb_dtype, nrows=nrows, ncols=ncols))
 
 
-def _from_values2D(fragments, out_row_range, out_col_range, dup_op=None, gb_dtype=None):
+def _from_values2D(values, fragments, out_row_range, out_col_range, dup_op=None, gb_dtype=None):
     """
     Reassembles filtered tuples (row, col, val) in the list `fragments`
     obtained from _pick2D() for the chunk within the given row and column
@@ -1070,7 +1291,10 @@ def _from_values2D(fragments, out_row_range, out_col_range, dup_op=None, gb_dtyp
     """
     rows = np.concatenate([rows for (rows, _, _) in fragments])
     cols = np.concatenate([cols for (_, cols, _) in fragments])
-    vals = np.concatenate([vals for (_, _, vals) in fragments])
+    if values is None:
+        vals = np.concatenate([vals for (_, _, vals) in fragments])
+    else:
+        vals = values
     nrows = out_row_range[0].stop - out_row_range[0].start
     ncols = out_col_range[0].stop - out_col_range[0].start
     if rows.size == 0 or cols.size == 0:
@@ -1105,7 +1329,8 @@ def _pick2D(rows, cols, values, row_range, col_range, shape):
     cols_in = (col_range.start <= cols) & (cols < col_range.stop)
     rows = rows[rows_in & cols_in] - row_range.start
     cols = cols[rows_in & cols_in] - col_range.start
-    values = values[rows_in & cols_in]
+    if isinstance(values, np.ndarray):
+        values = values[rows_in & cols_in]
     return (rows, cols, values)
 
 
diff --git a/dask_grblas/scalar.py b/dask_grblas/scalar.py
index fffc1e6..16bef6c 100644
--- a/dask_grblas/scalar.py
+++ b/dask_grblas/scalar.py
@@ -230,7 +230,8 @@ def __eq__(self, other):
     def compute(self, *args, **kwargs):
         innerval = self._delayed.compute(*args, **kwargs)
         if type(self._delayed) is DOnion:
-            return innerval
+            return innerval.value if hasattr(innerval, 'value') else innerval
+            
         return innerval.value.value
 
 
diff --git a/tests/from_grblas2/test_matrix.py b/tests/from_grblas2/test_matrix.py
index 66f21fe..00a85b9 100644
--- a/tests/from_grblas2/test_matrix.py
+++ b/tests/from_grblas2/test_matrix.py
@@ -23,6 +23,7 @@
 from .conftest import autocompute, compute
 
 from dask_grblas import Matrix, Scalar, Vector  # isort:skip
+from dask_grblas.base import is_DOnion
 
 
 @pytest.fixture
@@ -43,9 +44,33 @@ def A():
     return Matrix.from_values(*data)
 
 
+@pytest.fixture
+def A_dask():
+    #    0 1 2 3 4 5 6
+    # 0 [- 2 - 3 - - -]
+    # 1 [- - - - 8 - 4]
+    # 2 [- - - - - 1 -]
+    # 3 [3 - 3 - - - -]
+    # 4 [- - - - - 7 -]
+    # 5 [- - 1 - - - -]
+    # 6 [- - 5 7 3 - -]
+    data = [
+        [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+        [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+        [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4],
+    ]
+    data = [da.from_array(np.array(a, dtype=np.int64)) for a in data]
+    return Matrix.from_values(*data)
+
+
+@pytest.fixture
+def As(A, A_dask):
+    return [A, A_dask]
+
+
 @pytest.fixture
 def A_chunks():
-    return [7, 4, 3]
+    return [7, 3]
 
 
 @pytest.fixture
@@ -54,6 +79,18 @@ def v():
     return Vector.from_values(*data)
 
 
+@pytest.fixture
+def v_dask():
+    data = [[1, 3, 4, 6], [1, 1, 2, 0]]
+    data = [da.from_array(a) for a in data]
+    return Vector.from_values(*data)
+
+
+@pytest.fixture
+def vs(v, v_dask):
+    return [v, v_dask]
+
+
 def test_new():
     C = Matrix.new(dtypes.INT8, 17, 12)
     assert C.dtype == "INT8"
@@ -62,30 +99,44 @@ def test_new():
     assert C.ncols == 12
 
 
-def test_dup(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        C = A.dup()
-        assert C is not A
-        assert C.dtype == A.dtype
-        assert C.nvals == A.nvals
-        assert C.nrows == A.nrows
-        assert C.ncols == A.ncols
-        # Ensure they are not the same backend object
-        A[0, 0] = 1000
-        assert C[0, 0].value != 1000
-        # extended functionality
-        D = Matrix.from_values([0, 1], [0, 1], [0, 2.5], dtype=dtypes.FP64)
-        E = D.dup(dtype=dtypes.INT64)
-        assert E.isequal(
-            Matrix.from_values([0, 1], [0, 1], [0, 2], dtype=dtypes.INT64), check_dtype=True
+def test_dup(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            C = A.dup()
+            assert C is not A
+            assert C.dtype == A.dtype
+            assert C.nvals == A.nvals
+            assert C.nrows == A.nrows
+            assert C.ncols == A.ncols
+            # Ensure they are not the same backend object
+            A[0, 0] = 1000
+            assert A[0, 0].value == 1000
+            assert C[0, 0].value != 1000
+
+    # extended functionality
+    Ds = [Matrix.from_values([0, 1], [0, 1], [0, 2.5], dtype=dtypes.FP64)]
+    Ds.append(
+        Matrix.from_values(
+            da.from_array([0, 1]),
+            da.from_array([0, 1]),
+            da.from_array([0, 2.5]),
+            dtype=dtypes.FP64
         )
-        E = D.dup(mask=D.V)
-        assert E.isequal(Matrix.from_values([1], [1], [2.5], dtype=dtypes.FP64), check_dtype=True)
-        E = D.dup(dtype=dtypes.INT64, mask=D.V)
-        assert E.isequal(Matrix.from_values([1], [1], [2], dtype=dtypes.INT64), check_dtype=True)
+    )
+    for D_ in Ds:
+        for chunks in A_chunks:
+            D = D_.dup()
+            D.rechunk(chunks=chunks, inplace=True)
+            E = D.dup(dtype=dtypes.INT64)
+            assert E.isequal(
+                Matrix.from_values([0, 1], [0, 1], [0, 2], dtype=dtypes.INT64), check_dtype=True
+            )
+            E = D.dup(mask=D.V)
+            assert E.isequal(Matrix.from_values([1], [1], [2.5], dtype=dtypes.FP64), check_dtype=True)
+            E = D.dup(dtype=dtypes.INT64, mask=D.V)
+            assert E.isequal(Matrix.from_values([1], [1], [2], dtype=dtypes.INT64), check_dtype=True)
 
 
 def test_from_values():
@@ -212,139 +263,166 @@ def test_from_values_dask():
         Matrix.from_values(zero_da, cols, zero_da)
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_from_values_scalar():
-    C = Matrix.from_values([0, 1, 3], [1, 1, 2], 7)
-    assert C.nrows == 4
-    assert C.ncols == 3
-    assert C.nvals == 3
-    assert C.dtype == dtypes.INT64
-    assert C.ss.is_iso
-    assert C.reduce_scalar(monoid.any).new() == 7
+    Cs = [Matrix.from_values([0, 1, 3], [1, 1, 2], 7)]
+    Cs.append(Matrix.from_values(
+        da.from_array([0, 1, 3]),
+        da.from_array([1, 1, 2]),
+        7,
+    ))
+    for C in Cs:
+        assert C.nrows == 4
+        assert C.ncols == 3
+        assert C.nvals == 3
+        assert C.dtype == dtypes.INT64
+        # assert C.ss.is_iso
+        assert C.reduce_scalar(monoid.any).new() == 7
 
     # iso drumps duplicates
     C = Matrix.from_values([0, 1, 3, 0], [1, 1, 2, 1], 7)
-    assert C.nrows == 4
-    assert C.ncols == 3
-    assert C.nvals == 3
-    assert C.dtype == dtypes.INT64
-    assert C.ss.is_iso
-    assert C.reduce_scalar(monoid.any).new() == 7
-    with pytest.raises(ValueError, match="dup_op must be None"):
-        Matrix.from_values([0, 1, 3, 0], [1, 1, 2, 1], 7, dup_op=binary.plus)
+    Cs.append(Matrix.from_values(
+        da.from_array([0, 1, 3, 0]),
+        da.from_array([1, 1, 2, 1]),
+        7,
+    ))
+    for C in Cs:
+        assert C.nrows == 4
+        assert C.ncols == 3
+        assert C.nvals == 3
+        assert C.dtype == dtypes.INT64
+        # assert C.ss.is_iso
+        assert C.reduce_scalar(monoid.any).new() == 7
+        with pytest.raises(ValueError, match="dup_op must be None"):
+            Matrix.from_values([0, 1, 3, 0], [1, 1, 2, 1], 7, dup_op=binary.plus)
+
+
+def test_clear(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            A.clear()
+            assert A.nvals == 0
+            assert A.nrows == 7
+            assert A.ncols == 7
 
 
-def test_clear(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        A.clear()
-        assert A.nvals == 0
-        assert A.nrows == 7
-        assert A.ncols == 7
+def test_resize(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A.nrows == 7
+            assert A.ncols == 7
+            assert A.nvals.compute() == 12
+            A.resize(10, 11)
+            assert A.nrows == 10
+            assert A.ncols == 11
+            assert A.nvals.compute() == 12
+            assert compute(A[9, 9].value) is None
+            A.resize(4, 1)
+            assert A.nrows == 4
+            assert A.ncols == 1
+            assert A.nvals.compute() == 1
 
+            A = A_.dup()
+            assert A.nrows == 7
+            assert A.ncols == 7
+            assert A.nvals.compute() == 12
+            A.resize(6, 11, chunks=4)
+            assert A.nrows == 6
+            assert A.ncols == 11
+            assert A.nvals.compute() == 9
+            if type(A._delayed) is da.Array:
+                assert A._delayed.chunks == ((4, 2), (4, 4, 3))
+            else:
+                assert A._delayed.deep_extract(
+                    None, lambda x: x._delayed.chunks
+                ) == ((4, 2), (4, 4, 3))
+            assert compute(A[3, 2].value) == 3
+            assert compute(A[5, 7].value) is None
 
-def test_resize(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert A.nrows == 7
-        assert A.ncols == 7
-        assert A.nvals.compute() == 12
-        A.resize(10, 11)
-        assert A.nrows == 10
-        assert A.ncols == 11
-        assert A.nvals.compute() == 12
-        assert compute(A[9, 9].value) is None
-        A.resize(4, 1)
-        assert A.nrows == 4
-        assert A.ncols == 1
-        assert A.nvals.compute() == 1
+            A = A_.dup()
+            A.resize(11, 3, chunks=4)
+            assert A.nrows == 11
+            assert A.ncols == 3
+            assert A.nvals.compute() == 5
+            if type(A._delayed) is da.Array:
+                assert A._delayed.chunks == ((4, 4, 3), (3,))
+            else:
+                assert A._delayed.deep_extract(
+                    None, lambda x: x._delayed.chunks
+                ) == ((4, 4, 3), (3,))
+            assert compute(A[3, 2].value) == 3
+            assert compute(A[7, 2].value) is None
 
-        A = A_.dup()
-        assert A.nrows == 7
-        assert A.ncols == 7
-        assert A.nvals.compute() == 12
-        A.resize(6, 11, chunks=4)
-        assert A.nrows == 6
-        assert A.ncols == 11
-        assert A.nvals.compute() == 9
-        assert A._delayed.chunks == ((4, 2), (4, 4, 3))
-        assert compute(A[3, 2].value) == 3
-        assert compute(A[5, 7].value) is None
 
+def test_rechunk(As, A_chunks):
+    for A_ in As:
         A = A_.dup()
-        A.resize(11, 3, chunks=4)
-        assert A.nrows == 11
-        assert A.ncols == 3
-        assert A.nvals.compute() == 5
-        assert A._delayed.chunks == ((4, 4, 3), (3,))
-        assert compute(A[3, 2].value) == 3
-        assert compute(A[7, 2].value) is None
+        for chunks in A_chunks + A_chunks[::-1]:
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A.nrows == 7
+            assert A.ncols == 7
+            assert A.nvals == 12
 
 
-def test_rechunk(A, A_chunks):
-    A_ = A.dup()
-    for chunks in A_chunks + A_chunks[::-1]:
-        A_.rechunk(chunks=chunks, inplace=True)
-        assert A_.nrows == 7
-        assert A_.ncols == 7
-        assert A_.nvals.compute() == 12
-
+def test_nrows(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A.nrows == 7
 
-def test_nrows(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert A.nrows == 7
 
+def test_ncols(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A.ncols == 7
 
-def test_ncols(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert A.ncols == 7
 
+def test_nvals(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A.nvals == 12
 
-def test_nvals(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert A.nvals == 12
 
+def test_build(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A.nvals == 12
+            A.clear()
+            A.build([0, 6], [0, 1], [1, 2])
+            assert A.nvals == 2
+            with pytest.raises(OutputNotEmpty):
+                A.build([1, 5], [2, 3], [3, 4])
+            assert A.nvals == 2  # nothing should be modified
+            # We can clear though
+            A.build([1, 2, 5], [1, 2, 3], [2, 3, 4], clear=True)
+            assert A.nvals == 3
+            A.clear()
+            if is_DOnion(A._delayed):
+                A.build([0, 11], [0, 0], [1, 1])
+                with pytest.raises(IndexOutOfBound):
+                    A.compute()
+            else:
+                with pytest.raises(IndexOutOfBound):
+                    A.build([0, 11], [0, 0], [1, 1])
 
-def test_build(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert A.nvals == 12
-        A.clear()
-        A.build([0, 6], [0, 1], [1, 2])
-        assert A.nvals == 2
-        with pytest.raises(OutputNotEmpty):
-            A.build([1, 5], [2, 3], [3, 4])
-        assert A.nvals == 2  # nothing should be modified
-        # We can clear though
-        A.build([1, 2, 5], [1, 2, 3], [2, 3, 4], clear=True)
-        assert A.nvals == 3
-        A.clear()
-        with pytest.raises(IndexOutOfBound):
-            A.build([0, 11], [0, 0], [1, 1])
-        B = Matrix.new(int, nrows=2, ncols=2)
-        B.build([0, 11], [0, 0], [1, 1], nrows=12)
-        assert B.isequal(Matrix.from_values([0, 11], [0, 0], [1, 1], ncols=2))
-        C = Matrix.new(int, nrows=2, ncols=2)
-        C.build([0, 0], [0, 11], [1, 1], ncols=12)
-        assert C.isequal(Matrix.from_values([0, 0], [0, 11], [1, 1], nrows=2))
+            B = Matrix.new(int, nrows=2, ncols=2)
+            B.build([0, 11], [0, 0], [1, 1], nrows=12)
+            assert B.isequal(Matrix.from_values([0, 11], [0, 0], [1, 1], ncols=2))
+            C = Matrix.new(int, nrows=2, ncols=2)
+            C.build([0, 0], [0, 11], [1, 1], ncols=12)
+            assert C.isequal(Matrix.from_values([0, 0], [0, 11], [1, 1], nrows=2))
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_build_scalar(A, A_chunks):
     A_ = A
     for chunks in A_chunks:
@@ -356,7 +434,7 @@ def test_build_scalar(A, A_chunks):
         A.clear()
         A.ss.build_scalar([0, 6], [0, 1], 1)
         assert A.nvals == 2
-        assert A.ss.is_iso
+        # assert A.ss.is_iso
         A.clear()
         with pytest.raises(ValueError, match="lengths must match"):
             A.ss.build_scalar([0, 6], [0, 1, 2], 1)
@@ -364,116 +442,116 @@ def test_build_scalar(A, A_chunks):
             A.ss.build_scalar([0, 5], [0, 1], None)
 
 
-def test_extract_values(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        rows, cols, vals = A.to_values(dtype=int)
-        rcv = set(
-            zip(
-                rows.compute(),
-                cols.compute(),
-                vals.compute(),
+def test_extract_values(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            rows, cols, vals = A.to_values(dtype=int)
+            rcv = set(
+                zip(
+                    rows.compute(),
+                    cols.compute(),
+                    vals.compute(),
+                )
             )
-        )
-        expected = set(
-            zip(
-                (0, 0, 1, 1, 2, 3, 3, 4, 5, 6, 6, 6),
-                (1, 3, 4, 6, 5, 0, 2, 5, 2, 2, 3, 4),
-                (2, 3, 8, 4, 1, 3, 3, 7, 1, 5, 7, 3),
+            expected = set(
+                zip(
+                    (0, 0, 1, 1, 2, 3, 3, 4, 5, 6, 6, 6),
+                    (1, 3, 4, 6, 5, 0, 2, 5, 2, 2, 3, 4),
+                    (2, 3, 8, 4, 1, 3, 3, 7, 1, 5, 7, 3),
+                )
             )
-        )
-        assert rcv == expected
-        assert rows.dtype == np.uint64
-        assert cols.dtype == np.uint64
-        assert vals.dtype == np.int64
-        Trows, Tcols, Tvals = A.T.to_values(dtype=float)
-        np.testing.assert_array_equal(rows.compute(), Tcols.compute())
-        np.testing.assert_array_equal(cols.compute(), Trows.compute())
-        np.testing.assert_array_equal(vals.compute(), Tvals.compute())
-        assert Trows.dtype == np.uint64
-        assert Tcols.dtype == np.uint64
-        assert Tvals.dtype == np.float64
-
-
-def test_extract_element(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert A[3, 0].new() == 3
-        assert A[1, 6].new() == 4
-        assert A[1, 6].value == 4
-        assert A.T[6, 1].value == 4
-        s = A[0, 0].new()
-        assert compute(s.value) is None
-        assert s.dtype == "INT64"
-        s = A[1, 6].new(dtype=float)
-        assert s.value == 4.0
-        assert s.dtype == "FP64"
-
-
-def test_set_element(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert compute(A[1, 1].value) is None
-        assert A[3, 0].value == 3
-        A[1, 1].update(21)
-        A[3, 0] << -5
-        assert A[1, 1].value == 21
-        assert A[3, 0].new() == -5
-
-
-def test_remove_element(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert A[3, 0].value == 3
-        del A[3, 0]
-        assert compute(A[3, 0].value) is None
-        assert A[6, 3].value == 7
-        with pytest.raises(TypeError, match="Remove Element only supports"):
-            del A[3:5, 3]
-
-
-def test_mxm(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        C = A.mxm(A, semiring.plus_times).new()
-        result = Matrix.from_values(
-            [0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 6],
-            [0, 2, 4, 6, 2, 3, 4, 5, 2, 1, 3, 5, 2, 5, 0, 2, 5],
-            [9, 9, 16, 8, 20, 28, 12, 56, 1, 6, 9, 3, 7, 1, 21, 21, 26],
-        )
-        assert C.isequal(result)
+            assert rcv == expected
+            assert rows.dtype == np.uint64
+            assert cols.dtype == np.uint64
+            assert vals.dtype == np.int64
+            Trows, Tcols, Tvals = A.T.to_values(dtype=float)
+            np.testing.assert_array_equal(rows.compute(), Tcols.compute())
+            np.testing.assert_array_equal(cols.compute(), Trows.compute())
+            np.testing.assert_array_equal(vals.compute(), Tvals.compute())
+            assert Trows.dtype == np.uint64
+            assert Tcols.dtype == np.uint64
+            assert Tvals.dtype == np.float64
+
+
+def test_extract_element(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A[3, 0].new() == 3
+            assert A[1, 6].new() == 4
+            assert A[1, 6].value == 4
+            assert A.T[6, 1].value == 4
+            s = A[0, 0].new()
+            assert compute(s.value) is None
+            assert s.dtype == "INT64"
+            s = A[1, 6].new(dtype=float)
+            assert s.value == 4.0
+            assert s.dtype == "FP64"
+
+
+def test_set_element(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert compute(A[1, 1].value) is None
+            assert A[3, 0].value == 3
+            A[1, 1].update(21)
+            A[3, 0] << -5
+            assert A[1, 1].value == 21
+            assert A[3, 0].new() == -5
+
+
+def test_remove_element(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A[3, 0].value == 3
+            del A[3, 0]
+            assert compute(A[3, 0].value) is None
+            assert A[6, 3].value == 7
+            with pytest.raises(TypeError, match="Remove Element only supports"):
+                del A[3:5, 3]
+
+
+def test_mxm(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            C = A.mxm(A, semiring.plus_times).new()
+            result = Matrix.from_values(
+                [0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 6],
+                [0, 2, 4, 6, 2, 3, 4, 5, 2, 1, 3, 5, 2, 5, 0, 2, 5],
+                [9, 9, 16, 8, 20, 28, 12, 56, 1, 6, 9, 3, 7, 1, 21, 21, 26],
+            )
+            assert C.isequal(result)
 
 
-def test_mxm_transpose(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        C = A.dup()
-        C << A.mxm(A.T, semiring.plus_times)
-        result = Matrix.from_values(
-            [0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6],
-            [0, 6, 1, 6, 2, 4, 3, 5, 6, 2, 4, 3, 5, 6, 0, 1, 3, 5, 6],
-            [13, 21, 80, 24, 1, 7, 18, 3, 15, 7, 49, 3, 1, 5, 21, 24, 15, 5, 83],
-        )
-        assert C.isequal(result)
-        C << A.T.mxm(A, semiring.plus_times)
-        result2 = Matrix.from_values(
-            [0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 6, 6],
-            [0, 2, 1, 3, 0, 2, 3, 4, 1, 2, 3, 4, 2, 3, 4, 6, 5, 4, 6],
-            [9, 9, 4, 6, 9, 35, 35, 15, 6, 35, 58, 21, 15, 21, 73, 32, 50, 32, 16],
-        )
-        assert C.isequal(result2)
+def test_mxm_transpose(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            C = A.dup()
+            C << A.mxm(A.T, semiring.plus_times)
+            result = Matrix.from_values(
+                [0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6],
+                [0, 6, 1, 6, 2, 4, 3, 5, 6, 2, 4, 3, 5, 6, 0, 1, 3, 5, 6],
+                [13, 21, 80, 24, 1, 7, 18, 3, 15, 7, 49, 3, 1, 5, 21, 24, 15, 5, 83],
+            )
+            assert C.isequal(result)
+            C << A.T.mxm(A, semiring.plus_times)
+            result2 = Matrix.from_values(
+                [0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 6, 6],
+                [0, 2, 1, 3, 0, 2, 3, 4, 1, 2, 3, 4, 2, 3, 4, 6, 5, 4, 6],
+                [9, 9, 4, 6, 9, 35, 35, 15, 6, 35, 58, 21, 15, 21, 73, 32, 50, 32, 16],
+            )
+            assert C.isequal(result2)
 
 
 def test_mxm_nonsquare():
@@ -488,170 +566,188 @@ def test_mxm_nonsquare():
     assert C2.nrows == 5
     assert C2.ncols == 5
 
-
-def test_mxm_mask(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        val_mask = Matrix.from_values([0, 3, 4], [2, 3, 2], [True, True, True], nrows=7, ncols=7)
-        struct_mask = Matrix.from_values([0, 3, 4], [2, 3, 2], [1, 0, 0], nrows=7, ncols=7)
-        C = A.dup()
-        C(val_mask.V) << A.mxm(A, semiring.plus_times)
-        result = Matrix.from_values(
-            [0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 5, 6, 6, 6],
-            [1, 2, 3, 4, 6, 5, 0, 2, 3, 2, 5, 2, 2, 3, 4],
-            [2, 9, 3, 8, 4, 1, 3, 3, 9, 7, 7, 1, 5, 7, 3],
-        )
-        assert C.isequal(result)
-        C = A.dup()
-        C(~val_mask.V) << A.mxm(A, semiring.plus_times)
-        result2 = Matrix.from_values(
-            [0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 5, 6, 6, 6],
-            [0, 4, 6, 2, 3, 4, 5, 2, 1, 5, 5, 0, 2, 5],
-            [9, 16, 8, 20, 28, 12, 56, 1, 6, 3, 1, 21, 21, 26],
-        )
-        assert C.isequal(result2)
-        C = A.dup()
-        C(struct_mask.S, replace=True).update(A.mxm(A, semiring.plus_times))
-        result3 = Matrix.from_values([0, 3, 4], [2, 3, 2], [9, 9, 7], nrows=7, ncols=7)
-        assert C.isequal(result3)
-        C2 = A.mxm(A, semiring.plus_times).new(mask=struct_mask.S)
-        assert C2.isequal(result3)
-        with pytest.raises(TypeError, match="Mask must indicate"):
-            A.mxm(A).new(mask=struct_mask)
-
-
-def test_mxm_accum(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        A(binary.plus) << A.mxm(A, semiring.plus_times)
-        # fmt: off
-        result = Matrix.from_values(
-            [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6],
-            [0, 1, 2, 3, 4, 6, 2, 3, 4, 5, 6, 2, 5, 0, 1, 2, 3, 5, 2, 5, 2, 5, 0, 2, 3, 4, 5],
-            [9, 2, 9, 3, 16, 8, 20, 28, 20, 56, 4, 1, 1, 3, 6, 3, 9, 3, 7, 7, 1, 1, 21, 26, 7, 3, 26],
-        )
-        # fmt: on
-        assert A.isequal(result)
-
-
-def test_mxv(A, A_chunks, v):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        w = A.mxv(v, semiring.plus_times).new()
-        result = Vector.from_values([0, 1, 6], [5, 16, 13])
-        assert w.isequal(result)
-
-
-def test_ewise_mult(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        # Binary, Monoid, and Semiring
-        B = Matrix.from_values([0, 0, 5], [1, 2, 2], [5, 4, 8], nrows=7, ncols=7)
-        result = Matrix.from_values([0, 5], [1, 2], [10, 8], nrows=7, ncols=7)
-        C = A.ewise_mult(B, binary.times).new()
-        assert C.isequal(result)
-        C() << A.ewise_mult(B, monoid.times)
-        assert C.isequal(result)
-        with pytest.raises(TypeError, match="Expected type: BinaryOp, Monoid"):
-            A.ewise_mult(B, semiring.plus_times)
-
-
-def test_ewise_add(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        # Binary, Monoid, and Semiring
-        B = Matrix.from_values([0, 0, 5], [1, 2, 2], [5, 4, 8], nrows=7, ncols=7)
-        result = Matrix.from_values(
-            [0, 3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
-            [2, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            [4, 3, 5, 3, 8, 5, 3, 7, 8, 3, 1, 7, 4],
-        )
-        with pytest.raises(TypeError, match="require_monoid"):
-            A.ewise_add(B, binary.second)
-        # surprising that SECOND(x, empty) == x, which is why user
-        # must opt-in to using binary ops in ewise_add
-        C = A.ewise_add(B, binary.second, require_monoid=False).new()
-        assert C.isequal(result)
-        C << A.ewise_add(B, monoid.max)
-        assert C.isequal(result)
-        C << A.ewise_add(B, binary.max)
-        assert C.isequal(result)
-        with pytest.raises(TypeError, match="Expected type: Monoid"):
-            A.ewise_add(B, semiring.max_minus)
+    A = Matrix.from_values(
+        da.from_array([0, 0, 0]),
+        da.from_array([0, 2, 4]),
+        da.from_array([1, 2, 3]),
+        nrows=1,
+        ncols=5,
+    )
+    B = Matrix.from_values([0, 2, 4], [0, 0, 0], [10, 20, 30], nrows=5, ncols=1)
+    C = Matrix.new(A.dtype, nrows=1, ncols=1)
+    C << A.mxm(B, semiring.max_plus)
+    assert C[0, 0].value == 33
+    C1 = A.mxm(B, semiring.max_plus).new()
+    assert C1.isequal(C)
+    C2 = A.T.mxm(B.T, semiring.max_plus).new()
+    assert C2.nrows == 5
+    assert C2.ncols == 5
 
 
-def test_extract(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        C = Matrix.new(A.dtype, 3, 4)
-        result = Matrix.from_values(
-            [0, 0, 1, 2, 2, 2], [0, 2, 1, 1, 2, 3], [2, 3, 3, 5, 7, 3], nrows=3, ncols=4
-        )
-        C << A[[0, 3, 6], [1, 2, 3, 4]]
-        assert C.isequal(result)
-        C << A[0::3, 1:5]
-        assert C.isequal(result)
-        C << A[[0, 3, 6], 1:5:1]
-        assert C.isequal(result)
-        C2 = A[[0, 3, 6], [1, 2, 3, 4]].new()
-        assert C2.isequal(result)
+def test_mxm_mask(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            val_mask = Matrix.from_values([0, 3, 4], [2, 3, 2], [True, True, True], nrows=7, ncols=7)
+            struct_mask = Matrix.from_values([0, 3, 4], [2, 3, 2], [1, 0, 0], nrows=7, ncols=7)
+            C = A.dup()
+            C(val_mask.V) << A.mxm(A, semiring.plus_times)
+            result = Matrix.from_values(
+                [0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 5, 6, 6, 6],
+                [1, 2, 3, 4, 6, 5, 0, 2, 3, 2, 5, 2, 2, 3, 4],
+                [2, 9, 3, 8, 4, 1, 3, 3, 9, 7, 7, 1, 5, 7, 3],
+            )
+            assert C.isequal(result)
+            C = A.dup()
+            C(~val_mask.V) << A.mxm(A, semiring.plus_times)
+            result2 = Matrix.from_values(
+                [0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 5, 6, 6, 6],
+                [0, 4, 6, 2, 3, 4, 5, 2, 1, 5, 5, 0, 2, 5],
+                [9, 16, 8, 20, 28, 12, 56, 1, 6, 3, 1, 21, 21, 26],
+            )
+            assert C.isequal(result2)
+            C = A.dup()
+            C(struct_mask.S, replace=True).update(A.mxm(A, semiring.plus_times))
+            result3 = Matrix.from_values([0, 3, 4], [2, 3, 2], [9, 9, 7], nrows=7, ncols=7)
+            assert C.isequal(result3)
+            C2 = A.mxm(A, semiring.plus_times).new(mask=struct_mask.S)
+            assert C2.isequal(result3)
+            with pytest.raises(TypeError, match="Mask must indicate"):
+                A.mxm(A).new(mask=struct_mask)
+
+
+def test_mxm_accum(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            A(binary.plus) << A.mxm(A, semiring.plus_times)
+            # fmt: off
+            result = Matrix.from_values(
+                [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6],
+                [0, 1, 2, 3, 4, 6, 2, 3, 4, 5, 6, 2, 5, 0, 1, 2, 3, 5, 2, 5, 2, 5, 0, 2, 3, 4, 5],
+                [9, 2, 9, 3, 16, 8, 20, 28, 20, 56, 4, 1, 1, 3, 6, 3, 9, 3, 7, 7, 1, 1, 21, 26, 7, 3, 26],
+            )
+            # fmt: on
+            assert A.isequal(result)
 
 
-def test_extract_row(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        w = Vector.new(A.dtype, 3)
-        result = Vector.from_values([1, 2], [5, 3], size=3)
-        w << A[6, [0, 2, 4]]
-        assert w.isequal(result)
-        w << A[6, :5:2]
-        assert w.isequal(result)
-        w << A.T[[0, 2, 4], 6]
-        assert w.isequal(result)
-        w2 = A[6, [0, 2, 4]].new()
-        assert w2.isequal(result)
-        with pytest.raises(TypeError):
-            # Should be list, not tuple (although tuple isn't so bad)
-            A[6, (0, 2, 4)]
-        w3 = A[6, np.array([0, 2, 4])].new()
-        assert w3.isequal(result)
-        with pytest.raises(TypeError, match="Invalid dtype"):
-            A[6, np.array([0, 2, 4], dtype=float)]
-        with pytest.raises(TypeError, match="Invalid number of dimensions"):
-            A[6, np.array([[0, 2, 4]])]
+def test_mxv(As, A_chunks, v):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            w = A.mxv(v, semiring.plus_times).new()
+            result = Vector.from_values([0, 1, 6], [5, 16, 13])
+            assert w.isequal(result)
 
 
-def test_extract_column(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        w = Vector.new(A.dtype, 3)
-        result = Vector.from_values([1, 2], [3, 1], size=3)
-        w << A[[1, 3, 5], 2]
-        assert w.isequal(result)
-        w << A[1:6:2, 2]
-        assert w.isequal(result)
-        w << A.T[2, [1, 3, 5]]
-        assert w.isequal(result)
-        w2 = A[1:6:2, 2].new()
-        assert w2.isequal(result)
+def test_ewise_mult(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            # Binary, Monoid, and Semiring
+            B = Matrix.from_values([0, 0, 5], [1, 2, 2], [5, 4, 8], nrows=7, ncols=7)
+            result = Matrix.from_values([0, 5], [1, 2], [10, 8], nrows=7, ncols=7)
+            C = A.ewise_mult(B, binary.times).new()
+            assert C.isequal(result)
+            C() << A.ewise_mult(B, monoid.times)
+            assert C.isequal(result)
+            with pytest.raises(TypeError, match="Expected type: BinaryOp, Monoid"):
+                A.ewise_mult(B, semiring.plus_times)
+
+
+def test_ewise_add(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            # Binary, Monoid, and Semiring
+            B = Matrix.from_values([0, 0, 5], [1, 2, 2], [5, 4, 8], nrows=7, ncols=7)
+            result = Matrix.from_values(
+                [0, 3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+                [2, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+                [4, 3, 5, 3, 8, 5, 3, 7, 8, 3, 1, 7, 4],
+            )
+            with pytest.raises(TypeError, match="require_monoid"):
+                A.ewise_add(B, binary.second)
+            # surprising that SECOND(x, empty) == x, which is why user
+            # must opt-in to using binary ops in ewise_add
+            C = A.ewise_add(B, binary.second, require_monoid=False).new()
+            assert C.isequal(result)
+            C << A.ewise_add(B, monoid.max)
+            assert C.isequal(result)
+            C << A.ewise_add(B, binary.max)
+            assert C.isequal(result)
+            with pytest.raises(TypeError, match="Expected type: Monoid"):
+                A.ewise_add(B, semiring.max_minus)
+
+
+def test_extract(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            C = Matrix.new(A.dtype, 3, 4)
+            result = Matrix.from_values(
+                [0, 0, 1, 2, 2, 2], [0, 2, 1, 1, 2, 3], [2, 3, 3, 5, 7, 3], nrows=3, ncols=4
+            )
+            C << A[[0, 3, 6], [1, 2, 3, 4]]
+            assert C.isequal(result)
+            C << A[0::3, 1:5]
+            assert C.isequal(result)
+            C << A[[0, 3, 6], 1:5:1]
+            assert C.isequal(result)
+            C2 = A[[0, 3, 6], [1, 2, 3, 4]].new()
+            assert C2.isequal(result)
+
+
+def test_extract_row(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            w = Vector.new(A.dtype, 3)
+            result = Vector.from_values([1, 2], [5, 3], size=3)
+            w << A[6, [0, 2, 4]]
+            assert w.isequal(result)
+            w << A[6, :5:2]
+            assert w.isequal(result)
+            w << A.T[[0, 2, 4], 6]
+            assert w.isequal(result)
+            w2 = A[6, [0, 2, 4]].new()
+            assert w2.isequal(result)
+            with pytest.raises(TypeError):
+                # Should be list, not tuple (although tuple isn't so bad)
+                A[6, (0, 2, 4)]
+            w3 = A[6, np.array([0, 2, 4])].new()
+            assert w3.isequal(result)
+            with pytest.raises(TypeError, match="Invalid dtype"):
+                A[6, np.array([0, 2, 4], dtype=float)]
+            with pytest.raises(TypeError, match="Invalid number of dimensions"):
+                A[6, np.array([[0, 2, 4]])]
+
+
+def test_extract_column(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            w = Vector.new(A.dtype, 3)
+            result = Vector.from_values([1, 2], [3, 1], size=3)
+            w << A[[1, 3, 5], 2]
+            assert w.isequal(result)
+            w << A[1:6:2, 2]
+            assert w.isequal(result)
+            w << A.T[2, [1, 3, 5]]
+            assert w.isequal(result)
+            w2 = A[1:6:2, 2].new()
+            assert w2.isequal(result)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_extract_input_mask():
     # A       M
     # 0 1 2   _ 0 1
@@ -811,6 +907,7 @@ def test_extract_input_mask():
     assert result.isequal(expected)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_extract_with_matrix(A, A_chunks):
     A_ = A
     for chunks in A_chunks:
@@ -824,6 +921,7 @@ def test_extract_with_matrix(A, A_chunks):
             A[[0], A.V].new()
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_assign(A, A_chunks):
     A_ = A
     for chunks in A_chunks:
@@ -867,6 +965,7 @@ def test_assign(A, A_chunks):
         assert C.isequal(result)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_assign_wrong_dims(A, A_chunks):
     A_ = A
     for chunks in A_chunks:
@@ -877,6 +976,7 @@ def test_assign_wrong_dims(A, A_chunks):
             A[[0, 2, 4], [0, 5]] = B
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_assign_row(A, A_chunks, v):
     A_ = A
     for chunks in A_chunks:
@@ -892,6 +992,7 @@ def test_assign_row(A, A_chunks, v):
         assert C.isequal(result)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_subassign_row_col(A_chunks):
     A = Matrix.from_values(
         [0, 0, 0, 1, 1, 1, 2, 2, 2],
@@ -957,6 +1058,7 @@ def test_subassign_row_col(A_chunks):
         assert A.isequal(result6)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_subassign_matrix():
     A = Matrix.from_values(
         [0, 0, 0, 1, 1, 1, 2, 2, 2],
@@ -1032,6 +1134,7 @@ def test_subassign_matrix():
     assert A.isequal(result6)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_assign_column(A, A_chunks, v):
     A_ = A
     for chunks in A_chunks:
@@ -1047,6 +1150,7 @@ def test_assign_column(A, A_chunks, v):
         assert C.isequal(result)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_assign_row_scalar(A, A_chunks, v):
     A_ = A
     for chunks in A_chunks:
@@ -1093,6 +1197,7 @@ def test_assign_row_scalar(A, A_chunks, v):
         assert C.isequal(result)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_assign_row_col_matrix_mask():
     # A         B       v1      v2
     # 0 1       4 _     100     10
@@ -1235,6 +1340,7 @@ def test_assign_row_col_matrix_mask():
         C[0, 0](B.S) << 100
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_assign_column_scalar(A, A_chunks, v):
     A_ = A
     for chunks in A_chunks:
@@ -1265,6 +1371,7 @@ def test_assign_column_scalar(A, A_chunks, v):
         assert C.isequal(result)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_assign_scalar(A, A_chunks):
     A_ = A
     for chunks in A_chunks:
@@ -1318,6 +1425,7 @@ def test_assign_scalar(A, A_chunks):
         assert C.isequal(result_column)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_assign_bad(A, A_chunks):
     A_ = A
     for chunks in A_chunks:
@@ -1344,6 +1452,7 @@ def test_assign_bad(A, A_chunks):
             A[:, :] = v
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_apply(A, A_chunks):
     A_ = A
     for chunks in A_chunks:
@@ -1358,6 +1467,7 @@ def test_apply(A, A_chunks):
         assert C.isequal(result)
 
 
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_apply_binary(A, A_chunks):
     A_ = A
     for chunks in A_chunks:
@@ -1396,7 +1506,7 @@ def test_apply_binary(A, A_chunks):
         assert w1.isequal(w2)
         assert w1.isequal(w3)
 
-
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_reduce_row(A, A_chunks):
     A_ = A
     for chunks in A_chunks:
@@ -1410,6 +1520,7 @@ def test_reduce_row(A, A_chunks):
 
 
 @pytest.mark.slow
+@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_reduce_agg(A, A_chunks):
     A_ = A
     for chunks in A_chunks:

From d5336d836f6cb5233ac760208205a80855694d57 Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Wed, 9 Mar 2022 22:23:40 +0100
Subject: [PATCH 12/18] simplified dOnion technology

many tests in from_grblas2/test_matrix now working with dOnions
---
 dask_grblas/_ss/matrix.py         |    2 +-
 dask_grblas/base.py               |  321 ++++-
 dask_grblas/expr.py               |  275 +++-
 dask_grblas/mask.py               |   10 +
 dask_grblas/matrix.py             |  287 ++--
 dask_grblas/scalar.py             |  160 ++-
 dask_grblas/vector.py             |   43 +-
 tests/from_grblas2/test_matrix.py | 2092 ++++++++++++++++-------------
 8 files changed, 1906 insertions(+), 1284 deletions(-)

diff --git a/dask_grblas/_ss/matrix.py b/dask_grblas/_ss/matrix.py
index 3c63154..d312360 100644
--- a/dask_grblas/_ss/matrix.py
+++ b/dask_grblas/_ss/matrix.py
@@ -53,4 +53,4 @@ def build_scalar(
             nrows=nrows,
             ncols=ncols,
             chunks=chunks,
-        )
\ No newline at end of file
+        )
diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index cef30fc..4dfa6a7 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -41,6 +41,14 @@ class BaseType:
     _expect_type = _expect_type
     _is_scalar = False
 
+    @property
+    def is_dOnion(self):
+        return is_DOnion(self._delayed)
+
+    @property
+    def dOnion_if(self):
+        return self._delayed if self.is_dOnion else self
+
     def isequal(self, other, *, check_dtype=False):
         from .scalar import PythonScalar
 
@@ -88,6 +96,20 @@ def isequal(self, other, *, check_dtype=False):
     def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
         from .scalar import PythonScalar
 
+        args = [self, other]
+        if np.any([type(arg._delayed) is DOnion for arg in args]):
+            args = [arg._delayed if type(arg._delayed) is DOnion else arg for arg in args]
+            meta = gb.Scalar.new(bool)
+            delayed = DOnion.multiple_access(
+                meta,
+                self.__class__.isclose,
+                *args,
+                rel_tol=rel_tol,
+                abs_tol=abs_tol,
+                check_dtype=check_dtype,
+            )
+            return PythonScalar(delayed, meta=meta)
+
         # if type(other) is not type(self):
         #     raise TypeError(f'Argument of isclose must be of type {type(self).__name__}')
         if not self._meta.isequal(other._meta):
@@ -114,7 +136,7 @@ def _clear(self):
         # for a function like this, what's the difference between `map_blocks` and `elemwise`?
         if self.ndim == 0:
             return self.__class__(
-                    delayed.map_blocks(
+                delayed.map_blocks(
                     _clear,
                     dtype=np_dtype(self.dtype),
                 )
@@ -130,7 +152,7 @@ def _clear(self):
 
     def clear(self):
         if is_DOnion(self._delayed):
-            self.__init__(self._delayed.getattr(self._meta, '_clear'), meta=self._meta, nvals=0)
+            self.__init__(self._delayed.getattr(self._meta, "_clear"), meta=self._meta, nvals=0)
             return
 
         # Should we copy and mutate or simply create new chunks?
@@ -297,44 +319,117 @@ def _name_html(self):
         return f"{split[0]}<sub>{split[1]}</sub>"
 
     def update(self, expr, in_DOnion=False):
+        if isinstance(expr, Number):
+            if self.ndim == 2:
+                raise TypeError(
+                    "Warning: updating a Matrix with a scalar without a mask will "
+                    "make the Matrix dense.  This may use a lot of memory and probably "
+                    "isn't what you want.  Perhaps you meant:"
+                    "\n\n    M(M.S) << s\n\n"
+                    "If you do wish to make a dense matrix, then please be explicit:"
+                    "\n\n    M[:, :] = s"
+                )
         typ = type(expr)
         if (
-                is_DOnion(self._delayed)
-                or typ is GbDelayed and is_DOnion(expr.parent)
-                or typ is AmbiguousAssignOrExtract and is_DOnion(expr._donion)
-                or typ is type(self) and is_DOnion(expr._delayed)
+            self.is_dOnion
+            or typ is AmbiguousAssignOrExtract
+            and expr.has_dOnion
+            or typ is GbDelayed
+            and expr.has_dOnion
+            or typ is type(self)
+            and expr.is_dOnion
+            or typ is TransposedMatrix
+            and expr.is_dOnion
         ):
-            self_ = self.__class__(self._delayed, meta=self._meta)
-            self_ = self_._delayed if is_DOnion(self_._delayed) else self_
+            self_copy = self.__class__(self._delayed, meta=self._meta)
             expr_ = expr
-            if typ is GbDelayed and is_DOnion(expr.parent):
-                expr_ = expr.parent
-            elif typ is AmbiguousAssignOrExtract and is_DOnion(expr._donion):
-                expr_ = expr._donion
-            elif typ is type(self) and is_DOnion(expr._delayed):
+            if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
+
+                def update_by_aae(c, p, t, k_0, k_1):
+                    p = p.T if t else p
+                    keys = k_0 if k_1 is None else (k_0, k_1)
+                    aae = AmbiguousAssignOrExtract(p, keys)
+                    return c.update(aae, in_DOnion=True)
+
+                aae_parent = expr_.parent.dOnion_if
+                aae_parent_is_T = expr_.parent.is_dOnion and getattr(
+                    expr_.parent, "_is_transposed", False
+                )
+                if type(expr_.index) is tuple and len(expr_.index) == 2:
+                    keys_0, keys_1 = expr_.index[0], expr_.index[1]
+                else:
+                    keys_0, keys_1 = expr_.index, None
+
+                donion = DOnion.multiple_access(
+                    self._meta,
+                    update_by_aae,
+                    self_copy.dOnion_if,
+                    aae_parent,
+                    aae_parent_is_T,
+                    *(keys_0, keys_1),
+                )
+                self.__init__(donion, self._meta)
+                return
+
+            if typ is GbDelayed and expr.has_dOnion:
+
+                def update_by_gbd(c, t, *args, **kwargs):
+                    args = tuple(a.T if xt else a for (xt, a) in zip(t, args))
+                    gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
+                    return c.update(gbd, in_DOnion=True)
+
+                gbd_parent = expr_.parent.dOnion_if
+                gbd_method = expr.method_name
+                gbd_args = (gbd_parent, gbd_method) + tuple(
+                    getattr(x, "dOnion_if", x) for x in expr.args
+                )
+                gbd_parent_is_T = expr_.parent.is_dOnion and getattr(
+                    expr_.parent, "_is_transposed", False
+                )
+                is_T = (gbd_parent_is_T, False) + tuple(
+                    getattr(x, "is_dOnion", False) and getattr(x, "_is_transposed", False)
+                    for x in expr.args
+                )
+                gbd_kwargs = {k: getattr(v, "dOnion_if", v) for k, v in expr.kwargs.items()}
+                donion = DOnion.multiple_access(
+                    self._meta,
+                    update_by_gbd,
+                    self_copy.dOnion_if,
+                    is_T,
+                    *gbd_args,
+                    **gbd_kwargs,
+                )
+                self.__init__(donion, self._meta)
+                return
+            elif typ is type(self) and expr.is_dOnion:
                 expr_ = expr._delayed
+            elif typ is TransposedMatrix and expr.is_dOnion:
+
+                def update_T(lhs, rhs):
+                    return BaseType.update(lhs, rhs.T, in_DOnion=True)
+
+                donion = DOnion.multiple_access(
+                    self._meta, update_T, self_copy.dOnion_if, expr_.dOnion_if
+                )
+                self.__init__(donion, self._meta)
+                return
+
             donion = DOnion.multiple_access(
-                self._meta, BaseType.update, self_, expr_, in_DOnion=True
+                self._meta, BaseType.update, self_copy.dOnion_if, expr_, in_DOnion=True
             )
             self.__init__(donion, self._meta)
             return
 
+        if typ is Box:
+            expr = expr.content
+            typ = type(expr)
+
         if isinstance(expr, Number):
-            if self.ndim == 2:
-                raise TypeError(
-                    "Warning: updating a Matrix with a scalar without a mask will "
-                    "make the Matrix dense.  This may use a lot of memory and probably "
-                    "isn't what you want.  Perhaps you meant:"
-                    "\n\n    M(M.S) << s\n\n"
-                    "If you do wish to make a dense matrix, then please be explicit:"
-                    "\n\n    M[:, :] = s"
-                )
             Updater(self)[...] << expr
             if in_DOnion:
                 return self.__class__(self._delayed, meta=self._meta)
             return
 
-        self._meta.update(expr._meta)
         self._meta.clear()
         if typ is AmbiguousAssignOrExtract:
             # Extract (w << v[index])
@@ -351,7 +446,7 @@ def update(self, expr, in_DOnion=False):
             expr._update(self)
         elif typ is TransposedMatrix:
             # "C << A.T"
-            C = expr.new()
+            C = expr.new(dtype=self.dtype)
             self.__init__(C._delayed)
         else:
             # Anything else we need to handle?
@@ -362,26 +457,91 @@ def update(self, expr, in_DOnion=False):
     def _update(self, expr, *, mask=None, accum=None, replace=None, in_DOnion=False):
         typ = type(expr)
         if (
-                is_DOnion(self._delayed)
-                or mask is not None and is_DOnion(mask.mask)
-                or typ is GbDelayed and is_DOnion(expr.parent)
-                or typ is AmbiguousAssignOrExtract and is_DOnion(expr._donion)
-                or typ is type(self) and is_DOnion(expr._delayed)
+            self.is_dOnion
+            or mask is not None
+            and mask.is_dOnion
+            or typ is AmbiguousAssignOrExtract
+            and expr.has_dOnion
+            or typ is GbDelayed
+            and expr.has_dOnion
+            or typ is type(self)
+            and expr.is_dOnion
         ):
-            self_ = self._delayed if is_DOnion(self._delayed) else self
-            mask_ = mask.mask if mask is not None and is_DOnion(mask.mask) else mask
+            self_copy = self.__class__(self._delayed, meta=self._meta)
+            mask_ = mask.dOnion_if if mask is not None else None
             expr_ = expr
-            if typ is GbDelayed and is_DOnion(expr.parent):
-                expr_ = expr.parent
-            elif typ is AmbiguousAssignOrExtract and is_DOnion(expr._donion):
-                expr_ = expr._donion
-            elif typ is type(self) and is_DOnion(expr._delayed):
+            if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
+
+                def _update_by_aae(c, p, t, k_0, k_1, mask=None, accum=None, replace=None):
+                    p = p.T if t else p
+                    keys = k_0 if k_1 is None else (k_0, k_1)
+                    aae = AmbiguousAssignOrExtract(p, keys)
+                    return c.update(aae, mask=mask, accum=accum, replace=replace, in_DOnion=True)
+
+                aae_parent = expr_.parent.dOnion_if
+                aae_parent_is_T = expr_.parent.is_dOnion and getattr(
+                    expr_.parent, "_is_transposed", False
+                )
+                if type(expr_.index) is tuple and len(expr_.index) == 2:
+                    keys_0, keys_1 = expr_.index[0], expr_.index[1]
+                else:
+                    keys_0, keys_1 = expr_.index, None
+
+                donion = DOnion.multiple_access(
+                    self._meta,
+                    _update_by_aae,
+                    self_copy.dOnion_if,
+                    aae_parent,
+                    aae_parent_is_T,
+                    *(keys_0, keys_1),
+                    mask=mask_,
+                    accum=accum,
+                    replace=replace,
+                )
+                self.__init__(donion, self._meta)
+                return
+
+            if typ is GbDelayed and expr.has_dOnion:
+
+                def _update_by_gbd(c, t, *args, mask=None, accum=None, replace=None, **kwargs):
+                    args = tuple(a.T if xt else a for (xt, a) in zip(t, args))
+                    gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
+                    return c._update(gbd, mask=mask, accum=accum, replace=replace, in_DOnion=True)
+
+                gbd_parent = expr_.parent.dOnion_if
+                gbd_method = expr.method_name
+                gbd_args = (gbd_parent, gbd_method) + tuple(
+                    getattr(x, "dOnion_if", x) for x in expr.args
+                )
+                gbd_parent_is_T = expr_.parent.is_dOnion and getattr(
+                    expr_.parent, "_is_transposed", False
+                )
+                is_T = (gbd_parent_is_T, False) + tuple(
+                    getattr(x, "is_dOnion", False) and getattr(x, "_is_transposed", False)
+                    for x in expr.args
+                )
+                gbd_kwargs = {k: getattr(v, "dOnion_if", v) for k, v in expr.kwargs.items()}
+                donion = DOnion.multiple_access(
+                    self._meta,
+                    _update_by_gbd,
+                    self_copy.dOnion_if,
+                    is_T,
+                    *gbd_args,
+                    mask=mask_,
+                    accum=accum,
+                    replace=replace,
+                    **gbd_kwargs,
+                )
+                self.__init__(donion, self._meta)
+                return
+
+            elif typ is type(self) and expr.is_dOnion:
                 expr_ = expr._delayed
 
             donion = DOnion.multiple_access(
                 self._meta,
                 BaseType._update,
-                self_,
+                self_copy.dOnion_if,
                 expr_,
                 mask=mask_,
                 accum=accum,
@@ -391,6 +551,10 @@ def _update(self, expr, *, mask=None, accum=None, replace=None, in_DOnion=False)
             self.__init__(donion, meta=self._meta)
             return
 
+        if typ is Box:
+            expr = expr.content
+            typ = type(expr)
+
         if mask is None and accum is None:
             self.update(expr)
             if in_DOnion:
@@ -398,27 +562,42 @@ def _update(self, expr, *, mask=None, accum=None, replace=None, in_DOnion=False)
             return
         if typ is AmbiguousAssignOrExtract:
             # Extract (w(mask=mask, accum=accum) << v[index])
-            delayed = self._optional_dup()
-            expr_delayed = expr.new(dtype=self.dtype)._delayed
-            self._meta(mask=get_meta(mask), accum=accum, replace=replace)
-            if mask is not None:
-                delayed_mask = mask.mask._delayed
-                grblas_mask_type = get_grblas_type(mask)
+            expr_new = expr.new(dtype=self.dtype)
+            if expr_new.is_dOnion:
+                self_ = self.__class__(self._delayed, meta=self._meta)
+                donion = DOnion.multiple_access(
+                    self._meta,
+                    BaseType._update,
+                    self_,
+                    expr_new,
+                    mask=mask,
+                    accum=accum,
+                    replace=replace,
+                    in_DOnion=True,
+                )
+                self.__init__(donion, meta=self._meta)
             else:
-                delayed_mask = None
-                grblas_mask_type = None
-            self.__init__(
-                da.core.elemwise(
-                    _update_assign,
-                    delayed,
-                    accum,
-                    delayed_mask,
-                    grblas_mask_type,
-                    replace,
-                    expr_delayed,
-                    dtype=np_dtype(self._meta.dtype),
+                expr_delayed = expr_new._delayed
+                delayed = self._optional_dup()
+                self._meta(mask=get_meta(mask), accum=accum, replace=replace)
+                if mask is not None:
+                    delayed_mask = mask.mask._delayed
+                    grblas_mask_type = get_grblas_type(mask)
+                else:
+                    delayed_mask = None
+                    grblas_mask_type = None
+                self.__init__(
+                    da.core.elemwise(
+                        _update_assign,
+                        delayed,
+                        accum,
+                        delayed_mask,
+                        grblas_mask_type,
+                        replace,
+                        expr_delayed,
+                        dtype=np_dtype(self._meta.dtype),
+                    )
                 )
-            )
         elif typ is GbDelayed:
             # v(mask=mask) << left.ewise_mult(right)
             # Meta check handled in Updater
@@ -449,7 +628,7 @@ def _update(self, expr, *, mask=None, accum=None, replace=None, in_DOnion=False)
             raise NotImplementedError(f"{typ}")
 
         if in_DOnion:
-            return self
+            return self.__class__(self._delayed, meta=self._meta)
 
     def wait(self):
         # TODO: What should this do?
@@ -475,6 +654,7 @@ class Box:
     an Array object to prevent dask from post-processing the
     Array at the end of compute()
     """
+
     def __init__(self, content):
         self.content = content
 
@@ -496,6 +676,8 @@ class DOnion:
     (the shroud)
     """
 
+    is_dOnion = True
+
     @classmethod
     def sprout(cls, shroud, seed_meta, seed_func, *args, **kwargs):
         """
@@ -608,15 +790,17 @@ def multiple_access(cls, out_meta, func, *args, **kwargs):
 
     def deep_extract(self, out_meta, func, *args, **kwargs):
         func = flexible_partial(func, *args, **kwargs)
-        if not isinstance(out_meta, (gb.base.BaseType, gb.mask.Mask, gb.matrix.TransposedMatrix)):
+        if not isinstance(
+            out_meta, (np.ndarray, gb.base.BaseType, gb.mask.Mask, gb.matrix.TransposedMatrix)
+        ):
             func = compose(Box, func)
         kernel = self.kernel.map_blocks(func, **_const0_DOnion)
         return DOnion(kernel, meta=out_meta)
 
     def __call__(self, *args, **kwargs):
         meta = self._meta(*args, **kwargs)
-        return self.getattr(meta, '__call__', *args, **kwargs)
-        
+        return self.getattr(meta, "__call__", *args, **kwargs)
+
     def __getattr__(self, item):
         # TODO: how to compute meta of attribute?!!!
         meta = getattr(self._meta, item)
@@ -638,11 +822,16 @@ def _getattr(cls, x, attr_name, *args, **kwargs):
 def like_DOnion(what):
     return (
         is_DOnion(what)
-        or isinstance(what, BaseType) and is_DOnion(what._delayed)
-        or hasattr(what, '_matrix') and is_DOnion(what._matrix)
-        or hasattr(what, 'parent') and is_DOnion(what.parent)
-        or hasattr(what, 'mask') and is_DOnion(what.mask)
-        or hasattr(what, '_donion') and is_DOnion(what._donion)
+        or isinstance(what, BaseType)
+        and is_DOnion(what._delayed)
+        or hasattr(what, "_matrix")
+        and is_DOnion(what._matrix)
+        or hasattr(what, "parent")
+        and is_DOnion(what.parent)
+        or hasattr(what, "mask")
+        and is_DOnion(what.mask)
+        or hasattr(what, "_donion")
+        and is_DOnion(what._donion)
     )
 
 
diff --git a/dask_grblas/expr.py b/dask_grblas/expr.py
index 6636cda..1c07374 100644
--- a/dask_grblas/expr.py
+++ b/dask_grblas/expr.py
@@ -24,7 +24,8 @@
 
 
 class GbDelayed:
-    def __init__(self, parent, method_name, *args, meta, **kwargs):
+    def __init__(self, parent, method_name, *args, meta=None, **kwargs):
+        self.has_dOnion = np.any([getattr(x, "is_dOnion", False) for x in (parent,) + args])
         self.parent = parent
         self.method_name = method_name
         self.args = args
@@ -232,13 +233,85 @@ def new(self, dtype=None, *, mask=None, name=None):
         if mask is not None:
             _check_mask(mask)
 
-        if is_DOnion(self.parent) or mask is not None and is_DOnion(mask.mask):
-            meta = self._meta.new(dtype=dtype)
-            ret_type = get_return_type(meta)
+        if self.has_dOnion or mask is not None and mask.is_dOnion:
+
+            def GbDelayed_new(p, pt, m, t, *args, dtype=None, mask=None, **kwargs):
+                p = p.T if pt else p
+                args = tuple(a.T if xt else a for (xt, a) in zip(t, args))
+                gbd = getattr(p, m)(*args, **kwargs)
+                return gbd.new(dtype=dtype, mask=mask)
+
+            gbd_args = tuple(getattr(x, "dOnion_if", x) for x in self.args)
+            is_T = tuple(
+                getattr(x, "is_dOnion", False) and getattr(x, "_is_transposed", False)
+                for x in self.args
+            )
+            gbd_kwargs = {k: getattr(v, "dOnion_if", v) for k, v in self.kwargs.items()}
+            meta_kwargs = {k: getattr(v, "_meta", v) for k, v in self.kwargs.items()}
+
+            if self.method_name.startswith(("reduce", "apply")):
+                # unary operations
+                a = self.parent
+                op = self.args[0]
+                args = self.args[1:]
+                if self.method_name == "apply":
+                    # grblas does not like empty Scalars!
+                    if "left" in meta_kwargs and type(meta_kwargs["left"]) is gb.Scalar:
+                        meta_kwargs["left"] = gb.Scalar.from_value(
+                            1, dtype=meta_kwargs["left"].dtype
+                        )
+                    if "right" in meta_kwargs and type(meta_kwargs["right"]) is gb.Scalar:
+                        meta_kwargs["right"] = gb.Scalar.from_value(
+                            1, dtype=meta_kwargs["right"].dtype
+                        )
+                elif self.method_name.startswith("reduce"):
+                    # grblas bug occurs when shape is (0, 0)
+                    if a._meta.shape == (0,) * a.ndim:
+                        a._meta.resize(*((1,) * a.ndim))
+                meta = getattr(a._meta, self.method_name)(op, *args, **meta_kwargs).new(dtype=dtype)
+                meta.clear()
+            else:
+                # binary operations
+                a = self.parent
+                b = self.args[0]
+                op = self.args[1]
+
+                try:
+                    meta = getattr(a._meta, self.method_name)(b._meta, op=op, **meta_kwargs).new(
+                        dtype=dtype
+                    )
+                except DimensionMismatch:
+                    if self.method_name == "mxm":
+                        b_meta = gb.Matrix.new(
+                            dtype=b._meta.dtype, nrows=a._meta.ncols, ncols=b._meta.ncols
+                        )
+                    elif self.method_name == "vxm":
+                        b_meta = gb.Matrix.new(
+                            dtype=b._meta.dtype, nrows=a._meta.size, ncols=b._meta.ncols
+                        )
+                    elif self.method_name == "mxv":
+                        b_meta = gb.Vector.new(dtype=b._meta.dtype, size=a._meta.ncols)
+
+                    elif self.method_name in ("ewise_add", "ewise_mult"):
+                        b_meta = a._meta.dup(dtype=b._meta.dtype)
+
+                    meta = getattr(a._meta, self.method_name)(b_meta, op=op, **meta_kwargs).new(
+                        dtype=dtype
+                    )
+
             donion = DOnion.multiple_access(
-                meta, self.__class__.new, self.parent, dtype=dtype, mask=mask, name=name
+                meta,
+                GbDelayed_new,
+                a.dOnion_if,
+                a.is_dOnion and getattr(a, "_is_transposed", False),
+                self.method_name,
+                is_T,
+                *gbd_args,
+                dtype=dtype,
+                mask=None if mask is None else mask.dOnion_if,
+                **gbd_kwargs,
             )
-            return ret_type(donion, meta=meta)
+            return get_return_type(meta)(donion, meta=meta)
 
         if mask is not None:
             meta = self._meta.new(dtype=dtype, mask=mask._meta)
@@ -283,7 +356,6 @@ def new(self, dtype=None, *, mask=None, name=None):
                 dtype=np_dtype(meta.dtype),
             )
         elif self.method_name in {"vxm", "mxv", "mxm"}:
-            # TODO: handle dtype and mask
             delayed = self._matmul2(meta, mask=mask)
         else:
             raise ValueError(self.method_name)
@@ -446,7 +518,11 @@ def __init__(self, obj, indices, check_shape=True):
                     AxisIndex(obj._ncols, slice(*normalized1)),
                 ]
         else:
-            self.indices = self.parse_indices(indices, obj.shape, check_shape)
+            if not check_shape and hasattr(obj, "_meta"):
+                shape = obj._meta.shape
+            else:
+                shape = obj.shape
+            self.indices = self.parse_indices(indices, shape, check_shape)
 
     @property
     def is_single_element(self):
@@ -492,13 +568,16 @@ def parse_index(self, index, typ, size, check_shape=True):
                 if index < 0:
                     if check_shape:
                         raise IndexError(f"Index out of range: index={index - size}, size={size}")
-            return AxisIndex(None, IndexerResolver.normalize_index(index, size))
+            return AxisIndex(None, IndexerResolver.normalize_index(index, size, check_shape))
         if typ is list:
             index = [IndexerResolver.normalize_index(i, size, check_shape) for i in index]
             return AxisIndex(len(index), index)
         elif typ is slice:
-            normalized = index.indices(size)
-            return AxisIndex(len(range(*normalized)), slice(*normalized))
+            if check_shape:
+                normalized = index.indices(size)
+                return AxisIndex(len(range(*normalized)), slice(*normalized))
+            else:
+                return AxisIndex(None, index)
 
         elif typ in {np.ndarray, da.Array}:
             if len(index.shape) != 1:
@@ -506,6 +585,10 @@ def parse_index(self, index, typ, size, check_shape=True):
             if not np.issubdtype(index.dtype, np.integer):
                 raise TypeError(f"Invalid dtype for index: {index.dtype}")
             return AxisIndex(index.shape[0], index)
+
+        elif is_DOnion(index):
+            return AxisIndex(None, index)
+
         else:
             from .scalar import Scalar
 
@@ -627,6 +710,10 @@ def update(self, delayed):
         if self.input_mask is not None:
             if type(delayed) is AmbiguousAssignOrExtract:
                 # w(input_mask) << v[index]
+                if self.parent is delayed.parent:
+                    delayed.parent = delayed.parent.__class__(
+                        delayed.parent._delayed, delayed.parent._meta
+                    )
                 self.parent._update(
                     delayed.new(mask=self.mask, input_mask=self.input_mask),
                     accum=self.accum,
@@ -640,7 +727,7 @@ def update(self, delayed):
         if isinstance(delayed, Number) or (
             isinstance(delayed, BaseType) and get_meta(delayed)._is_scalar
         ):
-            ndim = len(self.parent.shape)
+            ndim = self.parent.ndim
             if ndim > 0:
                 self.__setitem__(_squeeze((slice(None),) * ndim), delayed)
             elif self.accum is not None:
@@ -652,9 +739,7 @@ def update(self, delayed):
         if self.mask is None and self.accum is None:
             return self.parent.update(delayed)
 
-        if like_DOnion(self.parent) or like_DOnion(delayed):
-            self.parent._meta = delayed._meta.new()
-        else:
+        if not (like_DOnion(self.parent) or like_DOnion(delayed)):
             self.parent._meta._update(
                 get_meta(delayed),
                 mask=get_meta(self.mask),
@@ -1264,17 +1349,49 @@ def _defrag_to_index_chunk(*args, x_chunks, dtype=None):
     return wrap_inner(fused_fragments[index_tuple].new())
 
 
+def _adjust_meta_to_index(meta, index):
+    from .scalar import Scalar, PythonScalar
+
+    # Since grblas does not support indices that are dask arrays
+    # this complicates meta deduction.  We therefore substitute
+    # any non-Integral type indices with `slice(None)`
+    index = index if type(index) is tuple else (index,)
+    # Next, we resize `meta` to accept any Integral-type indices:
+    numbers = [x for x in index if isinstance(x, (Integral, Scalar, PythonScalar))]
+    max_index = np.max(numbers) if numbers else None
+    meta = meta.dup()
+    if max_index is not None:
+        if len(index) == 1:
+            meta.resize(max_index + 1)
+        else:
+            meta.resize(max_index + 1, max_index + 1)
+
+    meta_index = tuple(
+        x if isinstance(x, (Integral, Scalar, PythonScalar)) else slice(None) for x in index
+    )
+    return meta[_squeeze(meta_index)]
+
+
 class AmbiguousAssignOrExtract:
-    def __init__(self, parent, index, self_donion=None, meta=None):
+    def __init__(self, parent, index, meta=None):
         self.parent = parent
         self.index = index
-        self._donion = self_donion
-        self._meta = parent._meta[index] if meta is None else meta
-        if (
-                not (hasattr(parent, '_delayed') and is_DOnion(parent._delayed))
-                and not (hasattr(parent, '_matrix') and is_DOnion(parent._matrix))
-        ):
+        input_ndim = parent.ndim
+        self.keys_0_is_dOnion = input_ndim == 1 and is_DOnion(index)
+        self.keys_1_is_dOnion = (
+            input_ndim == 2
+            and type(index) is tuple
+            and len(index) == 2
+            and (is_DOnion(index[0]) or is_DOnion(index[1]))
+        )
+        if parent.is_dOnion or self.keys_0_is_dOnion or self.keys_1_is_dOnion:
+            IndexerResolver(self.parent, index, check_shape=False)
+            self._meta = _adjust_meta_to_index(parent._meta, index)
+            self.has_dOnion = True
+        else:
             self.resolved_indices = IndexerResolver(parent, index)
+            self._meta = parent._meta[index] if meta is None else meta
+            self.has_dOnion = False
             # infix expression requirements:
             shape = tuple(i.size for i in self.resolved_indices.indices if i.size)
             self.ndim = len(shape)
@@ -1286,11 +1403,46 @@ def __init__(self, parent, index, self_donion=None, meta=None):
                 self._ncols = shape[1]
 
     def new(self, *, dtype=None, mask=None, input_mask=None, name=None):
-        if self._donion is not None:
-            return get_return_type(self._meta.new(dtype=dtype))(
-                self._donion.new(dtype=dtype, mask=mask, input_mask=input_mask, name=name)
+        def getitem(parent, at, keys_0, keys_1, dtype, mask, input_mask):
+            keys = keys_0 if keys_1 is None else (keys_0, keys_1)
+            return AmbiguousAssignOrExtract(parent.T if at else parent, keys).new(
+                dtype=dtype, mask=mask, input_mask=input_mask
             )
 
+        if mask is not None:
+            _check_mask(mask)
+        if input_mask is not None:
+            _check_mask(input_mask)
+
+        mask_is_DOnion = mask is not None and mask.is_dOnion
+        input_mask_is_DOnion = input_mask is not None and input_mask.is_dOnion
+        if (
+            self.parent.is_dOnion
+            or self.keys_0_is_dOnion
+            or self.keys_1_is_dOnion
+            or mask_is_DOnion
+            or input_mask_is_DOnion
+        ):
+            meta = self._meta.new(dtype=dtype)
+
+            if type(self.index) is tuple and len(self.index) == 2:
+                keys_0, keys_1 = self.index[0], self.index[1]
+            else:
+                keys_0, keys_1 = self.index, None
+
+            donion = DOnion.multiple_access(
+                meta,
+                getitem,
+                self.parent.dOnion_if,
+                self.parent.is_dOnion and getattr(self.parent, "_is_transposed", False),
+                *(keys_0, keys_1),
+                dtype=dtype,
+                mask=None if mask is None else mask.dOnion_if,
+                input_mask=None if input_mask is None else input_mask.dOnion_if,
+            )
+            return get_return_type(meta)(donion)
+
+        # no dOnions
         parent = self.parent
         xt = False  # xt = parent._is_transposed
         dxn = 1  # dxn = -1 if xt else 1
@@ -1475,12 +1627,13 @@ def __call__(self, *args, **kwargs):
         return Assigner(self.parent(*args, **kwargs), self.index, subassign=True)
 
     def update(self, obj):
+        if getattr(self.parent, "_is_transposed", False):
+            raise TypeError("'TransposedMatrix' object does not support item assignment")
+
         if is_DOnion(self.parent._delayed):
             self.parent.__setitem__(self.index, obj)
             return
 
-        if getattr(self.parent, "_is_transposed", False):
-            raise TypeError("'TransposedMatrix' object does not support item assignment")
         Assigner(Updater(self.parent), self.index).update(obj)
 
     def __lshift__(self, rhs):
@@ -1488,12 +1641,9 @@ def __lshift__(self, rhs):
 
     @property
     def value(self):
-        if self._donion is not None:
-            ret_type = get_return_type(self._meta.new())
-            return ret_type(self._donion.new()).value
-
         self._meta.value
-        return self.new().value
+        scalar = self.new()
+        return scalar.value
 
 
 def _uniquify(ndim, index, obj, mask=None, ot=False):
@@ -1543,12 +1693,69 @@ class Assigner:
     def __init__(self, updater, index, subassign=False):
         self.updater = updater
         self.parent = updater.parent
-        self.resolved_indices = IndexerResolver(self.parent, index).indices
-        self.index = tuple(i.index for i in self.resolved_indices)
         self._meta = updater.parent._meta
         self.subassign = subassign
 
+        input_ndim = self.parent.ndim
+        self.keys_0_is_dOnion = input_ndim == 1 and is_DOnion(index)
+        self.keys_1_is_dOnion = (
+            input_ndim == 2
+            and type(index) is tuple
+            and len(index) == 2
+            and (is_DOnion(index[0]) or is_DOnion(index[1]))
+        )
+        if self.parent.is_dOnion or self.keys_0_is_dOnion or self.keys_1_is_dOnion:
+            IndexerResolver(self.parent, index, check_shape=False)
+            self.index = index
+        else:
+            self.resolved_indices = IndexerResolver(self.parent, index).indices
+            self.index = tuple(i.index for i in self.resolved_indices)
+
     def update(self, obj):
+        def setitem(lhs, mask, accum, replace, keys_0, keys_1, obj, ot, subassign, in_dOnion=False):
+            keys = (keys_0,) if keys_1 is None else (keys_0, keys_1)
+            updater = Updater(lhs, mask=mask, accum=accum, replace=replace)
+            Assigner(updater, keys, subassign=subassign).update(obj.T if ot else obj)
+            if in_dOnion:
+                return lhs
+
+        # check for dOnions:
+        lhs = self.parent
+        updater = self.updater
+        if (
+            lhs.is_dOnion
+            or updater.mask is not None
+            and updater.mask.is_dOnion
+            or self.keys_0_is_dOnion
+            or self.keys_1_is_dOnion
+            or getattr(obj, "is_dOnion", False)
+        ):
+            lhs_ = lhs.__class__(lhs._delayed, meta=lhs._meta)
+            mask = None if updater.mask is None else updater.mask.dOnion_if
+
+            if type(self.index) is tuple and len(self.index) == 2:
+                keys_0, keys_1 = self.index[0], self.index[1]
+            else:
+                keys_0, keys_1 = self.index, None
+
+            donion = DOnion.multiple_access(
+                lhs._meta,
+                setitem,
+                lhs_.dOnion_if,
+                mask,
+                updater.accum,
+                updater.replace,
+                keys_0,
+                keys_1,
+                getattr(obj, "dOnion_if", obj),
+                getattr(obj, "is_dOnion", False) and getattr(obj, "_is_transposed", False),
+                self.subassign,
+                in_dOnion=True,
+            )
+            lhs.__init__(donion, meta=lhs._meta)
+            return
+
+        # no dOnions
         if not (isinstance(obj, BaseType) or isinstance(obj, Number)):
             try:
                 obj_transposed = obj._is_transposed
diff --git a/dask_grblas/mask.py b/dask_grblas/mask.py
index 51c4e3b..715ba8f 100644
--- a/dask_grblas/mask.py
+++ b/dask_grblas/mask.py
@@ -15,6 +15,16 @@ def __init__(self, mask):
         if base.is_DOnion(mask._delayed):
             self.mask = mask._delayed.deep_extract(self._meta, self.__class__)
 
+    @property
+    def is_dOnion(self):
+        from .base import is_DOnion
+
+        return is_DOnion(self.mask)
+
+    @property
+    def dOnion_if(self):
+        return self.mask if self.is_dOnion else self
+
 
 class StructuralMask(Mask):
     complement = False
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index 9740776..286cb62 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -10,7 +10,7 @@
 from grblas.dtypes import lookup_dtype
 from grblas.exceptions import IndexOutOfBound, EmptyObject, DimensionMismatch
 
-from .base import BaseType, InnerBaseType, DOnion, is_DOnion, like_DOnion, skip
+from .base import BaseType, InnerBaseType, DOnion, is_DOnion, Box, skip
 from .base import _nvals as _nvals_in_chunk
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater
 from .mask import StructuralMask, ValueMask
@@ -152,8 +152,10 @@ def from_values(
             return Matrix(out_donion, meta=meta)
 
         # no DOnions
-        if type(rows) is da.Array and type(columns) is da.Array and (
-                type(values) is da.Array or isinstance(values, Number)
+        if (
+            type(rows) is da.Array
+            and type(columns) is da.Array
+            and (type(values) is da.Array or isinstance(values, Number))
         ):
             np_idtype_ = np_dtype(lookup_dtype(rows.dtype))
             if nrows is not None and ncols is not None:
@@ -298,9 +300,7 @@ def build(
         if type(values) is list:
             values = da.core.from_array(np.array(values), name="values-" + tokenize(values))
 
-        if type(values) is da.Array and (
-            rows.size != columns.size or columns.size != values.size
-        ):
+        if type(values) is da.Array and (rows.size != columns.size or columns.size != values.size):
             raise ValueError(
                 "`rows` and `columns` and `values` lengths must match: "
                 f"{rows.size}, {columns.size}, {values.size}"
@@ -315,7 +315,8 @@ def build(
         idtype = gb.Matrix.new(rows.dtype).dtype
         np_idtype_ = np_dtype(idtype)
         vdtype = (
-            lookup_dtype(type(values)) if isinstance(values, Number)
+            lookup_dtype(type(values))
+            if isinstance(values, Number)
             else gb.Matrix.new(values.dtype).dtype
         )
         np_vdtype_ = np_dtype(vdtype)
@@ -355,6 +356,19 @@ def build(
 
     @classmethod
     def new(cls, dtype, nrows=0, ncols=0, *, chunks="auto", name=None):
+        if is_DOnion(nrows) or is_DOnion(ncols):
+            meta = gb.Matrix.new(dtype)
+            donion = DOnion.multiple_access(
+                meta, cls.new, dtype, nrows=nrows, ncols=ncols, chunks=chunks, name=name
+            )
+            return Matrix(donion, meta=meta)
+
+        if type(nrows) is Box:
+            nrows = nrows.content
+
+        if type(ncols) is Box:
+            ncols = ncols.content
+
         dtype = dtype.lower() if isinstance(dtype, str) else dtype
         if nrows == 0 and ncols == 0:
             matrix = gb.Matrix.new(dtype, nrows, ncols)
@@ -422,8 +436,6 @@ def V(self):
 
     @property
     def T(self):
-        if is_DOnion(self._delayed):
-            return TransposedMatrix(self._delayed.T)
         return TransposedMatrix(self)
 
     @property
@@ -447,7 +459,7 @@ def shape(self):
     def resize(self, nrows, ncols, inplace=True, chunks="auto"):
         if is_DOnion(self._delayed):
             donion = self._delayed.getattr(
-                self._meta, 'resize', nrows, ncols, inplace=False, chunks=chunks
+                self._meta, "resize", nrows, ncols, inplace=False, chunks=chunks
             )
             if inplace:
                 self.__init__(donion, meta=self._meta)
@@ -497,7 +509,7 @@ def resize(self, nrows, ncols, inplace=True, chunks="auto"):
     def rechunk(self, inplace=False, chunks="auto"):
         if is_DOnion(self._delayed):
             meta = self._meta
-            donion = self._delayed.getattr(meta, 'rechunk', inplace=False, chunks=chunks)
+            donion = self._delayed.getattr(meta, "rechunk", inplace=False, chunks=chunks)
             if inplace:
                 self.__init__(donion, meta=meta)
                 return
@@ -614,39 +626,6 @@ def _diag_old(self, k=0, dtype=None, chunks="auto"):
         return get_return_type(meta)(delayed, nvals=nvals)
 
     def __getitem__(self, index):
-        if (
-                type(self._delayed) is DOnion
-                or type(index) is tuple and len(index) == 2
-                and (is_DOnion(index[0]) or is_DOnion(index[1]))
-        ):
-            from .scalar import Scalar, PythonScalar
-            from .expr import IndexerResolver
-
-            self_delayed = self._delayed if type(self._delayed) is DOnion else self
-            if type(index) is tuple and len(index) == 2:
-                def getitem(self_, i0, i1):
-                    return self.__class__.__getitem__(self_, (i0, i1))
-
-                # Since grblas does not support indices that are dask arrays
-                # this complicates meta deduction.  We therefore substitute
-                # any non-Integral type indices with `slice(None)`
-                meta_index = tuple(
-                    x if isinstance(x, (Integral, Scalar, PythonScalar))
-                    else slice(None) for x in index
-                )
-                # Next, we resize `meta` to accept any Integral-type indices:
-                numbers = [x for x in index if isinstance(x, (Integral, Scalar, PythonScalar))]
-                max_index = np.max(numbers) if numbers else None
-                if max_index is not None:
-                    self._meta.resize(nrows=max_index + 1, ncols=max_index + 1)
-                meta = self._meta[meta_index]
-                
-                IndexerResolver(self._meta, index, check_shape=False)
-                donion = DOnion.multiple_access(meta, getitem, self_delayed, index[0], index[1])
-                return AmbiguousAssignOrExtract(self, index, self_donion=donion, meta=meta)
-            else:
-                raise ValueError("Matrix indices must be a 2-tuple.")
-
         return AmbiguousAssignOrExtract(self, index)
 
     def __delitem__(self, keys, in_DOnion=False):
@@ -655,9 +634,7 @@ def __delitem__(self, keys, in_DOnion=False):
             if len(good_keys) != 2:
                 raise TypeError("Remove Element only supports scalars.")
 
-            donion = self._delayed.getattr(
-                self._meta, '__delitem__', keys, in_DOnion=True
-            )
+            donion = self._delayed.getattr(self._meta, "__delitem__", keys, in_DOnion=True)
             self.__init__(donion, meta=self._meta)
             return
 
@@ -666,32 +643,7 @@ def __delitem__(self, keys, in_DOnion=False):
             return self
 
     def __setitem__(self, index, delayed, in_DOnion=False):
-        if is_DOnion(self._delayed):
-            donion = self._delayed.getattr(
-                self._meta, '__setitem__', index, delayed, in_DOnion=True
-            )
-            self.__init__(donion, meta=self._meta)
-            return
-
-        dlayd_is_donion = like_DOnion(delayed)
-        if dlayd_is_donion:
-            delayed = (
-                delayed._delayed if hasattr(delayed, '_delayed') and is_DOnion(delayed._delayed)
-                else delayed
-            )
-        if dlayd_is_donion or type(index) is tuple and len(index) == 2 and (
-                is_DOnion(index[0]) or is_DOnion(index[1])
-        ):
-            def func(i0, i1, delayed):
-                return self.__setitem__((i0, i1), delayed)
-
-            donion = DOnion.multiple_access(self._meta, func, index[0], index[1], delayed)
-            self.__init__(donion, meta=self._meta)
-            return
-
         Updater(self)[index] = delayed
-        if in_DOnion:
-            return self
 
     def __contains__(self, index):
         extractor = self[index]
@@ -710,41 +662,20 @@ def __iter__(self):
     def ewise_add(self, other, op=monoid.plus, *, require_monoid=True):
         assert type(other) is Matrix  # TODO: or TransposedMatrix
 
-        self_delayed = self._matrix if self._is_transposed else self._delayed
-        other_delayed = other._matrix if other._is_transposed else other._delayed
-        if is_DOnion(self_delayed) or is_DOnion(other_delayed):
-            self_ = self_delayed if is_DOnion(self_delayed) else self
-            other_ = other_delayed if is_DOnion(other_delayed) else other
-            try:
-                meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid)
-            except DimensionMismatch:
-                meta = self._meta.ewise_add(self._meta, op=op, require_monoid=require_monoid)
-
-            donion = DOnion.multiple_access(
-                meta, Matrix.ewise_add, self_, other_, op=op, require_monoid=require_monoid
-            )
-            return GbDelayed(donion, 'ewise_add', op, meta=meta)
-
-        meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid)
+        try:
+            meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid)
+        except DimensionMismatch:
+            meta = self._meta.ewise_add(self._meta, op=op, require_monoid=require_monoid)
         return GbDelayed(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta)
 
     def ewise_mult(self, other, op=binary.times):
-        assert type(other) is Matrix  # TODO: or TransposedMatrix
+        assert type(other) is Matrix
 
-        self_delayed = self._matrix if self._is_transposed else self._delayed
-        other_delayed = other._matrix if other._is_transposed else other._delayed
-        if is_DOnion(self_delayed) or is_DOnion(other_delayed):
-            self_ = self_delayed if is_DOnion(self_delayed) else self
-            other_ = other_delayed if is_DOnion(other_delayed) else other
-            try:
-                meta = self._meta.ewise_mult(other._meta, op=op)
-            except DimensionMismatch:
-                meta = self._meta.ewise_mult(self._meta, op=op)
-
-            donion = DOnion.multiple_access(meta, Matrix.ewise_mult, self_, other_, op=op)
-            return GbDelayed(donion, 'ewise_mult', op, meta=meta)
+        try:
+            meta = self._meta.ewise_mult(other._meta, op=op)
+        except DimensionMismatch:
+            meta = self._meta.ewise_mult(self._meta, op=op)
 
-        meta = self._meta.ewise_mult(other._meta, op=op)
         return GbDelayed(self, "ewise_mult", other, op, meta=meta)
 
     def mxv(self, other, op=semiring.plus_times):
@@ -752,40 +683,24 @@ def mxv(self, other, op=semiring.plus_times):
 
         assert type(other) is Vector
 
-        self_delayed = self._matrix if self._is_transposed else self._delayed
-        if is_DOnion(self_delayed) or is_DOnion(other._delayed):
-            self_ = self_delayed if is_DOnion(self_delayed) else self
-            other_ = other._delayed if is_DOnion(other._delayed) else other
-            try:
-                meta = self._meta.mxv(other._meta, op=op)
-            except DimensionMismatch:
-                other_meta = gb.Vector.new(dtype=other._meta.dtype, size=self._meta.ncols)
-                meta = self._meta.mxv(other_meta, op=op)
-            donion = DOnion.multiple_access(meta, Matrix.mxv, self_, other_, op=op)
-            return GbDelayed(donion, 'mxv', op, meta=meta)
-
-        meta = self._meta.mxv(other._meta, op=op)
+        try:
+            meta = self._meta.mxv(other._meta, op=op)
+        except DimensionMismatch:
+            other_meta = gb.Vector.new(dtype=other._meta.dtype, size=self._meta.ncols)
+            meta = self._meta.mxv(other_meta, op=op)
+
         return GbDelayed(self, "mxv", other, op, meta=meta)
 
     def mxm(self, other, op=semiring.plus_times):
         assert type(other) in (Matrix, TransposedMatrix)
 
-        self_delayed = self._matrix if self._is_transposed else self._delayed
-        other_delayed = other._matrix if other._is_transposed else other._delayed
-        if is_DOnion(self_delayed) or is_DOnion(other_delayed):
-            self_ = self_delayed if is_DOnion(self_delayed) else self
-            other_ = other_delayed if is_DOnion(other_delayed) else other
-            try:
-                meta = self._meta.mxm(other._meta, op=op)
-            except DimensionMismatch:
-                other_meta = gb.Matrix.new(
-                    dtype=other._meta.dtype, nrows=self._meta.ncols, ncols=other._meta.ncols
-                )
-                meta = self._meta.mxm(other_meta, op=op)
-            donion = DOnion.multiple_access(meta, Matrix.mxm, self_, other_, op=op)
-            return GbDelayed(donion, 'mxm', op, meta=meta)
-
-        meta = self._meta.mxm(other._meta, op=op)
+        try:
+            meta = self._meta.mxm(other._meta, op=op)
+        except DimensionMismatch:
+            other_meta = gb.Matrix.new(
+                dtype=other._meta.dtype, nrows=self._meta.ncols, ncols=other._meta.ncols
+            )
+            meta = self._meta.mxm(other_meta, op=op)
         return GbDelayed(self, "mxm", other, op, meta=meta)
 
     def kronecker(self, other, op=binary.times):
@@ -804,6 +719,8 @@ def apply(self, op, right=None, *, left=None):
         if type(right) is Scalar:
             right_meta = right.dtype.np_type(0)
 
+        if self._meta.shape == (0,) * self.ndim:
+            self._meta.resize(*((1,) * self.ndim))
         meta = self._meta.apply(op=op, left=left_meta, right=right_meta)
         return GbDelayed(self, "apply", op, right, meta=meta, left=left)
 
@@ -816,11 +733,6 @@ def reduce_columnwise(self, op=monoid.plus):
         return GbDelayed(self, "reduce_columnwise", op, meta=meta)
 
     def reduce_scalar(self, op=monoid.plus):
-        if is_DOnion(self._delayed):
-            meta = self._meta.reduce_scalar(op)
-            donion = self._delayed.getattr(meta, 'reduce_scalar', op=op)
-            return GbDelayed(donion, 'reduce_scalar', op, meta=meta)
-
         meta = self._meta.reduce_scalar(op)
         return GbDelayed(self, "reduce_scalar", op, meta=meta)
 
@@ -947,16 +859,37 @@ class TransposedMatrix:
     ndim = 2
     _is_transposed = True
 
-    def __init__(self, matrix):
-        assert type(matrix) in {Matrix, DOnion}
+    def __init__(self, matrix, meta=None):
+        assert type(matrix) is Matrix
         self._matrix = matrix
-        self._meta = matrix._meta.T
+        self._meta = matrix._meta.T if meta is None else meta
 
         # Aggregator-specific requirements:
         self._nrows = self._meta.nrows
         self._ncols = self._meta.ncols
 
+    @property
+    def is_dOnion(self):
+        return is_DOnion(self._matrix._delayed)
+
+    @property
+    def dOnion_if(self):
+        return self._matrix._delayed if self.is_dOnion else self
+
     def new(self, *, dtype=None, mask=None):
+        mask_is_DOnion = mask is not None and mask.is_dOnion
+        if self.is_dOnion or mask_is_DOnion:
+
+            def T(matrix, dtype=None, mask=None):
+                return TransposedMatrix(matrix).new(dtype=dtype, mask=mask)
+
+            _matrix = self._matrix._delayed if self.is_dOnion else self._matrix
+            mask = mask.mask if mask_is_DOnion else mask
+            donion = DOnion.multiple_access(
+                self._meta.new(dtype), T, _matrix, dtype=dtype, mask=mask
+            )
+            return Matrix(donion)
+
         gb_dtype = self._matrix.dtype if dtype is None else lookup_dtype(dtype)
         dtype = np_dtype(gb_dtype)
 
@@ -981,8 +914,6 @@ def new(self, *, dtype=None, mask=None):
 
     @property
     def T(self):
-        if is_DOnion(self._matrix._delayed):
-            return Matrix(self._matrix._delayed.T)
         return self._matrix
 
     @property
@@ -990,77 +921,53 @@ def dtype(self):
         return self._meta.dtype
 
     def to_values(self, dtype=None, chunks="auto"):
-        if is_DOnion(self._matrix):
+        if self.is_dOnion:
             out_meta = np.array([])
-            result = self._matrix.getattr(out_meta, 'to_values', dtype=dtype, chunks=chunks)
+            result = self.dOnion_if.getattr(out_meta, "to_values", dtype=dtype, chunks=chunks)
             meta_i, _, meta_v = self._meta.to_values(dtype)
             rows = result.getattr(meta_i, "__getitem__", 0)
-            columns = result.getattr(meta_i, "__getitem__", 1)
-            values = result.getattr(meta_v, "__getitem__", 2)
-            return rows, columns, values
-
-        rows, cols, vals = self._matrix.to_values(dtype=dtype, chunks=chunks)
+            cols = result.getattr(meta_i, "__getitem__", 1)
+            vals = result.getattr(meta_v, "__getitem__", 2)
+        else:
+            rows, cols, vals = self._matrix.to_values(dtype=dtype, chunks=chunks)
         return cols, rows, vals
 
     # Properties
     @property
     def nrows(self):
-        if is_DOnion(self._matrix._delayed):
-            return self._matrix._delayed.nrows
+        if self.is_dOnion:
+            return DOnion.multiple_access(
+                self._meta.nrows, lambda x: x.ncols, self._matrix._delayed
+            )
         return self._meta.nrows
 
     @property
     def ncols(self):
-        if is_DOnion(self._matrix._delayed):
-            return self._matrix._delayed.ncols
+        if self.is_dOnion:
+            return DOnion.multiple_access(
+                self._meta.ncols, lambda x: x.nrows, self._matrix._delayed
+            )
         return self._meta.ncols
 
     @property
     def shape(self):
-        if is_DOnion(self._matrix._delayed):
-            return self._matrix._delayed.shape
+        if self.is_dOnion:
+
+            def shape(matrix):
+                return matrix.shape[::-1]
+
+            return DOnion.multiple_access(self._meta.shape, shape, self._matrix._delayed)
         return self._meta.shape
 
     @property
     def nvals(self):
-        if is_DOnion(self._matrix._delayed):
-            return self._matrix._delayed.nvals
-        return self._meta.shape
+        if self.is_dOnion:
+            return DOnion.multiple_access(
+                self._meta.nvals, lambda x: x.nvals, self._matrix._delayed
+            )
+        return self._meta.nvals
 
     def __getitem__(self, index):
-        if (
-                type(self._matrix) is DOnion
-                or type(index) is tuple and len(index) == 2
-                and (is_DOnion(index[0]) or is_DOnion(index[1]))
-        ):
-            from .scalar import Scalar, PythonScalar
-            from .expr import IndexerResolver
-
-            self_delayed = self._matrix if type(self._matrix) is DOnion else self
-            if type(index) is tuple and len(index) == 2:
-                def getitem(self_, i0, i1):
-                    return self.__class__.__getitem__(self_, (i0, i1))
-
-                # Since grblas does not support indices that are dask arrays
-                # this complicates meta deduction.  We therefore substitute
-                # any non-Integral type indices with `slice(None)`
-                meta_index = tuple(
-                    x if isinstance(x, (Integral, Scalar, PythonScalar))
-                    else slice(None) for x in index
-                )
-                # Next, we resize `meta` to accept any Integral-type indices:
-                numbers = [x for x in index if isinstance(x, (Integral, Scalar, PythonScalar))]
-                max_index = np.max(numbers) if numbers else None
-                if max_index is not None:
-                    self._meta.resize(nrows=max_index + 1, ncols=max_index + 1)
-                meta = self._meta[meta_index]
-                
-                IndexerResolver(self._meta, index, check_shape=False)
-                donion = DOnion.multiple_access(meta, getitem, self_delayed, index[0], index[1])
-                return AmbiguousAssignOrExtract(self, index, self_donion=donion, meta=meta)
-            else:
-                raise ValueError("Matrix indices must be a 2-tuple.")
-
         return AmbiguousAssignOrExtract(self, index)
 
     # Delayed methods
diff --git a/dask_grblas/scalar.py b/dask_grblas/scalar.py
index 16bef6c..bdd3159 100644
--- a/dask_grblas/scalar.py
+++ b/dask_grblas/scalar.py
@@ -3,7 +3,7 @@
 import numpy as np
 from dask.delayed import Delayed, delayed
 
-from .base import BaseType, InnerBaseType, DOnion
+from .base import BaseType, InnerBaseType, DOnion, Box
 from .expr import AmbiguousAssignOrExtract, GbDelayed
 from .utils import get_meta, np_dtype
 
@@ -76,13 +76,96 @@ def __init__(self, delayed, meta=None):
         self._meta = meta
         self.dtype = meta.dtype
 
-    def update(self, expr):
+    def update(self, expr, in_DOnion=False):
+        typ = type(expr)
+        if (
+            self.is_dOnion
+            or typ is AmbiguousAssignOrExtract
+            and expr.has_dOnion
+            or typ is GbDelayed
+            and expr.has_dOnion
+            or typ is Scalar
+            and expr.is_dOnion
+        ):
+            self_copy = self.__class__(self._delayed, meta=self._meta)
+            expr_ = expr
+            if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
+
+                def update_by_aae(c, p, t, k_0, k_1):
+                    p = p.T if t else p
+                    keys = k_0 if k_1 is None else (k_0, k_1)
+                    aae = AmbiguousAssignOrExtract(p, keys)
+                    return c.update(aae, in_DOnion=True)
+
+                aae_parent = expr_.parent.dOnion_if
+                aae_parent_is_T = expr_.parent.is_dOnion and getattr(
+                    expr_.parent, "_is_transposed", False
+                )
+                if type(expr_.index) is tuple and len(expr_.index) == 2:
+                    keys_0, keys_1 = expr_.index[0], expr_.index[1]
+                else:
+                    keys_0, keys_1 = expr_.index, None
+
+                donion = DOnion.multiple_access(
+                    self._meta,
+                    update_by_aae,
+                    self_copy.dOnion_if,
+                    aae_parent,
+                    aae_parent_is_T,
+                    *(keys_0, keys_1),
+                )
+                self.__init__(donion, self._meta)
+                return
+
+            if typ is GbDelayed and expr.has_dOnion:
+
+                def update_by_gbd(c, t, *args, **kwargs):
+                    args = tuple(a.T if xt else a for (xt, a) in zip(t, args))
+                    gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
+                    return c.update(gbd, in_DOnion=True)
+
+                gbd_parent = expr_.parent.dOnion_if
+                gbd_method = expr.method_name
+                gbd_args = (gbd_parent, gbd_method) + tuple(
+                    getattr(x, "dOnion_if", x) for x in expr.args
+                )
+                gbd_parent_is_T = expr_.parent.is_dOnion and getattr(
+                    expr_.parent, "_is_transposed", False
+                )
+                is_T = (gbd_parent_is_T, False) + tuple(
+                    getattr(x, "is_dOnion", False) and getattr(x, "_is_transposed", False)
+                    for x in expr.args
+                )
+                gbd_kwargs = {k: getattr(v, "dOnion_if", v) for k, v in expr.kwargs.items()}
+                donion = DOnion.multiple_access(
+                    self._meta,
+                    update_by_gbd,
+                    self_copy.dOnion_if,
+                    is_T,
+                    *gbd_args,
+                    **gbd_kwargs,
+                )
+                self.__init__(donion, self._meta)
+                return
+
+            elif typ is Scalar and expr.is_dOnion:
+                expr_ = expr._delayed
+            donion = DOnion.multiple_access(
+                self._meta, Scalar.update, self_copy.dOnion_if, expr_, in_DOnion=True
+            )
+            self.__init__(donion, self._meta)
+            return
+
+        if typ is Box:
+            expr = expr.content
+            typ = type(expr)
+
         self._meta.update(get_meta(expr))
         self._meta.clear()
-        typ = type(expr)
         if typ is AmbiguousAssignOrExtract:
             # Extract (s << v[index])
-            self.value = expr.new(dtype=self.dtype).value
+            expr_new = expr.new(dtype=self.dtype)
+            self.value = expr_new.value
         elif typ is Scalar:
             # Simple assignment (s << t)
             self.value = expr.value
@@ -92,11 +175,63 @@ def update(self, expr):
         else:
             # Try simple assignment (s << 1)
             self.value = expr
+        if in_DOnion:
+            return self.__class__(self._delayed, meta=self._meta)
 
-    def _update(self, delayed, *, accum):
+    def _update(self, rhs, *, accum, in_DOnion=False):
         # s(accum=accum) << v.reduce()
-        assert type(delayed) is GbDelayed
-        delayed._update(self, accum=accum)
+        typ = type(rhs)
+        if typ is Box:
+            rhs = rhs.content
+
+        assert type(rhs) is GbDelayed
+
+        if self.is_dOnion or rhs.parent.is_dOnion:
+            self_copy = self.__class__(self._delayed, meta=self._meta)
+            self_ = self_copy.dOnion_if
+            rhs_ = rhs
+            if typ is GbDelayed and rhs.has_dOnion:
+
+                def _update_by_gbd(c, t, *args, accum=None, **kwargs):
+                    args = tuple(a.T if xt else a for (xt, a) in zip(t, args))
+                    gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
+                    return c._update(gbd, accum=accum, in_DOnion=True)
+
+                gbd_parent = rhs_.parent.dOnion_if
+                gbd_method = rhs.method_name
+                gbd_args = (gbd_parent, gbd_method) + tuple(
+                    getattr(x, "dOnion_if", x) for x in rhs.args
+                )
+                gbd_parent_is_T = rhs_.parent.is_dOnion and getattr(
+                    rhs_.parent, "_is_transposed", False
+                )
+                is_T = (gbd_parent_is_T, False) + tuple(
+                    getattr(x, "is_dOnion", False) and getattr(x, "_is_transposed", False)
+                    for x in rhs.args
+                )
+                gbd_kwargs = {k: getattr(v, "dOnion_if", v) for k, v in rhs.kwargs.items()}
+                donion = DOnion.multiple_access(
+                    self._meta,
+                    _update_by_gbd,
+                    self_copy.dOnion_if,
+                    is_T,
+                    *gbd_args,
+                    accum=accum,
+                    **gbd_kwargs,
+                )
+                self.__init__(donion, self._meta)
+                return
+
+            rhs_ = rhs.parent.dOnion_if
+            donion = DOnion.multiple_access(
+                self._meta, Scalar._update, self_, rhs_, accum=accum, in_DOnion=True
+            )
+            self.__init__(donion, self._meta)
+            return
+
+        rhs._update(self, accum=accum)
+        if in_DOnion:
+            return self.__class__(self._delayed, meta=self._meta)
 
     def dup(self, dtype=None, *, name=None):
         if dtype is None:
@@ -155,6 +290,8 @@ def __array__(self, dtype=None):
     def isequal(self, other, *, check_dtype=False):
         if other is None:
             return self.is_empty
+        if type(other) is Box:
+            other = other.content
         if type(other) is not Scalar:
             self._meta.isequal(get_meta(other))
             other = Scalar.from_value(other)
@@ -199,6 +336,11 @@ def value(self):
 
     @value.setter
     def value(self, val):
+        if type(self._delayed) is DOnion:
+            donion = DOnion.multiple_access(self._meta, Scalar.from_value, val)
+            self.__init__(donion, meta=self._meta)
+            return
+
         scalar = Scalar.from_value(val, dtype=self.dtype)
         self._delayed = scalar._delayed
 
@@ -230,8 +372,8 @@ def __eq__(self, other):
     def compute(self, *args, **kwargs):
         innerval = self._delayed.compute(*args, **kwargs)
         if type(self._delayed) is DOnion:
-            return innerval.value if hasattr(innerval, 'value') else innerval
-            
+            return innerval.value if hasattr(innerval, "value") else innerval
+
         return innerval.value.value
 
 
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index fc8d536..fdf655f 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -2,14 +2,13 @@
 import numpy as np
 import grblas as gb
 
-from numbers import Integral
 from dask.base import tokenize
 from dask.delayed import Delayed, delayed
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
 from grblas.exceptions import IndexOutOfBound
 
-from .base import BaseType, InnerBaseType, _nvals, DOnion, is_DOnion
+from .base import BaseType, InnerBaseType, _nvals, DOnion, is_DOnion, Box
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, Assigner
 from .mask import StructuralMask, ValueMask
 from ._ss.vector import ss
@@ -196,6 +195,16 @@ def from_values(
 
     @classmethod
     def new(cls, dtype, size=0, *, chunks="auto", name=None):
+        if is_DOnion(size):
+            meta = gb.Vector.new(dtype)
+            donion = DOnion.multiple_access(
+                meta, cls.new, dtype, size=size, chunks=chunks, name=name
+            )
+            return Vector(donion, meta=meta)
+
+        if type(size) is Box:
+            size = size.content
+
         if size > 0:
             chunks = da.core.normalize_chunks(chunks, (size,), dtype=int)
             meta = gb.Vector.new(dtype)
@@ -357,41 +366,13 @@ def rechunk(self, inplace=False, chunks="auto"):
             self.resize(*self.shape, chunks=chunks)
         else:
             return self.resize(*self.shape, chunks=chunks, inplace=False)
-        # chunks = da.core.normalize_chunks(chunks, self.shape, dtype=np.int64)
-        # id = self.to_values()
-        # new = Vector.from_values(*id, *self.shape, trust_size=True, chunks=chunks)
-        # if inplace:
-        #     self.__init__(new._delayed)
-        # else:
-        #     return new
 
     def __getitem__(self, index):
-        if type(self._delayed) is DOnion:
-            from .scalar import Scalar, PythonScalar
-
-            if isinstance(index, (Integral, Scalar, PythonScalar)):
-                meta = gb.Scalar.new(self._meta.dtype)
-            else:
-                meta = gb.Vector.new(self._meta.dtype)
-            return self._delayed.getattr(meta, "__getitem__", index)
-        if type(index) is DOnion:
-            meta = self._meta
-            return DOnion.multiple_access(meta, self.__getitem__, index)
         return AmbiguousAssignOrExtract(self, index)
 
     def __delitem__(self, keys):
         del Updater(self)[keys]
 
-        # del self._meta[index]
-        # delayed = self._optional_dup()
-        # TODO: normalize index
-        # delayed = delayed.map_blocks(
-        #     _delitem,
-        #     index,
-        #     dtype=np_dtype(self.dtype),
-        # )
-        # raise NotImplementedError()
-
     def __setitem__(self, index, delayed):
         Assigner(Updater(self), index).update(delayed)
 
@@ -411,11 +392,13 @@ def __iter__(self):
 
     def ewise_add(self, other, op=monoid.plus, *, require_monoid=True):
         assert type(other) is Vector
+
         meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid)
         return GbDelayed(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta)
 
     def ewise_mult(self, other, op=binary.times):
         assert type(other) is Vector
+
         meta = self._meta.ewise_mult(other._meta, op=op)
         return GbDelayed(self, "ewise_mult", other, op, meta=meta)
 
diff --git a/tests/from_grblas2/test_matrix.py b/tests/from_grblas2/test_matrix.py
index 00a85b9..ffe12a5 100644
--- a/tests/from_grblas2/test_matrix.py
+++ b/tests/from_grblas2/test_matrix.py
@@ -23,7 +23,7 @@
 from .conftest import autocompute, compute
 
 from dask_grblas import Matrix, Scalar, Vector  # isort:skip
-from dask_grblas.base import is_DOnion
+from dask_grblas.base import is_DOnion, like_DOnion
 
 
 @pytest.fixture
@@ -119,10 +119,7 @@ def test_dup(As, A_chunks):
     Ds = [Matrix.from_values([0, 1], [0, 1], [0, 2.5], dtype=dtypes.FP64)]
     Ds.append(
         Matrix.from_values(
-            da.from_array([0, 1]),
-            da.from_array([0, 1]),
-            da.from_array([0, 2.5]),
-            dtype=dtypes.FP64
+            da.from_array([0, 1]), da.from_array([0, 1]), da.from_array([0, 2.5]), dtype=dtypes.FP64
         )
     )
     for D_ in Ds:
@@ -134,9 +131,13 @@ def test_dup(As, A_chunks):
                 Matrix.from_values([0, 1], [0, 1], [0, 2], dtype=dtypes.INT64), check_dtype=True
             )
             E = D.dup(mask=D.V)
-            assert E.isequal(Matrix.from_values([1], [1], [2.5], dtype=dtypes.FP64), check_dtype=True)
+            assert E.isequal(
+                Matrix.from_values([1], [1], [2.5], dtype=dtypes.FP64), check_dtype=True
+            )
             E = D.dup(dtype=dtypes.INT64, mask=D.V)
-            assert E.isequal(Matrix.from_values([1], [1], [2], dtype=dtypes.INT64), check_dtype=True)
+            assert E.isequal(
+                Matrix.from_values([1], [1], [2], dtype=dtypes.INT64), check_dtype=True
+            )
 
 
 def test_from_values():
@@ -265,11 +266,13 @@ def test_from_values_dask():
 
 def test_from_values_scalar():
     Cs = [Matrix.from_values([0, 1, 3], [1, 1, 2], 7)]
-    Cs.append(Matrix.from_values(
-        da.from_array([0, 1, 3]),
-        da.from_array([1, 1, 2]),
-        7,
-    ))
+    Cs.append(
+        Matrix.from_values(
+            da.from_array([0, 1, 3]),
+            da.from_array([1, 1, 2]),
+            7,
+        )
+    )
     for C in Cs:
         assert C.nrows == 4
         assert C.ncols == 3
@@ -280,11 +283,13 @@ def test_from_values_scalar():
 
     # iso drumps duplicates
     C = Matrix.from_values([0, 1, 3, 0], [1, 1, 2, 1], 7)
-    Cs.append(Matrix.from_values(
-        da.from_array([0, 1, 3, 0]),
-        da.from_array([1, 1, 2, 1]),
-        7,
-    ))
+    Cs.append(
+        Matrix.from_values(
+            da.from_array([0, 1, 3, 0]),
+            da.from_array([1, 1, 2, 1]),
+            7,
+        )
+    )
     for C in Cs:
         assert C.nrows == 4
         assert C.ncols == 3
@@ -336,9 +341,10 @@ def test_resize(As, A_chunks):
             if type(A._delayed) is da.Array:
                 assert A._delayed.chunks == ((4, 2), (4, 4, 3))
             else:
-                assert A._delayed.deep_extract(
-                    None, lambda x: x._delayed.chunks
-                ) == ((4, 2), (4, 4, 3))
+                assert A._delayed.deep_extract(None, lambda x: x._delayed.chunks) == (
+                    (4, 2),
+                    (4, 4, 3),
+                )
             assert compute(A[3, 2].value) == 3
             assert compute(A[5, 7].value) is None
 
@@ -350,9 +356,10 @@ def test_resize(As, A_chunks):
             if type(A._delayed) is da.Array:
                 assert A._delayed.chunks == ((4, 4, 3), (3,))
             else:
-                assert A._delayed.deep_extract(
-                    None, lambda x: x._delayed.chunks
-                ) == ((4, 4, 3), (3,))
+                assert A._delayed.deep_extract(None, lambda x: x._delayed.chunks) == (
+                    (4, 4, 3),
+                    (3,),
+                )
             assert compute(A[3, 2].value) == 3
             assert compute(A[7, 2].value) is None
 
@@ -589,7 +596,9 @@ def test_mxm_mask(As, A_chunks):
         for chunks in A_chunks:
             A = A_.dup()
             A.rechunk(chunks=chunks, inplace=True)
-            val_mask = Matrix.from_values([0, 3, 4], [2, 3, 2], [True, True, True], nrows=7, ncols=7)
+            val_mask = Matrix.from_values(
+                [0, 3, 4], [2, 3, 2], [True, True, True], nrows=7, ncols=7
+            )
             struct_mask = Matrix.from_values([0, 3, 4], [2, 3, 2], [1, 0, 0], nrows=7, ncols=7)
             C = A.dup()
             C(val_mask.V) << A.mxm(A, semiring.plus_times)
@@ -747,266 +756,439 @@ def test_extract_column(As, A_chunks):
             assert w2.isequal(result)
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_extract_input_mask():
     # A       M
     # 0 1 2   _ 0 1
     # 3 4 5   2 3 _
-    A = Matrix.from_values(
-        [0, 0, 0, 1, 1, 1],
-        [0, 1, 2, 0, 1, 2],
-        [0, 1, 2, 3, 4, 5],
-    )
-    M = Matrix.from_values(
-        [0, 0, 1, 1],
-        [1, 2, 0, 1],
-        [0, 1, 2, 3],
-    )
-    m = M[0, :].new()
-    MT = M.T.new()
-    # Matrix structure mask
-    result = A[0, [0, 1]].new(input_mask=M.S)
-    expected = Vector.from_values([1], [1])
-    assert result.isequal(expected)
-    # again
-    result.clear()
-    result(input_mask=M.S) << A[0, [0, 1]]
-    assert result.isequal(expected)
-
-    # Vector mask
-    result = A[0, [0, 1]].new(input_mask=m.S)
-    assert result.isequal(expected)
-    # again
-    result.clear()
-    result(input_mask=m.S) << A[0, [0, 1]]
-    assert result.isequal(expected)
-
-    # Matrix value mask
-    result = A[0, [1, 2]].new(input_mask=M.V)
-    expected = Vector.from_values([1], [2], size=2)
-    assert result.isequal(expected)
-    # again
-    result.clear()
-    result(input_mask=M.V) << A[0, [1, 2]]
-    assert result.isequal(expected)
-
-    with pytest.raises(ValueError, match="Shape of `input_mask` does not match shape of input"):
-        A[0, [0, 1]].new(input_mask=MT.S)
-    with pytest.raises(ValueError, match="Shape of `input_mask` does not match shape of input"):
-        m(input_mask=MT.S) << A[0, [0, 1]]
-    with pytest.raises(
-        ValueError, match="Size of `input_mask` Vector does not match ncols of Matrix"
-    ):
-        A[0, [0]].new(input_mask=expected.S)
-    with pytest.raises(
-        ValueError, match="Size of `input_mask` Vector does not match ncols of Matrix"
-    ):
-        m(input_mask=expected.S) << A[0, [0]]
-    with pytest.raises(
-        ValueError, match="Size of `input_mask` Vector does not match nrows of Matrix"
-    ):
-        A[[0], 0].new(input_mask=m.S)
-    with pytest.raises(
-        ValueError, match="Size of `input_mask` Vector does not match nrows of Matrix"
-    ):
-        m(input_mask=m.S) << A[[0], 0]
-    with pytest.raises(
-        TypeError, match="Got Vector `input_mask` when extracting a submatrix from a Matrix"
-    ):
-        A[[0], [0]].new(input_mask=expected.S)
-    with pytest.raises(
-        TypeError, match="Got Vector `input_mask` when extracting a submatrix from a Matrix"
-    ):
-        A(input_mask=expected.S) << A[[0], [0]]
-    with pytest.raises(TypeError, match="mask is not allowed for single element extraction"):
-        A[0, 0].new(input_mask=M.S)
-    with pytest.raises(TypeError, match="mask and input_mask arguments cannot both be given"):
-        A[0, [0, 1]].new(input_mask=M.S, mask=expected.S)
-    with pytest.raises(TypeError, match="mask and input_mask arguments cannot both be given"):
-        A(input_mask=M.S, mask=expected.S)
-    with pytest.raises(TypeError, match=r"Mask must indicate values \(M.V\) or structure \(M.S\)"):
-        A[0, [0, 1]].new(input_mask=M)
-    with pytest.raises(TypeError, match=r"Mask must indicate values \(M.V\) or structure \(M.S\)"):
-        A(input_mask=M)
-    with pytest.raises(TypeError, match="Mask object must be type Vector"):
-        expected[[0, 1]].new(input_mask=M.S)
-    with pytest.raises(TypeError, match="Mask object must be type Vector"):
-        expected(input_mask=M.S) << expected[[0, 1]]
-    with pytest.raises(TypeError, match=r"new\(\) got an unexpected keyword argument 'input_mask'"):
-        A.new(input_mask=M.S)
-    with pytest.raises(TypeError, match="`input_mask` argument may only be used for extract"):
-        A(input_mask=M.S) << A.apply(unary.ainv)
-    with pytest.raises(TypeError, match="`input_mask` argument may only be used for extract"):
-        A(input_mask=M.S)[[0], [0]] = 1
-    with pytest.raises(TypeError, match="`input_mask` argument may only be used for extract"):
-        A(input_mask=M.S)[[0], [0]]
+    As = [
+        Matrix.from_values(
+            [0, 0, 0, 1, 1, 1],
+            [0, 1, 2, 0, 1, 2],
+            [0, 1, 2, 3, 4, 5],
+        )
+    ]
+    As += [
+        Matrix.from_values(
+            da.from_array([0, 0, 0, 1, 1, 1]),
+            da.from_array([0, 1, 2, 0, 1, 2]),
+            da.from_array([0, 1, 2, 3, 4, 5]),
+        )
+    ]
+    Ms = [
+        Matrix.from_values(
+            [0, 0, 1, 1],
+            [1, 2, 0, 1],
+            [0, 1, 2, 3],
+        )
+    ]
+    Ms += [
+        Matrix.from_values(
+            da.from_array([0, 0, 1, 1]),
+            da.from_array([1, 2, 0, 1]),
+            da.from_array([0, 1, 2, 3]),
+        )
+    ]
+    for A_ in As:
+        for M_ in Ms:
+            A = A_.dup()
+            M = M_.dup()
+            m = M[0, :].new()
+            MT = M.T.new()
+            # Matrix structure mask
+            result = A[0, [0, 1]].new(input_mask=M.S)
+            expected = Vector.from_values([1], [1])
+            assert result.isequal(expected)
+            # again
+            result.clear()
+            result(input_mask=M.S) << A[0, [0, 1]]
+            assert result.isequal(expected)
+
+            # Vector mask
+            result = A[0, [0, 1]].new(input_mask=m.S)
+            assert result.isequal(expected)
+            # again
+            result.clear()
+            result(input_mask=m.S) << A[0, [0, 1]]
+            assert result.isequal(expected)
+
+            # Matrix value mask
+            result = A[0, [1, 2]].new(input_mask=M.V)
+            expected = Vector.from_values([1], [2], size=2)
+            assert result.isequal(expected)
+            # again
+            result.clear()
+            result(input_mask=M.V) << A[0, [1, 2]]
+            assert result.isequal(expected)
+
+            with pytest.raises(
+                ValueError, match="Shape of `input_mask` does not match shape of input"
+            ):
+                A[0, [0, 1]].new(input_mask=MT.S).compute()
 
-    # With transpose input value
-    # Matrix structure mask
-    result = A.T[[0, 1], 0].new(input_mask=MT.S)
-    expected = Vector.from_values([1], [1])
-    assert result.isequal(expected)
-    # again
-    result.clear()
-    result(input_mask=MT.S) << A.T[[0, 1], 0]
-    assert result.isequal(expected)
-
-    # Vector mask
-    result = A.T[[0, 1], 0].new(input_mask=m.S)
-    assert result.isequal(expected)
-    # again
-    result.clear()
-    result(input_mask=m.S) << A.T[[0, 1], 0]
-    assert result.isequal(expected)
-
-    # Matrix value mask
-    result = A.T[[1, 2], 0].new(input_mask=MT.V)
-    expected = Vector.from_values([1], [2], size=2)
-    assert result.isequal(expected)
-    # again
-    result.clear()
-    result(input_mask=MT.V) << A.T[[1, 2], 0]
-    assert result.isequal(expected)
+            with pytest.raises(
+                ValueError, match="Shape of `input_mask` does not match shape of input"
+            ):
+                m(input_mask=MT.S) << A[0, [0, 1]]
+                m.compute()
 
-    # With transpose input value
-    # Matrix structure mask
-    A = Matrix.from_values(
-        [0, 0, 0, 1, 1, 1],
-        [0, 1, 2, 0, 1, 2],
-        [0, 1, 2, 3, 4, 5],
-    )
-    M = Matrix.from_values(
-        [0, 0, 1, 1],
-        [1, 2, 0, 1],
-        [0, 1, 2, 3],
-    )
-    A.rechunk(chunks=((1, 1), (2, 1)), inplace=True)
-    result = A.T[[0, 1], 0].new(input_mask=MT.S)
-    expected = Vector.from_values([1], [1])
-    assert result.isequal(expected)
-    # again
-    result.clear()
-    result(input_mask=MT.S) << A.T[[0, 1], 0]
-    assert result.isequal(expected)
-
-    # Vector mask
-    result = A.T[[0, 1], 0].new(input_mask=m.S)
-    assert result.isequal(expected)
-    # again
-    result.clear()
-    result(input_mask=m.S) << A.T[[0, 1], 0]
-    assert result.isequal(expected)
-
-    # Matrix value mask
-    result = A.T[[1, 2], 0].new(input_mask=MT.V)
-    expected = Vector.from_values([1], [2], size=2)
-    assert result.isequal(expected)
-    # again
-    result.clear()
-    result(input_mask=MT.V) << A.T[[1, 2], 0]
-    assert result.isequal(expected)
+            with pytest.raises(
+                ValueError, match="Size of `input_mask` Vector does not match ncols of Matrix"
+            ):
+                A[0, [0]].new(input_mask=expected.S).compute()
 
+            m = M[0, :].new()
+            with pytest.raises(
+                ValueError, match="Size of `input_mask` Vector does not match ncols of Matrix"
+            ):
+                m(input_mask=expected.S) << A[0, [0]]
+                m.compute()
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_extract_with_matrix(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        with pytest.raises(TypeError, match="Invalid type for index"):
-            A[A.T, 1].new()
-        with pytest.raises(TypeError, match="Invalid type for index"):
-            A[A, [1]].new()
-        with pytest.raises(TypeError, match="Invalid type for index"):
-            A[[0], A.V].new()
+            m = M[0, :].new()
+            with pytest.raises(
+                ValueError, match="Size of `input_mask` Vector does not match nrows of Matrix"
+            ):
+                A[[0], 0].new(input_mask=m.S).compute()
 
+            m = M[0, :].new()
+            with pytest.raises(
+                ValueError, match="Size of `input_mask` Vector does not match nrows of Matrix"
+            ):
+                m(input_mask=m.S) << A[[0], 0]
+                m.compute()
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_assign(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        B = Matrix.from_values([0, 0, 1], [0, 1, 0], [9, 8, 7])
-        result = Matrix.from_values(
-            [0, 0, 2, 3, 0, 3, 5, 6, 0, 6, 1, 6, 4, 1],
-            [0, 5, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 6],
-            [9, 8, 7, 3, 2, 3, 1, 5, 3, 7, 8, 3, 7, 4],
-        )
-        C = A.dup()
-        C()[[0, 2], [0, 5]] = B
-        assert C.isequal(result)
-        C = A.dup()
-        C[:3:2, :6:5]() << B
-        assert C.isequal(result)
-        with pytest.raises(TypeError, match="will make the Matrix dense"):
-            C << 1
-        nvals = C.nvals
-        C(C.S) << 1
-        assert C.nvals == nvals
-        assert C.reduce_scalar().new() == nvals
-        with pytest.raises(TypeError, match="Invalid type for index"):
-            C[C, [1]] = C
-
-        B = B.T.new()
-        C = A.dup()
-        C()[[0, 2], [0, 5]] = B.T
-        assert C.isequal(result)
-        C = A.dup()
-        C[:3:2, :6:5]() << B.T
-        assert C.isequal(result)
-
-        B.rechunk(chunks=1)
-        C = A.dup()
-        C()[[0, 2], [0, 5]] = B.T
-        assert C.isequal(result)
-        C = A.dup()
-        C[:3:2, :6:5]() << B.T
-        assert C.isequal(result)
+            with pytest.raises(
+                TypeError, match="Got Vector `input_mask` when extracting a submatrix from a Matrix"
+            ):
+                A[[0], [0]].new(input_mask=expected.S).compute()
 
+            with pytest.raises(
+                TypeError, match="Got Vector `input_mask` when extracting a submatrix from a Matrix"
+            ):
+                A(input_mask=expected.S) << A[[0], [0]]
+                A.compute()
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_assign_wrong_dims(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        B = Matrix.from_values([0, 0, 1], [0, 1, 0], [9, 8, 7])
-        with pytest.raises(DimensionMismatch):
-            A[[0, 2, 4], [0, 5]] = B
+            A = A_.dup()
+            with pytest.raises(
+                TypeError, match="mask is not allowed for single element extraction"
+            ):
+                A[0, 0].new(input_mask=M.S).compute()
 
+            with pytest.raises(
+                TypeError, match="mask and input_mask arguments cannot both be given"
+            ):
+                A[0, [0, 1]].new(input_mask=M.S, mask=expected.S).compute()
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_assign_row(A, A_chunks, v):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        result = Matrix.from_values(
-            [3, 3, 5, 6, 6, 1, 6, 2, 4, 1, 0, 0, 0, 0],
-            [0, 2, 2, 2, 3, 4, 4, 5, 5, 6, 1, 3, 4, 6],
-            [3, 3, 1, 5, 7, 8, 3, 1, 7, 4, 1, 1, 2, 0],
+            with pytest.raises(
+                TypeError, match="mask and input_mask arguments cannot both be given"
+            ):
+                A(input_mask=M.S, mask=expected.S).compute()
+
+            with pytest.raises(
+                TypeError, match=r"Mask must indicate values \(M.V\) or structure \(M.S\)"
+            ):
+                A[0, [0, 1]].new(input_mask=M).compute()
+
+            with pytest.raises(
+                TypeError, match=r"Mask must indicate values \(M.V\) or structure \(M.S\)"
+            ):
+                A(input_mask=M).compute()
+
+            with pytest.raises(TypeError, match="Mask object must be type Vector"):
+                expected[[0, 1]].new(input_mask=M.S).compute()
+
+            with pytest.raises(TypeError, match="Mask object must be type Vector"):
+                expected(input_mask=M.S) << expected[[0, 1]]
+                expected.compute()
+
+            with pytest.raises(
+                TypeError, match=r"new\(\) got an unexpected keyword argument 'input_mask'"
+            ):
+                A.new(input_mask=M.S).compute()
+
+            with pytest.raises(
+                TypeError, match="`input_mask` argument may only be used for extract"
+            ):
+                A(input_mask=M.S) << A.apply(unary.ainv)
+                A.compute()
+
+            A = A_.dup()
+            with pytest.raises(
+                TypeError, match="`input_mask` argument may only be used for extract"
+            ):
+                A(input_mask=M.S)[[0], [0]] = 1
+                A.compute()
+
+            A = A_.dup()
+            with pytest.raises(
+                TypeError, match="`input_mask` argument may only be used for extract"
+            ):
+                A(input_mask=M.S)[[0], [0]]
+                A.compute()
+
+            A = A_.dup()
+            m = M[0, :].new()
+            # With transpose input value
+            # Matrix structure mask
+            result = A.T[[0, 1], 0].new(input_mask=MT.S)
+            expected = Vector.from_values([1], [1])
+            assert result.isequal(expected)
+            # again
+            result.clear()
+            result(input_mask=MT.S) << A.T[[0, 1], 0]
+            assert result.isequal(expected)
+
+            # Vector mask
+            result = A.T[[0, 1], 0].new(input_mask=m.S)
+            assert result.isequal(expected)
+            # again
+            result.clear()
+            result(input_mask=m.S) << A.T[[0, 1], 0]
+            assert result.isequal(expected)
+
+            # Matrix value mask
+            result = A.T[[1, 2], 0].new(input_mask=MT.V)
+            expected = Vector.from_values([1], [2], size=2)
+            assert result.isequal(expected)
+            # again
+            result.clear()
+            result(input_mask=MT.V) << A.T[[1, 2], 0]
+            assert result.isequal(expected)
+
+    # With transpose input value
+    # Matrix structure mask
+    As = [
+        Matrix.from_values(
+            [0, 0, 0, 1, 1, 1],
+            [0, 1, 2, 0, 1, 2],
+            [0, 1, 2, 3, 4, 5],
+        )
+    ]
+    As += [
+        Matrix.from_values(
+            da.from_array([0, 0, 0, 1, 1, 1]),
+            da.from_array([0, 1, 2, 0, 1, 2]),
+            da.from_array([0, 1, 2, 3, 4, 5]),
+        )
+    ]
+    Ms = [
+        Matrix.from_values(
+            [0, 0, 1, 1],
+            [1, 2, 0, 1],
+            [0, 1, 2, 3],
+        )
+    ]
+    Ms += [
+        Matrix.from_values(
+            da.from_array([0, 0, 1, 1]),
+            da.from_array([1, 2, 0, 1]),
+            da.from_array([0, 1, 2, 3]),
         )
-        C = A.dup()
-        C[0, :] = v
-        assert C.isequal(result)
+    ]
+    for A_ in As:
+        for M_ in Ms:
+            A = A_.dup()
+            M = M_.dup()
+            A.rechunk(chunks=((1, 1), (2, 1)), inplace=True)
+            result = A.T[[0, 1], 0].new(input_mask=MT.S)
+            expected = Vector.from_values([1], [1])
+            assert result.isequal(expected)
+            # again
+            result.clear()
+            result(input_mask=MT.S) << A.T[[0, 1], 0]
+            assert result.isequal(expected)
+
+            # Vector mask
+            result = A.T[[0, 1], 0].new(input_mask=m.S)
+            assert result.isequal(expected)
+            # again
+            result.clear()
+            result(input_mask=m.S) << A.T[[0, 1], 0]
+            assert result.isequal(expected)
+
+            # Matrix value mask
+            result = A.T[[1, 2], 0].new(input_mask=MT.V)
+            expected = Vector.from_values([1], [2], size=2)
+            assert result.isequal(expected)
+            # again
+            result.clear()
+            result(input_mask=MT.V) << A.T[[1, 2], 0]
+            assert result.isequal(expected)
+
+
+def test_extract_with_matrix(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            with pytest.raises(TypeError, match="Invalid type for index"):
+                A[A.T, 1].new()
+            with pytest.raises(TypeError, match="Invalid type for index"):
+                A[A, [1]].new()
+            with pytest.raises(TypeError, match="Invalid type for index"):
+                A[[0], A.V].new()
+
+
+def test_assign(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            B = Matrix.from_values([0, 0, 1], [0, 1, 0], [9, 8, 7])
+            result = Matrix.from_values(
+                [0, 0, 2, 3, 0, 3, 5, 6, 0, 6, 1, 6, 4, 1],
+                [0, 5, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 6],
+                [9, 8, 7, 3, 2, 3, 1, 5, 3, 7, 8, 3, 7, 4],
+            )
+            C = A.dup()
+            C()[[0, 2], [0, 5]] = B
+            assert C.isequal(result)
+            C = A.dup()
+            C[:3:2, :6:5]() << B
+            assert C.isequal(result)
+            with pytest.raises(TypeError, match="will make the Matrix dense"):
+                C << 1
+            nvals = C.nvals
+            C(C.S) << 1
+            assert C.nvals == nvals
+            assert C.reduce_scalar().new() == nvals
+            with pytest.raises(TypeError, match="Invalid type for index"):
+                C[C, [1]] = C
+            B = B.T.new()
+            C = A.dup()
+            C()[[0, 2], [0, 5]] = B.T
+            assert C.isequal(result)
+            C = A.dup()
+            C[:3:2, :6:5]() << B.T
+            assert C.isequal(result)
+
+            B.rechunk(chunks=1)
+            C = A.dup()
+            C()[[0, 2], [0, 5]] = B.T
+            assert C.isequal(result)
+            C = A.dup()
+            C[:3:2, :6:5]() << B.T
+            assert C.isequal(result)
+
+
+def test_assign_wrong_dims(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            B = Matrix.from_values([0, 0, 1], [0, 1, 0], [9, 8, 7])
+            with pytest.raises(DimensionMismatch):
+                A[[0, 2, 4], [0, 5]] = B
+                A.compute()
+
+
+def test_assign_row(As, A_chunks, v):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            result = Matrix.from_values(
+                [3, 3, 5, 6, 6, 1, 6, 2, 4, 1, 0, 0, 0, 0],
+                [0, 2, 2, 2, 3, 4, 4, 5, 5, 6, 1, 3, 4, 6],
+                [3, 3, 1, 5, 7, 8, 3, 1, 7, 4, 1, 1, 2, 0],
+            )
+            C = A.dup()
+            C[0, :] = v
+            assert C.isequal(result)
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_subassign_row_col(A_chunks):
-    A = Matrix.from_values(
+    A_0 = Matrix.from_values(
         [0, 0, 0, 1, 1, 1, 2, 2, 2],
         [0, 1, 2, 0, 1, 2, 0, 1, 2],
         [0, 1, 2, 3, 4, 5, 6, 7, 8],
     )
-    A_ = A
-    for chunks in [3, 2, 1]:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        m = Vector.from_values([1], [True])
-        v = Vector.from_values([0, 1], [10, 20])
+    A_1 = Matrix.from_values(
+        da.from_array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+        da.from_array([0, 1, 2, 0, 1, 2, 0, 1, 2]),
+        da.from_array([0, 1, 2, 3, 4, 5, 6, 7, 8]),
+    )
+    As = [A_0, A_1]
+    for A_ in As:
+        for chunks in [3, 2, 1]:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            m = Vector.from_values([1], [True])
+            v = Vector.from_values([0, 1], [10, 20])
+
+            A[[0, 1], 0](m.S) << v
+            result1 = Matrix.from_values(
+                [0, 0, 0, 1, 1, 1, 2, 2, 2],
+                [0, 1, 2, 0, 1, 2, 0, 1, 2],
+                [0, 1, 2, 20, 4, 5, 6, 7, 8],
+            )
+            assert A.isequal(result1)
+
+            A[1, [1, 2]](m.V, accum=binary.plus).update(v)
+            result2 = Matrix.from_values(
+                [0, 0, 0, 1, 1, 1, 2, 2, 2],
+                [0, 1, 2, 0, 1, 2, 0, 1, 2],
+                [0, 1, 2, 20, 4, 25, 6, 7, 8],
+            )
+            assert A.isequal(result2)
+
+            A[[0, 1], 0](m.S, binary.plus, replace=True) << v
+            result3 = Matrix.from_values(
+                [0, 0, 1, 1, 1, 2, 2, 2],
+                [1, 2, 0, 1, 2, 0, 1, 2],
+                [1, 2, 40, 4, 25, 6, 7, 8],
+            )
+            assert A.isequal(result3)
+
+            _A = A.dup()
+            with pytest.raises(DimensionMismatch):
+                A(m.S)[[0, 1], 0] << v
+                A.compute()
+
+            A = _A
+            A[[0, 1], 0](m.S) << 99
+            result4 = Matrix.from_values(
+                [0, 0, 1, 1, 1, 2, 2, 2],
+                [1, 2, 0, 1, 2, 0, 1, 2],
+                [1, 2, 99, 4, 25, 6, 7, 8],
+            )
+            assert A.isequal(result4)
+
+            A[[1, 2], 0](m.S, binary.plus, replace=True) << 100
+            result5 = Matrix.from_values(
+                [0, 0, 1, 1, 2, 2, 2],
+                [1, 2, 1, 2, 0, 1, 2],
+                [1, 2, 4, 25, 106, 7, 8],
+            )
+            assert A.isequal(result5)
+
+            A[2, [0, 1]](m.S) << -1
+            result6 = Matrix.from_values(
+                [0, 0, 1, 1, 2, 2, 2],
+                [1, 2, 1, 2, 0, 1, 2],
+                [1, 2, 4, 25, 106, -1, 8],
+            )
+            assert A.isequal(result6)
+
 
-        A[[0, 1], 0](m.S) << v
+def test_subassign_matrix():
+    A_0 = Matrix.from_values(
+        [0, 0, 0, 1, 1, 1, 2, 2, 2],
+        [0, 1, 2, 0, 1, 2, 0, 1, 2],
+        [0, 1, 2, 3, 4, 5, 6, 7, 8],
+    )
+    A_1 = Matrix.from_values(
+        da.from_array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+        da.from_array([0, 1, 2, 0, 1, 2, 0, 1, 2]),
+        da.from_array([0, 1, 2, 3, 4, 5, 6, 7, 8]),
+    )
+    As = [A_0, A_1]
+    for A_i in As:
+        A = A_i.dup()
+        m = Matrix.from_values([1], [0], [True])
+        v = Matrix.from_values([0, 1], [0, 0], [10, 20])
+        mT = m.T.new()
+
+        A[[0, 1], [0]](m.S) << v
         result1 = Matrix.from_values(
             [0, 0, 0, 1, 1, 1, 2, 2, 2],
             [0, 1, 2, 0, 1, 2, 0, 1, 2],
@@ -1014,7 +1196,11 @@ def test_subassign_row_col(A_chunks):
         )
         assert A.isequal(result1)
 
-        A[1, [1, 2]](m.V, accum=binary.plus).update(v)
+        A_ = A.dup()
+        _A = A.dup()
+        _A_ = A.dup()
+
+        A[[1], [1, 2]](mT.V, accum=binary.plus) << v.T
         result2 = Matrix.from_values(
             [0, 0, 0, 1, 1, 1, 2, 2, 2],
             [0, 1, 2, 0, 1, 2, 0, 1, 2],
@@ -1022,7 +1208,16 @@ def test_subassign_row_col(A_chunks):
         )
         assert A.isequal(result2)
 
-        A[[0, 1], 0](m.S, binary.plus, replace=True) << v
+        A_[[1], 1:3](mT.V, accum=binary.plus) << v.T
+        assert A_.isequal(result2)
+
+        _A[1:2, [1, 2]](mT.V, accum=binary.plus) << v.T
+        assert _A.isequal(result2)
+
+        _A_[1:2, 1:3](mT.V, accum=binary.plus) << v.T
+        assert _A_.isequal(result2)
+
+        A[[0, 1], [0]](m.S, binary.plus, replace=True) << v
         result3 = Matrix.from_values(
             [0, 0, 1, 1, 1, 2, 2, 2],
             [1, 2, 0, 1, 2, 0, 1, 2],
@@ -1030,10 +1225,13 @@ def test_subassign_row_col(A_chunks):
         )
         assert A.isequal(result3)
 
+        A__ = A.dup()
         with pytest.raises(DimensionMismatch):
-            A(m.S)[[0, 1], 0] << v
+            A(m.S)[[0, 1], [0]] << v
+            A.compute()
 
-        A[[0, 1], 0](m.S) << 99
+        A = A__
+        A[[0, 1], [0]](m.S) << 99
         result4 = Matrix.from_values(
             [0, 0, 1, 1, 1, 2, 2, 2],
             [1, 2, 0, 1, 2, 0, 1, 2],
@@ -1041,7 +1239,7 @@ def test_subassign_row_col(A_chunks):
         )
         assert A.isequal(result4)
 
-        A[[1, 2], 0](m.S, binary.plus, replace=True) << 100
+        A[[1, 2], [0]](m.S, binary.plus, replace=True) << 100
         result5 = Matrix.from_values(
             [0, 0, 1, 1, 2, 2, 2],
             [1, 2, 1, 2, 0, 1, 2],
@@ -1049,7 +1247,7 @@ def test_subassign_row_col(A_chunks):
         )
         assert A.isequal(result5)
 
-        A[2, [0, 1]](m.S) << -1
+        A[[2], [0, 1]](mT.S) << -1
         result6 = Matrix.from_values(
             [0, 0, 1, 1, 2, 2, 2],
             [1, 2, 1, 2, 0, 1, 2],
@@ -1058,555 +1256,539 @@ def test_subassign_row_col(A_chunks):
         assert A.isequal(result6)
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_subassign_matrix():
-    A = Matrix.from_values(
-        [0, 0, 0, 1, 1, 1, 2, 2, 2],
-        [0, 1, 2, 0, 1, 2, 0, 1, 2],
-        [0, 1, 2, 3, 4, 5, 6, 7, 8],
-    )
-    m = Matrix.from_values([1], [0], [True])
-    v = Matrix.from_values([0, 1], [0, 0], [10, 20])
-    mT = m.T.new()
+def test_assign_column(As, A_chunks, v):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            result = Matrix.from_values(
+                [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4, 6],
+                [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1, 1],
+                [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 1, 1, 2, 0],
+            )
+            C = A.dup()
+            C[:, 1] = v
+            assert C.isequal(result)
 
-    A[[0, 1], [0]](m.S) << v
-    result1 = Matrix.from_values(
-        [0, 0, 0, 1, 1, 1, 2, 2, 2],
-        [0, 1, 2, 0, 1, 2, 0, 1, 2],
-        [0, 1, 2, 20, 4, 5, 6, 7, 8],
-    )
-    assert A.isequal(result1)
 
-    A_ = A.dup()
-    _A = A.dup()
-    _A_ = A.dup()
+def test_assign_row_scalar(As, A_chunks, v):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            C = A.dup()
+            C[0, :](v.S) << v
+            D = A.dup()
+            D(v.S)[0, :] << v
+            assert C.isequal(D)
 
-    A[[1], [1, 2]](mT.V, accum=binary.plus) << v.T
-    result2 = Matrix.from_values(
-        [0, 0, 0, 1, 1, 1, 2, 2, 2],
-        [0, 1, 2, 0, 1, 2, 0, 1, 2],
-        [0, 1, 2, 20, 4, 25, 6, 7, 8],
-    )
-    assert A.isequal(result2)
+            C[:, :](C.S) << 1
 
-    A_[[1], 1:3](mT.V, accum=binary.plus) << v.T
-    assert A_.isequal(result2)
+            C_ = C.dup()
+            with pytest.raises(
+                TypeError, match="Unable to use Vector mask on Matrix assignment to a Matrix"
+            ):
+                C[:, :](v.S) << 1
+                C.compute()
 
-    _A[1:2, [1, 2]](mT.V, accum=binary.plus) << v.T
-    assert _A.isequal(result2)
+            C = C_.dup()
+            with pytest.raises(
+                TypeError,
+                match="Unable to use Vector mask on single element assignment to a Matrix",
+            ):
+                C[0, 0](v.S) << 1
+                C.compute()
 
-    _A_[1:2, 1:3](mT.V, accum=binary.plus) << v.T
-    assert _A_.isequal(result2)
+            C = C_.dup()
+            with pytest.raises(TypeError):
+                C[0, 0](v.S) << v
+                C.compute()
 
-    A[[0, 1], [0]](m.S, binary.plus, replace=True) << v
-    result3 = Matrix.from_values(
-        [0, 0, 1, 1, 1, 2, 2, 2],
-        [1, 2, 0, 1, 2, 0, 1, 2],
-        [1, 2, 40, 4, 25, 6, 7, 8],
-    )
-    assert A.isequal(result3)
+            C = C_.dup()
+            with pytest.raises(TypeError):
+                C(v.S)[0, 0] << v
+                C.compute()
 
-    with pytest.raises(DimensionMismatch):
-        A(m.S)[[0, 1], [0]] << v
+            C = C_.dup()
+            with pytest.raises(TypeError):
+                C[0, 0](C.S) << v
+                C.compute()
 
-    A[[0, 1], [0]](m.S) << 99
-    result4 = Matrix.from_values(
-        [0, 0, 1, 1, 1, 2, 2, 2],
-        [1, 2, 0, 1, 2, 0, 1, 2],
-        [1, 2, 99, 4, 25, 6, 7, 8],
-    )
-    assert A.isequal(result4)
+            C = C_.dup()
+            with pytest.raises(TypeError):
+                C(C.S)[0, 0] << v
+                C.compute()
 
-    A[[1, 2], [0]](m.S, binary.plus, replace=True) << 100
-    result5 = Matrix.from_values(
-        [0, 0, 1, 1, 2, 2, 2],
-        [1, 2, 1, 2, 0, 1, 2],
-        [1, 2, 4, 25, 106, 7, 8],
-    )
-    assert A.isequal(result5)
+            C = C_.dup()
+            with pytest.raises(TypeError):
+                C[0, 0](v.S) << C
+                C.compute()
 
-    A[[2], [0, 1]](mT.S) << -1
-    result6 = Matrix.from_values(
-        [0, 0, 1, 1, 2, 2, 2],
-        [1, 2, 1, 2, 0, 1, 2],
-        [1, 2, 4, 25, 106, -1, 8],
-    )
-    assert A.isequal(result6)
+            C = C_.dup()
+            with pytest.raises(TypeError):
+                C[0, 0](C.S) << C
+                C.compute()
 
+            C = A.dup()
+            C(v.S)[0, :] = 10
+            result = Matrix.from_values(
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 0, 0],
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 4, 6],
+                [3, 10, 3, 1, 5, 10, 7, 8, 3, 1, 7, 4, 10, 10],
+            )
+            assert C.isequal(result)
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_assign_column(A, A_chunks, v):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        result = Matrix.from_values(
-            [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4, 6],
-            [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1, 1],
-            [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 1, 1, 2, 0],
-        )
-        C = A.dup()
-        C[:, 1] = v
-        assert C.isequal(result)
 
+def test_assign_row_col_matrix_mask():
+    # A         B       v1      v2
+    # 0 1       4 _     100     10
+    # 2 _       0 5             20
+    A_0 = Matrix.from_values([0, 0, 1], [0, 1, 0], [0, 1, 2])
+    B_0 = Matrix.from_values([0, 1, 1], [0, 0, 1], [4, 0, 5])
+    v1_0 = Vector.from_values([0], [100])
+    v2_0 = Vector.from_values([0, 1], [10, 20])
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_assign_row_scalar(A, A_chunks, v):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        C = A.dup()
-        C[0, :](v.S) << v
-        D = A.dup()
-        D(v.S)[0, :] << v
-        assert C.isequal(D)
-
-        C[:, :](C.S) << 1
-
-        with pytest.raises(
-            TypeError, match="Unable to use Vector mask on Matrix assignment to a Matrix"
-        ):
-            C[:, :](v.S) << 1
-        with pytest.raises(
-            TypeError, match="Unable to use Vector mask on single element assignment to a Matrix"
-        ):
-            C[0, 0](v.S) << 1
-
-        with pytest.raises(TypeError):
-            C[0, 0](v.S) << v
-        with pytest.raises(TypeError):
-            C(v.S)[0, 0] << v
-        with pytest.raises(TypeError):
-            C[0, 0](C.S) << v
-        with pytest.raises(TypeError):
-            C(C.S)[0, 0] << v
-
-        with pytest.raises(TypeError):
-            C[0, 0](v.S) << C
-        with pytest.raises(TypeError):
-            C[0, 0](C.S) << C
-
-        C = A.dup()
-        C(v.S)[0, :] = 10
-        result = Matrix.from_values(
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 0, 0],
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 4, 6],
-            [3, 10, 3, 1, 5, 10, 7, 8, 3, 1, 7, 4, 10, 10],
-        )
-        assert C.isequal(result)
-
-
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_assign_row_col_matrix_mask():
-    # A         B       v1      v2
-    # 0 1       4 _     100     10
-    # 2 _       0 5             20
-    A = Matrix.from_values([0, 0, 1], [0, 1, 0], [0, 1, 2])
-    B = Matrix.from_values([0, 1, 1], [0, 0, 1], [4, 0, 5])
-    v1 = Vector.from_values([0], [100])
-    v2 = Vector.from_values([0, 1], [10, 20])
-
-    # row assign
-    C = A.dup()
-    C(B.S)[0, :] << v2
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 2])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C(B.S, accum=binary.plus)[1, :] = v2
-    result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 12, 20])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C(B.S, replace=True)[1, :] << v2
-    result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 10, 20])
-    assert C.isequal(result)
-
-    # col assign
-    C = A.dup()
-    C(B.S)[:, 0] = v2
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 20])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C(B.S, accum=binary.plus)[:, 1] << v2
-    result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 2, 20])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C(B.S, replace=True)[:, 1] = v2
-    result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 2, 20])
-    assert C.isequal(result)
-
-    # row assign scalar (as a sanity check)
-    C = A.dup()
-    C(B.S)[0, :] = 100
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C(B.S, accum=binary.plus)[1, :] << 100
-    result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 102, 100])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C(B.S, replace=True)[1, :] = 100
-    result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 100, 100])
-    assert C.isequal(result)
-
-    # col assign scalar (as a sanity check)
-    C = A.dup()
-    C(B.S)[:, 0] << 100
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 100])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C(B.S, accum=binary.plus)[:, 1] = 100
-    result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 2, 100])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C(B.S, replace=True)[:, 1] << 100
-    result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 2, 100])
-    assert C.isequal(result)
-
-    # row subassign
-    C = A.dup()
-    C[0, :](v2.S) << v2
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 20, 2])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C[0, [0]](v1.S) << v1
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2])
-    assert C.isequal(result)
-
-    with pytest.raises(
-        TypeError, match="Indices for subassign imply Vector submask, but got Matrix mask instead"
-    ):
-        C[0, :](B.S) << v2
-
-    # col subassign
-    C = A.dup()
-    C[:, 0](v2.S) << v2
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 20])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C[[0], 0](v1.S) << v1
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2])
-    assert C.isequal(result)
-
-    with pytest.raises(
-        TypeError, match="Indices for subassign imply Vector submask, but got Matrix mask instead"
-    ):
-        C[:, 0](B.S) << v2
-
-    # row subassign scalar
-    C = A.dup()
-    C[0, :](v2.S) << 100
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 100, 2])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C[0, [0]](v1.S) << 100
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2])
-    assert C.isequal(result)
-
-    with pytest.raises(
-        TypeError, match="Indices for subassign imply Vector submask, but got Matrix mask instead"
-    ):
-        C[:, 0](B.S) << 100
-
-    # col subassign scalar
-    C = A.dup()
-    C[:, 0](v2.S) << 100
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 100])
-    assert C.isequal(result)
-
-    C = A.dup()
-    C[[0], 0](v1.S) << 100
-    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2])
-    assert C.isequal(result)
-
-    with pytest.raises(
-        TypeError, match="Indices for subassign imply Vector submask, but got Matrix mask instead"
-    ):
-        C[:, 0](B.S) << 100
-
-    # Bad subassign
-    with pytest.raises(TypeError, match="Single element assign does not accept a submask"):
-        C[0, 0](B.S) << 100
+    A_1 = Matrix.from_values(
+        da.from_array([0, 0, 1]), da.from_array([0, 1, 0]), da.from_array([0, 1, 2])
+    )
+    B_1 = Matrix.from_values(
+        da.from_array([0, 1, 1]), da.from_array([0, 0, 1]), da.from_array([4, 0, 5])
+    )
+    v1_1 = Vector.from_values(da.from_array([0]), da.from_array([100]))
+    v2_1 = Vector.from_values(da.from_array([0, 1]), da.from_array([10, 20]))
+
+    As = [A_0, A_1]
+    Bs = [B_0, B_1]
+    v1s = [v1_0, v1_1]
+    v2s = [v2_0, v2_1]
+
+    for A in As:
+        for B in Bs:
+            for v1 in v1s:
+                for v2 in v2s:
+                    # row assign
+                    C = A.dup()
+                    C(B.S)[0, :] << v2
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 2])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C(B.S, accum=binary.plus)[1, :] = v2
+                    result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 12, 20])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C(B.S, replace=True)[1, :] << v2
+                    result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 10, 20])
+                    assert C.isequal(result)
+
+                    # col assign
+                    C = A.dup()
+                    C(B.S)[:, 0] = v2
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 20])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C(B.S, accum=binary.plus)[:, 1] << v2
+                    result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 2, 20])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C(B.S, replace=True)[:, 1] = v2
+                    result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 2, 20])
+                    assert C.isequal(result)
+
+                    # row assign scalar (as a sanity check)
+                    C = A.dup()
+                    C(B.S)[0, :] = 100
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C(B.S, accum=binary.plus)[1, :] << 100
+                    result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 102, 100])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C(B.S, replace=True)[1, :] = 100
+                    result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 100, 100])
+                    assert C.isequal(result)
+
+                    # col assign scalar (as a sanity check)
+                    C = A.dup()
+                    C(B.S)[:, 0] << 100
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 100])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C(B.S, accum=binary.plus)[:, 1] = 100
+                    result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 2, 100])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C(B.S, replace=True)[:, 1] << 100
+                    result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 2, 100])
+                    assert C.isequal(result)
+
+                    # row subassign
+                    C = A.dup()
+                    C[0, :](v2.S) << v2
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 20, 2])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C[0, [0]](v1.S) << v1
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2])
+                    assert C.isequal(result)
+
+                    with pytest.raises(
+                        TypeError,
+                        match="Indices for subassign imply Vector submask, but got Matrix mask instead",
+                    ):
+                        C[0, :](B.S) << v2
+                        C.compute()
+
+                    # col subassign
+                    C = A.dup()
+                    C[:, 0](v2.S) << v2
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 20])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C[[0], 0](v1.S) << v1
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2])
+                    assert C.isequal(result)
+
+                    with pytest.raises(
+                        TypeError,
+                        match="Indices for subassign imply Vector submask, but got Matrix mask instead",
+                    ):
+                        C[:, 0](B.S) << v2
+                        C.compute()
+
+                    # row subassign scalar
+                    C = A.dup()
+                    C[0, :](v2.S) << 100
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 100, 2])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C[0, [0]](v1.S) << 100
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2])
+                    assert C.isequal(result)
+
+                    with pytest.raises(
+                        TypeError,
+                        match="Indices for subassign imply Vector submask, but got Matrix mask instead",
+                    ):
+                        C[:, 0](B.S) << 100
+                        C.compute()
+
+                    # col subassign scalar
+                    C = A.dup()
+                    C[:, 0](v2.S) << 100
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 100])
+                    assert C.isequal(result)
+
+                    C = A.dup()
+                    C[[0], 0](v1.S) << 100
+                    result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2])
+                    assert C.isequal(result)
+
+                    with pytest.raises(
+                        TypeError,
+                        match="Indices for subassign imply Vector submask, but got Matrix mask instead",
+                    ):
+                        C[:, 0](B.S) << 100
+                        C.compute()
+
+                    # Bad subassign
+                    C = A.dup()
+                    with pytest.raises(
+                        TypeError, match="Single element assign does not accept a submask"
+                    ):
+                        C[0, 0](B.S) << 100
+                        C.compute()
+
+
+def test_assign_column_scalar(As, A_chunks, v):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            C = A.dup()
+            C[:, 0](v.S) << v
+            D = A.dup()
+            D(v.S)[:, 0] << v
+            assert C.isequal(D)
 
+            C = A.dup()
+            C[:, 1] = v
+            C(v.S)[:, 1] = 10
+            result = Matrix.from_values(
+                [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4, 6],
+                [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1, 1],
+                [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 10, 10, 10, 10],
+            )
+            assert C.isequal(result)
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_assign_column_scalar(A, A_chunks, v):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        C = A.dup()
-        C[:, 0](v.S) << v
-        D = A.dup()
-        D(v.S)[:, 0] << v
-        assert C.isequal(D)
-
-        C = A.dup()
-        C[:, 1] = v
-        C(v.S)[:, 1] = 10
-        result = Matrix.from_values(
-            [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4, 6],
-            [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1, 1],
-            [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 10, 10, 10, 10],
-        )
-        assert C.isequal(result)
+            C(v.V, replace=True, accum=binary.plus)[:, 1] = 20
+            result = Matrix.from_values(
+                [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4],
+                [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1],
+                [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 30, 30, 30],
+            )
+            assert C.isequal(result)
 
-        C(v.V, replace=True, accum=binary.plus)[:, 1] = 20
-        result = Matrix.from_values(
-            [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4],
-            [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1],
-            [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 30, 30, 30],
-        )
-        assert C.isequal(result)
 
+def test_assign_scalar(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            # Test block
+            result_block = Matrix.from_values(
+                [3, 0, 6, 0, 6, 6, 2, 4, 1, 1, 3, 5, 1, 3, 5],
+                [0, 1, 2, 3, 3, 4, 5, 5, 6, 2, 2, 2, 4, 4, 4],
+                [3, 2, 5, 3, 7, 3, 1, 7, 4, 0, 0, 0, 0, 0, 0],
+            )
+            C = A.dup()
+            C[[1, 3, 5], [2, 4]] = 0
+            assert C.isequal(result_block)
+            C = A.dup()
+            C[[1, 3, 5], [2, 4]] = Scalar.from_value(0)
+            assert C.isequal(result_block)
+            C = A.dup()
+            C[1::2, 2:5:2] = 0
+            assert C.isequal(result_block)
+            C = A.dup()
+            C[1::2, 2:5:2] = Scalar.from_value(0)
+            assert C.isequal(result_block)
+            # Test row
+            result_row = Matrix.from_values(
+                [3, 0, 6, 0, 6, 6, 2, 4, 1, 3, 5, 1, 1],
+                [0, 1, 2, 3, 3, 4, 5, 5, 6, 2, 2, 2, 4],
+                [3, 2, 5, 3, 7, 3, 1, 7, 4, 3, 1, 0, 0],
+            )
+            C = A.dup()
+            C[1, [2, 4]] = 0
+            assert C.isequal(result_row)
+            C = A.dup()
+            C[1, 2] = Scalar.from_value(0)
+            C[1, 4] = Scalar.from_value(0)
+            assert C.isequal(result_row)
+            C = A.dup()
+            C[1, 2:5:2] = 0
+            assert C.isequal(result_row)
+            # Test column
+            result_column = Matrix.from_values(
+                [3, 0, 6, 0, 6, 6, 2, 4, 1, 1, 1, 3, 5],
+                [0, 1, 2, 3, 3, 4, 5, 5, 6, 4, 2, 2, 2],
+                [3, 2, 5, 3, 7, 3, 1, 7, 4, 8, 0, 0, 0],
+            )
+            C = A.dup()
+            C[[1, 3, 5], 2] = 0
+            assert C.isequal(result_column)
+            C = A.dup()
+            C[1::2, 2] = 0
+            assert C.isequal(result_column)
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_assign_scalar(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        # Test block
-        result_block = Matrix.from_values(
-            [3, 0, 6, 0, 6, 6, 2, 4, 1, 1, 3, 5, 1, 3, 5],
-            [0, 1, 2, 3, 3, 4, 5, 5, 6, 2, 2, 2, 4, 4, 4],
-            [3, 2, 5, 3, 7, 3, 1, 7, 4, 0, 0, 0, 0, 0, 0],
-        )
-        C = A.dup()
-        C[[1, 3, 5], [2, 4]] = 0
-        assert C.isequal(result_block)
-        C = A.dup()
-        C[[1, 3, 5], [2, 4]] = Scalar.from_value(0)
-        assert C.isequal(result_block)
-        C = A.dup()
-        C[1::2, 2:5:2] = 0
-        assert C.isequal(result_block)
-        C = A.dup()
-        C[1::2, 2:5:2] = Scalar.from_value(0)
-        assert C.isequal(result_block)
-        # Test row
-        result_row = Matrix.from_values(
-            [3, 0, 6, 0, 6, 6, 2, 4, 1, 3, 5, 1, 1],
-            [0, 1, 2, 3, 3, 4, 5, 5, 6, 2, 2, 2, 4],
-            [3, 2, 5, 3, 7, 3, 1, 7, 4, 3, 1, 0, 0],
-        )
-        C = A.dup()
-        C[1, [2, 4]] = 0
-        assert C.isequal(result_row)
-        C = A.dup()
-        C[1, 2] = Scalar.from_value(0)
-        C[1, 4] = Scalar.from_value(0)
-        assert C.isequal(result_row)
-        C = A.dup()
-        C[1, 2:5:2] = 0
-        assert C.isequal(result_row)
-        # Test column
-        result_column = Matrix.from_values(
-            [3, 0, 6, 0, 6, 6, 2, 4, 1, 1, 1, 3, 5],
-            [0, 1, 2, 3, 3, 4, 5, 5, 6, 4, 2, 2, 2],
-            [3, 2, 5, 3, 7, 3, 1, 7, 4, 8, 0, 0, 0],
-        )
-        C = A.dup()
-        C[[1, 3, 5], 2] = 0
-        assert C.isequal(result_column)
-        C = A.dup()
-        C[1::2, 2] = 0
-        assert C.isequal(result_column)
 
+def test_assign_bad(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            with pytest.raises(TypeError, match="Bad type"):
+                A[0, 0] = object()
+                A.compute()
+            A = A_.dup()
+            with pytest.raises(TypeError, match="Bad type"):
+                A[:, 0] = object()
+                A.compute()
+            A = A_.dup()
+            with pytest.raises(TypeError, match="Bad type"):
+                A[0, :] = object()
+                A.compute()
+            A = A_.dup()
+            with pytest.raises(TypeError, match="Bad type"):
+                A[:, :] = object()
+                A.compute()
+            A = A_.dup()
+            with pytest.raises(TypeError, match="Bad type"):
+                A[0, 0] = A
+                A.compute()
+            A = A_.dup()
+            with pytest.raises(TypeError, match="Bad type"):
+                A[:, 0] = A
+                A.compute()
+            A = A_.dup()
+            with pytest.raises(TypeError, match="Bad type"):
+                A[0, :] = A
+                A.compute()
+            A = A_.dup()
+            v = A[0, :].new()
+            with pytest.raises(TypeError, match="Bad type"):
+                A[0, 0] = v
+                A.compute()
+            A = A_.dup()
+            with pytest.raises(TypeError, match="Bad type"):
+                A[:, :] = v
+                A.compute()
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_assign_bad(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        with pytest.raises(TypeError, match="Bad type"):
-            A[0, 0] = object()
-        with pytest.raises(TypeError, match="Bad type"):
-            A[:, 0] = object()
-        with pytest.raises(TypeError, match="Bad type"):
-            A[0, :] = object()
-        with pytest.raises(TypeError, match="Bad type"):
-            A[:, :] = object()
-        with pytest.raises(TypeError, match="Bad type"):
-            A[0, 0] = A
-        with pytest.raises(TypeError, match="Bad type"):
-            A[:, 0] = A
-        with pytest.raises(TypeError, match="Bad type"):
-            A[0, :] = A
-        v = A[0, :].new()
-        with pytest.raises(TypeError, match="Bad type"):
-            A[0, 0] = v
-        with pytest.raises(TypeError, match="Bad type"):
-            A[:, :] = v
 
+def test_apply(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            result = Matrix.from_values(
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+                [-3, -2, -3, -1, -5, -3, -7, -8, -3, -1, -7, -4],
+            )
+            C = A.apply(unary.ainv).new()
+            assert C.isequal(result)
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_apply(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        result = Matrix.from_values(
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            [-3, -2, -3, -1, -5, -3, -7, -8, -3, -1, -7, -4],
-        )
-        C = A.apply(unary.ainv).new()
-        assert C.isequal(result)
 
+def test_apply_binary(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            result_right = Matrix.from_values(
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+                [1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1],
+                dtype=bool,
+            )
+            w_right = A.apply(binary.gt, right=1).new()
+            w_right2 = A.apply(binary.gt, right=Scalar.from_value(1)).new()
+            assert w_right.isequal(result_right)
+            assert w_right2.isequal(result_right)
+            result_left = Matrix.from_values(
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+                [5, 6, 5, 7, 3, 5, 1, 0, 5, 7, 1, 4],
+            )
+            w_left = A.apply(binary.minus, left=8).new()
+            w_left2 = A.apply(binary.minus, left=Scalar.from_value(8)).new()
+            assert w_left.isequal(result_left)
+            assert w_left2.isequal(result_left)
+            with pytest.raises(TypeError):
+                A.apply(binary.plus, left=A)
+            with pytest.raises(TypeError):
+                A.apply(binary.plus, right=A)
+            with pytest.raises(TypeError, match="Cannot provide both"):
+                A.apply(binary.plus, left=1, right=1)
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_apply_binary(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        result_right = Matrix.from_values(
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            [1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1],
-            dtype=bool,
-        )
-        w_right = A.apply(binary.gt, right=1).new()
-        w_right2 = A.apply(binary.gt, right=Scalar.from_value(1)).new()
-        assert w_right.isequal(result_right)
-        assert w_right2.isequal(result_right)
-        result_left = Matrix.from_values(
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            [5, 6, 5, 7, 3, 5, 1, 0, 5, 7, 1, 4],
-        )
-        w_left = A.apply(binary.minus, left=8).new()
-        w_left2 = A.apply(binary.minus, left=Scalar.from_value(8)).new()
-        assert w_left.isequal(result_left)
-        assert w_left2.isequal(result_left)
-        with pytest.raises(TypeError):
-            A.apply(binary.plus, left=A)
-        with pytest.raises(TypeError):
-            A.apply(binary.plus, right=A)
-        with pytest.raises(TypeError, match="Cannot provide both"):
-            A.apply(binary.plus, left=1, right=1)
+            # allow monoids
+            w1 = A.apply(binary.plus, left=1).new()
+            w2 = A.apply(monoid.plus, left=1).new()
+            w3 = A.apply(monoid.plus, right=1).new()
+            assert w1.isequal(w2)
+            assert w1.isequal(w3)
 
-        # allow monoids
-        w1 = A.apply(binary.plus, left=1).new()
-        w2 = A.apply(monoid.plus, left=1).new()
-        w3 = A.apply(monoid.plus, right=1).new()
-        assert w1.isequal(w2)
-        assert w1.isequal(w3)
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_reduce_row(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15])
-        w = A.reduce_rowwise(monoid.plus).new()
-        assert w.isequal(result)
-        w2 = A.reduce_rowwise(binary.plus).new()
-        assert w2.isequal(result)
+def test_reduce_row(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15])
+            w = A.reduce_rowwise(monoid.plus).new()
+            assert w.isequal(result)
+            w2 = A.reduce_rowwise(binary.plus).new()
+            assert w2.isequal(result)
 
 
 @pytest.mark.slow
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_reduce_agg(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15])
-        w1 = A.reduce_rowwise(agg.sum).new()
-        assert w1.isequal(result)
-        w2 = A.T.reduce_columnwise(agg.sum).new()
-        assert w2.isequal(result)
-
-        counts = A.dup(dtype=bool).reduce_rowwise(monoid.plus[int]).new()
-        w3 = A.reduce_rowwise(agg.count).new()
-        assert w3.isequal(counts)
-        w4 = A.T.reduce_columnwise(agg.count).new()
-        assert w4.isequal(counts)
-
-        Asquared = monoid.times(A & A).new()
-        squared = Asquared.reduce_rowwise(monoid.plus).new()
-        expected = unary.sqrt[float](squared).new()
-        w5 = A.reduce_rowwise(agg.hypot).new()
-        assert w5.isclose(expected)
-        w6 = A.reduce_rowwise(monoid.numpy.hypot[float]).new()
-        assert w6.isclose(expected)
-        w7 = Vector.new(w5.dtype, size=w5.size)
-        w7 << A.reduce_rowwise(agg.hypot)
-        assert w7.isclose(expected)
-
-        w8 = A.reduce_rowwise(agg.logaddexp).new()
-        expected = A.reduce_rowwise(monoid.numpy.logaddexp[float]).new()
-        assert w8.isclose(w8)
-
-        result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 9, 10, 11, 8, 4])
-        w9 = A.reduce_columnwise(agg.sum).new()
-        assert w9.isequal(result)
-        w10 = A.T.reduce_rowwise(agg.sum).new()
-        assert w10.isequal(result)
-
-        counts = A.dup(dtype=bool).reduce_columnwise(monoid.plus[int]).new()
-        w11 = A.reduce_columnwise(agg.count).new()
-        assert w11.isequal(counts)
-        w12 = A.T.reduce_rowwise(agg.count).new()
-        assert w12.isequal(counts)
-
-        w13 = A.reduce_rowwise(agg.mean).new()
-        expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [2.5, 6, 1, 3, 7, 1, 5])
-        assert w13.isequal(expected)
-        w14 = A.reduce_columnwise(agg.mean).new()
-        expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 3, 5, 5.5, 4, 4])
-        assert w14.isequal(expected)
-
-        w15 = A.reduce_rowwise(agg.exists).new()
-        w16 = A.reduce_columnwise(agg.exists).new()
-        expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [1, 1, 1, 1, 1, 1, 1])
-        assert w15.isequal(expected)
-        assert w16.isequal(expected)
-
-        assert A.reduce_scalar(agg.sum).new() == 47
-        assert A.reduce_scalar(agg.prod).new() == 1270080
-        assert A.reduce_scalar(agg.count).new() == 12
-        assert A.reduce_scalar(agg.count_nonzero).new() == 12
-        assert A.reduce_scalar(agg.count_zero).new() == 0
-        assert A.reduce_scalar(agg.sum_of_squares).new() == 245
-        assert A.reduce_scalar(agg.hypot).new().isclose(245 ** 0.5)
-        assert A.reduce_scalar(agg.logaddexp).new().isclose(8.6071076)
-        assert A.reduce_scalar(agg.logaddexp2).new().isclose(9.2288187)
-        assert A.reduce_scalar(agg.mean).new().isclose(47 / 12)
-        assert A.reduce_scalar(agg.exists).new() == 1
+def test_reduce_agg(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15])
+            w1 = A.reduce_rowwise(agg.sum).new()
+            assert w1.isequal(result)
+            w2 = A.T.reduce_columnwise(agg.sum).new()
+            assert w2.isequal(result)
 
-        silly = agg.Aggregator(
-            "silly",
-            composite=[agg.varp, agg.stdp],
-            finalize=lambda x, y: binary.times(x & y),
-            types=[agg.varp],
-        )
-        v1 = A.reduce_rowwise(agg.varp).new()
-        v2 = A.reduce_rowwise(agg.stdp).new()
-        assert v1.isclose(binary.times(v2 & v2).new())
-        v3 = A.reduce_rowwise(silly).new()
-        assert v3.isclose(binary.times(v1 & v2).new())
+            counts = A.dup(dtype=bool).reduce_rowwise(monoid.plus[int]).new()
+            w3 = A.reduce_rowwise(agg.count).new()
+            assert w3.isequal(counts)
+            w4 = A.T.reduce_columnwise(agg.count).new()
+            assert w4.isequal(counts)
+
+            Asquared = monoid.times(A & A).new()
+            squared = Asquared.reduce_rowwise(monoid.plus).new()
+            expected = unary.sqrt[float](squared).new()
+            w5 = A.reduce_rowwise(agg.hypot).new()
+            assert w5.isclose(expected)
+            w6 = A.reduce_rowwise(monoid.numpy.hypot[float]).new()
+            assert w6.isclose(expected)
+            w7 = Vector.new(w5.dtype, size=w5.size)
+            w7 << A.reduce_rowwise(agg.hypot)
+            assert w7.isclose(expected)
+
+            w8 = A.reduce_rowwise(agg.logaddexp).new()
+            expected = A.reduce_rowwise(monoid.numpy.logaddexp[float]).new()
+            assert w8.isclose(w8)
+
+            result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 9, 10, 11, 8, 4])
+            w9 = A.reduce_columnwise(agg.sum).new()
+            assert w9.isequal(result)
+            w10 = A.T.reduce_rowwise(agg.sum).new()
+            assert w10.isequal(result)
+
+            counts = A.dup(dtype=bool).reduce_columnwise(monoid.plus[int]).new()
+            w11 = A.reduce_columnwise(agg.count).new()
+            assert w11.isequal(counts)
+            w12 = A.T.reduce_rowwise(agg.count).new()
+            assert w12.isequal(counts)
+
+            w13 = A.reduce_rowwise(agg.mean).new()
+            expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [2.5, 6, 1, 3, 7, 1, 5])
+            assert w13.isequal(expected)
+            w14 = A.reduce_columnwise(agg.mean).new()
+            expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 3, 5, 5.5, 4, 4])
+            assert w14.isequal(expected)
+
+            w15 = A.reduce_rowwise(agg.exists).new()
+            w16 = A.reduce_columnwise(agg.exists).new()
+            expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [1, 1, 1, 1, 1, 1, 1])
+            assert w15.isequal(expected)
+            assert w16.isequal(expected)
+
+            assert A.reduce_scalar(agg.sum).new() == 47
+            assert A.reduce_scalar(agg.prod).new() == 1270080
+            assert A.reduce_scalar(agg.count).new() == 12
+            assert A.reduce_scalar(agg.count_nonzero).new() == 12
+            assert A.reduce_scalar(agg.count_zero).new() == 0
+            assert A.reduce_scalar(agg.sum_of_squares).new() == 245
+            assert A.reduce_scalar(agg.hypot).new().isclose(245 ** 0.5)
+            assert A.reduce_scalar(agg.logaddexp).new().isclose(8.6071076)
+            assert A.reduce_scalar(agg.logaddexp2).new().isclose(9.2288187)
+            assert A.reduce_scalar(agg.mean).new().isclose(47 / 12)
+            assert A.reduce_scalar(agg.exists).new() == 1
+
+            silly = agg.Aggregator(
+                "silly",
+                composite=[agg.varp, agg.stdp],
+                finalize=lambda x, y: binary.times(x & y),
+                types=[agg.varp],
+            )
+            v1 = A.reduce_rowwise(agg.varp).new()
+            v2 = A.reduce_rowwise(agg.stdp).new()
+            assert v1.isclose(binary.times(v2 & v2).new())
+            v3 = A.reduce_rowwise(silly).new()
+            assert v3.isclose(binary.times(v1 & v2).new())
 
-        s1 = A.reduce_scalar(agg.varp).new()
-        s2 = A.reduce_scalar(agg.stdp).new()
-        assert s1.isclose(s2.value.compute() * s2.value.compute())
-        s3 = A.reduce_scalar(silly).new()
-        assert s3.isclose(s1.value.compute() * s2.value.compute())
+            s1 = A.reduce_scalar(agg.varp).new()
+            s2 = A.reduce_scalar(agg.stdp).new()
+            assert s1.isclose(s2.value.compute() * s2.value.compute())
+            s3 = A.reduce_scalar(silly).new()
+            assert s3.isclose(s1.value.compute() * s2.value.compute())
 
 
 @pytest.mark.xfail("'Needs investigation'", strict=True)
@@ -1798,83 +1980,83 @@ def test_reduce_agg_empty(A_chunks):
                     assert compute(s.value) is None
 
 
-def test_reduce_row_udf(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15])
-        binop = grblas.operator.BinaryOp.register_anonymous(lambda x, y: x + y)
-        with pytest.raises(NotImplementedException):
-            # Although allowed by the spec, SuiteSparse doesn't like user-defined binarops here
-            A.reduce_rowwise(binop).new()
-        # If the user creates a monoid from the binop, then we can use the monoid instead
-        monoid = grblas.operator.Monoid.register_anonymous(binop, 0)
-        w = A.reduce_rowwise(binop).new()
-        assert w.isequal(result)
-        w2 = A.reduce_rowwise(monoid).new()
-        assert w2.isequal(result)
-
-
-def test_reduce_column(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 9, 10, 11, 8, 4])
-        w = A.reduce_columnwise(monoid.plus).new()
-        assert w.isequal(result)
-        w2 = A.reduce_columnwise(binary.plus).new()
-        assert w2.isequal(result)
+def test_reduce_row_udf(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15])
+            binop = grblas.operator.BinaryOp.register_anonymous(lambda x, y: x + y)
+            with pytest.raises(NotImplementedException):
+                # Although allowed by the spec, SuiteSparse doesn't like user-defined binarops here
+                A.reduce_rowwise(binop).new()
+            # If the user creates a monoid from the binop, then we can use the monoid instead
+            monoid = grblas.operator.Monoid.register_anonymous(binop, 0)
+            w = A.reduce_rowwise(binop).new()
+            assert w.isequal(result)
+            w2 = A.reduce_rowwise(monoid).new()
+            assert w2.isequal(result)
 
 
-def test_reduce_scalar(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        s = A.reduce_scalar(monoid.plus).new()
-        assert s == 47
-        assert A.reduce_scalar(binary.plus).new() == 47
-        with pytest.raises(TypeError, match="Expected type: Monoid"):
-            A.reduce_scalar(binary.minus)
-
-        # test dtype coercion
-        assert A.dtype == dtypes.INT64
-        s = A.reduce_scalar().new(dtype=float)
-        assert s == 47.0
-        assert s.dtype == dtypes.FP64
-        t = Scalar.new(float)
-        t << A.reduce_scalar(monoid.plus)
-        assert t == 47.0
-        t = Scalar.new(float)
-        t() << A.reduce_scalar(monoid.plus)
-        assert t == 47.0
-        t(accum=binary.times) << A.reduce_scalar(monoid.plus)
-        assert t == 47 * 47
-        assert A.reduce_scalar(monoid.plus[dtypes.UINT64]).new() == 47
-        # Make sure we accumulate as a float, not int
-        t.value = 1.23
-        t(accum=binary.plus) << A.reduce_scalar()
-        assert t == 48.23
-
-
-def test_transpose(A, A_chunks):
+def test_reduce_column(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 9, 10, 11, 8, 4])
+            w = A.reduce_columnwise(monoid.plus).new()
+            assert w.isequal(result)
+            w2 = A.reduce_columnwise(binary.plus).new()
+            assert w2.isequal(result)
+
+
+def test_reduce_scalar(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            s = A.reduce_scalar(monoid.plus).new()
+            assert s == 47
+            assert A.reduce_scalar(binary.plus).new() == 47
+            with pytest.raises(TypeError, match="Expected type: Monoid"):
+                A.reduce_scalar(binary.minus)
+
+            # test dtype coercion
+            assert A.dtype == dtypes.INT64
+            s = A.reduce_scalar().new(dtype=float)
+            assert s == 47.0
+            assert s.dtype == dtypes.FP64
+            t = Scalar.new(float)
+            t << A.reduce_scalar(monoid.plus)
+            assert t == 47.0
+            t = Scalar.new(float)
+            t() << A.reduce_scalar(monoid.plus)
+            assert t == 47.0
+            t(accum=binary.times) << A.reduce_scalar(monoid.plus)
+            assert t == 47 * 47
+            assert A.reduce_scalar(monoid.plus[dtypes.UINT64]).new() == 47
+            # Make sure we accumulate as a float, not int
+            t.value = 1.23
+            t(accum=binary.plus) << A.reduce_scalar()
+            assert t == 48.23
+
+
+def test_transpose(As, A_chunks):
     # C << A.T
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        rows, cols, vals = A.to_values()
-        result = Matrix.from_values(cols, rows, vals)
-        C = Matrix.new(A.dtype, A.ncols, A.nrows)
-        C << A.T
-        assert C.isequal(result)
-        C2 = A.T.new()
-        assert C2.isequal(result)
-        assert A.T.T is A
-        C3 = A.T.new(dtype=float)
-        assert C3.isequal(result)
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            rows, cols, vals = A.to_values()
+            result = Matrix.from_values(cols, rows, vals)
+            C = Matrix.new(A.dtype, A.ncols, A.nrows)
+            C << A.T
+            assert C.isequal(result)
+            C2 = A.T.new()
+            assert C2.isequal(result)
+            assert A.T.T is A
+            C3 = A.T.new(dtype=float)
+            assert C3.isequal(result)
 
 
 @pytest.mark.xfail("'Needs investigation'", strict=True)
@@ -1899,111 +2081,113 @@ def test_kronecker():
     assert C.isequal(result)
 
 
-def test_simple_assignment(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        # C << A
-        C = Matrix.new(A.dtype, A.nrows, A.ncols)
-        C << A
-        assert C.isequal(A)
+def test_simple_assignment(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            # C << A
+            C = Matrix.new(A.dtype, A.nrows, A.ncols)
+            C << A
+            assert C.isequal(A)
 
 
-def test_assign_transpose(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        C = Matrix.new(A.dtype, A.ncols, A.nrows)
-        C << A.T
-        assert C.isequal(A.T.new())
+def test_assign_transpose(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            C = Matrix.new(A.dtype, A.ncols, A.nrows)
+            C << A.T
+            assert C.isequal(A.T.new())
 
-        with pytest.raises(TypeError):
-            C.T << A
-        with pytest.raises(TypeError, match="does not support item assignment"):
-            C.T[:, :] << A
-        with pytest.raises(AttributeError):
-            C[:, :].T << A
+            with pytest.raises(TypeError):
+                C.T << A
+            with pytest.raises(TypeError, match="does not support item assignment"):
+                C.T[:, :] << A
+            with pytest.raises(AttributeError):
+                C[:, :].T << A
 
-        C = Matrix.new(A.dtype, A.ncols + 1, A.nrows + 1)
-        C[: A.ncols, : A.nrows] << A.T
-        assert C[: A.ncols, : A.nrows].new().isequal(A.T.new())
+            nrows, ncols = A.nrows, A.ncols
+            if A.is_dOnion:
+                nrows, ncols = nrows.compute(), ncols.compute()
+            C = Matrix.new(A.dtype, ncols + 1, nrows + 1)
+            C[:ncols, :nrows] << A.T
+            assert C[:ncols, :nrows].new().isequal(A.T.new())
 
 
-def test_isequal(A, A_chunks, v):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert A.isequal(A)
-        with pytest.raises(TypeError, match="Matrix"):
-            A.isequal(v)  # equality is not type-checking
-        C = Matrix.from_values([1], [1], [1])
-        assert not C.isequal(A)
-        D = Matrix.from_values([1], [2], [1])
-        assert not C.isequal(D)
-        D2 = Matrix.from_values([0], [2], [1], nrows=D.nrows, ncols=D.ncols)
-        assert not D2.isequal(D)
-        C2 = Matrix.from_values([1], [1], [1], nrows=7, ncols=7)
-        assert not C2.isequal(A)
-        C3 = Matrix.from_values(
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            [3.0, 2.0, 3.0, 1.0, 5.0, 3.0, 7.0, 8.0, 3.0, 1.0, 7.0, 4.0],
-        )
-        assert not C3.isequal(A, check_dtype=True), "different datatypes are not equal"
-        C4 = Matrix.from_values(
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            [3.0, 2.0, 3.0, 1.0, 5.0, 3.000000000000000001, 7.0, 8.0, 3.0, 1 - 1e-11, 7.0, 4.0],
-        )
-        assert not C4.isequal(A)
+def test_isequal(As, A_chunks, v):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A.isequal(A)
+            with pytest.raises(TypeError, match="Matrix"):
+                A.isequal(v)  # equality is not type-checking
+            C = Matrix.from_values([1], [1], [1])
+            assert not C.isequal(A)
+            D = Matrix.from_values([1], [2], [1])
+            assert not C.isequal(D)
+            D2 = Matrix.from_values([0], [2], [1], nrows=D.nrows, ncols=D.ncols)
+            assert not D2.isequal(D)
+            C2 = Matrix.from_values([1], [1], [1], nrows=7, ncols=7)
+            assert not C2.isequal(A)
+            C3 = Matrix.from_values(
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+                [3.0, 2.0, 3.0, 1.0, 5.0, 3.0, 7.0, 8.0, 3.0, 1.0, 7.0, 4.0],
+            )
+            assert not C3.isequal(A, check_dtype=True), "different datatypes are not equal"
+            C4 = Matrix.from_values(
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+                [3.0, 2.0, 3.0, 1.0, 5.0, 3.000000000000000001, 7.0, 8.0, 3.0, 1 - 1e-11, 7.0, 4.0],
+            )
+            assert not C4.isequal(A)
 
 
-@pytest.mark.slow
-def test_isclose(A, A_chunks, v):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert A.isclose(A)
-        with pytest.raises(TypeError, match="Matrix"):
-            A.isclose(v)  # equality is not type-checking
-        C = Matrix.from_values([1], [1], [1])  # wrong size
-        assert not C.isclose(A)
-        D = Matrix.from_values([1], [2], [1])
-        assert not C.isclose(D)
-        D2 = Matrix.from_values([0], [2], [1], nrows=D.nrows, ncols=D.ncols)
-        assert not D2.isclose(D)
-        C2 = Matrix.from_values([1], [1], [1], nrows=7, ncols=7)  # missing values
-        assert not C2.isclose(A)
-        C3 = Matrix.from_values(
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 0],
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 2],
-            [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 3],
-        )  # extra values
-        assert not C3.isclose(A)
-        C4 = Matrix.from_values(
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            [3.0, 2.0, 3.0, 1.0, 5.0, 3.0, 7.0, 8.0, 3.0, 1.0, 7.0, 4.0],
-        )
-        assert not C4.isclose(A, check_dtype=True), "different datatypes are not equal"
-        # fmt: off
-        C5 = Matrix.from_values(
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            [3.0, 2.0, 3.0, 1.0, 5.0, 3.000000000000000001, 7.0, 8.0, 3.0, 1 - 1e-11, 7.0, 4.0],
-        )
-        # fmt: on
-        assert C5.isclose(A)
-        C6 = Matrix.from_values(
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            [3.0, 2.000001, 3.0, 1.0, 5.0, 3.0, 7.0, 7.9999999, 3.0, 1.0, 7.0, 4.0],
-        )
-        assert C6.isclose(A, rel_tol=1e-3)
+def test_isclose(As, A_chunks, v):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A.isclose(A)
+            with pytest.raises(TypeError, match="Matrix"):
+                A.isclose(v)  # equality is not type-checking
+            C = Matrix.from_values([1], [1], [1])  # wrong size
+            assert not C.isclose(A)
+            D = Matrix.from_values([1], [2], [1])
+            assert not C.isclose(D)
+            D2 = Matrix.from_values([0], [2], [1], nrows=D.nrows, ncols=D.ncols)
+            assert not D2.isclose(D)
+            C2 = Matrix.from_values([1], [1], [1], nrows=7, ncols=7)  # missing values
+            assert not C2.isclose(A)
+            C3 = Matrix.from_values(
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 0],
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 2],
+                [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 3],
+            )  # extra values
+            assert not C3.isclose(A)
+            C4 = Matrix.from_values(
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+                [3.0, 2.0, 3.0, 1.0, 5.0, 3.0, 7.0, 8.0, 3.0, 1.0, 7.0, 4.0],
+            )
+            assert not C4.isclose(A, check_dtype=True), "different datatypes are not equal"
+            # fmt: off
+            C5 = Matrix.from_values(
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+                [3.0, 2.0, 3.0, 1.0, 5.0, 3.000000000000000001, 7.0, 8.0, 3.0, 1 - 1e-11, 7.0, 4.0],
+            )
+            # fmt: on
+            assert C5.isclose(A)
+            C6 = Matrix.from_values(
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+                [3.0, 2.000001, 3.0, 1.0, 5.0, 3.0, 7.0, 7.9999999, 3.0, 1.0, 7.0, 4.0],
+            )
+            assert C6.isclose(A, rel_tol=1e-3)
 
 
 @pytest.mark.slow

From 9bf42cd42500236c769894ad8da3e1868c9d91b4 Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Fri, 11 Mar 2022 00:36:02 +0100
Subject: [PATCH 13/18] further simplified dOnion technology

---
 dask_grblas/base.py               | 398 ++++++++++++++++--------------
 dask_grblas/expr.py               | 141 +++++------
 dask_grblas/mask.py               |  10 +-
 dask_grblas/matrix.py             |  17 +-
 dask_grblas/scalar.py             |  97 ++------
 tests/from_grblas2/test_matrix.py |  44 ++--
 6 files changed, 337 insertions(+), 370 deletions(-)

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 4dfa6a7..362e6aa 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -52,12 +52,10 @@ def dOnion_if(self):
     def isequal(self, other, *, check_dtype=False):
         from .scalar import PythonScalar
 
-        args = [self, other]
-        if np.any([type(arg._delayed) is DOnion for arg in args]):
-            args = [arg._delayed if type(arg._delayed) is DOnion else arg for arg in args]
+        if any_dOnions(self, other):
             meta = gb.Scalar.new(bool)
-            delayed = DOnion.multiple_access(
-                meta, self.__class__.isequal, *args, check_dtype=check_dtype
+            delayed = DOnion.multi_access(
+                meta, self.__class__.isequal, self, other, check_dtype=check_dtype
             )
             return PythonScalar(delayed, meta=meta)
 
@@ -78,14 +76,32 @@ def isequal(self, other, *, check_dtype=False):
             adjust_chunks={i: 1 for i in range(self._delayed.ndim)},
         )
         """
-        delayed = da.core.elemwise(
-            _isequal,
-            self._delayed,
-            other._delayed,
-            check_dtype,
-            dtype=bool,
+        ndim = (
+            self._matrix._delayed.ndim
+            if getattr(self, "_is_transposed", False)
+            else self._delayed.ndim
         )
-        if self._delayed.ndim > 0:
+        if ndim < 2:
+            delayed = da.core.elemwise(
+                partial(_isequal, False, False),
+                self._delayed,
+                other._delayed,
+                check_dtype,
+                dtype=bool,
+            )
+        else:
+            xt = getattr(self, "_is_transposed", False)
+            yt = getattr(other, "_is_transposed", False)
+            self_ = (self._matrix._delayed, "ji") if xt else (self._delayed, "ij")
+            other_ = (other._matrix._delayed, "ji") if yt else (other._delayed, "ij")
+            delayed = da.core.blockwise(
+                *(partial(_isequal, xt, yt), "ij"),
+                *self_,
+                *other_,
+                *(check_dtype, None),
+                dtype=bool,
+            )
+        if ndim > 0:
             delayed = da.core.elemwise(
                 _to_scalar,
                 delayed.all(),
@@ -96,14 +112,13 @@ def isequal(self, other, *, check_dtype=False):
     def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
         from .scalar import PythonScalar
 
-        args = [self, other]
-        if np.any([type(arg._delayed) is DOnion for arg in args]):
-            args = [arg._delayed if type(arg._delayed) is DOnion else arg for arg in args]
+        if any_dOnions(self, other):
             meta = gb.Scalar.new(bool)
-            delayed = DOnion.multiple_access(
+            delayed = DOnion.multi_access(
                 meta,
                 self.__class__.isclose,
-                *args,
+                self,
+                other,
                 rel_tol=rel_tol,
                 abs_tol=abs_tol,
                 check_dtype=check_dtype,
@@ -114,16 +129,37 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
         #     raise TypeError(f'Argument of isclose must be of type {type(self).__name__}')
         if not self._meta.isequal(other._meta):
             return PythonScalar.from_value(False)
-        delayed = da.core.elemwise(
-            _isclose,
-            self._delayed,
-            other._delayed,
-            rel_tol,
-            abs_tol,
-            check_dtype,
-            dtype=bool,
+
+        ndim = (
+            self._matrix._delayed.ndim
+            if getattr(self, "_is_transposed", False)
+            else self._delayed.ndim
         )
-        if self._delayed.ndim > 0:
+        if ndim < 2:
+            delayed = da.core.elemwise(
+                partial(_isclose, False, False),
+                self._delayed,
+                other._delayed,
+                rel_tol,
+                abs_tol,
+                check_dtype,
+                dtype=bool,
+            )
+        else:
+            xt = getattr(self, "_is_transposed", False)
+            yt = getattr(other, "_is_transposed", False)
+            self_ = (self._matrix._delayed, "ji") if xt else (self._delayed, "ij")
+            other_ = (other._matrix._delayed, "ji") if yt else (other._delayed, "ij")
+            delayed = da.core.blockwise(
+                *(partial(_isclose, xt, yt), "ij"),
+                *self_,
+                *other_,
+                *(rel_tol, None),
+                *(abs_tol, None),
+                *(check_dtype, None),
+                dtype=bool,
+            )
+        if ndim > 0:
             delayed = da.core.elemwise(
                 _to_scalar,
                 delayed.all(),
@@ -173,19 +209,13 @@ def clear(self):
             )
 
     def dup(self, dtype=None, *, mask=None, name=None):
-        if is_DOnion(self._delayed):
-            mask_meta = mask._meta if mask else None
-            meta = self._meta.dup(dtype=dtype, mask=mask_meta, name=name)
-            donion = DOnion.multiple_access(
-                meta, self.__class__.dup, self._delayed, dtype=dtype, mask=mask, name=name
+        if any_dOnions(self, mask):
+            meta = self._meta.dup(dtype=dtype)
+            donion = DOnion.multi_access(
+                meta, self.__class__.dup, self, dtype=dtype, mask=mask, name=name
             )
             return self.__class__(donion, meta=meta)
 
-        if mask and is_DOnion(mask.mask):
-            meta = self._meta.dup(dtype=dtype, name=name)
-            donion = DOnion.multiple_access(meta, self.dup, dtype=dtype, mask=mask.mask, name=name)
-            return self.__class__(donion, meta=meta)
-
         if mask is not None:
             if not isinstance(mask, Mask):
                 self._meta.dup(dtype=dtype, mask=mask, name=name)  # should raise
@@ -330,42 +360,26 @@ def update(self, expr, in_DOnion=False):
                     "\n\n    M[:, :] = s"
                 )
         typ = type(expr)
-        if (
-            self.is_dOnion
-            or typ is AmbiguousAssignOrExtract
-            and expr.has_dOnion
-            or typ is GbDelayed
-            and expr.has_dOnion
-            or typ is type(self)
-            and expr.is_dOnion
-            or typ is TransposedMatrix
-            and expr.is_dOnion
-        ):
+        if any_dOnions(self, expr):
             self_copy = self.__class__(self._delayed, meta=self._meta)
             expr_ = expr
             if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
 
-                def update_by_aae(c, p, t, k_0, k_1):
-                    p = p.T if t else p
+                def update_by_aae(c, p, k_0, k_1):
                     keys = k_0 if k_1 is None else (k_0, k_1)
                     aae = AmbiguousAssignOrExtract(p, keys)
                     return c.update(aae, in_DOnion=True)
 
-                aae_parent = expr_.parent.dOnion_if
-                aae_parent_is_T = expr_.parent.is_dOnion and getattr(
-                    expr_.parent, "_is_transposed", False
-                )
-                if type(expr_.index) is tuple and len(expr_.index) == 2:
+                if _is_pair(expr_.index):
                     keys_0, keys_1 = expr_.index[0], expr_.index[1]
                 else:
                     keys_0, keys_1 = expr_.index, None
 
-                donion = DOnion.multiple_access(
+                donion = DOnion.multi_access(
                     self._meta,
                     update_by_aae,
-                    self_copy.dOnion_if,
-                    aae_parent,
-                    aae_parent_is_T,
+                    self_copy,
+                    expr_.parent,
                     *(keys_0, keys_1),
                 )
                 self.__init__(donion, self._meta)
@@ -373,49 +387,32 @@ def update_by_aae(c, p, t, k_0, k_1):
 
             if typ is GbDelayed and expr.has_dOnion:
 
-                def update_by_gbd(c, t, *args, **kwargs):
-                    args = tuple(a.T if xt else a for (xt, a) in zip(t, args))
+                def update_by_gbd(c, *args, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
                     return c.update(gbd, in_DOnion=True)
 
-                gbd_parent = expr_.parent.dOnion_if
-                gbd_method = expr.method_name
-                gbd_args = (gbd_parent, gbd_method) + tuple(
-                    getattr(x, "dOnion_if", x) for x in expr.args
-                )
-                gbd_parent_is_T = expr_.parent.is_dOnion and getattr(
-                    expr_.parent, "_is_transposed", False
-                )
-                is_T = (gbd_parent_is_T, False) + tuple(
-                    getattr(x, "is_dOnion", False) and getattr(x, "_is_transposed", False)
-                    for x in expr.args
-                )
-                gbd_kwargs = {k: getattr(v, "dOnion_if", v) for k, v in expr.kwargs.items()}
-                donion = DOnion.multiple_access(
+                donion = DOnion.multi_access(
                     self._meta,
                     update_by_gbd,
-                    self_copy.dOnion_if,
-                    is_T,
-                    *gbd_args,
-                    **gbd_kwargs,
+                    self_copy,
+                    expr_.parent,
+                    expr_.method_name,
+                    *expr_.args,
+                    **expr_.kwargs,
                 )
                 self.__init__(donion, self._meta)
                 return
-            elif typ is type(self) and expr.is_dOnion:
-                expr_ = expr._delayed
-            elif typ is TransposedMatrix and expr.is_dOnion:
 
-                def update_T(lhs, rhs):
-                    return BaseType.update(lhs, rhs.T, in_DOnion=True)
+            elif typ is TransposedMatrix and expr.is_dOnion:
 
-                donion = DOnion.multiple_access(
-                    self._meta, update_T, self_copy.dOnion_if, expr_.dOnion_if
+                donion = DOnion.multi_access(
+                    self._meta, BaseType.update, self_copy, expr_, in_DOnion=True
                 )
                 self.__init__(donion, self._meta)
                 return
 
-            donion = DOnion.multiple_access(
-                self._meta, BaseType.update, self_copy.dOnion_if, expr_, in_DOnion=True
+            donion = DOnion.multi_access(
+                self._meta, BaseType.update, self_copy, expr_, in_DOnion=True
             )
             self.__init__(donion, self._meta)
             return
@@ -456,43 +453,27 @@ def update_T(lhs, rhs):
 
     def _update(self, expr, *, mask=None, accum=None, replace=None, in_DOnion=False):
         typ = type(expr)
-        if (
-            self.is_dOnion
-            or mask is not None
-            and mask.is_dOnion
-            or typ is AmbiguousAssignOrExtract
-            and expr.has_dOnion
-            or typ is GbDelayed
-            and expr.has_dOnion
-            or typ is type(self)
-            and expr.is_dOnion
-        ):
+        if any_dOnions(self, expr, mask):
             self_copy = self.__class__(self._delayed, meta=self._meta)
             mask_ = mask.dOnion_if if mask is not None else None
             expr_ = expr
             if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
 
-                def _update_by_aae(c, p, t, k_0, k_1, mask=None, accum=None, replace=None):
-                    p = p.T if t else p
+                def _update_by_aae(c, p, k_0, k_1, mask=None, accum=None, replace=None):
                     keys = k_0 if k_1 is None else (k_0, k_1)
                     aae = AmbiguousAssignOrExtract(p, keys)
                     return c.update(aae, mask=mask, accum=accum, replace=replace, in_DOnion=True)
 
-                aae_parent = expr_.parent.dOnion_if
-                aae_parent_is_T = expr_.parent.is_dOnion and getattr(
-                    expr_.parent, "_is_transposed", False
-                )
-                if type(expr_.index) is tuple and len(expr_.index) == 2:
+                if _is_pair(expr_.index):
                     keys_0, keys_1 = expr_.index[0], expr_.index[1]
                 else:
                     keys_0, keys_1 = expr_.index, None
 
-                donion = DOnion.multiple_access(
+                donion = DOnion.multi_access(
                     self._meta,
                     _update_by_aae,
-                    self_copy.dOnion_if,
-                    aae_parent,
-                    aae_parent_is_T,
+                    self_copy,
+                    expr_.parent,
                     *(keys_0, keys_1),
                     mask=mask_,
                     accum=accum,
@@ -503,45 +484,29 @@ def _update_by_aae(c, p, t, k_0, k_1, mask=None, accum=None, replace=None):
 
             if typ is GbDelayed and expr.has_dOnion:
 
-                def _update_by_gbd(c, t, *args, mask=None, accum=None, replace=None, **kwargs):
-                    args = tuple(a.T if xt else a for (xt, a) in zip(t, args))
+                def _update_by_gbd(c, *args, mask=None, accum=None, replace=None, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
                     return c._update(gbd, mask=mask, accum=accum, replace=replace, in_DOnion=True)
 
-                gbd_parent = expr_.parent.dOnion_if
-                gbd_method = expr.method_name
-                gbd_args = (gbd_parent, gbd_method) + tuple(
-                    getattr(x, "dOnion_if", x) for x in expr.args
-                )
-                gbd_parent_is_T = expr_.parent.is_dOnion and getattr(
-                    expr_.parent, "_is_transposed", False
-                )
-                is_T = (gbd_parent_is_T, False) + tuple(
-                    getattr(x, "is_dOnion", False) and getattr(x, "_is_transposed", False)
-                    for x in expr.args
-                )
-                gbd_kwargs = {k: getattr(v, "dOnion_if", v) for k, v in expr.kwargs.items()}
-                donion = DOnion.multiple_access(
+                donion = DOnion.multi_access(
                     self._meta,
                     _update_by_gbd,
-                    self_copy.dOnion_if,
-                    is_T,
-                    *gbd_args,
+                    self_copy,
+                    expr_.parent,
+                    expr_.method_name,
+                    *expr_.args,
                     mask=mask_,
                     accum=accum,
                     replace=replace,
-                    **gbd_kwargs,
+                    **expr_.kwargs,
                 )
                 self.__init__(donion, self._meta)
                 return
 
-            elif typ is type(self) and expr.is_dOnion:
-                expr_ = expr._delayed
-
-            donion = DOnion.multiple_access(
+            donion = DOnion.multi_access(
                 self._meta,
                 BaseType._update,
-                self_copy.dOnion_if,
+                self_copy,
                 expr_,
                 mask=mask_,
                 accum=accum,
@@ -563,41 +528,27 @@ def _update_by_gbd(c, t, *args, mask=None, accum=None, replace=None, **kwargs):
         if typ is AmbiguousAssignOrExtract:
             # Extract (w(mask=mask, accum=accum) << v[index])
             expr_new = expr.new(dtype=self.dtype)
-            if expr_new.is_dOnion:
-                self_ = self.__class__(self._delayed, meta=self._meta)
-                donion = DOnion.multiple_access(
-                    self._meta,
-                    BaseType._update,
-                    self_,
-                    expr_new,
-                    mask=mask,
-                    accum=accum,
-                    replace=replace,
-                    in_DOnion=True,
-                )
-                self.__init__(donion, meta=self._meta)
+            expr_delayed = expr_new._delayed
+            delayed = self._optional_dup()
+            self._meta(mask=get_meta(mask), accum=accum, replace=replace)
+            if mask is not None:
+                delayed_mask = mask.mask._delayed
+                grblas_mask_type = get_grblas_type(mask)
             else:
-                expr_delayed = expr_new._delayed
-                delayed = self._optional_dup()
-                self._meta(mask=get_meta(mask), accum=accum, replace=replace)
-                if mask is not None:
-                    delayed_mask = mask.mask._delayed
-                    grblas_mask_type = get_grblas_type(mask)
-                else:
-                    delayed_mask = None
-                    grblas_mask_type = None
-                self.__init__(
-                    da.core.elemwise(
-                        _update_assign,
-                        delayed,
-                        accum,
-                        delayed_mask,
-                        grblas_mask_type,
-                        replace,
-                        expr_delayed,
-                        dtype=np_dtype(self._meta.dtype),
-                    )
+                delayed_mask = None
+                grblas_mask_type = None
+            self.__init__(
+                da.core.elemwise(
+                    _update_assign,
+                    delayed,
+                    accum,
+                    delayed_mask,
+                    grblas_mask_type,
+                    replace,
+                    expr_delayed,
+                    dtype=np_dtype(self._meta.dtype),
                 )
+            )
         elif typ is GbDelayed:
             # v(mask=mask) << left.ewise_mult(right)
             # Meta check handled in Updater
@@ -767,6 +718,80 @@ def persist(self, *args, **kwargs):
         else:
             raise TypeError(f'Something went wrong: {self} cannot be "persisted".')
 
+    @classmethod
+    def multi_access(cls, out_meta, func, *args, **kwargs):
+        def adaptor(func, ts, cs, ss, vs, kwargs_desc, *args, **kwargs):
+            args_ = ()
+            for arg, t, c, s, v in zip(args, ts, cs, ss, vs):
+                if t:
+                    arg = arg.T
+                if s:
+                    arg = arg.S
+                if v:
+                    arg = arg.V
+                if c:
+                    arg = arg.__invert__()
+                args_ += (arg,)
+
+            kwargs_ = kwargs.copy()
+            for k in kwargs:
+                t, c, s, v = kwargs_desc[k]
+                if t:
+                    kwargs_[k] = kwargs_[k].T
+                if s:
+                    kwargs_[k] = kwargs_[k].S
+                if v:
+                    kwargs_[k] = kwargs_[k].V
+                if c:
+                    kwargs_[k] = kwargs_[k].__invert__()
+
+            return func(*args_, **kwargs_)
+
+        _args = [getattr(arg, "dOnion_if", arg) for arg in args]
+        ts = [
+            getattr(arg, "is_dOnion", False) and getattr(arg, "_is_transposed", False)
+            for arg in args
+        ]
+        cs = [
+            getattr(arg, "is_dOnion", False)
+            and isinstance(arg, Mask)
+            and getattr(arg, "complement", False)
+            for arg in args
+        ]
+        ss = [
+            getattr(arg, "is_dOnion", False)
+            and isinstance(arg, Mask)
+            and getattr(arg, "structure", False)
+            for arg in args
+        ]
+        vs = [
+            getattr(arg, "is_dOnion", False)
+            and isinstance(arg, Mask)
+            and getattr(arg, "value", False)
+            for arg in args
+        ]
+
+        _kwargs = {k: getattr(arg, "dOnion_if", arg) for k, arg in kwargs.items()}
+
+        kwargs_desc = {
+            k: (
+                getattr(arg, "is_dOnion", False) and getattr(arg, "_is_transposed", False),
+                getattr(arg, "is_dOnion", False)
+                and isinstance(arg, Mask)
+                and getattr(arg, "complement", False),
+                getattr(arg, "is_dOnion", False)
+                and isinstance(arg, Mask)
+                and getattr(arg, "structure", False),
+                getattr(arg, "is_dOnion", False)
+                and isinstance(arg, Mask)
+                and getattr(arg, "value", False),
+            )
+            for k, arg in kwargs.items()
+        }
+        return DOnion.multiple_access(
+            out_meta, adaptor, func, ts, cs, ss, vs, kwargs_desc, *_args, **_kwargs
+        )
+
     @classmethod
     def multiple_access(cls, out_meta, func, *args, **kwargs):
         """
@@ -819,19 +844,18 @@ def _getattr(cls, x, attr_name, *args, **kwargs):
 is_DOnion = partial(is_type, DOnion)
 
 
-def like_DOnion(what):
-    return (
-        is_DOnion(what)
-        or isinstance(what, BaseType)
-        and is_DOnion(what._delayed)
-        or hasattr(what, "_matrix")
-        and is_DOnion(what._matrix)
-        or hasattr(what, "parent")
-        and is_DOnion(what.parent)
-        or hasattr(what, "mask")
-        and is_DOnion(what.mask)
-        or hasattr(what, "_donion")
-        and is_DOnion(what._donion)
+def like_dOnion(arg):
+    return arg is not None and (
+        is_DOnion(arg)
+        or getattr(arg, "is_dOnion", False)
+        or getattr(arg, "has_dOnion", False)
+        or type(arg) is tuple
+    )
+
+
+def any_dOnions(*args, **kwargs):
+    return np.any([like_dOnion(arg) for arg in args]) or np.any(
+        [like_dOnion(v) for _, v in kwargs.items()]
     )
 
 
@@ -847,13 +871,17 @@ def _dup(x, mask, dtype, mask_type):
     return wrap_inner(x.value.dup(dtype=dtype, mask=mask))
 
 
-def _isclose(x, y, rel_tol, abs_tol, check_dtype):
-    val = x.value.isclose(y.value, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype)
+def _isclose(xt, yt, x, y, rel_tol, abs_tol, check_dtype):
+    x_ = x.value.T if xt else x.value
+    y_ = y.value.T if yt else y.value
+    val = x_.isclose(y_, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype)
     return _reduction_value(x, val)
 
 
-def _isequal(x, y, check_dtype):
-    val = x.value.isequal(y.value, check_dtype=check_dtype)
+def _isequal(xt, yt, x, y, check_dtype):
+    x_ = x.value.T if xt else x.value
+    y_ = y.value.T if yt else y.value
+    val = x_.isequal(y_, check_dtype=check_dtype)
     return _reduction_value(x, val)
 
 
@@ -888,5 +916,5 @@ def _update_assign(updating, accum, mask, mask_type, replace, x):
     return updating
 
 
-from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater  # noqa isort: skip
+from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, _is_pair  # noqa isort: skip
 from .matrix import TransposedMatrix  # noqa isort: skip
diff --git a/dask_grblas/expr.py b/dask_grblas/expr.py
index 1c07374..9d6c7f6 100644
--- a/dask_grblas/expr.py
+++ b/dask_grblas/expr.py
@@ -9,7 +9,7 @@
 from grblas.exceptions import DimensionMismatch
 from dask.base import tokenize
 
-from .base import BaseType, InnerBaseType, _check_mask, DOnion, is_DOnion, like_DOnion
+from .base import BaseType, InnerBaseType, _check_mask, DOnion, is_DOnion, any_dOnions
 from .mask import Mask
 from .utils import (
     get_grblas_type,
@@ -233,22 +233,13 @@ def new(self, dtype=None, *, mask=None, name=None):
         if mask is not None:
             _check_mask(mask)
 
-        if self.has_dOnion or mask is not None and mask.is_dOnion:
+        if any_dOnions(self, mask):
 
-            def GbDelayed_new(p, pt, m, t, *args, dtype=None, mask=None, **kwargs):
-                p = p.T if pt else p
-                args = tuple(a.T if xt else a for (xt, a) in zip(t, args))
+            def recall_GbDelayed_new(p, m, *args, dtype=None, mask=None, **kwargs):
                 gbd = getattr(p, m)(*args, **kwargs)
                 return gbd.new(dtype=dtype, mask=mask)
 
-            gbd_args = tuple(getattr(x, "dOnion_if", x) for x in self.args)
-            is_T = tuple(
-                getattr(x, "is_dOnion", False) and getattr(x, "_is_transposed", False)
-                for x in self.args
-            )
-            gbd_kwargs = {k: getattr(v, "dOnion_if", v) for k, v in self.kwargs.items()}
             meta_kwargs = {k: getattr(v, "_meta", v) for k, v in self.kwargs.items()}
-
             if self.method_name.startswith(("reduce", "apply")):
                 # unary operations
                 a = self.parent
@@ -299,17 +290,15 @@ def GbDelayed_new(p, pt, m, t, *args, dtype=None, mask=None, **kwargs):
                         dtype=dtype
                     )
 
-            donion = DOnion.multiple_access(
+            donion = DOnion.multi_access(
                 meta,
-                GbDelayed_new,
-                a.dOnion_if,
-                a.is_dOnion and getattr(a, "_is_transposed", False),
+                recall_GbDelayed_new,
+                self.parent,
                 self.method_name,
-                is_T,
-                *gbd_args,
+                *self.args,
                 dtype=dtype,
-                mask=None if mask is None else mask.dOnion_if,
-                **gbd_kwargs,
+                mask=mask,
+                **self.kwargs,
             )
             return get_return_type(meta)(donion, meta=meta)
 
@@ -667,9 +656,17 @@ def __init__(self, parent, *, mask=None, accum=None, replace=False, input_mask=N
         if input_mask is not None and not isinstance(input_mask, Mask):
             raise TypeError(r"Mask must indicate values (M.V) or structure (M.S)")
 
+        self.has_dOnion = any_dOnions(parent, mask, input_mask)
         self.parent = parent
+
         self.mask = mask
+        if parent is getattr(mask, "mask", None):
+            self.mask = type(mask)(mask.mask.dup())
+
         self.input_mask = input_mask
+        if parent is getattr(input_mask, "mask", None):
+            self.input_mask = type(input_mask)(input_mask.mask.dup())
+
         self.accum = accum
         if mask is None:
             self.replace = None
@@ -739,7 +736,7 @@ def update(self, delayed):
         if self.mask is None and self.accum is None:
             return self.parent.update(delayed)
 
-        if not (like_DOnion(self.parent) or like_DOnion(delayed)):
+        if not any_dOnions(self.parent, delayed):
             self.parent._meta._update(
                 get_meta(delayed),
                 mask=get_meta(self.mask),
@@ -1377,21 +1374,18 @@ def __init__(self, parent, index, meta=None):
         self.parent = parent
         self.index = index
         input_ndim = parent.ndim
-        self.keys_0_is_dOnion = input_ndim == 1 and is_DOnion(index)
-        self.keys_1_is_dOnion = (
-            input_ndim == 2
-            and type(index) is tuple
-            and len(index) == 2
-            and (is_DOnion(index[0]) or is_DOnion(index[1]))
+        keys_0_is_dOnion = input_ndim == 1 and is_DOnion(index)
+        keys_1_is_dOnion = (
+            input_ndim == 2 and _is_pair(index) and (is_DOnion(index[0]) or is_DOnion(index[1]))
         )
-        if parent.is_dOnion or self.keys_0_is_dOnion or self.keys_1_is_dOnion:
+        if parent.is_dOnion or keys_0_is_dOnion or keys_1_is_dOnion:
+            self.has_dOnion = True
             IndexerResolver(self.parent, index, check_shape=False)
             self._meta = _adjust_meta_to_index(parent._meta, index)
-            self.has_dOnion = True
         else:
+            self.has_dOnion = False
             self.resolved_indices = IndexerResolver(parent, index)
             self._meta = parent._meta[index] if meta is None else meta
-            self.has_dOnion = False
             # infix expression requirements:
             shape = tuple(i.size for i in self.resolved_indices.indices if i.size)
             self.ndim = len(shape)
@@ -1403,9 +1397,9 @@ def __init__(self, parent, index, meta=None):
                 self._ncols = shape[1]
 
     def new(self, *, dtype=None, mask=None, input_mask=None, name=None):
-        def getitem(parent, at, keys_0, keys_1, dtype, mask, input_mask):
+        def _recall_getitem(parent, keys_0, keys_1, dtype, mask, input_mask):
             keys = keys_0 if keys_1 is None else (keys_0, keys_1)
-            return AmbiguousAssignOrExtract(parent.T if at else parent, keys).new(
+            return AmbiguousAssignOrExtract(parent, keys).new(
                 dtype=dtype, mask=mask, input_mask=input_mask
             )
 
@@ -1414,31 +1408,22 @@ def getitem(parent, at, keys_0, keys_1, dtype, mask, input_mask):
         if input_mask is not None:
             _check_mask(input_mask)
 
-        mask_is_DOnion = mask is not None and mask.is_dOnion
-        input_mask_is_DOnion = input_mask is not None and input_mask.is_dOnion
-        if (
-            self.parent.is_dOnion
-            or self.keys_0_is_dOnion
-            or self.keys_1_is_dOnion
-            or mask_is_DOnion
-            or input_mask_is_DOnion
-        ):
+        if any_dOnions(self, mask, input_mask):
             meta = self._meta.new(dtype=dtype)
 
-            if type(self.index) is tuple and len(self.index) == 2:
+            if _is_pair(self.index):
                 keys_0, keys_1 = self.index[0], self.index[1]
             else:
                 keys_0, keys_1 = self.index, None
 
-            donion = DOnion.multiple_access(
+            donion = DOnion.multi_access(
                 meta,
-                getitem,
-                self.parent.dOnion_if,
-                self.parent.is_dOnion and getattr(self.parent, "_is_transposed", False),
+                _recall_getitem,
+                self.parent,
                 *(keys_0, keys_1),
                 dtype=dtype,
-                mask=None if mask is None else mask.dOnion_if,
-                input_mask=None if input_mask is None else input_mask.dOnion_if,
+                mask=mask,
+                input_mask=input_mask,
             )
             return get_return_type(meta)(donion)
 
@@ -1630,7 +1615,7 @@ def update(self, obj):
         if getattr(self.parent, "_is_transposed", False):
             raise TypeError("'TransposedMatrix' object does not support item assignment")
 
-        if is_DOnion(self.parent._delayed):
+        if self.parent.is_dOnion:
             self.parent.__setitem__(self.index, obj)
             return
 
@@ -1697,60 +1682,46 @@ def __init__(self, updater, index, subassign=False):
         self.subassign = subassign
 
         input_ndim = self.parent.ndim
-        self.keys_0_is_dOnion = input_ndim == 1 and is_DOnion(index)
-        self.keys_1_is_dOnion = (
-            input_ndim == 2
-            and type(index) is tuple
-            and len(index) == 2
-            and (is_DOnion(index[0]) or is_DOnion(index[1]))
-        )
-        if self.parent.is_dOnion or self.keys_0_is_dOnion or self.keys_1_is_dOnion:
+        keys_0_is_dOnion = input_ndim == 1 and is_DOnion(index)
+        keys_1_is_dOnion = _is_pair(index) and (is_DOnion(index[0]) or is_DOnion(index[1]))
+        if self.updater.has_dOnion or keys_0_is_dOnion or keys_1_is_dOnion:
+            self.has_dOnion = True
             IndexerResolver(self.parent, index, check_shape=False)
             self.index = index
         else:
+            self.has_dOnion = False
             self.resolved_indices = IndexerResolver(self.parent, index).indices
             self.index = tuple(i.index for i in self.resolved_indices)
 
     def update(self, obj):
-        def setitem(lhs, mask, accum, replace, keys_0, keys_1, obj, ot, subassign, in_dOnion=False):
+        def _recall_update(lhs, mask, accum, replace, keys_0, keys_1, obj, subassign):
             keys = (keys_0,) if keys_1 is None else (keys_0, keys_1)
             updater = Updater(lhs, mask=mask, accum=accum, replace=replace)
-            Assigner(updater, keys, subassign=subassign).update(obj.T if ot else obj)
-            if in_dOnion:
-                return lhs
-
-        # check for dOnions:
-        lhs = self.parent
-        updater = self.updater
-        if (
-            lhs.is_dOnion
-            or updater.mask is not None
-            and updater.mask.is_dOnion
-            or self.keys_0_is_dOnion
-            or self.keys_1_is_dOnion
-            or getattr(obj, "is_dOnion", False)
-        ):
-            lhs_ = lhs.__class__(lhs._delayed, meta=lhs._meta)
-            mask = None if updater.mask is None else updater.mask.dOnion_if
+            Assigner(updater, keys, subassign=subassign).update(obj)
+            return lhs
 
-            if type(self.index) is tuple and len(self.index) == 2:
+        if any_dOnions(self, obj):
+            lhs = self.parent
+            lhs_copy = lhs.__class__(lhs._delayed, meta=lhs._meta)
+
+            updater = self.updater
+
+            if _is_pair(self.index):
                 keys_0, keys_1 = self.index[0], self.index[1]
             else:
                 keys_0, keys_1 = self.index, None
 
-            donion = DOnion.multiple_access(
+            donion = DOnion.multi_access(
                 lhs._meta,
-                setitem,
-                lhs_.dOnion_if,
-                mask,
+                _recall_update,
+                lhs_copy,
+                updater.mask,
                 updater.accum,
                 updater.replace,
                 keys_0,
                 keys_1,
-                getattr(obj, "dOnion_if", obj),
-                getattr(obj, "is_dOnion", False) and getattr(obj, "_is_transposed", False),
+                obj,
                 self.subassign,
-                in_dOnion=True,
             )
             lhs.__init__(donion, meta=lhs._meta)
             return
@@ -2335,3 +2306,7 @@ def concatenate_fragments(frag1, frag2, axis=0, base_axis=0):
         return reduce(partial(concatenate_fragments, axis=axis, base_axis=base_axis), seq_)
     else:
         return seq[0]
+
+
+def _is_pair(arg):
+    return type(arg) is tuple and len(arg) == 2
diff --git a/dask_grblas/mask.py b/dask_grblas/mask.py
index 715ba8f..2d86cdb 100644
--- a/dask_grblas/mask.py
+++ b/dask_grblas/mask.py
@@ -7,23 +7,19 @@ class Mask:
     value = False
 
     def __init__(self, mask):
-        from . import matrix, vector, base
+        from . import matrix, vector
 
         assert type(mask) in {vector.Vector, matrix.Matrix}
         self.mask = mask
         self._meta = get_grblas_type(self)(mask._meta)
-        if base.is_DOnion(mask._delayed):
-            self.mask = mask._delayed.deep_extract(self._meta, self.__class__)
 
     @property
     def is_dOnion(self):
-        from .base import is_DOnion
-
-        return is_DOnion(self.mask)
+        return getattr(self.mask, "is_dOnion", False)
 
     @property
     def dOnion_if(self):
-        return self.mask if self.is_dOnion else self
+        return self.mask._delayed if self.is_dOnion else self
 
 
 class StructuralMask(Mask):
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index 286cb62..a6c736b 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -970,6 +970,20 @@ def nvals(self):
     def __getitem__(self, index):
         return AmbiguousAssignOrExtract(self, index)
 
+    def isequal(self, other, *, check_dtype=False):
+        other = self._expect_type(
+            other, (Matrix, TransposedMatrix), within="isequal", argname="other"
+        )
+        return BaseType.isequal(self, other, check_dtype=check_dtype)
+
+    def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
+        other = self._expect_type(
+            other, (Matrix, TransposedMatrix), within="isclose", argname="other"
+        )
+        return BaseType.isclose(
+            self, other, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype
+        )
+
     # Delayed methods
     ewise_add = Matrix.ewise_add
     ewise_mult = Matrix.ewise_mult
@@ -982,8 +996,7 @@ def __getitem__(self, index):
     reduce_scalar = Matrix.reduce_scalar
 
     # Misc.
-    isequal = Matrix.isequal
-    isclose = Matrix.isclose
+    _expect_type = Matrix._expect_type
     __array__ = Matrix.__array__
     name = Matrix.name
 
diff --git a/dask_grblas/scalar.py b/dask_grblas/scalar.py
index bdd3159..3157eb8 100644
--- a/dask_grblas/scalar.py
+++ b/dask_grblas/scalar.py
@@ -3,8 +3,8 @@
 import numpy as np
 from dask.delayed import Delayed, delayed
 
-from .base import BaseType, InnerBaseType, DOnion, Box
-from .expr import AmbiguousAssignOrExtract, GbDelayed
+from .base import BaseType, InnerBaseType, DOnion, Box, any_dOnions
+from .expr import AmbiguousAssignOrExtract, GbDelayed, _is_pair
 from .utils import get_meta, np_dtype
 
 
@@ -78,40 +78,26 @@ def __init__(self, delayed, meta=None):
 
     def update(self, expr, in_DOnion=False):
         typ = type(expr)
-        if (
-            self.is_dOnion
-            or typ is AmbiguousAssignOrExtract
-            and expr.has_dOnion
-            or typ is GbDelayed
-            and expr.has_dOnion
-            or typ is Scalar
-            and expr.is_dOnion
-        ):
+        if any_dOnions(self, expr):
             self_copy = self.__class__(self._delayed, meta=self._meta)
             expr_ = expr
             if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
 
-                def update_by_aae(c, p, t, k_0, k_1):
-                    p = p.T if t else p
+                def update_by_aae(c, p, k_0, k_1):
                     keys = k_0 if k_1 is None else (k_0, k_1)
                     aae = AmbiguousAssignOrExtract(p, keys)
                     return c.update(aae, in_DOnion=True)
 
-                aae_parent = expr_.parent.dOnion_if
-                aae_parent_is_T = expr_.parent.is_dOnion and getattr(
-                    expr_.parent, "_is_transposed", False
-                )
-                if type(expr_.index) is tuple and len(expr_.index) == 2:
+                if _is_pair(expr_.index):
                     keys_0, keys_1 = expr_.index[0], expr_.index[1]
                 else:
                     keys_0, keys_1 = expr_.index, None
 
-                donion = DOnion.multiple_access(
+                donion = DOnion.multi_access(
                     self._meta,
                     update_by_aae,
-                    self_copy.dOnion_if,
-                    aae_parent,
-                    aae_parent_is_T,
+                    self_copy,
+                    expr_.parent,
                     *(keys_0, keys_1),
                 )
                 self.__init__(donion, self._meta)
@@ -119,39 +105,24 @@ def update_by_aae(c, p, t, k_0, k_1):
 
             if typ is GbDelayed and expr.has_dOnion:
 
-                def update_by_gbd(c, t, *args, **kwargs):
-                    args = tuple(a.T if xt else a for (xt, a) in zip(t, args))
+                def update_by_gbd(c, *args, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
                     return c.update(gbd, in_DOnion=True)
 
-                gbd_parent = expr_.parent.dOnion_if
-                gbd_method = expr.method_name
-                gbd_args = (gbd_parent, gbd_method) + tuple(
-                    getattr(x, "dOnion_if", x) for x in expr.args
-                )
-                gbd_parent_is_T = expr_.parent.is_dOnion and getattr(
-                    expr_.parent, "_is_transposed", False
-                )
-                is_T = (gbd_parent_is_T, False) + tuple(
-                    getattr(x, "is_dOnion", False) and getattr(x, "_is_transposed", False)
-                    for x in expr.args
-                )
-                gbd_kwargs = {k: getattr(v, "dOnion_if", v) for k, v in expr.kwargs.items()}
-                donion = DOnion.multiple_access(
+                donion = DOnion.multi_access(
                     self._meta,
                     update_by_gbd,
-                    self_copy.dOnion_if,
-                    is_T,
-                    *gbd_args,
-                    **gbd_kwargs,
+                    self_copy,
+                    expr_.parent,
+                    expr_.method_name,
+                    *expr_.args,
+                    **expr_.kwargs,
                 )
                 self.__init__(donion, self._meta)
                 return
 
-            elif typ is Scalar and expr.is_dOnion:
-                expr_ = expr._delayed
-            donion = DOnion.multiple_access(
-                self._meta, Scalar.update, self_copy.dOnion_if, expr_, in_DOnion=True
+            donion = DOnion.multi_access(
+                self._meta, Scalar.update, self_copy, expr_, in_DOnion=True
             )
             self.__init__(donion, self._meta)
             return
@@ -186,45 +157,31 @@ def _update(self, rhs, *, accum, in_DOnion=False):
 
         assert type(rhs) is GbDelayed
 
-        if self.is_dOnion or rhs.parent.is_dOnion:
+        if any_dOnions(self, rhs):
             self_copy = self.__class__(self._delayed, meta=self._meta)
-            self_ = self_copy.dOnion_if
             rhs_ = rhs
             if typ is GbDelayed and rhs.has_dOnion:
 
-                def _update_by_gbd(c, t, *args, accum=None, **kwargs):
-                    args = tuple(a.T if xt else a for (xt, a) in zip(t, args))
+                def _update_by_gbd(c, *args, accum=None, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
                     return c._update(gbd, accum=accum, in_DOnion=True)
 
-                gbd_parent = rhs_.parent.dOnion_if
-                gbd_method = rhs.method_name
-                gbd_args = (gbd_parent, gbd_method) + tuple(
-                    getattr(x, "dOnion_if", x) for x in rhs.args
-                )
-                gbd_parent_is_T = rhs_.parent.is_dOnion and getattr(
-                    rhs_.parent, "_is_transposed", False
-                )
-                is_T = (gbd_parent_is_T, False) + tuple(
-                    getattr(x, "is_dOnion", False) and getattr(x, "_is_transposed", False)
-                    for x in rhs.args
-                )
-                gbd_kwargs = {k: getattr(v, "dOnion_if", v) for k, v in rhs.kwargs.items()}
-                donion = DOnion.multiple_access(
+                donion = DOnion.multi_access(
                     self._meta,
                     _update_by_gbd,
-                    self_copy.dOnion_if,
-                    is_T,
-                    *gbd_args,
+                    self_copy,
+                    rhs_.parent,
+                    rhs_.method_name,
+                    *rhs_.args,
                     accum=accum,
-                    **gbd_kwargs,
+                    **rhs_.kwargs,
                 )
                 self.__init__(donion, self._meta)
                 return
 
             rhs_ = rhs.parent.dOnion_if
-            donion = DOnion.multiple_access(
-                self._meta, Scalar._update, self_, rhs_, accum=accum, in_DOnion=True
+            donion = DOnion.mult_access(
+                self._meta, Scalar._update, self_copy, rhs_, accum=accum, in_DOnion=True
             )
             self.__init__(donion, self._meta)
             return
diff --git a/tests/from_grblas2/test_matrix.py b/tests/from_grblas2/test_matrix.py
index ffe12a5..7926f29 100644
--- a/tests/from_grblas2/test_matrix.py
+++ b/tests/from_grblas2/test_matrix.py
@@ -23,7 +23,7 @@
 from .conftest import autocompute, compute
 
 from dask_grblas import Matrix, Scalar, Vector  # isort:skip
-from dask_grblas.base import is_DOnion, like_DOnion
+from dask_grblas.base import is_DOnion, like_dOnion
 
 
 @pytest.fixture
@@ -2190,28 +2190,26 @@ def test_isclose(As, A_chunks, v):
             assert C6.isclose(A, rel_tol=1e-3)
 
 
-@pytest.mark.slow
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_transpose_equals(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        data = [
-            [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
-            [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
-            [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4],
-        ]
-        B = Matrix.from_values(*data)
-        assert A.isequal(B.T)
-        assert B.isequal(A.T)
-        assert A.T.isequal(B)
-        assert A.T.isequal(A.T)
-        assert A.isclose(A)
-        assert A.isclose(B.T)
-        assert B.isclose(A.T)
-        assert A.T.isclose(B)
-        assert A.T.isclose(A.T)
+def test_transpose_equals(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            data = [
+                [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6],
+                [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1],
+                [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4],
+            ]
+            B = Matrix.from_values(*data)
+            assert A.isequal(B.T)
+            assert B.isequal(A.T)
+            assert A.T.isequal(B)
+            assert A.T.isequal(A.T)
+            assert A.isclose(A)
+            assert A.isclose(B.T)
+            assert B.isclose(A.T)
+            assert A.T.isclose(B)
+            assert A.T.isclose(A.T)
 
 
 @pytest.mark.xfail("'Needs investigation'", strict=True)

From 89216cbca1198067aa6ab167f6881512ee6c0466 Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Fri, 11 Mar 2022 10:04:06 +0100
Subject: [PATCH 14/18] further simplified dOnion technology:

chose DOnion.multi_access() over DOnion.multiple_access()
---
 dask_grblas/_ss/matrix.py |   2 +-
 dask_grblas/base.py       |  33 ++++----
 dask_grblas/matrix.py     | 156 ++++++++++++++++----------------------
 dask_grblas/scalar.py     |  22 +++---
 dask_grblas/vector.py     |  40 ++++++----
 5 files changed, 118 insertions(+), 135 deletions(-)

diff --git a/dask_grblas/_ss/matrix.py b/dask_grblas/_ss/matrix.py
index d312360..ed64b4e 100644
--- a/dask_grblas/_ss/matrix.py
+++ b/dask_grblas/_ss/matrix.py
@@ -42,7 +42,7 @@ def build_scalar(
         nrows=None,
         ncols=None,
         chunks=None,
-        in_DOnion=False,  # not part of the API
+        in_dOnion=False,  # not part of the API
     ):
         self._parent.build(
             rows,
diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 362e6aa..15b6fe8 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -316,8 +316,9 @@ def compute_and_store_nvals(self):
     def nvals(self):
         from .scalar import PythonScalar
 
-        if type(self._delayed) is DOnion:
-            return PythonScalar(self._delayed.nvals)
+        if self.is_dOnion:
+            donion = DOnion.multi_access(self._meta.nvals, getattr, self, "nvals")
+            return PythonScalar(donion)
 
         delayed = da.core.elemwise(
             _nvals,
@@ -348,7 +349,7 @@ def _name_html(self):
             return self.name
         return f"{split[0]}<sub>{split[1]}</sub>"
 
-    def update(self, expr, in_DOnion=False):
+    def update(self, expr, in_dOnion=False):
         if isinstance(expr, Number):
             if self.ndim == 2:
                 raise TypeError(
@@ -368,7 +369,7 @@ def update(self, expr, in_DOnion=False):
                 def update_by_aae(c, p, k_0, k_1):
                     keys = k_0 if k_1 is None else (k_0, k_1)
                     aae = AmbiguousAssignOrExtract(p, keys)
-                    return c.update(aae, in_DOnion=True)
+                    return c.update(aae, in_dOnion=True)
 
                 if _is_pair(expr_.index):
                     keys_0, keys_1 = expr_.index[0], expr_.index[1]
@@ -389,7 +390,7 @@ def update_by_aae(c, p, k_0, k_1):
 
                 def update_by_gbd(c, *args, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
-                    return c.update(gbd, in_DOnion=True)
+                    return c.update(gbd, in_dOnion=True)
 
                 donion = DOnion.multi_access(
                     self._meta,
@@ -406,13 +407,13 @@ def update_by_gbd(c, *args, **kwargs):
             elif typ is TransposedMatrix and expr.is_dOnion:
 
                 donion = DOnion.multi_access(
-                    self._meta, BaseType.update, self_copy, expr_, in_DOnion=True
+                    self._meta, BaseType.update, self_copy, expr_, in_dOnion=True
                 )
                 self.__init__(donion, self._meta)
                 return
 
             donion = DOnion.multi_access(
-                self._meta, BaseType.update, self_copy, expr_, in_DOnion=True
+                self._meta, BaseType.update, self_copy, expr_, in_dOnion=True
             )
             self.__init__(donion, self._meta)
             return
@@ -423,7 +424,7 @@ def update_by_gbd(c, *args, **kwargs):
 
         if isinstance(expr, Number):
             Updater(self)[...] << expr
-            if in_DOnion:
+            if in_dOnion:
                 return self.__class__(self._delayed, meta=self._meta)
             return
 
@@ -448,10 +449,10 @@ def update_by_gbd(c, *args, **kwargs):
         else:
             # Anything else we need to handle?
             raise TypeError()
-        if in_DOnion:
+        if in_dOnion:
             return self.__class__(self._delayed, meta=self._meta)
 
-    def _update(self, expr, *, mask=None, accum=None, replace=None, in_DOnion=False):
+    def _update(self, expr, *, mask=None, accum=None, replace=None, in_dOnion=False):
         typ = type(expr)
         if any_dOnions(self, expr, mask):
             self_copy = self.__class__(self._delayed, meta=self._meta)
@@ -462,7 +463,7 @@ def _update(self, expr, *, mask=None, accum=None, replace=None, in_DOnion=False)
                 def _update_by_aae(c, p, k_0, k_1, mask=None, accum=None, replace=None):
                     keys = k_0 if k_1 is None else (k_0, k_1)
                     aae = AmbiguousAssignOrExtract(p, keys)
-                    return c.update(aae, mask=mask, accum=accum, replace=replace, in_DOnion=True)
+                    return c.update(aae, mask=mask, accum=accum, replace=replace, in_dOnion=True)
 
                 if _is_pair(expr_.index):
                     keys_0, keys_1 = expr_.index[0], expr_.index[1]
@@ -486,7 +487,7 @@ def _update_by_aae(c, p, k_0, k_1, mask=None, accum=None, replace=None):
 
                 def _update_by_gbd(c, *args, mask=None, accum=None, replace=None, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
-                    return c._update(gbd, mask=mask, accum=accum, replace=replace, in_DOnion=True)
+                    return c._update(gbd, mask=mask, accum=accum, replace=replace, in_dOnion=True)
 
                 donion = DOnion.multi_access(
                     self._meta,
@@ -511,7 +512,7 @@ def _update_by_gbd(c, *args, mask=None, accum=None, replace=None, **kwargs):
                 mask=mask_,
                 accum=accum,
                 replace=replace,
-                in_DOnion=True,
+                in_dOnion=True,
             )
             self.__init__(donion, meta=self._meta)
             return
@@ -522,7 +523,7 @@ def _update_by_gbd(c, *args, mask=None, accum=None, replace=None, **kwargs):
 
         if mask is None and accum is None:
             self.update(expr)
-            if in_DOnion:
+            if in_dOnion:
                 return self
             return
         if typ is AmbiguousAssignOrExtract:
@@ -578,7 +579,7 @@ def _update_by_gbd(c, *args, mask=None, accum=None, replace=None, **kwargs):
         else:
             raise NotImplementedError(f"{typ}")
 
-        if in_DOnion:
+        if in_dOnion:
             return self.__class__(self._delayed, meta=self._meta)
 
     def wait(self):
@@ -723,6 +724,8 @@ def multi_access(cls, out_meta, func, *args, **kwargs):
         def adaptor(func, ts, cs, ss, vs, kwargs_desc, *args, **kwargs):
             args_ = ()
             for arg, t, c, s, v in zip(args, ts, cs, ss, vs):
+                if type(arg) is Box:
+                    arg = arg.content
                 if t:
                     arg = arg.T
                 if s:
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index a6c736b..5a27ea4 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -10,7 +10,7 @@
 from grblas.dtypes import lookup_dtype
 from grblas.exceptions import IndexOutOfBound, EmptyObject, DimensionMismatch
 
-from .base import BaseType, InnerBaseType, DOnion, is_DOnion, Box, skip
+from .base import BaseType, InnerBaseType, DOnion, is_DOnion, any_dOnions, Box, skip
 from .base import _nvals as _nvals_in_chunk
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater
 from .mask import StructuralMask, ValueMask
@@ -137,28 +137,35 @@ def from_values(
         elif hasattr(values, "dtype"):
             dtype = lookup_dtype(values.dtype if dtype is None else dtype)
 
-        meta = gb.Matrix.new(dtype, nrows=nrows or 0, ncols=ncols or 0)
-
-        # check for any DOnions:
-        pkd_args = pack_args(rows, columns, values)
-        pkd_kwargs = pack_kwargs(
-            nrows=nrows, ncols=ncols, dup_op=dup_op, dtype=dtype, chunks=chunks, name=name
+        meta = gb.Matrix.new(
+            dtype,
+            nrows=nrows if isinstance(nrows, Number) else 0,
+            ncols=ncols if isinstance(ncols, Number) else 0,
         )
-        donions = [True for arg in pkd_args if is_DOnion(arg)]
-        donions += [True for (k, v) in pkd_kwargs.items() if is_DOnion(v)]
-        if np.any(donions):
-            # dive into DOnion(s):
-            out_donion = DOnion.multiple_access(meta, Matrix.from_values, *pkd_args, **pkd_kwargs)
+
+        # check for any dOnions:
+        args = pack_args(rows, columns, values, nrows, ncols)
+        kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
+        if any_dOnions(*args, **kwargs):
+            # dive into dOnion(s):
+            out_donion = DOnion.multi_access(meta, Matrix.from_values, *args, **kwargs)
             return Matrix(out_donion, meta=meta)
 
-        # no DOnions
-        if (
-            type(rows) is da.Array
-            and type(columns) is da.Array
-            and (type(values) is da.Array or isinstance(values, Number))
-        ):
+        # no dOnions
+        if type(rows) is da.Array or type(columns) is da.Array or type(values) is da.Array:
+            nrows_, ncols_ = nrows, ncols
+            if type(rows) in {tuple, list, np.ndarray}:
+                nrows_ = nrows or (np.max(rows) + 1)
+                rows = da.asarray(rows)
+            if type(columns) in {tuple, list, np.ndarray}:
+                ncols_ = ncols or (np.max(columns) + 1)
+                columns = da.asarray(columns)
+            if type(values) in {tuple, list, np.ndarray}:
+                values = da.asarray(values)
+
             np_idtype_ = np_dtype(lookup_dtype(rows.dtype))
-            if nrows is not None and ncols is not None:
+            if isinstance(nrows_, Integral) and isinstance(ncols_, Integral):
+                nrows, ncols = nrows_, ncols_
                 chunks = da.core.normalize_chunks(chunks, (nrows, ncols), dtype=np_idtype_)
             else:
                 if nrows is None and rows.size == 0:
@@ -185,13 +192,16 @@ def from_values(
                 if columns.dtype.kind not in "ui":
                     raise ValueError(f"columns must be integers, not {columns.dtype}")
 
+                nrows = nrows_
                 if nrows is None:
                     nrows = da.max(rows) + np.asarray(1, dtype=rows.dtype)
 
+                ncols = ncols_
                 if ncols is None:
                     ncols = da.max(columns) + np.asarray(1, dtype=columns.dtype)
 
-                # use the inner value of `nrows` or `ncols` to create the new Matrix:
+                # Create dOnion from `nrows` and/or `ncols`, that is,
+                # use the inner value of `nrows` and/or `ncols` to create the new Matrix:
                 shape = (nrows, ncols)
                 _shape = [skip if is_dask_collection(x) else x for x in shape]
                 dasks = [x for x in shape if is_dask_collection(x)]
@@ -200,6 +210,7 @@ def from_values(
                 donion = DOnion.sprout(dasks, meta, Matrix.from_values, *args, **kwargs)
                 return Matrix(donion, meta=meta)
 
+            # output shape `(nrows, ncols)` is completely determined
             vdtype = dtype
             np_vdtype_ = np_dtype(vdtype)
 
@@ -253,27 +264,25 @@ def build(
         nrows=None,
         ncols=None,
         chunks=None,
-        in_DOnion=False,  # not part of the API
+        in_dOnion=False,  # not part of the API
     ):
         if not clear and self._nvals != 0:
             raise gb.exceptions.OutputNotEmpty()
 
+        # TODO: delayed nrows/ncols
         nrows = nrows or self._nrows
         ncols = ncols or self._ncols
         meta = self._meta
         meta.resize(nrows, ncols)
 
         # check for any DOnions:
-        self_ = self._delayed if is_DOnion(self._delayed) else self
-        pkd_args = pack_args(self_, rows, columns, values)
-        pkd_kwargs = pack_kwargs(
-            dup_op=dup_op, clear=clear, nrows=nrows, ncols=ncols, chunks=chunks, in_DOnion=True
+        args = pack_args(self, rows, columns, values)
+        kwargs = pack_kwargs(
+            dup_op=dup_op, clear=clear, nrows=nrows, ncols=ncols, chunks=chunks, in_dOnion=True
         )
-        donions = [True for arg in pkd_args if is_DOnion(arg)]
-        donions += [True for (k, v) in pkd_kwargs.items() if is_DOnion(v)]
-        if np.any(donions):
+        if any_dOnions(*args, **kwargs):
             # dive into DOnion(s):
-            out_donion = DOnion.multiple_access(meta, Matrix.build, *pkd_args, **pkd_kwargs)
+            out_donion = DOnion.multi_access(meta, Matrix.build, *args, **kwargs)
             self.__init__(out_donion, meta=meta)
             return
 
@@ -287,17 +296,17 @@ def build(
             self.rechunk(inplace=True, chunks=chunks)
 
         x = self._optional_dup()
-        if type(rows) is list:
+        if type(rows) in {tuple, list, np.ndarray}:
             if np.max(rows) >= self._nrows:
                 raise gb.exceptions.IndexOutOfBound
             rows = da.core.from_array(np.array(rows), name="rows-" + tokenize(rows))
 
-        if type(columns) is list:
+        if type(columns) in {tuple, list, np.ndarray}:
             if np.max(columns) >= self._ncols:
                 raise gb.exceptions.IndexOutOfBound
             columns = da.core.from_array(np.array(columns), name="columns-" + tokenize(columns))
 
-        if type(values) is list:
+        if type(values) in {tuple, list, np.ndarray}:
             values = da.core.from_array(np.array(values), name="values-" + tokenize(values))
 
         if type(values) is da.Array and (rows.size != columns.size or columns.size != values.size):
@@ -350,15 +359,15 @@ def build(
             dtype=np_vdtype_,
             meta=meta,
         )
-        if in_DOnion:
+        if in_dOnion:
             return Matrix(delayed)
         self.__init__(delayed)
 
     @classmethod
     def new(cls, dtype, nrows=0, ncols=0, *, chunks="auto", name=None):
-        if is_DOnion(nrows) or is_DOnion(ncols):
+        if any_dOnions(nrows, ncols):
             meta = gb.Matrix.new(dtype)
-            donion = DOnion.multiple_access(
+            donion = DOnion.multi_access(
                 meta, cls.new, dtype, nrows=nrows, ncols=ncols, chunks=chunks, name=name
             )
             return Matrix(donion, meta=meta)
@@ -440,24 +449,24 @@ def T(self):
 
     @property
     def nrows(self):
-        if is_DOnion(self._delayed):
-            return self._delayed.nrows
+        if self.is_dOnion:
+            return DOnion.multi_access(self._meta.nrows, getattr, self, "nrows")
         return self._meta.nrows
 
     @property
     def ncols(self):
-        if is_DOnion(self._delayed):
-            return self._delayed.ncols
+        if self.is_dOnion:
+            return DOnion.multi_access(self._meta.ncols, getattr, self, "ncols")
         return self._meta.ncols
 
     @property
     def shape(self):
-        if is_DOnion(self._delayed):
-            return self._delayed.shape
-        return (self._meta.nrows, self._meta.ncols)
+        if self.is_dOnion:
+            return DOnion.multi_access(self._meta.shape, getattr, self, "shape")
+        return self._meta.shape
 
     def resize(self, nrows, ncols, inplace=True, chunks="auto"):
-        if is_DOnion(self._delayed):
+        if self.is_dOnion:
             donion = self._delayed.getattr(
                 self._meta, "resize", nrows, ncols, inplace=False, chunks=chunks
             )
@@ -507,7 +516,7 @@ def resize(self, nrows, ncols, inplace=True, chunks="auto"):
             return Matrix(x, nvals=nvals)
 
     def rechunk(self, inplace=False, chunks="auto"):
-        if is_DOnion(self._delayed):
+        if self.is_dOnion:
             meta = self._meta
             donion = self._delayed.getattr(meta, "rechunk", inplace=False, chunks=chunks)
             if inplace:
@@ -628,21 +637,21 @@ def _diag_old(self, k=0, dtype=None, chunks="auto"):
     def __getitem__(self, index):
         return AmbiguousAssignOrExtract(self, index)
 
-    def __delitem__(self, keys, in_DOnion=False):
+    def __delitem__(self, keys, in_dOnion=False):
         if is_DOnion(self._delayed):
             good_keys = [x for x in keys if isinstance(x, Integral)]
             if len(good_keys) != 2:
                 raise TypeError("Remove Element only supports scalars.")
 
-            donion = self._delayed.getattr(self._meta, "__delitem__", keys, in_DOnion=True)
+            donion = self._delayed.getattr(self._meta, "__delitem__", keys, in_dOnion=True)
             self.__init__(donion, meta=self._meta)
             return
 
         del Updater(self)[keys]
-        if in_DOnion:
+        if in_dOnion:
             return self
 
-    def __setitem__(self, index, delayed, in_DOnion=False):
+    def __setitem__(self, index, delayed, in_dOnion=False):
         Updater(self)[index] = delayed
 
     def __contains__(self, index):
@@ -877,16 +886,9 @@ def dOnion_if(self):
         return self._matrix._delayed if self.is_dOnion else self
 
     def new(self, *, dtype=None, mask=None):
-        mask_is_DOnion = mask is not None and mask.is_dOnion
-        if self.is_dOnion or mask_is_DOnion:
-
-            def T(matrix, dtype=None, mask=None):
-                return TransposedMatrix(matrix).new(dtype=dtype, mask=mask)
-
-            _matrix = self._matrix._delayed if self.is_dOnion else self._matrix
-            mask = mask.mask if mask_is_DOnion else mask
-            donion = DOnion.multiple_access(
-                self._meta.new(dtype), T, _matrix, dtype=dtype, mask=mask
+        if any_dOnions(self, mask):
+            donion = DOnion.multi_access(
+                self._meta.new(dtype), self.__class__.new, self, dtype=dtype, mask=mask
             )
             return Matrix(donion)
 
@@ -933,40 +935,6 @@ def to_values(self, dtype=None, chunks="auto"):
         return cols, rows, vals
 
     # Properties
-    @property
-    def nrows(self):
-        if self.is_dOnion:
-            return DOnion.multiple_access(
-                self._meta.nrows, lambda x: x.ncols, self._matrix._delayed
-            )
-        return self._meta.nrows
-
-    @property
-    def ncols(self):
-        if self.is_dOnion:
-            return DOnion.multiple_access(
-                self._meta.ncols, lambda x: x.nrows, self._matrix._delayed
-            )
-        return self._meta.ncols
-
-    @property
-    def shape(self):
-        if self.is_dOnion:
-
-            def shape(matrix):
-                return matrix.shape[::-1]
-
-            return DOnion.multiple_access(self._meta.shape, shape, self._matrix._delayed)
-        return self._meta.shape
-
-    @property
-    def nvals(self):
-        if self.is_dOnion:
-            return DOnion.multiple_access(
-                self._meta.nvals, lambda x: x.nvals, self._matrix._delayed
-            )
-        return self._meta.nvals
-
     def __getitem__(self, index):
         return AmbiguousAssignOrExtract(self, index)
 
@@ -996,6 +964,10 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
     reduce_scalar = Matrix.reduce_scalar
 
     # Misc.
+    nrows = Matrix.nrows
+    ncols = Matrix.ncols
+    shape = Matrix.shape
+    nvals = Matrix.nvals
     _expect_type = Matrix._expect_type
     __array__ = Matrix.__array__
     name = Matrix.name
diff --git a/dask_grblas/scalar.py b/dask_grblas/scalar.py
index 3157eb8..81be48b 100644
--- a/dask_grblas/scalar.py
+++ b/dask_grblas/scalar.py
@@ -76,7 +76,7 @@ def __init__(self, delayed, meta=None):
         self._meta = meta
         self.dtype = meta.dtype
 
-    def update(self, expr, in_DOnion=False):
+    def update(self, expr, in_dOnion=False):
         typ = type(expr)
         if any_dOnions(self, expr):
             self_copy = self.__class__(self._delayed, meta=self._meta)
@@ -86,7 +86,7 @@ def update(self, expr, in_DOnion=False):
                 def update_by_aae(c, p, k_0, k_1):
                     keys = k_0 if k_1 is None else (k_0, k_1)
                     aae = AmbiguousAssignOrExtract(p, keys)
-                    return c.update(aae, in_DOnion=True)
+                    return c.update(aae, in_dOnion=True)
 
                 if _is_pair(expr_.index):
                     keys_0, keys_1 = expr_.index[0], expr_.index[1]
@@ -107,7 +107,7 @@ def update_by_aae(c, p, k_0, k_1):
 
                 def update_by_gbd(c, *args, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
-                    return c.update(gbd, in_DOnion=True)
+                    return c.update(gbd, in_dOnion=True)
 
                 donion = DOnion.multi_access(
                     self._meta,
@@ -122,7 +122,7 @@ def update_by_gbd(c, *args, **kwargs):
                 return
 
             donion = DOnion.multi_access(
-                self._meta, Scalar.update, self_copy, expr_, in_DOnion=True
+                self._meta, Scalar.update, self_copy, expr_, in_dOnion=True
             )
             self.__init__(donion, self._meta)
             return
@@ -146,10 +146,10 @@ def update_by_gbd(c, *args, **kwargs):
         else:
             # Try simple assignment (s << 1)
             self.value = expr
-        if in_DOnion:
+        if in_dOnion:
             return self.__class__(self._delayed, meta=self._meta)
 
-    def _update(self, rhs, *, accum, in_DOnion=False):
+    def _update(self, rhs, *, accum, in_dOnion=False):
         # s(accum=accum) << v.reduce()
         typ = type(rhs)
         if typ is Box:
@@ -164,7 +164,7 @@ def _update(self, rhs, *, accum, in_DOnion=False):
 
                 def _update_by_gbd(c, *args, accum=None, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
-                    return c._update(gbd, accum=accum, in_DOnion=True)
+                    return c._update(gbd, accum=accum, in_dOnion=True)
 
                 donion = DOnion.multi_access(
                     self._meta,
@@ -181,13 +181,13 @@ def _update_by_gbd(c, *args, accum=None, **kwargs):
 
             rhs_ = rhs.parent.dOnion_if
             donion = DOnion.mult_access(
-                self._meta, Scalar._update, self_copy, rhs_, accum=accum, in_DOnion=True
+                self._meta, Scalar._update, self_copy, rhs_, accum=accum, in_dOnion=True
             )
             self.__init__(donion, self._meta)
             return
 
         rhs._update(self, accum=accum)
-        if in_DOnion:
+        if in_dOnion:
             return self.__class__(self._delayed, meta=self._meta)
 
     def dup(self, dtype=None, *, name=None):
@@ -293,8 +293,8 @@ def value(self):
 
     @value.setter
     def value(self, val):
-        if type(self._delayed) is DOnion:
-            donion = DOnion.multiple_access(self._meta, Scalar.from_value, val)
+        if any_dOnions(self, val):
+            donion = DOnion.multi_access(self._meta, Scalar.from_value, val)
             self.__init__(donion, meta=self._meta)
             return
 
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index fdf655f..8b737f6 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -2,13 +2,14 @@
 import numpy as np
 import grblas as gb
 
+from numbers import Integral
 from dask.base import tokenize
 from dask.delayed import Delayed, delayed
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
 from grblas.exceptions import IndexOutOfBound
 
-from .base import BaseType, InnerBaseType, _nvals, DOnion, is_DOnion, Box
+from .base import BaseType, InnerBaseType, _nvals, DOnion, Box, any_dOnions
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, Assigner
 from .mask import StructuralMask, ValueMask
 from ._ss.vector import ss
@@ -117,22 +118,28 @@ def from_values(
         if hasattr(values, "dtype"):
             dtype = lookup_dtype(values.dtype if dtype is None else dtype)
 
-        meta = gb.Vector.new(dtype) if size is None else gb.Vector.new(dtype, size=size)
+        meta = gb.Vector.new(dtype, size=size if isinstance(size, Integral) else 0)
 
         # check for any DOnions:
-        pkd_args = pack_args(indices, values)
-        pkd_kwargs = pack_kwargs(size=size, dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
-        donions = [True for arg in pkd_args if is_DOnion(arg)]
-        donions += [True for (k, v) in pkd_kwargs.items() if is_DOnion(v)]
-        if np.any(donions):
+        args = pack_args(indices, values, size)
+        kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name)
+        if any_dOnions(*args, **kwargs):
             # dive into DOnion(s):
-            out_donion = DOnion.multiple_access(meta, Vector.from_values, *pkd_args, **pkd_kwargs)
+            out_donion = DOnion.multi_access(meta, Vector.from_values, *args, **kwargs)
             return Vector(out_donion, meta=meta)
 
         # no DOnions
-        if type(indices) is da.Array and type(values) is da.Array:
+        if type(indices) is da.Array or type(values) is da.Array:
+            size_ = size
+            if type(indices) in {tuple, list, np.ndarray}:
+                size_ = size or (np.max(indices) + 1)
+                indices = da.asarray(indices)
+            if type(values) in {tuple, list, np.ndarray}:
+                values = da.asarray(values)
+
             np_idtype_ = np_dtype(lookup_dtype(indices.dtype))
-            if size is not None:
+            if isinstance(size_, Integral):
+                size = size_
                 chunks = da.core.normalize_chunks(chunks, (size,), dtype=np_idtype_)
             else:
                 if indices.size == 0:
@@ -143,11 +150,13 @@ def from_values(
 
                 # Note: uint + int = float which numpy cannot cast to uint.  So we
                 # ensure the same dtype for each summand here:
-                size = da.max(indices) + np.asarray(1, dtype=indices.dtype)
+                size = size_
+                if size is None:
+                    size = da.max(indices) + np.asarray(1, dtype=indices.dtype)
                 # Here `size` is a dask 0d-array whose computed value is
                 # used to determine the size of the Vector to be returned.
                 # But since we do not want to compute anything just now,
-                # we instead create a "DOnion" (dask onion) object.  This
+                # we instead create a "dOnion" (dask Onion) object.  This
                 # effectively means that we will use the inner value of
                 # `size` to create the new Vector:
                 args = pack_args(indices, values)
@@ -155,6 +164,7 @@ def from_values(
                 donion = DOnion.sprout(size, meta, Vector.from_values, *args, **kwargs)
                 return Vector(donion, meta=meta)
 
+            # output shape `(size,)` is completely determined
             if indices.size > 0:
                 if indices.size != values.size:
                     raise ValueError("`indices` and `values` lengths must match")
@@ -195,11 +205,9 @@ def from_values(
 
     @classmethod
     def new(cls, dtype, size=0, *, chunks="auto", name=None):
-        if is_DOnion(size):
+        if any_dOnions(size):
             meta = gb.Vector.new(dtype)
-            donion = DOnion.multiple_access(
-                meta, cls.new, dtype, size=size, chunks=chunks, name=name
-            )
+            donion = DOnion.multi_access(meta, cls.new, dtype, size=size, chunks=chunks, name=name)
             return Vector(donion, meta=meta)
 
         if type(size) is Box:

From 1662dd1e00fbcbe6e482e2325fabdcc2a7f77cd3 Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Fri, 11 Mar 2022 19:08:07 +0100
Subject: [PATCH 15/18] covered more tests from `from_grblas2`

---
 dask_grblas/base.py               |   4 +-
 dask_grblas/matrix.py             |  24 +++++-
 dask_grblas/vector.py             |   7 ++
 tests/from_grblas2/test_matrix.py | 119 +++++++++++++++++-------------
 4 files changed, 98 insertions(+), 56 deletions(-)

diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index 15b6fe8..c2bd010 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -446,9 +446,11 @@ def update_by_gbd(c, *args, **kwargs):
             # "C << A.T"
             C = expr.new(dtype=self.dtype)
             self.__init__(C._delayed)
+        elif typ is type(None):
+            raise TypeError("Assignment value must be a valid expression")
         else:
             # Anything else we need to handle?
-            raise TypeError()
+            raise NotImplementedError()
         if in_dOnion:
             return self.__class__(self._delayed, meta=self._meta)
 
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index 5a27ea4..9713a15 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -6,6 +6,7 @@
 from dask.base import tokenize, is_dask_collection
 from dask.delayed import Delayed, delayed
 from dask.highlevelgraph import HighLevelGraph
+from grblas import _automethods
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
 from grblas.exceptions import IndexOutOfBound, EmptyObject, DimensionMismatch
@@ -51,6 +52,13 @@ class Matrix(BaseType):
     ndim = 2
     _is_transposed = False
 
+    __eq__ = gb.Matrix.__eq__
+    __ge__ = gb.Matrix.__ge__
+    __gt__ = gb.Matrix.__gt__
+    __le__ = gb.Matrix.__le__
+    __lt__ = gb.Matrix.__lt__
+    __ne__ = gb.Matrix.__ne__
+
     @classmethod
     def from_delayed(cls, matrix, dtype, nrows, ncols, *, nvals=None, name=None):
         if not isinstance(matrix, Delayed):
@@ -868,6 +876,13 @@ class TransposedMatrix:
     ndim = 2
     _is_transposed = True
 
+    __eq__ = gb.matrix.TransposedMatrix.__eq__
+    __ge__ = gb.matrix.TransposedMatrix.__ge__
+    __gt__ = gb.matrix.TransposedMatrix.__gt__
+    __le__ = gb.matrix.TransposedMatrix.__le__
+    __lt__ = gb.matrix.TransposedMatrix.__lt__
+    __ne__ = gb.matrix.TransposedMatrix.__ne__
+
     def __init__(self, matrix, meta=None):
         assert type(matrix) is Matrix
         self._matrix = matrix
@@ -885,6 +900,9 @@ def is_dOnion(self):
     def dOnion_if(self):
         return self._matrix._delayed if self.is_dOnion else self
 
+    def dup(self, dtype=None, *, mask=None, name=None):
+        return self.new(dtype=dtype, mask=mask)
+
     def new(self, *, dtype=None, mask=None):
         if any_dOnions(self, mask):
             donion = DOnion.multi_access(
@@ -897,12 +915,12 @@ def new(self, *, dtype=None, mask=None):
 
         delayed = self._matrix._delayed
         if mask is None:
-            mask_ind = None
             mask_type = None
+            mask_ind = None
         else:
-            mask = mask.mask
-            mask_ind = "ji"
             mask_type = get_grblas_type(mask)
+            mask = mask.mask._delayed
+            mask_ind = "ji"
         delayed = da.core.blockwise(
             *(_transpose, "ji"),
             *(delayed, "ij"),
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 8b737f6..3c25300 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -83,6 +83,13 @@ class Vector(BaseType):
     __slots__ = ("ss",)
     ndim = 1
 
+    __eq__ = gb.Vector.__eq__
+    __ge__ = gb.Vector.__ge__
+    __gt__ = gb.Vector.__gt__
+    __le__ = gb.Vector.__le__
+    __lt__ = gb.Vector.__lt__
+    __ne__ = gb.Vector.__ne__
+
     @classmethod
     def from_delayed(cls, vector, dtype, size, *, nvals=None, name=None):
         if not isinstance(vector, Delayed):
diff --git a/tests/from_grblas2/test_matrix.py b/tests/from_grblas2/test_matrix.py
index 7926f29..c462bf6 100644
--- a/tests/from_grblas2/test_matrix.py
+++ b/tests/from_grblas2/test_matrix.py
@@ -2212,32 +2212,43 @@ def test_transpose_equals(As, A_chunks):
             assert A.T.isclose(A.T)
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_transpose_exceptional():
-    A = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [True, True, False, True])
-    B = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [1, 2, 3, 4])
-
-    with pytest.raises(TypeError, match="not callable"):
-        B.T(mask=A.V) << B.ewise_mult(B, op=binary.plus)
-    with pytest.raises(AttributeError):
-        B(mask=A.T.V) << B.ewise_mult(B, op=binary.plus)
-    with pytest.raises(AttributeError):
-        B.T(mask=A.T.V) << B.ewise_mult(B, op=binary.plus)
-    with pytest.raises(TypeError, match="does not support item assignment"):
-        B.T[1, 0] << 10
-    with pytest.raises(TypeError, match="not callable"):
-        B.T[1, 0]() << 10
-    with pytest.raises(TypeError, match="not callable"):
-        B.T()[1, 0] << 10
-    # with pytest.raises(AttributeError):
-    # should use new instead--Now okay.
-    assert B.T.dup().isequal(B.T.new())
-    # Not exceptional, but while we're here...
-    C = B.T.new(mask=A.V)
-    D = B.T.new()
-    D = D.dup(mask=A.V)
-    assert C.isequal(D)
-    assert C.isequal(Matrix.from_values([0, 0, 1], [0, 1, 1], [1, 3, 4]))
+    A0 = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [True, True, False, True])
+    B0 = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [1, 2, 3, 4])
+    A1 = Matrix.from_values(
+        da.from_array([0, 0, 1, 1]),
+        da.from_array([0, 1, 0, 1]),
+        da.from_array([True, True, False, True]),
+    )
+    B1 = Matrix.from_values(
+        da.from_array([0, 0, 1, 1]),
+        da.from_array([0, 1, 0, 1]),
+        da.from_array([1, 2, 3, 4]),
+    )
+    As, Bs = [A0, A1], [B0, B1]
+    for A in As:
+        for B in Bs:
+            with pytest.raises(TypeError, match="not callable"):
+                B.T(mask=A.V) << B.ewise_mult(B, op=binary.plus)
+            with pytest.raises(AttributeError):
+                B(mask=A.T.V) << B.ewise_mult(B, op=binary.plus)
+            with pytest.raises(AttributeError):
+                B.T(mask=A.T.V) << B.ewise_mult(B, op=binary.plus)
+            with pytest.raises(TypeError, match="does not support item assignment"):
+                B.T[1, 0] << 10
+            with pytest.raises(TypeError, match="not callable"):
+                B.T[1, 0]() << 10
+            with pytest.raises(TypeError, match="not callable"):
+                B.T()[1, 0] << 10
+            # with pytest.raises(AttributeError):
+            # should use new instead--Now okay.
+            assert B.T.dup().isequal(B.T.new())
+            # Not exceptional, but while we're here...
+            C = B.T.new(mask=A.V)
+            D = B.T.new()
+            D = D.dup(mask=A.V)
+            assert C.isequal(D)
+            assert C.isequal(Matrix.from_values([0, 0, 1], [0, 1, 1], [1, 3, 4]))
 
 
 def test_nested_matrix_operations():
@@ -2253,37 +2264,41 @@ def test_bad_init():
         Matrix(None, float, name="bad_matrix")
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_equals(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert (A == A).new().reduce_scalar(monoid.land)
+def test_equals(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert (A == A).new().reduce_scalar(monoid.land)
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_bad_update(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        with pytest.raises(TypeError, match="Assignment value must be a valid expression"):
-            A << None
+def test_bad_update(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            with pytest.raises(TypeError, match="Assignment value must be a valid expression"):
+                A << None
+                A.compute()
 
 
-def test_incompatible_shapes(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        B = A[:-1, :-1].new()
-        with pytest.raises(DimensionMismatch):
-            A.mxm(B)
-        with pytest.raises(DimensionMismatch):
-            A.ewise_add(B)
-        with pytest.raises(DimensionMismatch):
-            A.ewise_mult(B)
+def test_incompatible_shapes(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            B = A[:-1, :-1].new()
+            with pytest.raises(DimensionMismatch):
+                A.mxm(B)
+                A.compute()
+            A = A_.dup()
+            with pytest.raises(DimensionMismatch):
+                A.ewise_add(B)
+                A.compute()
+            A = A_.dup()
+            with pytest.raises(DimensionMismatch):
+                A.ewise_mult(B)
+                A.compute()
 
 
 @pytest.mark.xfail("'Needs investigation'", strict=True)

From 80ba866abefd1244382f731cfa05b33dffe6a8c2 Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Wed, 16 Mar 2022 12:48:47 +0100
Subject: [PATCH 16/18] fixed infix/auto methods; all related tests passing for
 Matrix

---
 dask_grblas/_automethods.py       |  21 +
 dask_grblas/base.py               |  69 +--
 dask_grblas/expr.py               | 266 ++++++++----
 dask_grblas/mask.py               |   4 +
 dask_grblas/matrix.py             | 282 ++++++++++--
 dask_grblas/scalar.py             |  72 +++-
 dask_grblas/utils.py              |   5 +
 dask_grblas/vector.py             | 174 +++++++-
 tests/from_grblas2/conftest.py    |  52 ++-
 tests/from_grblas2/test_matrix.py | 693 +++++++++++++++---------------
 10 files changed, 1098 insertions(+), 540 deletions(-)
 create mode 100644 dask_grblas/_automethods.py

diff --git a/dask_grblas/_automethods.py b/dask_grblas/_automethods.py
new file mode 100644
index 0000000..eda04ae
--- /dev/null
+++ b/dask_grblas/_automethods.py
@@ -0,0 +1,21 @@
+from grblas import config
+
+
+def _get_value(self, attr=None, default=None):
+    if config.get("autocompute"):
+        if self._value is None:
+            self._value = self.new()
+            if getattr(self, 'is_dOnion', False):
+                self._value = self._value.strip()
+        if attr is None:
+            return self._value
+        else:
+            return getattr(self._value, attr)
+    if default is not None:
+        return default.__get__(self)
+    raise TypeError(
+        f"{attr} not enabled for objects of type {type(self)}.  "
+        f"Use `.new()` to create a new {self.output_type.__name__}.\n\n"
+        "Hint: use `grblas.config.set(autocompute=True)` to enable "
+        "automatic computation of expressions."
+    )
diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index c2bd010..bdd5b6e 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -20,11 +20,12 @@ def is_type(arg_type, a):
     return type(a) is arg_type
 
 
-def _check_mask(mask, output=None):
+def _check_mask(mask, output=None, ignore_None=False):
     if not isinstance(mask, Mask):
         if isinstance(mask, BaseType):
             raise TypeError("Mask must indicate values (M.V) or structure (M.S)")
-        raise TypeError(f"Invalid mask: {type(mask)}")
+        elif mask is None and not ignore_None:
+            raise TypeError(f"Invalid mask: {type(mask)}")
     if output is not None:
         from .vector import Vector
 
@@ -49,6 +50,9 @@ def is_dOnion(self):
     def dOnion_if(self):
         return self._delayed if self.is_dOnion else self
 
+    def strip(self, *args, **kwargs):
+        return self._delayed.strip(*args, **kwargs) if self.is_dOnion else self
+
     def isequal(self, other, *, check_dtype=False):
         from .scalar import PythonScalar
 
@@ -294,6 +298,9 @@ def __call__(
     __imatmul__ = gb.base.BaseType.__imatmul__
 
     def _optional_dup(self):
+        if self.is_dOnion:
+            return DOnion.multi_access(self._meta, _dOnion_dup, self)
+
         # TODO: maybe try to create an optimization pass that remove these if they are unnecessary
         return da.core.elemwise(
             _optional_dup,
@@ -362,7 +369,7 @@ def update(self, expr, in_dOnion=False):
                 )
         typ = type(expr)
         if any_dOnions(self, expr):
-            self_copy = self.__class__(self._delayed, meta=self._meta)
+            self_copy = self.__class__(self._optional_dup(), meta=self._meta)
             expr_ = expr
             if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
 
@@ -386,7 +393,7 @@ def update_by_aae(c, p, k_0, k_1):
                 self.__init__(donion, self._meta)
                 return
 
-            if typ is GbDelayed and expr.has_dOnion:
+            if isinstance(expr, GbDelayed) and expr.has_dOnion:
 
                 def update_by_gbd(c, *args, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
@@ -440,7 +447,7 @@ def update_by_gbd(c, *args, **kwargs):
                 self.__init__(expr._optional_dup())
             else:
                 self.__init__(expr.dup(dtype=self.dtype)._delayed)
-        elif typ is GbDelayed:
+        elif isinstance(expr, GbDelayed):
             expr._update(self)
         elif typ is TransposedMatrix:
             # "C << A.T"
@@ -457,7 +464,7 @@ def update_by_gbd(c, *args, **kwargs):
     def _update(self, expr, *, mask=None, accum=None, replace=None, in_dOnion=False):
         typ = type(expr)
         if any_dOnions(self, expr, mask):
-            self_copy = self.__class__(self._delayed, meta=self._meta)
+            self_copy = self.__class__(self._optional_dup(), meta=self._meta)
             mask_ = mask.dOnion_if if mask is not None else None
             expr_ = expr
             if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
@@ -485,7 +492,7 @@ def _update_by_aae(c, p, k_0, k_1, mask=None, accum=None, replace=None):
                 self.__init__(donion, self._meta)
                 return
 
-            if typ is GbDelayed and expr.has_dOnion:
+            if isinstance(expr, GbDelayed) and expr.has_dOnion:
 
                 def _update_by_gbd(c, *args, mask=None, accum=None, replace=None, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
@@ -552,7 +559,7 @@ def _update_by_gbd(c, *args, mask=None, accum=None, replace=None, **kwargs):
                     dtype=np_dtype(self._meta.dtype),
                 )
             )
-        elif typ is GbDelayed:
+        elif isinstance(expr, GbDelayed):
             # v(mask=mask) << left.ewise_mult(right)
             # Meta check handled in Updater
             expr._update(self, mask=mask, accum=accum, replace=replace)
@@ -591,7 +598,7 @@ def wait(self):
     def compute(self, *args, **kwargs):
         # kwargs['scheduler'] = 'synchronous'
         val = self._delayed.compute(*args, **kwargs)
-        if type(self._delayed) is DOnion:
+        if self.is_dOnion:
             return val
         return val.value
 
@@ -677,15 +684,12 @@ def sprout(cls, shroud, seed_meta, seed_func, *args, **kwargs):
 
     def __init__(self, kernel, meta=None):
         self.kernel = kernel
-        # Why ._meta and .dtype? B'cos Scalar, Vector & Matrix need them
+        # Why have ._meta and .dtype attributes? B'cos Scalar, Vector & Matrix need them
         self._meta = meta
-        try:
-            self.dtype = meta.dtype
-        except AttributeError:
-            self.dtype = type(meta)
+        self.dtype = getattr(meta, 'dtype', type(meta))
 
     def __eq__(self, other):
-        if type(other) is DOnion:
+        if like_dOnion(other):
             other = other.compute()
         return self.compute() == other
 
@@ -703,23 +707,35 @@ def compute_once(self, *args, **kwargs):
             value = value.content
         return value
 
-    def persist(self, *args, **kwargs):
+    def strip(self, *args, **kwargs):
         value = self.compute_once(*args, **kwargs)
-        while type(value) is DOnion or (
-            hasattr(value, "_delayed") and type(value._delayed) is DOnion
-        ):
+        while like_dOnion(value):
             if type(value) is DOnion:
                 value = value.compute_once(*args, **kwargs)
             else:
                 value = value._delayed.compute_once(*args, **kwargs)
+        return value
 
+    def persist(self, *args, **kwargs):
+        value = self.strip(*args, **kwargs)
         if hasattr(value, "persist"):
             return value.persist(*args, **kwargs)
-        elif hasattr(value, "_persist") and hasattr(value, "_delayed"):
+        else:
+            raise AttributeError(
+                f'Something went wrong: stripped dOnion {self} value {value} has'
+                ' no `persist()` attribute.'
+            )
+
+    def _persist(self, *args, **kwargs):
+        value = self.strip(*args, **kwargs)
+        if hasattr(value, "_persist"):
             value._persist(*args, **kwargs)
             return value._delayed
         else:
-            raise TypeError(f'Something went wrong: {self} cannot be "persisted".')
+            raise AttributeError(
+                f'Something went wrong: stripped dOnion {self} value {value} has'
+                ' no `_persist()` attribute.'
+            )
 
     @classmethod
     def multi_access(cls, out_meta, func, *args, **kwargs):
@@ -832,8 +848,10 @@ def __call__(self, *args, **kwargs):
         return self.getattr(meta, "__call__", *args, **kwargs)
 
     def __getattr__(self, item):
-        # TODO: how to compute meta of attribute?!!!
-        meta = getattr(self._meta, item)
+        try:
+            meta = getattr(self._meta, item, getattr(self.kernel, item))
+        except AttributeError:
+            raise AttributeError(f'Unable to compute meta corresponding to attribute {item}.')
         _getattr = flexible_partial(getattr, skip, item)
         return self.deep_extract(meta, _getattr)
 
@@ -854,7 +872,6 @@ def like_dOnion(arg):
         is_DOnion(arg)
         or getattr(arg, "is_dOnion", False)
         or getattr(arg, "has_dOnion", False)
-        or type(arg) is tuple
     )
 
 
@@ -899,6 +916,10 @@ def _optional_dup(x):
     return wrap_inner(x.value.dup())
 
 
+def _dOnion_dup(x):
+    return x.dup()
+
+
 def _reduction_value(x, val):
     """Helper function used when reducing objects to scalars such as for `isclose`"""
     if x.ndim == 0:
diff --git a/dask_grblas/expr.py b/dask_grblas/expr.py
index 9d6c7f6..2e936bc 100644
--- a/dask_grblas/expr.py
+++ b/dask_grblas/expr.py
@@ -8,15 +8,18 @@
 
 from grblas.exceptions import DimensionMismatch
 from dask.base import tokenize
+from dask.highlevelgraph import HighLevelGraph
 
 from .base import BaseType, InnerBaseType, _check_mask, DOnion, is_DOnion, any_dOnions
 from .mask import Mask
 from .utils import (
     get_grblas_type,
+    get_inner_type,
     get_meta,
     get_return_type,
     np_dtype,
     wrap_inner,
+    flatten,
     build_chunk_offsets_dask_array,
     build_chunk_ranges_dask_array,
     build_slice_dask_array_from_chunks,
@@ -25,7 +28,7 @@
 
 class GbDelayed:
     def __init__(self, parent, method_name, *args, meta=None, **kwargs):
-        self.has_dOnion = np.any([getattr(x, "is_dOnion", False) for x in (parent,) + args])
+        self.has_dOnion = any_dOnions(parent, *args)
         self.parent = parent
         self.method_name = method_name
         self.args = args
@@ -33,13 +36,8 @@ def __init__(self, parent, method_name, *args, meta=None, **kwargs):
         self._meta = meta
         # InfixExpression and Aggregator requirements:
         self.dtype = meta.dtype
-        self.output_type = meta.output_type
-        self.ndim = len(meta.shape)
-        if self.ndim == 1:
-            self._size = meta.size
-        elif self.ndim == 2:
-            self._nrows = meta.nrows
-            self._ncols = meta.ncols
+        # autocompute requirements:
+        self._value = None
 
     def _matmul(self, meta, mask=None):
         left_operand = self.parent
@@ -230,8 +228,7 @@ def _aggregate(
         return output
 
     def new(self, dtype=None, *, mask=None, name=None):
-        if mask is not None:
-            _check_mask(mask)
+        _check_mask(mask, ignore_None=True)
 
         if any_dOnions(self, mask):
 
@@ -333,15 +330,29 @@ def recall_GbDelayed_new(p, m, *args, dtype=None, mask=None, **kwargs):
                 )
                 for key in self.kwargs
             }
-            delayed = da.core.elemwise(
-                _expr_new,
-                self.method_name,
-                dtype,
-                grblas_mask_type,
-                self_kwargs,
-                self.parent._delayed,
-                delayed_mask,
-                *[x._delayed if isinstance(x, BaseType) else x for x in self.args],
+            pt = getattr(self.parent, '_is_transposed', False)
+            xts = [getattr(arg, '_is_transposed', False) for arg in self.args]
+            axes = 'ij' if self.parent.ndim == 2 else 'i'
+            delayed = da.core.blockwise(
+                *(partial(_expr_new, pt, xts), axes),
+                *(self.method_name, None),
+                *(dtype, None),
+                *(grblas_mask_type, None),
+                *(
+                    (self.parent._matrix._delayed, axes[::-1]) if pt
+                    else (self.parent._delayed, axes)
+                ),
+                *(delayed_mask, (None if mask is None else out_axes)),
+                *flatten(
+                    (
+                        (x._matrix._delayed, axes[::-1]) if xt
+                        else (x._delayed, (None if x._is_scalar else axes))
+                    )
+                    if isinstance(x, BaseType) or getattr(x, '_is_transposed', False)
+                    else (x, None)
+                    for x, xt in zip(self.args, xts)
+                ),
+                **self_kwargs,
                 dtype=np_dtype(meta.dtype),
             )
         elif self.method_name in {"vxm", "mxv", "mxm"}:
@@ -566,7 +577,7 @@ def parse_index(self, index, typ, size, check_shape=True):
                 normalized = index.indices(size)
                 return AxisIndex(len(range(*normalized)), slice(*normalized))
             else:
-                return AxisIndex(None, index)
+                return AxisIndex(0, index)
 
         elif typ in {np.ndarray, da.Array}:
             if len(index.shape) != 1:
@@ -576,7 +587,7 @@ def parse_index(self, index, typ, size, check_shape=True):
             return AxisIndex(index.shape[0], index)
 
         elif is_DOnion(index):
-            return AxisIndex(None, index)
+            return AxisIndex(0, index)
 
         else:
             from .scalar import Scalar
@@ -650,31 +661,37 @@ def normalize_index(cls, index, size, check_size=True):
 
 
 class Updater:
+    __bool__ = gb.expr.Updater.__bool__
+    __eq__ = gb.expr.Updater.__eq__
+
     def __init__(self, parent, *, mask=None, accum=None, replace=False, input_mask=None):
-        if input_mask is not None and mask is not None:
+        if (
+            mask is not None
+            and input_mask is not None
+        ):
             raise TypeError("mask and input_mask arguments cannot both be given")
-        if input_mask is not None and not isinstance(input_mask, Mask):
-            raise TypeError(r"Mask must indicate values (M.V) or structure (M.S)")
+
+        _check_mask(mask, ignore_None=True)
+        _check_mask(input_mask, ignore_None=True)
 
         self.has_dOnion = any_dOnions(parent, mask, input_mask)
         self.parent = parent
-
         self.mask = mask
+        self.input_mask = input_mask
+        self.accum = accum
+        self.replace = replace if mask is not None else None
+        self._meta = parent._meta(mask=get_meta(mask), accum=accum, replace=replace)
+
+        # copy `mask` if `parent` is the source of `mask`
         if parent is getattr(mask, "mask", None):
             self.mask = type(mask)(mask.mask.dup())
 
-        self.input_mask = input_mask
+        # copy `input_mask` if `parent` is the source of `input_mask`
         if parent is getattr(input_mask, "mask", None):
             self.input_mask = type(input_mask)(input_mask.mask.dup())
 
-        self.accum = accum
-        if mask is None:
-            self.replace = None
-        else:
-            self.replace = replace
-        self._meta = parent._meta(mask=get_meta(mask), accum=accum, replace=replace)
         # Aggregator specific attribute requirements:
-        self.kwargs = {"mask": mask}
+        self.kwargs = {"mask": self.mask}
 
     def __delitem__(self, keys):
         # Occurs when user calls `del C(params)[index]`
@@ -708,8 +725,9 @@ def update(self, delayed):
             if type(delayed) is AmbiguousAssignOrExtract:
                 # w(input_mask) << v[index]
                 if self.parent is delayed.parent:
+                    # replace `v` with a copy of itself if `w` is `v`
                     delayed.parent = delayed.parent.__class__(
-                        delayed.parent._delayed, delayed.parent._meta
+                        delayed.parent._optional_dup(), delayed.parent._meta
                     )
                 self.parent._update(
                     delayed.new(mask=self.mask, input_mask=self.input_mask),
@@ -724,6 +742,7 @@ def update(self, delayed):
         if isinstance(delayed, Number) or (
             isinstance(delayed, BaseType) and get_meta(delayed)._is_scalar
         ):
+            # w(mask, accum, replace) << s
             ndim = self.parent.ndim
             if ndim > 0:
                 self.__setitem__(_squeeze((slice(None),) * ndim), delayed)
@@ -751,6 +770,11 @@ def update(self, delayed):
 
 
 def _csc_chunk(row_range, col_range, indices, red_columns, track_indices=False):
+    """
+    create chunk of Reduce_Assign Matrix in Compressed Sparse Column (CSC) format
+    
+    (Used in `reduce_assign()`)
+    """
     row_range = row_range[0]
     nrows = row_range.stop - row_range.start
     if type(indices[0]) is slice:
@@ -790,13 +814,14 @@ def _csc_chunk(row_range, col_range, indices, red_columns, track_indices=False):
 
 
 def _fill(inner_vector, rhs):
+    # used in reduce_assign()
     rhs = rhs.value if isinstance(rhs, InnerBaseType) else rhs
     inner_vector.value[:] << rhs
     return inner_vector
 
 
 def reduce_assign(lhs, indices, rhs, dup_op="last", mask=None, accum=None, replace=False):
-    # lhs(mask, accum, replace)[i] << rhs
+    # lhs(mask, accum, replace, dup_op)[i] << rhs
     rhs_is_scalar = not (isinstance(rhs, BaseType) and type(rhs._meta) is gb.Vector)
     if type(indices) is slice:
         chunksz = "auto" if rhs_is_scalar else rhs._delayed.chunks
@@ -909,6 +934,15 @@ def _get_type_with_ndims(n):
         return get_return_type(gb.Matrix.new(int))
 
 
+def _get_inner_type_with_ndims(n):
+    if n == 0:
+        return get_inner_type(gb.Scalar.new(int))
+    elif n == 1:
+        return get_inner_type(gb.Vector.new(int))
+    else:
+        return get_inner_type(gb.Matrix.new(int))
+
+
 def _get_grblas_type_with_ndims(n):
     if n == 0:
         return gb.Scalar
@@ -1370,45 +1404,83 @@ def _adjust_meta_to_index(meta, index):
 
 
 class AmbiguousAssignOrExtract:
+    __bool__ = gb.expr.AmbiguousAssignOrExtract.__bool__
+    __eq__ = gb.expr.AmbiguousAssignOrExtract.__eq__
+    __float__ = gb.expr.AmbiguousAssignOrExtract.__float__
+    __int__ = gb.expr.AmbiguousAssignOrExtract.__int__
+    __index__ = gb.expr.AmbiguousAssignOrExtract.__index__
+
     def __init__(self, parent, index, meta=None):
         self.parent = parent
         self.index = index
         input_ndim = parent.ndim
-        keys_0_is_dOnion = input_ndim == 1 and is_DOnion(index)
-        keys_1_is_dOnion = (
+        index_is_dOnion = input_ndim == 1 and is_DOnion(index)
+        index_is_dOnion = index_is_dOnion or (
             input_ndim == 2 and _is_pair(index) and (is_DOnion(index[0]) or is_DOnion(index[1]))
         )
-        if parent.is_dOnion or keys_0_is_dOnion or keys_1_is_dOnion:
+        if parent.is_dOnion or index_is_dOnion:
             self.has_dOnion = True
-            IndexerResolver(self.parent, index, check_shape=False)
+            self.resolved_indexes = IndexerResolver(self.parent, index, check_shape=False)
             self._meta = _adjust_meta_to_index(parent._meta, index)
         else:
             self.has_dOnion = False
-            self.resolved_indices = IndexerResolver(parent, index)
+            self.resolved_indexes = IndexerResolver(parent, index)
             self._meta = parent._meta[index] if meta is None else meta
-            # infix expression requirements:
-            shape = tuple(i.size for i in self.resolved_indices.indices if i.size)
-            self.ndim = len(shape)
-            self.output_type = _get_grblas_type_with_ndims(self.ndim)
-            if self.ndim == 1:
-                self._size = shape[0]
-            elif self.ndim == 2:
-                self._nrows = shape[0]
-                self._ncols = shape[1]
 
-    def new(self, *, dtype=None, mask=None, input_mask=None, name=None):
-        def _recall_getitem(parent, keys_0, keys_1, dtype, mask, input_mask):
-            keys = keys_0 if keys_1 is None else (keys_0, keys_1)
-            return AmbiguousAssignOrExtract(parent, keys).new(
-                dtype=dtype, mask=mask, input_mask=input_mask
-            )
+        # infix expression requirements:
+        shape = tuple(i.size for i in self.resolved_indexes.indices if i.size)
+        self.ndim = len(shape)
+        self.output_type = _get_grblas_type_with_ndims(self.ndim)
+        if self.ndim == 1:
+            self._size = shape[0]
+        elif self.ndim == 2:
+            self._nrows = shape[0]
+            self._ncols = shape[1]
+
+    @staticmethod
+    def _extract_single_element(x, xt, T, dxn, indices, meta, dtype):
+
+        def getitem(inner, key, dtype):
+            return wrap_inner(inner.value[key].new(dtype=dtype))
+
+        name = "extract_single_element-" + tokenize(x, xt, indices)
+        
+        block = ()
+        element = ()
+        for axis, i in enumerate(indices):
+            stops_ = np.cumsum(x.chunks[T[axis]])
+            starts = np.roll(stops_, 1)
+            starts[0] = 0
+    
+            blockid = np.arange(x.numblocks[T[axis]])
+    
+            # locate chunk containing element:
+            filter = (starts <= i) & (i < stops_)
+            (R,) = blockid[filter]
+
+            block += (R,)
+            element += (i - starts[R],)
+
+        dsk = dict()
+        dsk[(name,)] = (
+            getitem, (x.name, *block[::dxn]), _squeeze(element[::dxn]), dtype
+        )
+        graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x])
+        out = da.core.Array(graph, name, (), meta=wrap_inner(meta))
+        return out
 
-        if mask is not None:
-            _check_mask(mask)
-        if input_mask is not None:
-            _check_mask(input_mask)
+    def new(self, *, dtype=None, mask=None, input_mask=None, name=None):
+        _check_mask(mask, ignore_None=True)
+        _check_mask(input_mask, ignore_None=True)
 
         if any_dOnions(self, mask, input_mask):
+
+            def _recall_getitem(parent, keys_0, keys_1, dtype, mask, input_mask):
+                keys = keys_0 if keys_1 is None else (keys_0, keys_1)
+                return AmbiguousAssignOrExtract(parent, keys).new(
+                    dtype=dtype, mask=mask, input_mask=input_mask
+                )
+
             meta = self._meta.new(dtype=dtype)
 
             if _is_pair(self.index):
@@ -1443,8 +1515,8 @@ def _recall_getitem(parent, keys_0, keys_1, dtype, mask, input_mask):
         input_ndim = len(input_shape)
         axes = tuple(range(input_ndim))
         x_axes = axes[::dxn]
-        indices = tuple(i.index for i in self.resolved_indices.indices)
-        out_shape = tuple(i.size for i in self.resolved_indices.indices if i.size is not None)
+        indices = tuple(i.index for i in self.resolved_indexes.indices)
+        out_shape = tuple(i.size for i in self.resolved_indexes.indices if i.size is not None)
         out_ndim = len(out_shape)
 
         if mask is not None:
@@ -1497,7 +1569,7 @@ def _recall_getitem(parent, keys_0, keys_1, dtype, mask, input_mask):
                 elif out_ndim < input_ndim:
                     (rem_axis,) = [
                         axis
-                        for axis, index in enumerate(self.resolved_indices.indices)
+                        for axis, index in enumerate(self.resolved_indexes.indices)
                         if index.size is not None
                     ]
                     if out_ndim == input_mask_ndim:
@@ -1521,6 +1593,12 @@ def _recall_getitem(parent, keys_0, keys_1, dtype, mask, input_mask):
 
         dtype = np_dtype(meta.dtype)
         if input_ndim in [1, 2]:
+            if out_ndim == 0:
+                delayed = self.__class__._extract_single_element(
+                    x, xt, T, dxn, indices, meta, meta.dtype
+                )
+                return get_return_type(meta)(delayed)
+
             # prepare arguments for blockwise:
             indices_args = []
             offset_args = []
@@ -1675,6 +1753,9 @@ def _identity_func(x, axis, keepdims):
 
 
 class Assigner:
+    __bool__ = gb.expr.Assigner.__bool__
+    __eq__ = gb.expr.Assigner.__eq__
+
     def __init__(self, updater, index, subassign=False):
         self.updater = updater
         self.parent = updater.parent
@@ -1682,27 +1763,45 @@ def __init__(self, updater, index, subassign=False):
         self.subassign = subassign
 
         input_ndim = self.parent.ndim
-        keys_0_is_dOnion = input_ndim == 1 and is_DOnion(index)
-        keys_1_is_dOnion = _is_pair(index) and (is_DOnion(index[0]) or is_DOnion(index[1]))
-        if self.updater.has_dOnion or keys_0_is_dOnion or keys_1_is_dOnion:
+        index_is_dOnion = input_ndim == 1 and is_DOnion(index)
+        index_is_dOnion = index_is_dOnion or (
+            input_ndim == 2 and _is_pair(index) and (is_DOnion(index[0]) or is_DOnion(index[1]))
+        )
+        if self.updater.has_dOnion or index_is_dOnion:
             self.has_dOnion = True
             IndexerResolver(self.parent, index, check_shape=False)
             self.index = index
         else:
             self.has_dOnion = False
-            self.resolved_indices = IndexerResolver(self.parent, index).indices
-            self.index = tuple(i.index for i in self.resolved_indices)
+            self.resolved_indexes = IndexerResolver(self.parent, index).indices
+            self.index = tuple(i.index for i in self.resolved_indexes)
 
     def update(self, obj):
-        def _recall_update(lhs, mask, accum, replace, keys_0, keys_1, obj, subassign):
-            keys = (keys_0,) if keys_1 is None else (keys_0, keys_1)
-            updater = Updater(lhs, mask=mask, accum=accum, replace=replace)
-            Assigner(updater, keys, subassign=subassign).update(obj)
-            return lhs
-
+        if not (
+            isinstance(obj, Number)
+            or isinstance(obj, BaseType)
+            or getattr(obj, '_is_transposed', False)
+        ):
+            obj = self.parent._expect_type(
+                obj,
+                (
+                    gb.Scalar,
+                    gb.Vector,
+                    gb.Matrix,
+                    gb.matrix.TransposedMatrix,
+                ),
+                within="Assign.update",
+            )
         if any_dOnions(self, obj):
+
+            def _recall_update(lhs, mask, accum, replace, keys_0, keys_1, obj, subassign):
+                keys = (keys_0,) if keys_1 is None else (keys_0, keys_1)
+                updater = Updater(lhs, mask=mask, accum=accum, replace=replace)
+                Assigner(updater, keys, subassign=subassign).update(obj)
+                return lhs
+    
             lhs = self.parent
-            lhs_copy = lhs.__class__(lhs._delayed, meta=lhs._meta)
+            lhs_copy = lhs.__class__(lhs._optional_dup(), meta=lhs._meta)
 
             updater = self.updater
 
@@ -1727,11 +1826,8 @@ def _recall_update(lhs, mask, accum, replace, keys_0, keys_1, obj, subassign):
             return
 
         # no dOnions
-        if not (isinstance(obj, BaseType) or isinstance(obj, Number)):
-            try:
-                obj_transposed = obj._is_transposed
-            except AttributeError:
-                raise TypeError("Bad type for argument `obj`")
+        if getattr(obj, '_is_transposed', False):
+            obj_transposed = obj._is_transposed
             obj = obj._matrix
         else:
             obj_transposed = False
@@ -1784,7 +1880,7 @@ def _recall_update(lhs, mask, accum, replace, keys_0, keys_1, obj, subassign):
                     else:
                         (rem_axis,) = [
                             axis
-                            for axis, index in enumerate(self.resolved_indices)
+                            for axis, index in enumerate(self.resolved_indexes)
                             if index.size is not None
                         ]
                         if parent.shape[rem_axis] != out_shape[0]:
@@ -1793,7 +1889,7 @@ def _recall_update(lhs, mask, accum, replace, keys_0, keys_1, obj, subassign):
                     if ndim == 2 and out_dim == 1:
                         (int_axis,) = [
                             axis
-                            for axis, index in enumerate(self.resolved_indices)
+                            for axis, index in enumerate(self.resolved_indexes)
                             if index.size is None
                         ]
                         indices = list(indices)
@@ -2014,14 +2110,16 @@ def __init__(self, value, compress_axis):
         self.compress_axis = compress_axis
 
 
-def _expr_new(method_name, dtype, grblas_mask_type, kwargs, x, mask, *args):
+def _expr_new(xt, ats, method_name, dtype, grblas_mask_type, x, mask, *args, **kwargs):
     # expr.new(...)
-    args = [x.value if isinstance(x, InnerBaseType) else x for x in args]
+    args = [
+        _transpose_if(y, yt) if isinstance(y, InnerBaseType) else y for y, yt in zip(args, ats)
+    ]
     kwargs = {
         key: (kwargs[key].value if isinstance(kwargs[key], InnerBaseType) else kwargs[key])
         for key in kwargs
     }
-    expr = getattr(x.value, method_name)(*args, **kwargs)
+    expr = getattr(_transpose_if(x, xt), method_name)(*args, **kwargs)
     if mask is not None:
         mask = grblas_mask_type(mask.value)
     return wrap_inner(expr.new(dtype=dtype, mask=mask))
diff --git a/dask_grblas/mask.py b/dask_grblas/mask.py
index 2d86cdb..21a760a 100644
--- a/dask_grblas/mask.py
+++ b/dask_grblas/mask.py
@@ -1,3 +1,4 @@
+from grblas.mask import Mask as gb_Mask
 from .utils import get_grblas_type
 
 
@@ -6,6 +7,9 @@ class Mask:
     structure = False
     value = False
 
+    __bool__ = gb_Mask.__bool__
+    __eq__ = gb_Mask.__eq__
+
     def __init__(self, mask):
         from . import matrix, vector
 
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index 9713a15..e5a4ef2 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -6,11 +6,11 @@
 from dask.base import tokenize, is_dask_collection
 from dask.delayed import Delayed, delayed
 from dask.highlevelgraph import HighLevelGraph
-from grblas import _automethods
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
 from grblas.exceptions import IndexOutOfBound, EmptyObject, DimensionMismatch
 
+from . import _automethods
 from .base import BaseType, InnerBaseType, DOnion, is_DOnion, any_dOnions, Box, skip
 from .base import _nvals as _nvals_in_chunk
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater
@@ -28,7 +28,6 @@
     build_chunk_ranges_dask_array,
     wrap_dataframe,
 )
-from builtins import isinstance
 
 
 class InnerMatrix(InnerBaseType):
@@ -52,12 +51,43 @@ class Matrix(BaseType):
     ndim = 2
     _is_transposed = False
 
+    __abs__ = gb.Matrix.__abs__
+    __add__ = gb.Matrix.__add__
+    __divmod__ = gb.Matrix.__divmod__
     __eq__ = gb.Matrix.__eq__
+    __floordiv__ = gb.Matrix.__floordiv__
     __ge__ = gb.Matrix.__ge__
     __gt__ = gb.Matrix.__gt__
+    __iadd__ = gb.Matrix.__iadd__
+    __iand__ = gb.Matrix.__iand__
+    __ifloordiv__ = gb.Matrix.__ifloordiv__
+    __imod__ = gb.Matrix.__imod__
+    __imul__ = gb.Matrix.__imul__
+    __invert__ = gb.Matrix.__invert__
+    __ior__ = gb.Matrix.__ior__
+    __ipow__ = gb.Matrix.__ipow__
+    __isub__ = gb.Matrix.__isub__
+    __itruediv__ = gb.Matrix.__itruediv__
+    __ixor__ = gb.Matrix.__ixor__
     __le__ = gb.Matrix.__le__
     __lt__ = gb.Matrix.__lt__
+    __mod__ = gb.Matrix.__mod__
+    __mul__ = gb.Matrix.__mul__
     __ne__ = gb.Matrix.__ne__
+    __neg__ = gb.Matrix.__neg__
+    __pow__ = gb.Matrix.__pow__
+    __radd__ = gb.Matrix.__radd__
+    __rdivmod__ = gb.Matrix.__rdivmod__
+    __rfloordiv__ = gb.Matrix.__rfloordiv__
+    __rmod__ = gb.Matrix.__rmod__
+    __rmul__ = gb.Matrix.__rmul__
+    __rpow__ = gb.Matrix.__rpow__
+    __rsub__ = gb.Matrix.__rsub__
+    __rtruediv__ = gb.Matrix.__rtruediv__
+    __rxor__ = gb.Matrix.__rxor__
+    __sub__ = gb.Matrix.__sub__
+    __truediv__ = gb.Matrix.__truediv__
+    __xor__ = gb.Matrix.__xor__
 
     @classmethod
     def from_delayed(cls, matrix, dtype, nrows, ncols, *, nvals=None, name=None):
@@ -281,7 +311,6 @@ def build(
         nrows = nrows or self._nrows
         ncols = ncols or self._ncols
         meta = self._meta
-        meta.resize(nrows, ncols)
 
         # check for any DOnions:
         args = pack_args(self, rows, columns, values)
@@ -436,9 +465,9 @@ def __init__(self, delayed, meta=None, nvals=None):
             if meta is None:
                 meta = gb.Matrix.new(delayed.dtype)
         self._meta = meta
-        self._nrows = meta.nrows
-        self._ncols = meta.ncols
         self.dtype = meta.dtype
+        self._nrows = self.nrows
+        self._ncols = self.ncols
         self._nvals = nvals
         # Add ss extension methods
         self.ss = ss(self)
@@ -470,7 +499,8 @@ def ncols(self):
     @property
     def shape(self):
         if self.is_dOnion:
-            return DOnion.multi_access(self._meta.shape, getattr, self, "shape")
+            return (self.nrows, self.ncols)
+            # return DOnion.multi_access(self._meta.shape, getattr, self, "shape")
         return self._meta.shape
 
     def resize(self, nrows, ncols, inplace=True, chunks="auto"):
@@ -565,7 +595,7 @@ def _diag(self, k=0, dtype=None, chunks="auto"):
         row_blockid = np.arange(A.numblocks[0])
         col_blockid = np.arange(A.numblocks[1])
 
-        # locate first chunk containing diaagonal:
+        # locate first chunk containing diagonal:
         row_filter = (row_starts <= kdiag_row_start) & (kdiag_row_start < row_stops_)
         col_filter = (col_starts <= kdiag_col_start) & (kdiag_col_start < col_stops_)
         (R,) = row_blockid[row_filter]
@@ -677,48 +707,73 @@ def __iter__(self):
         return zip(rows.flat, columns.flat)
 
     def ewise_add(self, other, op=monoid.plus, *, require_monoid=True):
-        assert type(other) is Matrix  # TODO: or TransposedMatrix
+        gb_types = (gb.Matrix, gb.matrix.TransposedMatrix)
+        other = self._expect_type(
+            other, (Matrix, TransposedMatrix) + gb_types, within="ewise_add", argname="other"
+        )
 
         try:
             meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid)
         except DimensionMismatch:
-            meta = self._meta.ewise_add(self._meta, op=op, require_monoid=require_monoid)
-        return GbDelayed(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta)
+            if any_dOnions(self, other):
+                meta = self._meta.ewise_add(self._meta, op=op, require_monoid=require_monoid)
+            else:
+                raise
+
+        return MatrixExpression(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta)
 
     def ewise_mult(self, other, op=binary.times):
-        assert type(other) is Matrix
+        gb_types = (gb.Matrix, gb.matrix.TransposedMatrix)
+        other = self._expect_type(
+            other, (Matrix, TransposedMatrix) + gb_types, within="ewise_mult", argname="other"
+        )
 
         try:
             meta = self._meta.ewise_mult(other._meta, op=op)
         except DimensionMismatch:
-            meta = self._meta.ewise_mult(self._meta, op=op)
+            if any_dOnions(self, other):
+                meta = self._meta.ewise_mult(self._meta, op=op)
+            else:
+                raise
 
-        return GbDelayed(self, "ewise_mult", other, op, meta=meta)
+        return MatrixExpression(self, "ewise_mult", other, op, meta=meta)
 
     def mxv(self, other, op=semiring.plus_times):
-        from .vector import Vector
+        from .vector import Vector, VectorExpression
 
-        assert type(other) is Vector
+        other = self._expect_type(
+            other, (Vector, gb.Vector), within="mxv", argname="other"
+        )
 
         try:
             meta = self._meta.mxv(other._meta, op=op)
         except DimensionMismatch:
-            other_meta = gb.Vector.new(dtype=other._meta.dtype, size=self._meta.ncols)
-            meta = self._meta.mxv(other_meta, op=op)
+            if any_dOnions(self, other):
+                other_meta = gb.Vector.new(dtype=other._meta.dtype, size=self._meta.ncols)
+                meta = self._meta.mxv(other_meta, op=op)
+            else:
+                raise
 
-        return GbDelayed(self, "mxv", other, op, meta=meta)
+        return VectorExpression(self, "mxv", other, op, meta=meta, size=self.nrows)
 
     def mxm(self, other, op=semiring.plus_times):
-        assert type(other) in (Matrix, TransposedMatrix)
+        gb_types = (gb.Matrix, gb.matrix.TransposedMatrix)
+        other = self._expect_type(
+            other, (Matrix, TransposedMatrix) + gb_types, within="mxm", argname="other"
+        )
 
         try:
             meta = self._meta.mxm(other._meta, op=op)
         except DimensionMismatch:
-            other_meta = gb.Matrix.new(
-                dtype=other._meta.dtype, nrows=self._meta.ncols, ncols=other._meta.ncols
-            )
-            meta = self._meta.mxm(other_meta, op=op)
-        return GbDelayed(self, "mxm", other, op, meta=meta)
+            if any_dOnions(self, other):
+                other_meta = gb.Matrix.new(
+                    dtype=other._meta.dtype, nrows=self._meta.ncols, ncols=other._meta.ncols
+                )
+                meta = self._meta.mxm(other_meta, op=op)
+            else:
+                raise
+
+        return MatrixExpression(self, "mxm", other, op, meta=meta, nrows=self.nrows, ncols=other.ncols)
 
     def kronecker(self, other, op=binary.times):
         assert type(other) is Matrix  # TODO: or TransposedMatrix
@@ -739,35 +794,43 @@ def apply(self, op, right=None, *, left=None):
         if self._meta.shape == (0,) * self.ndim:
             self._meta.resize(*((1,) * self.ndim))
         meta = self._meta.apply(op=op, left=left_meta, right=right_meta)
-        return GbDelayed(self, "apply", op, right, meta=meta, left=left)
+        return MatrixExpression(self, "apply", op, right, meta=meta, left=left)
 
     def reduce_rowwise(self, op=monoid.plus):
+        from .vector import VectorExpression
+
         meta = self._meta.reduce_rowwise(op)
-        return GbDelayed(self, "reduce_rowwise", op, meta=meta)
+        return VectorExpression(self, "reduce_rowwise", op, meta=meta, size=self.nrows)
 
     def reduce_columnwise(self, op=monoid.plus):
+        from .vector import VectorExpression
+
         meta = self._meta.reduce_columnwise(op)
-        return GbDelayed(self, "reduce_columnwise", op, meta=meta)
+        return VectorExpression(self, "reduce_columnwise", op, meta=meta, size=self.ncols)
 
     def reduce_scalar(self, op=monoid.plus):
+        from .scalar import ScalarExpression
+
         meta = self._meta.reduce_scalar(op)
-        return GbDelayed(self, "reduce_scalar", op, meta=meta)
+        return ScalarExpression(self, "reduce_scalar", op, meta=meta)
 
     def to_values(self, dtype=None, chunks="auto"):
         dtype = lookup_dtype(self.dtype if dtype is None else dtype)
         meta_i, _, meta_v = self._meta.to_values(dtype)
 
-        x = self._delayed
-        if type(x) is DOnion:
+        if self.is_dOnion:
             meta = np.array([])
-            result = x.getattr(meta, "to_values", dtype=dtype, chunks=chunks)
-            rows = result.getattr(meta_i, "__getitem__", 0)
-            columns = result.getattr(meta_i, "__getitem__", 1)
-            values = result.getattr(meta_v, "__getitem__", 2)
+            result = DOnion.multi_access(
+                meta, self.__class__.to_values, self, dtype=dtype, chunks=chunks
+            )
+            rows = DOnion.multi_access(meta_i, tuple.__getitem__, result, 0)
+            columns = DOnion.multi_access(meta_i, tuple.__getitem__, result, 1)
+            values = DOnion.multi_access(meta_v, tuple.__getitem__, result, 2)
             return rows, columns, values
 
         # first find the number of values in each chunk and return
         # them as a 2D numpy array whose shape is equal to x.numblocks
+        x = self._delayed
         nvals_2D = da.core.blockwise(
             *(_nvals_in_chunk, "ij"),
             *(x, "ij"),
@@ -835,14 +898,16 @@ def _to_values(x, starts, stops_, dtype, chunks, nnz):
         return rows, cols, vals
 
     def isequal(self, other, *, check_dtype=False):
+        gb_types = (gb.Matrix, gb.matrix.TransposedMatrix)
         other = self._expect_type(
-            other, (Matrix, TransposedMatrix), within="isequal", argname="other"
+            other, (Matrix, TransposedMatrix) + gb_types, within="isequal", argname="other"
         )
         return super().isequal(other, check_dtype=check_dtype)
 
     def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
+        gb_types = (gb.Matrix, gb.matrix.TransposedMatrix)
         other = self._expect_type(
-            other, (Matrix, TransposedMatrix), within="isclose", argname="other"
+            other, (Matrix, TransposedMatrix) + gb_types, within="isclose", argname="other"
         )
         return super().isclose(other, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype)
 
@@ -874,14 +939,40 @@ def _delete_element(self, resolved_indexes):
 
 class TransposedMatrix:
     ndim = 2
+    _is_scalar = False
     _is_transposed = True
 
+    __and__ = gb.matrix.TransposedMatrix.__and__
+    __bool__ = gb.matrix.TransposedMatrix.__bool__
+    __or__ = gb.matrix.TransposedMatrix.__or__
+
+    __abs__ = gb.matrix.TransposedMatrix.__abs__
+    __add__ = gb.matrix.TransposedMatrix.__add__
+    __divmod__ = gb.matrix.TransposedMatrix.__divmod__
     __eq__ = gb.matrix.TransposedMatrix.__eq__
+    __floordiv__ = gb.matrix.TransposedMatrix.__floordiv__
     __ge__ = gb.matrix.TransposedMatrix.__ge__
     __gt__ = gb.matrix.TransposedMatrix.__gt__
+    __invert__ = gb.matrix.TransposedMatrix.__invert__
     __le__ = gb.matrix.TransposedMatrix.__le__
     __lt__ = gb.matrix.TransposedMatrix.__lt__
+    __mod__ = gb.matrix.TransposedMatrix.__mod__
+    __mul__ = gb.matrix.TransposedMatrix.__mul__
     __ne__ = gb.matrix.TransposedMatrix.__ne__
+    __neg__ = gb.matrix.TransposedMatrix.__neg__
+    __pow__ = gb.matrix.TransposedMatrix.__pow__
+    __radd__ = gb.matrix.TransposedMatrix.__radd__
+    __rdivmod__ = gb.matrix.TransposedMatrix.__rdivmod__
+    __rfloordiv__ = gb.matrix.TransposedMatrix.__rfloordiv__
+    __rmod__ = gb.matrix.TransposedMatrix.__rmod__
+    __rmul__ = gb.matrix.TransposedMatrix.__rmul__
+    __rpow__ = gb.matrix.TransposedMatrix.__rpow__
+    __rsub__ = gb.matrix.TransposedMatrix.__rsub__
+    __rtruediv__ = gb.matrix.TransposedMatrix.__rtruediv__
+    __rxor__ = gb.matrix.TransposedMatrix.__rxor__
+    __sub__ = gb.matrix.TransposedMatrix.__sub__
+    __truediv__ = gb.matrix.TransposedMatrix.__truediv__
+    __xor__ = gb.matrix.TransposedMatrix.__xor__
 
     def __init__(self, matrix, meta=None):
         assert type(matrix) is Matrix
@@ -941,15 +1032,7 @@ def dtype(self):
         return self._meta.dtype
 
     def to_values(self, dtype=None, chunks="auto"):
-        if self.is_dOnion:
-            out_meta = np.array([])
-            result = self.dOnion_if.getattr(out_meta, "to_values", dtype=dtype, chunks=chunks)
-            meta_i, _, meta_v = self._meta.to_values(dtype)
-            rows = result.getattr(meta_i, "__getitem__", 0)
-            cols = result.getattr(meta_i, "__getitem__", 1)
-            vals = result.getattr(meta_v, "__getitem__", 2)
-        else:
-            rows, cols, vals = self._matrix.to_values(dtype=dtype, chunks=chunks)
+        rows, cols, vals = self._matrix.to_values(dtype=dtype, chunks=chunks)
         return cols, rows, vals
 
     # Properties
@@ -971,6 +1054,7 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
         )
 
     # Delayed methods
+    __contains__ = Matrix.__contains__
     ewise_add = Matrix.ewise_add
     ewise_mult = Matrix.ewise_mult
     mxv = Matrix.mxv
@@ -991,6 +1075,113 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
     name = Matrix.name
 
 
+class MatrixExpression(GbDelayed):
+    __slots__ = ()
+    output_type = gb.Matrix
+    ndim = 2
+    _is_scalar = False
+
+    # automethods:
+    __and__ = gb.matrix.MatrixExpression.__and__
+    __bool__ = gb.matrix.MatrixExpression.__bool__
+    __or__ = gb.matrix.MatrixExpression.__or__
+    _get_value = _automethods._get_value
+    S = gb.matrix.MatrixExpression.S
+    T = gb.matrix.MatrixExpression.T
+    V = gb.matrix.MatrixExpression.V
+    apply = gb.matrix.MatrixExpression.apply
+    ewise_add = gb.matrix.MatrixExpression.ewise_add
+    ewise_mult = gb.matrix.MatrixExpression.ewise_mult
+    isclose = gb.matrix.MatrixExpression.isclose
+    isequal = gb.matrix.MatrixExpression.isequal
+    mxm = gb.matrix.MatrixExpression.mxm
+    mxv = gb.matrix.MatrixExpression.mxv
+    ncols = gb.matrix.MatrixExpression.ncols
+    nrows = gb.matrix.MatrixExpression.nrows
+    nvals = gb.matrix.MatrixExpression.nvals
+    reduce_rowwise = gb.matrix.MatrixExpression.reduce_rowwise
+    reduce_columnwise = gb.matrix.MatrixExpression.reduce_columnwise
+    reduce_scalar = gb.matrix.MatrixExpression.reduce_scalar
+    shape = gb.matrix.MatrixExpression.shape
+    nvals = gb.matrix.MatrixExpression.nvals
+
+    # infix sugar:
+    __abs__ = gb.matrix.MatrixExpression.__abs__
+    __add__ = gb.matrix.MatrixExpression.__add__
+    __divmod__ = gb.matrix.MatrixExpression.__divmod__
+    __eq__ = gb.matrix.MatrixExpression.__eq__
+    __floordiv__ = gb.matrix.MatrixExpression.__floordiv__
+    __ge__ = gb.matrix.MatrixExpression.__ge__
+    __gt__ = gb.matrix.MatrixExpression.__gt__
+    __invert__ = gb.matrix.MatrixExpression.__invert__
+    __le__ = gb.matrix.MatrixExpression.__le__
+    __lt__ = gb.matrix.MatrixExpression.__lt__
+    __mod__ = gb.matrix.MatrixExpression.__mod__
+    __mul__ = gb.matrix.MatrixExpression.__mul__
+    __ne__ = gb.matrix.MatrixExpression.__ne__
+    __neg__ = gb.matrix.MatrixExpression.__neg__
+    __pow__ = gb.matrix.MatrixExpression.__pow__
+    __radd__ = gb.matrix.MatrixExpression.__radd__
+    __rdivmod__ = gb.matrix.MatrixExpression.__rdivmod__
+    __rfloordiv__ = gb.matrix.MatrixExpression.__rfloordiv__
+    __rmod__ = gb.matrix.MatrixExpression.__rmod__
+    __rmul__ = gb.matrix.MatrixExpression.__rmul__
+    __rpow__ = gb.matrix.MatrixExpression.__rpow__
+    __rsub__ = gb.matrix.MatrixExpression.__rsub__
+    __rtruediv__ = gb.matrix.MatrixExpression.__rtruediv__
+    __rxor__ = gb.matrix.MatrixExpression.__rxor__
+    __sub__ = gb.matrix.MatrixExpression.__sub__
+    __truediv__ = gb.matrix.MatrixExpression.__truediv__
+    __xor__ = gb.matrix.MatrixExpression.__xor__
+
+    # bad sugar:
+    __itruediv__ = gb.matrix.MatrixExpression.__itruediv__
+    __imul__ = gb.matrix.MatrixExpression.__imul__
+    __imatmul__ = gb.matrix.MatrixExpression.__imatmul__
+    __iadd__ = gb.matrix.MatrixExpression.__iadd__
+    __iand__ = gb.matrix.MatrixExpression.__iand__
+    __ipow__ = gb.matrix.MatrixExpression.__ipow__
+    __imod__ = gb.matrix.MatrixExpression.__imod__
+    __isub__ = gb.matrix.MatrixExpression.__isub__
+    __ixor__ = gb.matrix.MatrixExpression.__ixor__
+    __ifloordiv__ = gb.matrix.MatrixExpression.__ifloordiv__
+    __ior__ = gb.matrix.MatrixExpression.__ior__
+
+    def __init__(
+        self,
+        parent,
+        method_name,
+        *args,
+        meta=None,
+        ncols=None,
+        nrows=None,
+        **kwargs,
+    ):
+        super().__init__(
+            parent,
+            method_name,
+            *args,
+            meta=meta,
+            **kwargs,
+        )
+        if ncols is None:
+            ncols = self.parent._ncols
+        if nrows is None:
+            nrows = self.parent._nrows
+        self._ncols = ncols
+        self._nrows = nrows
+
+    # def __getattr__(self, item):
+    #     return getattr(gb.matrix.MatrixExpression, item)
+
+    # def construct_output(self, dtype=None, *, name=None):
+    #     if dtype is None:
+    #         dtype = self.dtype
+    #     nrows = 0 if self._nrows.is_dOnion else self._nrows
+    #     ncols = 0 if self._ncols.is_dOnion else self._ncols
+    #     return Matrix.new(dtype, nrows, ncols, name=name)
+
+
 def _chunk_diag_v2(inner_matrix, k):
     return wrap_inner(gb.ss.diag(inner_matrix.value, k))
 
@@ -1431,3 +1622,4 @@ def _concat_matrix(seq, axis=0):
 
 gb.utils._output_types[Matrix] = gb.Matrix
 gb.utils._output_types[TransposedMatrix] = gb.matrix.TransposedMatrix
+gb.utils._output_types[MatrixExpression] = gb.Matrix
diff --git a/dask_grblas/scalar.py b/dask_grblas/scalar.py
index 81be48b..472282c 100644
--- a/dask_grblas/scalar.py
+++ b/dask_grblas/scalar.py
@@ -3,6 +3,7 @@
 import numpy as np
 from dask.delayed import Delayed, delayed
 
+from . import _automethods
 from .base import BaseType, InnerBaseType, DOnion, Box, any_dOnions
 from .expr import AmbiguousAssignOrExtract, GbDelayed, _is_pair
 from .utils import get_meta, np_dtype
@@ -79,7 +80,7 @@ def __init__(self, delayed, meta=None):
     def update(self, expr, in_dOnion=False):
         typ = type(expr)
         if any_dOnions(self, expr):
-            self_copy = self.__class__(self._delayed, meta=self._meta)
+            self_copy = self.__class__(self._optional_dup(), meta=self._meta)
             expr_ = expr
             if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
 
@@ -103,7 +104,7 @@ def update_by_aae(c, p, k_0, k_1):
                 self.__init__(donion, self._meta)
                 return
 
-            if typ is GbDelayed and expr.has_dOnion:
+            if isinstance(expr, GbDelayed) and expr.has_dOnion:
 
                 def update_by_gbd(c, *args, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
@@ -140,7 +141,7 @@ def update_by_gbd(c, *args, **kwargs):
         elif typ is Scalar:
             # Simple assignment (s << t)
             self.value = expr.value
-        elif typ is GbDelayed:
+        elif isinstance(expr, GbDelayed):
             # s << v.reduce()
             expr._update(self)
         else:
@@ -149,18 +150,18 @@ def update_by_gbd(c, *args, **kwargs):
         if in_dOnion:
             return self.__class__(self._delayed, meta=self._meta)
 
-    def _update(self, rhs, *, accum, in_dOnion=False):
+    def _update(self, expr, *, accum, in_dOnion=False):
         # s(accum=accum) << v.reduce()
-        typ = type(rhs)
+        typ = type(expr)
         if typ is Box:
-            rhs = rhs.content
+            expr = expr.content
 
-        assert type(rhs) is GbDelayed
+        assert isinstance(expr, GbDelayed)
 
-        if any_dOnions(self, rhs):
-            self_copy = self.__class__(self._delayed, meta=self._meta)
-            rhs_ = rhs
-            if typ is GbDelayed and rhs.has_dOnion:
+        if any_dOnions(self, expr):
+            self_copy = self.__class__(self._optional_dup(), meta=self._meta)
+            expr_ = expr
+            if isinstance(expr, GbDelayed) and expr.has_dOnion:
 
                 def _update_by_gbd(c, *args, accum=None, **kwargs):
                     gbd = getattr(args[0], args[1])(*args[2:], **kwargs)
@@ -170,23 +171,23 @@ def _update_by_gbd(c, *args, accum=None, **kwargs):
                     self._meta,
                     _update_by_gbd,
                     self_copy,
-                    rhs_.parent,
-                    rhs_.method_name,
-                    *rhs_.args,
+                    expr_.parent,
+                    expr_.method_name,
+                    *expr_.args,
                     accum=accum,
-                    **rhs_.kwargs,
+                    **expr_.kwargs,
                 )
                 self.__init__(donion, self._meta)
                 return
 
-            rhs_ = rhs.parent.dOnion_if
+            expr_ = expr.parent.dOnion_if
             donion = DOnion.mult_access(
-                self._meta, Scalar._update, self_copy, rhs_, accum=accum, in_dOnion=True
+                self._meta, Scalar._update, self_copy, expr_, accum=accum, in_dOnion=True
             )
             self.__init__(donion, self._meta)
             return
 
-        rhs._update(self, accum=accum)
+        expr._update(self, accum=accum)
         if in_dOnion:
             return self.__class__(self._delayed, meta=self._meta)
 
@@ -205,7 +206,10 @@ def dup(self, dtype=None, *, name=None):
 
     def _persist(self, *args, **kwargs):
         """Since scalars are small, persist them if they need to be computed"""
-        self._delayed = self._delayed.persist(*args, **kwargs)
+        if self.is_dOnion:
+            self._delayed = self._delayed._persist(*args, **kwargs)
+        else:
+            self._delayed = self._delayed.persist(*args, **kwargs)
 
     def __eq__(self, other):
         return self.isequal(other).compute()
@@ -266,6 +270,10 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
 
     @property
     def is_empty(self):
+        if self.is_dOnion:
+            donion = DOnion.multi_access(gb.Scalar.new(bool), getattr, self, 'is_empty')
+            return PythonScalar(donion)
+
         delayed = da.core.elemwise(
             _is_empty,
             self._delayed,
@@ -310,6 +318,7 @@ class PythonScalar:
     __complex__ = Scalar.__complex__
     __index__ = Scalar.__index__
     _persist = Scalar._persist
+    is_dOnion = Scalar.is_dOnion
 
     @classmethod
     def from_delayed(cls, scalar, dtype, *, name=None):
@@ -328,12 +337,34 @@ def __eq__(self, other):
 
     def compute(self, *args, **kwargs):
         innerval = self._delayed.compute(*args, **kwargs)
-        if type(self._delayed) is DOnion:
+        if self.is_dOnion:
             return innerval.value if hasattr(innerval, "value") else innerval
 
         return innerval.value.value
 
 
+class ScalarExpression(GbDelayed):
+    __slots__ = ()
+    output_type = gb.Scalar
+    ndim = 0
+    shape = ()
+    _is_scalar = True
+    __and__ = gb.scalar.ScalarExpression.__and__
+    __bool__ = gb.scalar.ScalarExpression.__bool__
+    __eq__ = gb.scalar.ScalarExpression.__eq__
+    __float__ = gb.scalar.ScalarExpression.__float__
+    __index__ = gb.scalar.ScalarExpression.__index__
+    __int__ = gb.scalar.ScalarExpression.__int__
+    __or__ = gb.scalar.ScalarExpression.__or__
+    _get_value = _automethods._get_value
+    isclose = gb.scalar.ScalarExpression.isclose
+    isequal = gb.scalar.ScalarExpression.isequal
+    value = gb.scalar.ScalarExpression.value
+
+    # def __getattr__(self, item):
+    #     return getattr(gb.scalar.ScalarExpression, item)
+
+
 # Dask task functions
 def _scalar_dup(x, dtype):
     return InnerScalar(x.value.dup(dtype=dtype))
@@ -353,3 +384,4 @@ def _invert(x):
 
 gb.utils._output_types[Scalar] = gb.Scalar
 gb.utils._output_types[PythonScalar] = gb.Scalar
+gb.utils._output_types[ScalarExpression] = gb.Scalar
diff --git a/dask_grblas/utils.py b/dask_grblas/utils.py
index 01b929d..1141f07 100644
--- a/dask_grblas/utils.py
+++ b/dask_grblas/utils.py
@@ -2,6 +2,7 @@
 import pandas as pd
 import dask.array as da
 import dask.dataframe as dd
+from functools import reduce
 from dask.base import tokenize
 from dask.delayed import delayed
 from .io import MMFile
@@ -39,6 +40,10 @@ def wrap_inner(val):
     return _inner_types[type(val)](val)
 
 
+def flatten(lol):
+    return reduce(lambda x, y: x + y, lol)
+
+
 def build_block_index_dask_array(x, axis, name):
     """
     Calculate block-index for each chunk of x along axis `axis`
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 3c25300..2c0cbe5 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -7,8 +7,9 @@
 from dask.delayed import Delayed, delayed
 from grblas import binary, monoid, semiring
 from grblas.dtypes import lookup_dtype
-from grblas.exceptions import IndexOutOfBound
+from grblas.exceptions import IndexOutOfBound, DimensionMismatch
 
+from . import _automethods
 from .base import BaseType, InnerBaseType, _nvals, DOnion, Box, any_dOnions
 from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, Assigner
 from .mask import StructuralMask, ValueMask
@@ -82,13 +83,43 @@ def __getitem__(self, index):
 class Vector(BaseType):
     __slots__ = ("ss",)
     ndim = 1
-
+    __abs__ = gb.Vector.__abs__
+    __add__ = gb.Vector.__add__
+    __divmod__ = gb.Vector.__divmod__
     __eq__ = gb.Vector.__eq__
+    __floordiv__ = gb.Vector.__floordiv__
     __ge__ = gb.Vector.__ge__
     __gt__ = gb.Vector.__gt__
+    __iadd__ = gb.Vector.__iadd__
+    __iand__ = gb.Vector.__iand__
+    __ifloordiv__ = gb.Vector.__ifloordiv__
+    __imod__ = gb.Vector.__imod__
+    __imul__ = gb.Vector.__imul__
+    __invert__ = gb.Vector.__invert__
+    __ior__ = gb.Vector.__ior__
+    __ipow__ = gb.Vector.__ipow__
+    __isub__ = gb.Vector.__isub__
+    __itruediv__ = gb.Vector.__itruediv__
+    __ixor__ = gb.Vector.__ixor__
     __le__ = gb.Vector.__le__
     __lt__ = gb.Vector.__lt__
+    __mod__ = gb.Vector.__mod__
+    __mul__ = gb.Vector.__mul__
     __ne__ = gb.Vector.__ne__
+    __neg__ = gb.Vector.__neg__
+    __pow__ = gb.Vector.__pow__
+    __radd__ = gb.Vector.__radd__
+    __rdivmod__ = gb.Vector.__rdivmod__
+    __rfloordiv__ = gb.Vector.__rfloordiv__
+    __rmod__ = gb.Vector.__rmod__
+    __rmul__ = gb.Vector.__rmul__
+    __rpow__ = gb.Vector.__rpow__
+    __rsub__ = gb.Vector.__rsub__
+    __rtruediv__ = gb.Vector.__rtruediv__
+    __rxor__ = gb.Vector.__rxor__
+    __sub__ = gb.Vector.__sub__
+    __truediv__ = gb.Vector.__truediv__
+    __xor__ = gb.Vector.__xor__
 
     @classmethod
     def from_delayed(cls, vector, dtype, size, *, nvals=None, name=None):
@@ -256,8 +287,8 @@ def __init__(self, delayed, meta=None, nvals=None):
             if meta is None:
                 meta = gb.Vector.new(delayed.dtype)
         self._meta = meta
-        self._size = meta.size
         self.dtype = meta.dtype
+        self._size = self.size
         self._nvals = nvals
         # Add ss extension methods
         self.ss = ss(self)
@@ -290,12 +321,14 @@ def V(self):
 
     @property
     def size(self):
-        if type(self._delayed) is DOnion:
-            return self._delayed.size
+        if self.is_dOnion:
+            return DOnion.multi_access(self._meta.size, getattr, self, "size")
         return self._meta.size
 
     @property
     def shape(self):
+        if self.is_dOnion:
+            return (self.size,)
         return self._meta.shape
 
     def resize(self, size, inplace=True, chunks="auto"):
@@ -409,13 +442,13 @@ def ewise_add(self, other, op=monoid.plus, *, require_monoid=True):
         assert type(other) is Vector
 
         meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid)
-        return GbDelayed(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta)
+        return VectorExpression(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta)
 
     def ewise_mult(self, other, op=binary.times):
         assert type(other) is Vector
 
         meta = self._meta.ewise_mult(other._meta, op=op)
-        return GbDelayed(self, "ewise_mult", other, op, meta=meta)
+        return VectorExpression(self, "ewise_mult", other, op, meta=meta)
 
     # Unofficial methods
     def inner(self, other, op=semiring.plus_times):
@@ -473,9 +506,21 @@ def outer(self, other, op=binary.times):
     def vxm(self, other, op=semiring.plus_times):
         from .matrix import Matrix, TransposedMatrix
 
-        assert type(other) in (Matrix, TransposedMatrix)
-        meta = self._meta.vxm(other._meta, op=op)
-        return GbDelayed(self, "vxm", other, op, meta=meta)
+        gb_types = (gb.Matrix, gb.matrix.TransposedMatrix)
+        other = self._expect_type(
+            other, (Matrix, TransposedMatrix) + gb_types, within="vxm", argname="other"
+        )
+        try:
+            meta = self._meta.vxm(other._meta, op=op)
+        except DimensionMismatch:
+            if any_dOnions(self, other):
+                other_meta = gb.Matrix.new(
+                    dtype=other._meta.dtype, nrows=self._meta.size, ncols=other._meta.ncols
+                )
+                meta = self._meta.vxm(other_meta, op=op)
+            else:
+                raise
+        return VectorExpression(self, "vxm", other, op, meta=meta, size=other.ncols)
 
     def apply(self, op, right=None, *, left=None):
         from .scalar import Scalar
@@ -489,11 +534,11 @@ def apply(self, op, right=None, *, left=None):
             right_meta = right.dtype.np_type(0)
 
         meta = self._meta.apply(op=op, left=left_meta, right=right_meta)
-        return GbDelayed(self, "apply", op, right, meta=meta, left=left)
+        return VectorExpression(self, "apply", op, right, meta=meta, left=left)
 
     def reduce(self, op=monoid.plus):
         meta = self._meta.reduce(op)
-        return GbDelayed(self, "reduce", op, meta=meta)
+        return ScalarExpression(self, "reduce", op, meta=meta)
 
     def build(self, indices, values, *, size=None, chunks=None, dup_op=None, clear=False):
         if clear:
@@ -622,11 +667,15 @@ def _to_values(x, starts, stops_, dtype, chunks, nnz):
         return indices, values
 
     def isequal(self, other, *, check_dtype=False):
-        other = self._expect_type(other, Vector, within="isequal", argname="other")
+        other = self._expect_type(
+            other, (Vector, gb.Vector), within="isequal", argname="other"
+        )
         return super().isequal(other, check_dtype=check_dtype)
 
     def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
-        other = self._expect_type(other, Vector, within="isclose", argname="other")
+        other = self._expect_type(
+            other, (Vector, gb.Vector), within="isclose", argname="other"
+        )
         return super().isclose(other, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype)
 
     def _delete_element(self, resolved_indexes):
@@ -654,6 +703,102 @@ def _carg(self):
 Vector.ss = gb.utils.class_property(Vector.ss, ss)
 
 
+class VectorExpression(GbDelayed):
+    __slots__ = ()
+    output_type = gb.Vector
+    ndim = 1
+    _is_scalar = False
+
+    # automethods:
+    __and__ = gb.vector.VectorExpression.__and__
+    __bool__ = gb.vector.VectorExpression.__bool__
+    __or__ = gb.vector.VectorExpression.__or__
+    _get_value = _automethods._get_value
+    S = gb.vector.VectorExpression.S
+    V = gb.vector.VectorExpression.V
+    apply = gb.vector.VectorExpression.apply
+    ewise_add = gb.vector.VectorExpression.ewise_add
+    ewise_mult = gb.vector.VectorExpression.ewise_mult
+    isclose = gb.vector.VectorExpression.isclose
+    isequal = gb.vector.VectorExpression.isequal
+    nvals = gb.vector.VectorExpression.nvals
+    reduce = gb.vector.VectorExpression.reduce
+    shape = gb.vector.VectorExpression.shape
+    size = gb.vector.VectorExpression.size
+    vxm = gb.vector.VectorExpression.vxm
+
+    # infix sugar:
+    __abs__ = gb.vector.VectorExpression.__abs__
+    __add__ = gb.vector.VectorExpression.__add__
+    __divmod__ = gb.vector.VectorExpression.__divmod__
+    __eq__ = gb.vector.VectorExpression.__eq__
+    __floordiv__ = gb.vector.VectorExpression.__floordiv__
+    __ge__ = gb.vector.VectorExpression.__ge__
+    __gt__ = gb.vector.VectorExpression.__gt__
+    __invert__ = gb.vector.VectorExpression.__invert__
+    __le__ = gb.vector.VectorExpression.__le__
+    __lt__ = gb.vector.VectorExpression.__lt__
+    __mod__ = gb.vector.VectorExpression.__mod__
+    __mul__ = gb.vector.VectorExpression.__mul__
+    __ne__ = gb.vector.VectorExpression.__ne__
+    __neg__ = gb.vector.VectorExpression.__neg__
+    __pow__ = gb.vector.VectorExpression.__pow__
+    __radd__ = gb.vector.VectorExpression.__radd__
+    __rdivmod__ = gb.vector.VectorExpression.__rdivmod__
+    __rfloordiv__ = gb.vector.VectorExpression.__rfloordiv__
+    __rmod__ = gb.vector.VectorExpression.__rmod__
+    __rmul__ = gb.vector.VectorExpression.__rmul__
+    __rpow__ = gb.vector.VectorExpression.__rpow__
+    __rsub__ = gb.vector.VectorExpression.__rsub__
+    __rtruediv__ = gb.vector.VectorExpression.__rtruediv__
+    __rxor__ = gb.vector.VectorExpression.__rxor__
+    __sub__ = gb.vector.VectorExpression.__sub__
+    __truediv__ = gb.vector.VectorExpression.__truediv__
+    __xor__ = gb.vector.VectorExpression.__xor__
+
+    # bad sugar:
+    __itruediv__ = gb.vector.VectorExpression.__itruediv__
+    __imul__ = gb.vector.VectorExpression.__imul__
+    __imatmul__ = gb.vector.VectorExpression.__imatmul__
+    __iadd__ = gb.vector.VectorExpression.__iadd__
+    __iand__ = gb.vector.VectorExpression.__iand__
+    __ipow__ = gb.vector.VectorExpression.__ipow__
+    __imod__ = gb.vector.VectorExpression.__imod__
+    __isub__ = gb.vector.VectorExpression.__isub__
+    __ixor__ = gb.vector.VectorExpression.__ixor__
+    __ifloordiv__ = gb.vector.VectorExpression.__ifloordiv__
+    __ior__ = gb.vector.VectorExpression.__ior__
+
+    def __init__(
+        self,
+        parent,
+        method_name,
+        *args,
+        meta=None,
+        size=None,
+        **kwargs,
+    ):
+        super().__init__(
+            parent,
+            method_name,
+            *args,
+            meta=meta,
+            **kwargs,
+        )
+        if size is None:
+            size = self.parent._size
+        self._size = size
+
+    # def __getattr__(self, item):
+    #     return getattr(gb.vector.VectorExpression, item)
+
+    # def construct_output(self, dtype=None, *, name=None):
+    #     if dtype is None:
+    #         dtype = self.dtype
+    #     size = 0 if self._size.is_dOnion else self._size
+    #     return Vector.new(dtype, size, name=name)
+
+
 def _chunk_diag(
     inner_vector,
     input_range,
@@ -891,4 +1036,5 @@ def _concat_vector(seq, axis=0):
 
 
 gb.utils._output_types[Vector] = gb.Vector
+gb.utils._output_types[VectorExpression] = gb.Vector
 from .matrix import InnerMatrix  # noqa isort:skip
diff --git a/tests/from_grblas2/conftest.py b/tests/from_grblas2/conftest.py
index 5d0e635..a9d4632 100644
--- a/tests/from_grblas2/conftest.py
+++ b/tests/from_grblas2/conftest.py
@@ -1,5 +1,55 @@
+import atexit
+import functools
+import itertools
+
+import numpy as np
+import pytest
+
+import grblas as gb
+
+
+def pytest_configure(config):
+    backend = config.getoption("--backend", "suitesparse")
+    blocking = config.getoption("--blocking", True)
+    record = config.getoption("--record", False)
+    mapnumpy = config.getoption("--mapnumpy", None)
+    if mapnumpy is None:  # pragma: no branch
+        mapnumpy = np.random.rand() < 0.5  # heh
+
+    gb.config.set(autocompute=False, mapnumpy=mapnumpy)
+
+    gb.init(backend, blocking=blocking)
+    print(
+        f'Running tests with "{backend}" backend, blocking={blocking}, '
+        f"record={record}, mapnumpy={mapnumpy}"
+    )
+    if record:
+        rec = gb.Recorder()
+        rec.start()
+
+        def save_records():
+            with open("record.txt", "w") as f:  # pragma: no cover
+                f.write("\n".join(rec.data))
+
+        # I'm sure there's a `pytest` way to do this...
+        atexit.register(save_records)
+    for mod in [gb.unary, gb.binary, gb.monoid, gb.semiring, gb.op]:
+        for name in list(mod._delayed):
+            getattr(mod, name)
+
+
+def pytest_runtest_setup(item):
+    if "slow" in item.keywords and not item.config.getoption("--runslow", True):  # pragma: no cover
+        pytest.skip("need --runslow option to run")
+
+
 def autocompute(func):
-    return func
+    @functools.wraps(func)
+    def inner(*args, **kwargs):
+        with gb.config.set(autocompute=True):
+            return func(*args, **kwargs)
+
+    return inner
 
 
 def compute(val):
diff --git a/tests/from_grblas2/test_matrix.py b/tests/from_grblas2/test_matrix.py
index c462bf6..bde2745 100644
--- a/tests/from_grblas2/test_matrix.py
+++ b/tests/from_grblas2/test_matrix.py
@@ -338,7 +338,7 @@ def test_resize(As, A_chunks):
             assert A.nrows == 6
             assert A.ncols == 11
             assert A.nvals.compute() == 9
-            if type(A._delayed) is da.Array:
+            if not A.is_dOnion:
                 assert A._delayed.chunks == ((4, 2), (4, 4, 3))
             else:
                 assert A._delayed.deep_extract(None, lambda x: x._delayed.chunks) == (
@@ -2269,7 +2269,7 @@ def test_equals(As, A_chunks):
         for chunks in A_chunks:
             A = A_.dup()
             A.rechunk(chunks=chunks, inplace=True)
-            assert (A == A).new().reduce_scalar(monoid.land)
+            assert (A == A).new().reduce_scalar(monoid.land).new()
 
 
 def test_bad_update(As, A_chunks):
@@ -2289,16 +2289,13 @@ def test_incompatible_shapes(As, A_chunks):
             A.rechunk(chunks=chunks, inplace=True)
             B = A[:-1, :-1].new()
             with pytest.raises(DimensionMismatch):
-                A.mxm(B)
-                A.compute()
+                A.mxm(B).new().compute()
             A = A_.dup()
             with pytest.raises(DimensionMismatch):
-                A.ewise_add(B)
-                A.compute()
+                A.ewise_add(B).new().compute()
             A = A_.dup()
             with pytest.raises(DimensionMismatch):
-                A.ewise_mult(B)
-                A.compute()
+                A.ewise_mult(B).new().compute()
 
 
 @pytest.mark.xfail("'Needs investigation'", strict=True)
@@ -2890,104 +2887,100 @@ def import_func(**kwargs):
         assert C_orig.ss.is_iso is do_iso
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_no_bool_or_eq(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        with pytest.raises(TypeError, match="not defined"):
-            bool(A)
-        # with pytest.raises(TypeError, match="not defined"):
-        A == A
-        with pytest.raises(TypeError, match="not defined"):
-            bool(A.S)
-        with pytest.raises(TypeError, match="not defined"):
-            A.S == A.S
-        expr = A.ewise_mult(A)
-        with pytest.raises(TypeError, match="not defined"):
-            bool(expr)
-        with pytest.raises(TypeError, match="not enabled"):
-            expr == expr
-        assigner = A[1, 2]()
-        with pytest.raises(TypeError, match="not defined"):
-            bool(assigner)
-        with pytest.raises(TypeError, match="not defined"):
-            assigner == assigner
-        updater = A()
-        with pytest.raises(TypeError, match="not defined"):
-            bool(updater)
-        with pytest.raises(TypeError, match="not defined"):
-            updater == updater
+def test_no_bool_or_eq(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            with pytest.raises(TypeError, match="not defined"):
+                bool(A)
+            # with pytest.raises(TypeError, match="not defined"):
+            A == A
+            with pytest.raises(TypeError, match="not defined"):
+                bool(A.S)
+            with pytest.raises(TypeError, match="not defined"):
+                A.S == A.S
+            expr = A.ewise_mult(A)
+            with pytest.raises(TypeError, match="not defined"):
+                bool(expr)
+            with pytest.raises(TypeError, match="not enabled"):
+                expr == expr
+            assigner = A[1, 2]()
+            with pytest.raises(TypeError, match="not defined"):
+                bool(assigner)
+            with pytest.raises(TypeError, match="not defined"):
+                assigner == assigner
+            updater = A()
+            with pytest.raises(TypeError, match="not defined"):
+                bool(updater)
+            with pytest.raises(TypeError, match="not defined"):
+                updater == updater
 
 
 @autocompute
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_bool_eq_on_scalar_expressions(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        expr = A.reduce_scalar()
-        assert expr == 47
-        assert bool(expr)
-        assert int(expr) == 47
-        assert float(expr) == 47.0
-        assert range(expr) == range(47)
-
-        expr = A[0, 1]
-        assert expr == 2
-        assert bool(expr)
-        assert int(expr) == 2
-        assert float(expr) == 2.0
-        assert range(expr) == range(2)
-
-        expr = A[0, [1, 1]]
-        with pytest.raises(TypeError, match="not defined"):
-            expr == expr
-        with pytest.raises(TypeError, match="not defined"):
-            bool(expr)
-        with pytest.raises(TypeError, match="not defined"):
-            int(expr)
-        with pytest.raises(TypeError, match="not defined"):
-            float(expr)
-        with pytest.raises(TypeError, match="not defined"):
-            range(expr)
-
-
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_bool_eq_on_scalar_expressions_no_auto(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        expr = A.reduce_scalar()
-        with pytest.raises(TypeError, match="autocompute"):
-            expr == 47
-        with pytest.raises(TypeError, match="autocompute"):
-            bool(expr)
-        with pytest.raises(TypeError, match="autocompute"):
-            int(expr)
-
-
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_contains(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert (0, 1) in A
-        assert (1, 0) in A.T
+def test_bool_eq_on_scalar_expressions(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            expr = A.reduce_scalar()
+            assert expr == 47
+            assert bool(expr)
+            assert int(expr) == 47
+            assert float(expr) == 47.0
+            assert range(expr) == range(47)
+
+            expr = A[0, 1]
+            assert expr == 2
+            assert bool(expr)
+            assert int(expr) == 2
+            assert float(expr) == 2.0
+            assert range(expr) == range(2)
+
+            expr = A[0, [1, 1]]
+            with pytest.raises(TypeError, match="not defined"):
+                expr == expr
+            with pytest.raises(TypeError, match="not defined"):
+                bool(expr)
+            with pytest.raises(TypeError, match="not defined"):
+                int(expr)
+            with pytest.raises(TypeError, match="not defined"):
+                float(expr)
+            with pytest.raises(TypeError, match="not defined"):
+                range(expr)
+
+
+def test_bool_eq_on_scalar_expressions_no_auto(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            expr = A.reduce_scalar()
+            with pytest.raises(TypeError, match="autocompute"):
+                expr == 47
+            with pytest.raises(TypeError, match="autocompute"):
+                bool(expr)
+            with pytest.raises(TypeError, match="autocompute"):
+                int(expr)
 
-        assert (0, 1) not in A.T
-        assert (1, 0) not in A
 
-        with pytest.raises(TypeError):
-            1 in A
-        with pytest.raises(TypeError):
-            (1,) in A.T
-        with pytest.raises(TypeError, match="Invalid index"):
-            (1, [1, 2]) in A
+def test_contains(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert (0, 1) in A
+            assert (1, 0) in A.T
+    
+            assert (0, 1) not in A.T
+            assert (1, 0) not in A
+    
+            with pytest.raises(TypeError):
+                1 in A
+            with pytest.raises(TypeError):
+                (1,) in A.T
+            with pytest.raises(TypeError, match="Invalid index"):
+                (1, [1, 2]) in A
 
 
 @pytest.mark.xfail("'Needs investigation'", strict=True)
@@ -3226,111 +3219,109 @@ def test_nbytes(A, A_chunks):
 
 
 @autocompute
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_auto(A, A_chunks, v):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        expected = binary.land[bool](A & A).new()
-        B = A.dup(dtype=bool)
-        for expr in [(B & B), binary.land[bool](A & A)]:
-            assert expr.dtype == expected.dtype
-            assert expr.nrows == expected.nrows
-            assert expr.ncols == expected.ncols
-            assert expr.shape == expected.shape
-            assert expr.nvals == expected.nvals
-            assert expr.isclose(expected)
-            assert expected.isclose(expr)
-            assert expr.isequal(expected)
-            assert expected.isequal(expr)
-            assert expr.mxv(v).isequal(expected.mxv(v))
-            assert expected.T.mxv(v).isequal(expr.T.mxv(v))
-            for method in [
-                # "ewise_add",
-                # "ewise_mult",
-                # "mxm",
-                # "__matmul__",
-                "__and__",
-                "__or__",
-                # "kronecker",
-            ]:
-                val1 = getattr(expected, method)(expected).new()
-                val2 = getattr(expected, method)(expr)
-                val3 = getattr(expr, method)(expected)
-                val4 = getattr(expr, method)(expr)
-                assert val1.isequal(val2)
-                assert val1.isequal(val3)
-                assert val1.isequal(val4)
-            for method in ["reduce_rowwise", "reduce_columnwise", "reduce_scalar"]:
-                s1 = getattr(expected, method)(monoid.lor).new()
-                s2 = getattr(expr, method)(monoid.lor)
-                assert s1.isequal(s2.new())
-                assert s1.isequal(s2)
-
-        expected = binary.times(A & A).new()
-        for expr in [binary.times(A & A)]:
-            assert expr.dtype == expected.dtype
-            assert expr.nrows == expected.nrows
-            assert expr.ncols == expected.ncols
-            assert expr.shape == expected.shape
-            assert expr.nvals == expected.nvals
-            assert expr.isclose(expected)
-            assert expected.isclose(expr)
-            assert expr.isequal(expected)
-            assert expected.isequal(expr)
-            assert expr.mxv(v).isequal(expected.mxv(v))
-            assert expected.T.mxv(v).isequal(expr.T.mxv(v))
-            for method in [
-                "ewise_add",
-                "ewise_mult",
-                "mxm",
-                # "__matmul__",
-                # "__and__",
-                # "__or__",
-                "kronecker",
-            ]:
-                val1 = getattr(expected, method)(expected).new()
-                val2 = getattr(expected, method)(expr)
-                val3 = getattr(expr, method)(expected)
-                val4 = getattr(expr, method)(expr)
-                assert val1.isequal(val2)
-                assert val1.isequal(val3)
-                assert val1.isequal(val4)
-            for method in ["reduce_rowwise", "reduce_columnwise", "reduce_scalar"]:
-                s1 = getattr(expected, method)().new()
-                s2 = getattr(expr, method)()
-                assert s1.isequal(s2.new())
-                assert s1.isequal(s2)
-
-        expected = semiring.plus_times(A @ v).new()
-        for expr in [(A @ v), (v @ A.T), semiring.plus_times(A @ v)]:
-            assert expr.vxm(A).isequal(expected.vxm(A))
-            assert expr.vxm(A).new(mask=expr.S).isequal(expected.vxm(A).new(mask=expected.S))
-            assert expr.vxm(A).new(mask=expr.V).isequal(expected.vxm(A).new(mask=expected.V))
+def test_auto(As, A_chunks, v):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            expected = binary.land[bool](A & A).new()
+            B = A.dup(dtype=bool)
+            for expr in [(B & B), binary.land[bool](A & A)]:
+                assert expr.dtype == expected.dtype
+                assert expr.nrows == expected.nrows
+                assert expr.ncols == expected.ncols
+                assert expr.shape == expected.shape
+                assert expr.nvals == expected.nvals
+                assert expr.isclose(expected)
+                assert expected.isclose(expr)
+                assert expr.isequal(expected)
+                assert expected.isequal(expr)
+                assert expr.mxv(v).isequal(expected.mxv(v))
+                assert expected.T.mxv(v).isequal(expr.T.mxv(v))
+                for method in [
+                    # "ewise_add",
+                    # "ewise_mult",
+                    # "mxm",
+                    # "__matmul__",
+                    "__and__",
+                    "__or__",
+                    # "kronecker",
+                ]:
+                    val1 = getattr(expected, method)(expected).new()
+                    val2 = getattr(expected, method)(expr)
+                    val3 = getattr(expr, method)(expected)
+                    val4 = getattr(expr, method)(expr)
+                    assert val1.isequal(val2)
+                    assert val1.isequal(val3)
+                    assert val1.isequal(val4)
+                for method in ["reduce_rowwise", "reduce_columnwise", "reduce_scalar"]:
+                    s1 = getattr(expected, method)(monoid.lor).new()
+                    s2 = getattr(expr, method)(monoid.lor)
+                    assert s1.isequal(s2.new())
+                    assert s1.isequal(s2)
+    
+            expected = binary.times(A & A).new()
+            for expr in [binary.times(A & A)]:
+                assert expr.dtype == expected.dtype
+                assert expr.nrows == expected.nrows
+                assert expr.ncols == expected.ncols
+                assert expr.shape == expected.shape
+                assert expr.nvals == expected.nvals
+                assert expr.isclose(expected)
+                assert expected.isclose(expr)
+                assert expr.isequal(expected)
+                assert expected.isequal(expr)
+                assert expr.mxv(v).isequal(expected.mxv(v))
+                assert expected.T.mxv(v).isequal(expr.T.mxv(v))
+                for method in [
+                    "ewise_add",
+                    "ewise_mult",
+                    "mxm",
+                    # "__matmul__",
+                    # "__and__",
+                    # "__or__",
+                    # "kronecker",
+                ]:
+                    val1 = getattr(expected, method)(expected).new()
+                    val2 = getattr(expected, method)(expr)
+                    val3 = getattr(expr, method)(expected)
+                    val4 = getattr(expr, method)(expr)
+                    assert val1.isequal(val2)
+                    assert val1.isequal(val3)
+                    assert val1.isequal(val4)
+                for method in ["reduce_rowwise", "reduce_columnwise", "reduce_scalar"]:
+                    s1 = getattr(expected, method)().new()
+                    s2 = getattr(expr, method)()
+                    assert s1.isequal(s2.new())
+                    assert s1.isequal(s2)
+    
+            expected = semiring.plus_times(A @ v).new()
+            for expr in [(A @ v), (v @ A.T), semiring.plus_times(A @ v)]:
+                assert expr.vxm(A).isequal(expected.vxm(A))
+                assert expr.vxm(A).new(mask=expr.S).isequal(expected.vxm(A).new(mask=expected.S))
+                assert expr.vxm(A).new(mask=expr.V).isequal(expected.vxm(A).new(mask=expected.V))
 
 
 @autocompute
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_auto_assign(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        expected = A.dup()
-        B = A[1:4, 1:4].new(dtype=bool)
-        expr = B & B
-        expected[:3, :3] = expr.new()
-        A[:3, :3] = expr
-        assert expected.isequal(A)
-        with pytest.raises(TypeError):
-            # Not yet supported, but we could!
-            A[:3, :3] = A[1:4, 1:4]
-        v = A[2:5, 5].new(dtype=bool)
-        expr = v & v
-        A[:3, 4] << expr
-        expected[:3, 4] << expr.new()
-        assert expected.isequal(A)
+def test_auto_assign(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            expected = A.dup()
+            B = A[1:4, 1:4].new(dtype=bool)
+            expr = B & B
+            expected[:3, :3] = expr.new()
+            A[:3, :3] = expr
+            assert expected.isequal(A)
+            with pytest.raises(TypeError):
+                # Not yet supported, but we could!
+                A[:3, :3] = A[1:4, 1:4]
+            v = A[2:5, 5].new(dtype=bool)
+            expr = v & v
+            A[:3, 4] << expr
+            expected[:3, 4] << expr.new()
+            assert expected.isequal(A)
 
 
 @autocompute
@@ -3437,149 +3428,147 @@ def test_flatten(A, A_chunks):
             v.ss.reshape(A.shape + (1,))
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_autocompute_argument_messages(A, A_chunks, v):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        with pytest.raises(TypeError, match="autocompute"):
-            A.ewise_mult(A & A)
-        with pytest.raises(TypeError, match="autocompute"):
-            A.mxv(A @ v)
+def test_autocompute_argument_messages(As, A_chunks, v):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            with pytest.raises(TypeError, match="autocompute"):
+                A.ewise_mult(A & A)
+            with pytest.raises(TypeError, match="autocompute"):
+                A.mxv(A @ v)
 
 
 @autocompute
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_infix_sugar(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert type(A + 1) is not Matrix
-        assert binary.plus(A, 1).isequal(A + 1)
-        assert binary.plus(A.T, 1).isequal(A.T + 1)
-        assert binary.plus(1, A).isequal(1 + A)
-        assert binary.minus(A, 1).isequal(A - 1)
-        assert binary.minus(1, A).isequal(1 - A)
-        assert binary.times(A, 2).isequal(A * 2)
-        assert binary.times(2, A).isequal(2 * A)
-        assert binary.truediv(A, 2).isequal(A / 2)
-        assert binary.truediv(5, A).isequal(5 / A)
-        assert binary.floordiv(A, 2).isequal(A // 2)
-        assert binary.floordiv(5, A).isequal(5 // A)
-        assert binary.numpy.mod(A, 2).isequal(A % 2)
-        assert binary.numpy.mod(5, A).isequal(5 % A)
-        assert binary.pow(A, 2).isequal(A ** 2)
-        assert binary.pow(2, A).isequal(2 ** A)
-        assert binary.pow(A, 2).isequal(pow(A, 2))
-        assert unary.ainv(A).isequal(-A)
-        assert unary.ainv(A.T).isequal(-A.T)
-        B = A.dup(dtype=bool)
-        assert unary.lnot(B).isequal(~B)
-        assert unary.lnot(B.T).isequal(~B.T)
-        with pytest.raises(TypeError):
-            assert unary.lnot(A).isequal(~A)
-        with pytest.raises(TypeError):
-            assert unary.lnot(A.T).isequal(~A.T)
-        assert binary.lxor(True, B).isequal(True ^ B)
-        assert binary.lxor(B, True).isequal(B ^ True)
-        with pytest.raises(TypeError):
-            A ^ True
-        with pytest.raises(TypeError):
-            A ^ B
-        with pytest.raises(TypeError):
-            6 ^ B
-        assert binary.lt(A, 4).isequal(A < 4)
-        assert binary.le(A, 4).isequal(A <= 4)
-        assert binary.gt(A, 4).isequal(A > 4)
-        assert binary.ge(A, 4).isequal(A >= 4)
-        assert binary.eq(A, 4).isequal(A == 4)
-        assert binary.ne(A, 4).isequal(A != 4)
-        x, y = divmod(A, 3)
-        assert binary.floordiv(A, 3).isequal(x)
-        assert binary.numpy.mod(A, 3).isequal(y)
-        assert binary.fmod(A, 3).isequal(y)
-        assert A.isequal(binary.plus((3 * x) & y))
-        x, y = divmod(-A, 3)
-        assert binary.floordiv(-A, 3).isequal(x)
-        assert binary.numpy.mod(-A, 3).isequal(y)
-        # assert binary.fmod(-A, 3).isequal(y)  # The reason we use numpy.mod
-        assert (-A).isequal(binary.plus((3 * x) & y))
-        x, y = divmod(3, A)
-        assert binary.floordiv(3, A).isequal(x)
-        assert binary.numpy.mod(3, A).isequal(y)
-        assert binary.fmod(3, A).isequal(y)
-        assert binary.plus(binary.times(A & x) & y).isequal(3 * unary.one(A))
-        x, y = divmod(-3, A)
-        assert binary.floordiv(-3, A).isequal(x)
-        assert binary.numpy.mod(-3, A).isequal(y)
-        # assert binary.fmod(-3, A).isequal(y)  # The reason we use numpy.mod
-        assert binary.plus(binary.times(A & x) & y).isequal(-3 * unary.one(A))
-
-        assert binary.eq(A & A).isequal(A == A)
-        assert binary.ne(A.T & A.T).isequal(A.T != A.T)
-        assert binary.lt(A & A.T).isequal(A < A.T)
-        assert binary.ge(A.T & A).isequal(A.T >= A)
-
-        B = A.dup()
-        B += 1
-        assert type(B) is Matrix
-        assert binary.plus(A, 1).isequal(B)
-        B = A.dup()
-        B -= 1
-        assert type(B) is Matrix
-        assert binary.minus(A, 1).isequal(B)
-        B = A.dup()
-        B *= 2
-        assert type(B) is Matrix
-        assert binary.times(A, 2).isequal(B)
-        B = A.dup(dtype=float)
-        B /= 2
-        assert type(B) is Matrix
-        assert binary.truediv(A, 2).isequal(B)
-        B = A.dup()
-        B //= 2
-        assert type(B) is Matrix
-        assert binary.floordiv(A, 2).isequal(B)
-        B = A.dup()
-        B %= 2
-        assert type(B) is Matrix
-        assert binary.numpy.mod(A, 2).isequal(B)
-        B = A.dup()
-        B **= 2
-        assert type(B) is Matrix
-        assert binary.pow(A, 2).isequal(B)
-        B = A.dup(dtype=bool)
-        B ^= True
-        assert type(B) is Matrix
-        assert B.isequal(~A.dup(dtype=bool))
-        B = A.dup(dtype=bool)
-        B ^= B
-        assert type(B) is Matrix
-        assert not B.reduce_scalar(agg.any).new()
-
-        expr = binary.plus(A & A)
-        assert unary.abs(expr).isequal(abs(expr))
-        assert unary.ainv(expr).isequal(-expr)
-        with pytest.raises(TypeError):
-            assert unary.lnot(expr).isequal(~expr)
-        with pytest.raises(TypeError):
-            expr += 1
-        with pytest.raises(TypeError):
-            expr -= 1
-        with pytest.raises(TypeError):
-            expr *= 1
-        with pytest.raises(TypeError):
-            expr /= 1
-        with pytest.raises(TypeError):
-            expr //= 1
-        with pytest.raises(TypeError):
-            expr %= 1
-        with pytest.raises(TypeError):
-            expr **= 1
-        with pytest.raises(TypeError):
-            expr ^= 1
+def test_infix_sugar(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert type(A + 1) is not Matrix
+            assert binary.plus(A, 1).isequal(A + 1)
+            assert binary.plus(A.T, 1).isequal(A.T + 1)
+            assert binary.plus(1, A).isequal(1 + A)
+            assert binary.minus(A, 1).isequal(A - 1)
+            assert binary.minus(1, A).isequal(1 - A)
+            assert binary.times(A, 2).isequal(A * 2)
+            assert binary.times(2, A).isequal(2 * A)
+            assert binary.truediv(A, 2).isequal(A / 2)
+            assert binary.truediv(5, A).isequal(5 / A)
+            assert binary.floordiv(A, 2).isequal(A // 2)
+            assert binary.floordiv(5, A).isequal(5 // A)
+            assert binary.numpy.mod(A, 2).isequal(A % 2)
+            assert binary.numpy.mod(5, A).isequal(5 % A)
+            assert binary.pow(A, 2).isequal(A ** 2)
+            assert binary.pow(2, A).isequal(2 ** A)
+            assert binary.pow(A, 2).isequal(pow(A, 2))
+            assert unary.ainv(A).isequal(-A)
+            assert unary.ainv(A.T).isequal(-A.T)
+            B = A.dup(dtype=bool)
+            assert unary.lnot(B).isequal(~B)
+            assert unary.lnot(B.T).isequal(~B.T)
+            with pytest.raises(TypeError):
+                assert unary.lnot(A).isequal(~A)
+            with pytest.raises(TypeError):
+                assert unary.lnot(A.T).isequal(~A.T)
+            assert binary.lxor(True, B).isequal(True ^ B)
+            assert binary.lxor(B, True).isequal(B ^ True)
+            with pytest.raises(TypeError):
+                A ^ True
+            with pytest.raises(TypeError):
+                A ^ B
+            with pytest.raises(TypeError):
+                6 ^ B
+            assert binary.lt(A, 4).isequal(A < 4)
+            assert binary.le(A, 4).isequal(A <= 4)
+            assert binary.gt(A, 4).isequal(A > 4)
+            assert binary.ge(A, 4).isequal(A >= 4)
+            assert binary.eq(A, 4).isequal(A == 4)
+            assert binary.ne(A, 4).isequal(A != 4)
+            x, y = divmod(A, 3)
+            assert binary.floordiv(A, 3).isequal(x)
+            assert binary.numpy.mod(A, 3).isequal(y)
+            assert binary.fmod(A, 3).isequal(y)
+            assert A.isequal(binary.plus((3 * x) & y))
+            x, y = divmod(-A, 3)
+            assert binary.floordiv(-A, 3).isequal(x)
+            assert binary.numpy.mod(-A, 3).isequal(y)
+            # assert binary.fmod(-A, 3).isequal(y)  # The reason we use numpy.mod
+            assert (-A).isequal(binary.plus((3 * x) & y))
+            x, y = divmod(3, A)
+            assert binary.floordiv(3, A).isequal(x)
+            assert binary.numpy.mod(3, A).isequal(y)
+            assert binary.fmod(3, A).isequal(y)
+            assert binary.plus(binary.times(A & x) & y).isequal(3 * unary.one(A))
+            x, y = divmod(-3, A)
+            assert binary.floordiv(-3, A).isequal(x)
+            assert binary.numpy.mod(-3, A).isequal(y)
+            # assert binary.fmod(-3, A).isequal(y)  # The reason we use numpy.mod
+            assert binary.plus(binary.times(A & x) & y).isequal(-3 * unary.one(A))
+    
+            assert binary.eq(A & A).isequal(A == A)
+            assert binary.ne(A.T & A.T).isequal(A.T != A.T)
+            assert binary.lt(A & A.T).isequal(A < A.T)
+            assert binary.ge(A.T & A).isequal(A.T >= A)
+    
+            B = A.dup()
+            B += 1
+            assert type(B) is Matrix
+            assert binary.plus(A, 1).isequal(B)
+            B = A.dup()
+            B -= 1
+            assert type(B) is Matrix
+            assert binary.minus(A, 1).isequal(B)
+            B = A.dup()
+            B *= 2
+            assert type(B) is Matrix
+            assert binary.times(A, 2).isequal(B)
+            B = A.dup(dtype=float)
+            B /= 2
+            assert type(B) is Matrix
+            assert binary.truediv(A, 2).isequal(B)
+            B = A.dup()
+            B //= 2
+            assert type(B) is Matrix
+            assert binary.floordiv(A, 2).isequal(B)
+            B = A.dup()
+            B %= 2
+            assert type(B) is Matrix
+            assert binary.numpy.mod(A, 2).isequal(B)
+            B = A.dup()
+            B **= 2
+            assert type(B) is Matrix
+            assert binary.pow(A, 2).isequal(B)
+            B = A.dup(dtype=bool)
+            B ^= True
+            assert type(B) is Matrix
+            assert B.isequal(~A.dup(dtype=bool))
+            B = A.dup(dtype=bool)
+            B ^= B
+            assert type(B) is Matrix
+            assert not B.reduce_scalar(agg.any).new()
+    
+            expr = binary.plus(A & A)
+            assert unary.abs(expr).isequal(abs(expr))
+            assert unary.ainv(expr).isequal(-expr)
+            with pytest.raises(TypeError):
+                assert unary.lnot(expr).isequal(~expr)
+            with pytest.raises(TypeError):
+                expr += 1
+            with pytest.raises(TypeError):
+                expr -= 1
+            with pytest.raises(TypeError):
+                expr *= 1
+            with pytest.raises(TypeError):
+                expr /= 1
+            with pytest.raises(TypeError):
+                expr //= 1
+            with pytest.raises(TypeError):
+                expr %= 1
+            with pytest.raises(TypeError):
+                expr **= 1
+            with pytest.raises(TypeError):
+                expr ^= 1
 
 
 @pytest.mark.slow
@@ -3892,15 +3881,15 @@ def test_deprecated(A, A_chunks):
             A.ss.scan_columns()
 
 
-def test_ndim(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        assert A.ndim == 2
-        assert A.ewise_mult(A).ndim == 2
-        assert (A & A).ndim == 2
-        assert (A @ A).ndim == 2
+def test_ndim(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            assert A.ndim == 2
+            assert A.ewise_mult(A).ndim == 2
+            assert (A & A).ndim == 2
+            assert (A @ A).ndim == 2
 
 
 @pytest.mark.xfail("'Needs investigation'", strict=True)

From b0f8cac6aff6ee76e67b09cce1c73b54261d238e Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Thu, 17 Mar 2022 20:07:57 +0100
Subject: [PATCH 17/18] implemented `kronecker`and optimized `rechunk`

---
 dask_grblas/expr.py   | 43 +++++++++++++++++++++++++++++++++++++++++++
 dask_grblas/matrix.py | 20 +++++++++++++++-----
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/dask_grblas/expr.py b/dask_grblas/expr.py
index 2e936bc..bf0db80 100644
--- a/dask_grblas/expr.py
+++ b/dask_grblas/expr.py
@@ -227,6 +227,38 @@ def _aggregate(
         op._new(updater, self)
         return output
 
+    def _kronecker(self, a, b, op, meta):
+        a = a.rechunk(chunks=1)
+        frag = da.core.blockwise(
+            *(partial(_kronecker, a._is_transposed, b._is_transposed), "ijMN"),
+            *((a._matrix._delayed, "ji") if a._is_transposed else (a._delayed, "ij")),
+            *((b._matrix._delayed, "NM") if b._is_transposed else (b._delayed, "MN")),
+            *(op, None),
+            dtype=np_dtype(meta.dtype),
+            meta=wrap_inner(meta),
+        )
+
+        name = "kronecker-" + tokenize(a, b)
+        b_ = b._matrix._delayed if b._is_transposed else b._delayed
+
+        out_chunks = ()
+        for axis in range(2):
+            out_chunks += (b_.chunks[axis] * a.shape[axis],)
+
+        dsk = dict()
+        for i in range(a.shape[0]):
+            for j in range(a.shape[1]):
+                for M in range(b_.numblocks[0]):
+                    for N in range(b_.numblocks[1]):
+                        
+                        dsk[(name, i*b_.numblocks[0] + M, j*b_.numblocks[1] + N)] = (
+                            lambda x: x, (frag.name, i, j, M, N)
+                        )
+
+        graph = HighLevelGraph.from_collections(name, dsk, dependencies=[frag])
+        out = da.core.Array(graph, name, out_chunks, meta=wrap_inner(meta))
+        return out
+
     def new(self, dtype=None, *, mask=None, name=None):
         _check_mask(mask, ignore_None=True)
 
@@ -357,6 +389,8 @@ def recall_GbDelayed_new(p, m, *args, dtype=None, mask=None, **kwargs):
             )
         elif self.method_name in {"vxm", "mxv", "mxm"}:
             delayed = self._matmul2(meta, mask=mask)
+        elif self.method_name == "kronecker":
+            delayed = self._kronecker(self.parent, self.args[0], self.args[1], meta)
         else:
             raise ValueError(self.method_name)
         return get_return_type(meta)(delayed)
@@ -463,6 +497,9 @@ def _update(self, updating, *, mask=None, accum=None, replace=None):
             delayed = self._matmul2(meta, mask=mask)
             updating(mask=mask, accum=accum, replace=replace) << get_return_type(meta)(delayed)
             return
+        elif self.method_name == "kronecker":
+            updating(mask=mask, accum=accum, replace=replace) << self.new()
+            return
         else:
             raise ValueError(self.method_name)
         updating.__init__(delayed)
@@ -2250,6 +2287,12 @@ def _transpose_if(inner_x, xt):
     return inner_x.value
 
 
+def _kronecker(at, bt, a, b, op):
+    a = _transpose_if(a, at)
+    b = _transpose_if(b, bt)
+    return wrap_inner(a.kronecker(b, op=op).new())
+
+
 def _matmul(op, at, bt, dtype, no_mask, mask_type, *args, computing_meta=None):
     if computing_meta:
         return np.empty(0, dtype=dtype)
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index e5a4ef2..8d7842c 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -563,12 +563,18 @@ def rechunk(self, inplace=False, chunks="auto"):
             else:
                 return Matrix(donion, meta=meta)
 
-        chunks = da.core.normalize_chunks(chunks, self.shape, dtype=np.int64)
+        delayed = self._delayed.rechunk(chunks=chunks)
         if inplace:
-            self.resize(*self.shape, chunks=chunks)
+            self._delayed = delayed
             return
         else:
-            return self.resize(*self.shape, chunks=chunks, inplace=False)
+            return Matrix(delayed, meta=self._meta, nvals=self._nvals)
+        # chunks = da.core.normalize_chunks(chunks, self.shape, dtype=np.int64)
+        # if inplace:
+        #     self.resize(*self.shape, chunks=chunks)
+        #     return
+        # else:
+        #     return self.resize(*self.shape, chunks=chunks, inplace=False)
 
     def _diag(self, k=0, dtype=None, chunks="auto"):
         kdiag_row_start = max(0, -k)
@@ -776,9 +782,12 @@ def mxm(self, other, op=semiring.plus_times):
         return MatrixExpression(self, "mxm", other, op, meta=meta, nrows=self.nrows, ncols=other.ncols)
 
     def kronecker(self, other, op=binary.times):
-        assert type(other) is Matrix  # TODO: or TransposedMatrix
+        gb_types = (gb.Matrix, gb.matrix.TransposedMatrix)
+        other = self._expect_type(
+            other, (Matrix, TransposedMatrix) + gb_types, within="kronecker", argname="other"
+        )
         meta = self._meta.kronecker(other._meta, op=op)
-        return GbDelayed(self, "kronecker", other, op, meta=meta)
+        return MatrixExpression(self, "kronecker", other, op, meta=meta)
 
     def apply(self, op, right=None, *, left=None):
         from .scalar import Scalar
@@ -1094,6 +1103,7 @@ class MatrixExpression(GbDelayed):
     ewise_mult = gb.matrix.MatrixExpression.ewise_mult
     isclose = gb.matrix.MatrixExpression.isclose
     isequal = gb.matrix.MatrixExpression.isequal
+    kronecker = gb.matrix.MatrixExpression.kronecker
     mxm = gb.matrix.MatrixExpression.mxm
     mxv = gb.matrix.MatrixExpression.mxv
     ncols = gb.matrix.MatrixExpression.ncols

From 20e542ef333b71096088def409942402bde59b7b Mon Sep 17 00:00:00 2001
From: Particular Miner <78448465+ParticularMiner@users.noreply.github.com>
Date: Sun, 20 Mar 2022 17:28:06 +0100
Subject: [PATCH 18/18] fixed argmin/argmax Aggregators & caught up with grblas
 version 2022.3.0

---
 dask_grblas/_automethods.py       |   2 +-
 dask_grblas/base.py               |  64 +++---
 dask_grblas/expr.py               | 321 ++++++++++++++++++++----------
 dask_grblas/matrix.py             | 140 +++++++++++--
 dask_grblas/scalar.py             |  63 +++++-
 dask_grblas/vector.py             | 136 +++++++++++--
 tests/from_grblas2/test_matrix.py | 207 ++++++++++---------
 7 files changed, 656 insertions(+), 277 deletions(-)

diff --git a/dask_grblas/_automethods.py b/dask_grblas/_automethods.py
index eda04ae..a781ad0 100644
--- a/dask_grblas/_automethods.py
+++ b/dask_grblas/_automethods.py
@@ -5,7 +5,7 @@ def _get_value(self, attr=None, default=None):
     if config.get("autocompute"):
         if self._value is None:
             self._value = self.new()
-            if getattr(self, 'is_dOnion', False):
+            if getattr(self, "is_dOnion", False):
                 self._value = self._value.strip()
         if attr is None:
             return self._value
diff --git a/dask_grblas/base.py b/dask_grblas/base.py
index bdd5b6e..c1e80fb 100644
--- a/dask_grblas/base.py
+++ b/dask_grblas/base.py
@@ -106,10 +106,12 @@ def isequal(self, other, *, check_dtype=False):
                 dtype=bool,
             )
         if ndim > 0:
-            delayed = da.core.elemwise(
-                _to_scalar,
-                delayed.all(),
-                bool,
+            delayed = da.core.blockwise(
+                *(_to_scalar, ()),
+                *(delayed.all(), None),
+                *(bool, None),
+                dtype=np.bool_,
+                meta=wrap_inner(gb.Scalar.new(bool)),
             )
         return PythonScalar(delayed)
 
@@ -164,10 +166,12 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
                 dtype=bool,
             )
         if ndim > 0:
-            delayed = da.core.elemwise(
-                _to_scalar,
-                delayed.all(),
-                bool,
+            delayed = da.core.blockwise(
+                *(_to_scalar, ()),
+                *(delayed.all(), None),
+                *(bool, None),
+                dtype=np.bool_,
+                meta=wrap_inner(gb.Scalar.new(bool)),
             )
         return PythonScalar(delayed)
 
@@ -333,10 +337,12 @@ def nvals(self):
             dtype=int,
         )
         if self._delayed.ndim > 0:
-            delayed = da.core.elemwise(
-                _to_scalar,
-                delayed.sum(),
-                int,
+            delayed = da.core.blockwise(
+                *(_to_scalar, ()),
+                *(delayed.sum(), None),
+                *(int, None),
+                dtype=np.int_,
+                meta=wrap_inner(gb.Scalar.new(int)),
             )
         return PythonScalar(delayed)
 
@@ -371,12 +377,11 @@ def update(self, expr, in_dOnion=False):
         if any_dOnions(self, expr):
             self_copy = self.__class__(self._optional_dup(), meta=self._meta)
             expr_ = expr
-            if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
+            if isinstance(expr, AmbiguousAssignOrExtract) and expr.has_dOnion:
 
                 def update_by_aae(c, p, k_0, k_1):
                     keys = k_0 if k_1 is None else (k_0, k_1)
-                    aae = AmbiguousAssignOrExtract(p, keys)
-                    return c.update(aae, in_dOnion=True)
+                    return c.update(p[keys], in_dOnion=True)
 
                 if _is_pair(expr_.index):
                     keys_0, keys_1 = expr_.index[0], expr_.index[1]
@@ -436,7 +441,7 @@ def update_by_gbd(c, *args, **kwargs):
             return
 
         self._meta.clear()
-        if typ is AmbiguousAssignOrExtract:
+        if isinstance(expr, AmbiguousAssignOrExtract):
             # Extract (w << v[index])
             # Is it safe/reasonable to simply replace `_delayed`?
             # Should we try to preserve e.g. format or partitions?
@@ -453,7 +458,7 @@ def update_by_gbd(c, *args, **kwargs):
             # "C << A.T"
             C = expr.new(dtype=self.dtype)
             self.__init__(C._delayed)
-        elif typ is type(None):
+        elif typ is type(None):  # noqa
             raise TypeError("Assignment value must be a valid expression")
         else:
             # Anything else we need to handle?
@@ -467,12 +472,13 @@ def _update(self, expr, *, mask=None, accum=None, replace=None, in_dOnion=False)
             self_copy = self.__class__(self._optional_dup(), meta=self._meta)
             mask_ = mask.dOnion_if if mask is not None else None
             expr_ = expr
-            if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
+            if isinstance(expr, AmbiguousAssignOrExtract) and expr.has_dOnion:
 
                 def _update_by_aae(c, p, k_0, k_1, mask=None, accum=None, replace=None):
                     keys = k_0 if k_1 is None else (k_0, k_1)
-                    aae = AmbiguousAssignOrExtract(p, keys)
-                    return c.update(aae, mask=mask, accum=accum, replace=replace, in_dOnion=True)
+                    return c.update(
+                        p[keys], mask=mask, accum=accum, replace=replace, in_dOnion=True
+                    )
 
                 if _is_pair(expr_.index):
                     keys_0, keys_1 = expr_.index[0], expr_.index[1]
@@ -535,7 +541,7 @@ def _update_by_gbd(c, *args, mask=None, accum=None, replace=None, **kwargs):
             if in_dOnion:
                 return self
             return
-        if typ is AmbiguousAssignOrExtract:
+        if isinstance(expr, AmbiguousAssignOrExtract):
             # Extract (w(mask=mask, accum=accum) << v[index])
             expr_new = expr.new(dtype=self.dtype)
             expr_delayed = expr_new._delayed
@@ -686,7 +692,7 @@ def __init__(self, kernel, meta=None):
         self.kernel = kernel
         # Why have ._meta and .dtype attributes? B'cos Scalar, Vector & Matrix need them
         self._meta = meta
-        self.dtype = getattr(meta, 'dtype', type(meta))
+        self.dtype = getattr(meta, "dtype", type(meta))
 
     def __eq__(self, other):
         if like_dOnion(other):
@@ -722,8 +728,8 @@ def persist(self, *args, **kwargs):
             return value.persist(*args, **kwargs)
         else:
             raise AttributeError(
-                f'Something went wrong: stripped dOnion {self} value {value} has'
-                ' no `persist()` attribute.'
+                f"Something went wrong: stripped dOnion {self} value {value} has"
+                " no `persist()` attribute."
             )
 
     def _persist(self, *args, **kwargs):
@@ -733,8 +739,8 @@ def _persist(self, *args, **kwargs):
             return value._delayed
         else:
             raise AttributeError(
-                f'Something went wrong: stripped dOnion {self} value {value} has'
-                ' no `_persist()` attribute.'
+                f"Something went wrong: stripped dOnion {self} value {value} has"
+                " no `_persist()` attribute."
             )
 
     @classmethod
@@ -851,7 +857,7 @@ def __getattr__(self, item):
         try:
             meta = getattr(self._meta, item, getattr(self.kernel, item))
         except AttributeError:
-            raise AttributeError(f'Unable to compute meta corresponding to attribute {item}.')
+            raise AttributeError(f"Unable to compute meta corresponding to attribute {item}.")
         _getattr = flexible_partial(getattr, skip, item)
         return self.deep_extract(meta, _getattr)
 
@@ -869,9 +875,7 @@ def _getattr(cls, x, attr_name, *args, **kwargs):
 
 def like_dOnion(arg):
     return arg is not None and (
-        is_DOnion(arg)
-        or getattr(arg, "is_dOnion", False)
-        or getattr(arg, "has_dOnion", False)
+        is_DOnion(arg) or getattr(arg, "is_dOnion", False) or getattr(arg, "has_dOnion", False)
     )
 
 
diff --git a/dask_grblas/expr.py b/dask_grblas/expr.py
index bf0db80..d506a5d 100644
--- a/dask_grblas/expr.py
+++ b/dask_grblas/expr.py
@@ -124,39 +124,94 @@ def _matmul2(self, meta, mask=None):
 
         op = self.args[1]
         sum_meta = wrap_inner(meta)
-        if mask is None:
-            out = da.core.blockwise(
-                partial(_matmul2, op, meta.dtype, at, bt),
-                out_ind,
-                a,
-                lhs_ind,
-                b,
-                rhs_ind,
-                adjust_chunks={compress_axis: 1},
-                dtype=np.result_type(a, b),
-                concatenate=False,
-                meta=FakeInnerTensor(meta, compress_axis),
-            )
+        if op.is_positional:
+            _, (a, b) = da.core.unify_chunks(a, lhs_ind, b, rhs_ind)
+            x = build_chunk_ranges_dask_array(a, 0, "row-ranges-" + tokenize(a, 0))
+            a_ranges = (x, (lhs_ind[0],))
+            if a.ndim == 2:
+                x = build_chunk_ranges_dask_array(a, 1, "col-ranges-" + tokenize(a, 1))
+                a_ranges += (x, (lhs_ind[1],))
+
+            x = build_chunk_ranges_dask_array(b, 0, "row-ranges-" + tokenize(b, 0))
+            b_ranges = (x, (rhs_ind[0],))
+            if b.ndim == 2:
+                x = build_chunk_ranges_dask_array(b, 1, "col-ranges-" + tokenize(b, 1))
+                b_ranges += (x, (rhs_ind[1],))
+
+            if mask is None:
+                matmul_pos = partial(
+                    _matmul2_positional,
+                    op,
+                    meta.dtype,
+                    at,
+                    bt,
+                    a.shape,
+                    b.shape,
+                )
+                out = da.core.blockwise(
+                    *(matmul_pos, out_ind),
+                    *(a, lhs_ind),
+                    *(b, rhs_ind),
+                    *(a_ranges + b_ranges),
+                    adjust_chunks={compress_axis: 1},
+                    dtype=np.result_type(a, b),
+                    concatenate=False,
+                    meta=FakeInnerTensor(meta, compress_axis),
+                )
+            else:
+                m = mask.mask._delayed
+                grblas_mask_type = get_grblas_type(mask)
+                mask_ind = list(out_ind)
+                mask_ind.remove(compress_axis)
+                mask_ind = tuple(mask_ind)
+                out = da.core.blockwise(
+                    partial(_matmul2_masked, op, meta.dtype, at, bt, grblas_mask_type),
+                    out_ind,
+                    m,
+                    mask_ind,
+                    a,
+                    lhs_ind,
+                    b,
+                    rhs_ind,
+                    adjust_chunks={compress_axis: 1},
+                    dtype=np.result_type(a, b),
+                    concatenate=False,
+                    meta=FakeInnerTensor(meta, compress_axis),
+                )
         else:
-            m = mask.mask._delayed
-            grblas_mask_type = get_grblas_type(mask)
-            mask_ind = list(out_ind)
-            mask_ind.remove(compress_axis)
-            mask_ind = tuple(mask_ind)
-            out = da.core.blockwise(
-                partial(_matmul2_masked, op, meta.dtype, at, bt, grblas_mask_type),
-                out_ind,
-                m,
-                mask_ind,
-                a,
-                lhs_ind,
-                b,
-                rhs_ind,
-                adjust_chunks={compress_axis: 1},
-                dtype=np.result_type(a, b),
-                concatenate=False,
-                meta=FakeInnerTensor(meta, compress_axis),
-            )
+            if mask is None:
+                out = da.core.blockwise(
+                    partial(_matmul2, op, meta.dtype, at, bt),
+                    out_ind,
+                    a,
+                    lhs_ind,
+                    b,
+                    rhs_ind,
+                    adjust_chunks={compress_axis: 1},
+                    dtype=np.result_type(a, b),
+                    concatenate=False,
+                    meta=FakeInnerTensor(meta, compress_axis),
+                )
+            else:
+                m = mask.mask._delayed
+                grblas_mask_type = get_grblas_type(mask)
+                mask_ind = list(out_ind)
+                mask_ind.remove(compress_axis)
+                mask_ind = tuple(mask_ind)
+                out = da.core.blockwise(
+                    partial(_matmul2_masked, op, meta.dtype, at, bt, grblas_mask_type),
+                    out_ind,
+                    m,
+                    mask_ind,
+                    a,
+                    lhs_ind,
+                    b,
+                    rhs_ind,
+                    adjust_chunks={compress_axis: 1},
+                    dtype=np.result_type(a, b),
+                    concatenate=False,
+                    meta=FakeInnerTensor(meta, compress_axis),
+                )
 
         # out has an extra dimension (a slab or a bar), and now reduce along it
         out = sum_by_monoid(op.monoid, out, axis=compress_axis, meta=sum_meta)
@@ -179,26 +234,24 @@ def _reduce_along_axis(self, axis, dtype):
         return delayed
 
     def _reduce_scalar(self, dtype):
-        assert not self.kwargs
         op = self.args[0]
         at = self.parent._is_transposed
         delayed = self.parent._matrix._delayed if at else self.parent._delayed
         delayed = da.reduction(
             delayed,
-            partial(_reduce_scalar, op, dtype),
-            partial(_reduce_combine, op),
+            partial(_reduce_scalar, op, dtype, **self.kwargs),
+            partial(_reduce_combine, op, **self.kwargs),
             concatenate=False,
             dtype=np_dtype(dtype),
         )
         return delayed
 
     def _reduce(self, dtype):
-        assert not self.kwargs
         op = self.args[0]
         delayed = da.reduction(
             self.parent._delayed,
-            partial(_reduce, op, dtype),
-            partial(_reduce_combine, op),
+            partial(_reduce, op, dtype, **self.kwargs),
+            partial(_reduce_combine, op, **self.kwargs),
             concatenate=False,
             dtype=np_dtype(dtype),
         )
@@ -250,9 +303,10 @@ def _kronecker(self, a, b, op, meta):
             for j in range(a.shape[1]):
                 for M in range(b_.numblocks[0]):
                     for N in range(b_.numblocks[1]):
-                        
-                        dsk[(name, i*b_.numblocks[0] + M, j*b_.numblocks[1] + N)] = (
-                            lambda x: x, (frag.name, i, j, M, N)
+
+                        dsk[(name, i * b_.numblocks[0] + M, j * b_.numblocks[1] + N)] = (
+                            lambda x: x,
+                            (frag.name, i, j, M, N),
                         )
 
         graph = HighLevelGraph.from_collections(name, dsk, dependencies=[frag])
@@ -268,14 +322,18 @@ def recall_GbDelayed_new(p, m, *args, dtype=None, mask=None, **kwargs):
                 gbd = getattr(p, m)(*args, **kwargs)
                 return gbd.new(dtype=dtype, mask=mask)
 
+            meta_args = list(getattr(v, "_meta", v) for v in self.args)
             meta_kwargs = {k: getattr(v, "_meta", v) for k, v in self.kwargs.items()}
             if self.method_name.startswith(("reduce", "apply")):
                 # unary operations
                 a = self.parent
                 op = self.args[0]
-                args = self.args[1:]
                 if self.method_name == "apply":
-                    # grblas does not like empty Scalars!
+                    # grblas `apply()` does not like empty Scalars!
+                    if len(meta_args) > 1 and type(meta_args[1]) is gb.Scalar:
+                        meta_args[1] = gb.Scalar.from_value(1, dtype=meta_args[1].dtype)
+                    if len(meta_args) > 2 and type(meta_args[2]) is gb.Scalar:
+                        meta_args[2] = gb.Scalar.from_value(1, dtype=meta_args[2].dtype)
                     if "left" in meta_kwargs and type(meta_kwargs["left"]) is gb.Scalar:
                         meta_kwargs["left"] = gb.Scalar.from_value(
                             1, dtype=meta_kwargs["left"].dtype
@@ -288,7 +346,9 @@ def recall_GbDelayed_new(p, m, *args, dtype=None, mask=None, **kwargs):
                     # grblas bug occurs when shape is (0, 0)
                     if a._meta.shape == (0,) * a.ndim:
                         a._meta.resize(*((1,) * a.ndim))
-                meta = getattr(a._meta, self.method_name)(op, *args, **meta_kwargs).new(dtype=dtype)
+                meta = getattr(a._meta, self.method_name)(*meta_args, **meta_kwargs).new(
+                    dtype=dtype
+                )
                 meta.clear()
             else:
                 # binary operations
@@ -331,6 +391,7 @@ def recall_GbDelayed_new(p, m, *args, dtype=None, mask=None, **kwargs):
             )
             return get_return_type(meta)(donion, meta=meta)
 
+        # no dOnions
         if mask is not None:
             meta = self._meta.new(dtype=dtype, mask=mask._meta)
             delayed_mask = mask.mask._delayed
@@ -340,6 +401,8 @@ def recall_GbDelayed_new(p, m, *args, dtype=None, mask=None, **kwargs):
             delayed_mask = None
             grblas_mask_type = None
 
+        meta.clear()
+
         if self.method_name.startswith("reduce"):
             op = self._meta.op
             if op is not None and op.opclass == "Aggregator":
@@ -362,25 +425,27 @@ def recall_GbDelayed_new(p, m, *args, dtype=None, mask=None, **kwargs):
                 )
                 for key in self.kwargs
             }
-            pt = getattr(self.parent, '_is_transposed', False)
-            xts = [getattr(arg, '_is_transposed', False) for arg in self.args]
-            axes = 'ij' if self.parent.ndim == 2 else 'i'
+            pt = getattr(self.parent, "_is_transposed", False)
+            xts = [getattr(arg, "_is_transposed", False) for arg in self.args]
+            axes = "ij" if self.parent.ndim == 2 else "i"
             delayed = da.core.blockwise(
                 *(partial(_expr_new, pt, xts), axes),
                 *(self.method_name, None),
                 *(dtype, None),
                 *(grblas_mask_type, None),
                 *(
-                    (self.parent._matrix._delayed, axes[::-1]) if pt
+                    (self.parent._matrix._delayed, axes[::-1])
+                    if pt
                     else (self.parent._delayed, axes)
                 ),
-                *(delayed_mask, (None if mask is None else out_axes)),
+                *(delayed_mask, (None if mask is None else axes)),
                 *flatten(
                     (
-                        (x._matrix._delayed, axes[::-1]) if xt
+                        (x._matrix._delayed, axes[::-1])
+                        if xt
                         else (x._delayed, (None if x._is_scalar else axes))
                     )
-                    if isinstance(x, BaseType) or getattr(x, '_is_transposed', False)
+                    if isinstance(x, BaseType) or getattr(x, "_is_transposed", False)
                     else (x, None)
                     for x, xt in zip(self.args, xts)
                 ),
@@ -539,7 +604,16 @@ def _new_matrix(self, dtype, nrows=0, ncols=0, *, name=None):
 
 
 class IndexerResolver:
+    __slots__ = "obj", "indices", "is_dOnion", "shape"
+
     def __init__(self, obj, indices, check_shape=True):
+        index_is_dOnion = obj.ndim == 1 and is_DOnion(indices)
+        index_is_dOnion = index_is_dOnion or (
+            obj.ndim == 2 and _is_pair(indices) and (is_DOnion(indices[0]) or is_DOnion(indices[1]))
+        )
+        self.is_dOnion = index_is_dOnion
+        check_shape = not (index_is_dOnion or obj.is_dOnion)
+
         self.obj = obj
         if indices is Ellipsis:
             from .vector import Vector
@@ -547,6 +621,7 @@ def __init__(self, obj, indices, check_shape=True):
             if type(obj) in {Vector, gb.Vector}:
                 normalized = slice(None).indices(obj._size)
                 self.indices = [AxisIndex(obj._size, slice(*normalized))]
+                self.shape = (obj._size,)
             else:
                 normalized0 = slice(None).indices(obj._nrows)
                 normalized1 = slice(None).indices(obj._ncols)
@@ -554,12 +629,14 @@ def __init__(self, obj, indices, check_shape=True):
                     AxisIndex(obj._nrows, slice(*normalized0)),
                     AxisIndex(obj._ncols, slice(*normalized1)),
                 ]
+                self.shape = (obj._nrows, obj._ncols)
         else:
             if not check_shape and hasattr(obj, "_meta"):
                 shape = obj._meta.shape
             else:
                 shape = obj.shape
             self.indices = self.parse_indices(indices, shape, check_shape)
+            self.shape = tuple(index.size for index in self.indices if index.size is not None)
 
     @property
     def is_single_element(self):
@@ -702,10 +779,7 @@ class Updater:
     __eq__ = gb.expr.Updater.__eq__
 
     def __init__(self, parent, *, mask=None, accum=None, replace=False, input_mask=None):
-        if (
-            mask is not None
-            and input_mask is not None
-        ):
+        if mask is not None and input_mask is not None:
             raise TypeError("mask and input_mask arguments cannot both be given")
 
         _check_mask(mask, ignore_None=True)
@@ -759,7 +833,7 @@ def __lshift__(self, delayed):
     def update(self, delayed):
         # Occurs when user calls C(params) << delayed
         if self.input_mask is not None:
-            if type(delayed) is AmbiguousAssignOrExtract:
+            if isinstance(delayed, AmbiguousAssignOrExtract):
                 # w(input_mask) << v[index]
                 if self.parent is delayed.parent:
                     # replace `v` with a copy of itself if `w` is `v`
@@ -809,7 +883,7 @@ def update(self, delayed):
 def _csc_chunk(row_range, col_range, indices, red_columns, track_indices=False):
     """
     create chunk of Reduce_Assign Matrix in Compressed Sparse Column (CSC) format
-    
+
     (Used in `reduce_assign()`)
     """
     row_range = row_range[0]
@@ -1441,56 +1515,45 @@ def _adjust_meta_to_index(meta, index):
 
 
 class AmbiguousAssignOrExtract:
-    __bool__ = gb.expr.AmbiguousAssignOrExtract.__bool__
-    __eq__ = gb.expr.AmbiguousAssignOrExtract.__eq__
-    __float__ = gb.expr.AmbiguousAssignOrExtract.__float__
-    __int__ = gb.expr.AmbiguousAssignOrExtract.__int__
-    __index__ = gb.expr.AmbiguousAssignOrExtract.__index__
+    __slots__ = (
+        "has_dOnion",
+        "index",
+        "parent",
+        "resolved_indexes",
+        "_meta",
+        "_value",
+        "__weakref__",
+    )
+    _is_scalar = False
 
     def __init__(self, parent, index, meta=None):
         self.parent = parent
-        self.index = index
-        input_ndim = parent.ndim
-        index_is_dOnion = input_ndim == 1 and is_DOnion(index)
-        index_is_dOnion = index_is_dOnion or (
-            input_ndim == 2 and _is_pair(index) and (is_DOnion(index[0]) or is_DOnion(index[1]))
-        )
-        if parent.is_dOnion or index_is_dOnion:
+        self.resolved_indexes = index
+        self.index = _squeeze(tuple(i.index for i in index.indices))
+        self._value = None
+        if parent.is_dOnion or index.is_dOnion:
             self.has_dOnion = True
-            self.resolved_indexes = IndexerResolver(self.parent, index, check_shape=False)
-            self._meta = _adjust_meta_to_index(parent._meta, index)
+            self._meta = _adjust_meta_to_index(parent._meta, self.index)
         else:
             self.has_dOnion = False
-            self.resolved_indexes = IndexerResolver(parent, index)
-            self._meta = parent._meta[index] if meta is None else meta
-
-        # infix expression requirements:
-        shape = tuple(i.size for i in self.resolved_indexes.indices if i.size)
-        self.ndim = len(shape)
-        self.output_type = _get_grblas_type_with_ndims(self.ndim)
-        if self.ndim == 1:
-            self._size = shape[0]
-        elif self.ndim == 2:
-            self._nrows = shape[0]
-            self._ncols = shape[1]
+            self._meta = parent._meta[self.index] if meta is None else meta
 
     @staticmethod
     def _extract_single_element(x, xt, T, dxn, indices, meta, dtype):
-
         def getitem(inner, key, dtype):
             return wrap_inner(inner.value[key].new(dtype=dtype))
 
         name = "extract_single_element-" + tokenize(x, xt, indices)
-        
+
         block = ()
         element = ()
         for axis, i in enumerate(indices):
             stops_ = np.cumsum(x.chunks[T[axis]])
             starts = np.roll(stops_, 1)
             starts[0] = 0
-    
+
             blockid = np.arange(x.numblocks[T[axis]])
-    
+
             # locate chunk containing element:
             filter = (starts <= i) & (i < stops_)
             (R,) = blockid[filter]
@@ -1499,9 +1562,7 @@ def getitem(inner, key, dtype):
             element += (i - starts[R],)
 
         dsk = dict()
-        dsk[(name,)] = (
-            getitem, (x.name, *block[::dxn]), _squeeze(element[::dxn]), dtype
-        )
+        dsk[(name,)] = (getitem, (x.name, *block[::dxn]), _squeeze(element[::dxn]), dtype)
         graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x])
         out = da.core.Array(graph, name, (), meta=wrap_inner(meta))
         return out
@@ -1514,9 +1575,7 @@ def new(self, *, dtype=None, mask=None, input_mask=None, name=None):
 
             def _recall_getitem(parent, keys_0, keys_1, dtype, mask, input_mask):
                 keys = keys_0 if keys_1 is None else (keys_0, keys_1)
-                return AmbiguousAssignOrExtract(parent, keys).new(
-                    dtype=dtype, mask=mask, input_mask=input_mask
-                )
+                return parent[keys].new(dtype=dtype, mask=mask, input_mask=input_mask)
 
             meta = self._meta.new(dtype=dtype)
 
@@ -1739,12 +1798,18 @@ def update(self, obj):
     def __lshift__(self, rhs):
         self.update(rhs)
 
+    @property
+    def dtype(self):
+        return self.parent.dtype
+
     @property
     def value(self):
-        self._meta.value
+        self._meta.new().value
         scalar = self.new()
         return scalar.value
 
+    dup = new
+
 
 def _uniquify(ndim, index, obj, mask=None, ot=False):
     # here we follow the SuiteSparse:GraphBLAS specification for
@@ -1817,7 +1882,7 @@ def update(self, obj):
         if not (
             isinstance(obj, Number)
             or isinstance(obj, BaseType)
-            or getattr(obj, '_is_transposed', False)
+            or getattr(obj, "_is_transposed", False)
         ):
             obj = self.parent._expect_type(
                 obj,
@@ -1836,7 +1901,7 @@ def _recall_update(lhs, mask, accum, replace, keys_0, keys_1, obj, subassign):
                 updater = Updater(lhs, mask=mask, accum=accum, replace=replace)
                 Assigner(updater, keys, subassign=subassign).update(obj)
                 return lhs
-    
+
             lhs = self.parent
             lhs_copy = lhs.__class__(lhs._optional_dup(), meta=lhs._meta)
 
@@ -1863,7 +1928,7 @@ def _recall_update(lhs, mask, accum, replace, keys_0, keys_1, obj, subassign):
             return
 
         # no dOnions
-        if getattr(obj, '_is_transposed', False):
+        if getattr(obj, "_is_transposed", False):
             obj_transposed = obj._is_transposed
             obj = obj._matrix
         else:
@@ -2149,9 +2214,7 @@ def __init__(self, value, compress_axis):
 
 def _expr_new(xt, ats, method_name, dtype, grblas_mask_type, x, mask, *args, **kwargs):
     # expr.new(...)
-    args = [
-        _transpose_if(y, yt) if isinstance(y, InnerBaseType) else y for y, yt in zip(args, ats)
-    ]
+    args = [_transpose_if(y, yt) if isinstance(y, InnerBaseType) else y for y, yt in zip(args, ats)]
     kwargs = {
         key: (kwargs[key].value if isinstance(kwargs[key], InnerBaseType) else kwargs[key])
         for key in kwargs
@@ -2218,32 +2281,34 @@ def _add_blocks(monoid_, x, y):
     return x
 
 
-def _reduce_scalar(op, gb_dtype, x, axis=None, keepdims=None, computing_meta=None, dtype=None):
+def _reduce_scalar(
+    op, gb_dtype, x, axis=None, keepdims=None, computing_meta=None, dtype=None, **kwargs
+):
     """Call reduce_scalar on each chunk"""
     if computing_meta:
         return np.empty(0, dtype=dtype)
-    return wrap_inner(x.value.reduce_scalar(op).new(dtype=gb_dtype))
+    return wrap_inner(x.value.reduce_scalar(op, **kwargs).new(dtype=gb_dtype))
 
 
-def _reduce(op, gb_dtype, x, axis=None, keepdims=None, computing_meta=None, dtype=None):
+def _reduce(op, gb_dtype, x, axis=None, keepdims=None, computing_meta=None, dtype=None, **kwargs):
     """Call reduce on each chunk"""
     if computing_meta:
         return np.empty(0, dtype=dtype)
-    return wrap_inner(x.value.reduce(op).new(dtype=gb_dtype))
+    return wrap_inner(x.value.reduce(op, **kwargs).new(dtype=gb_dtype))
 
 
-def _reduce_combine(op, x, axis=None, keepdims=None, computing_meta=None, dtype=None):
+def _reduce_combine(op, x, axis=None, keepdims=None, computing_meta=None, dtype=None, **kwargs):
     """Combine results from reduce or reduce_scalar on each chunk"""
     if computing_meta:
         return np.empty(0, dtype=dtype)
     if type(x) is list:
         # do we need `gb_dtype` instead of `np_dtype` below?
         if type(x[0]) is list:
-            vals = [val.value.value for sublist in x for val in sublist]
+            vals = [val.value.value for sublist in x for val in sublist if val.value.value]
         else:
-            vals = [val.value.value for val in x]
+            vals = [val.value.value for val in x if val.value.value]
         values = gb.Vector.from_values(list(range(len(vals))), vals, size=len(vals), dtype=dtype)
-        return wrap_inner(values.reduce(op).new())
+        return wrap_inner(values.reduce(op, **kwargs).new())
     return x
 
 
@@ -2313,6 +2378,46 @@ def _matmul(op, at, bt, dtype, no_mask, mask_type, *args, computing_meta=None):
     return wrap_inner(gb_obj)
 
 
+def _expand(inner, fullshape, *index_ranges):
+    a = inner
+    if a.ndim == 1:
+        (a_index_range,) = index_ranges
+        balloon = gb.Vector.new(a.value.dtype, *fullshape)
+        balloon[a_index_range.start : a_index_range.stop] << a.value
+    else:
+        (a_row_range, a_col_range) = index_ranges
+        balloon = gb.Matrix.new(a.value.dtype, *fullshape)
+        (
+            balloon[
+                a_row_range.start : a_row_range.stop,
+                a_col_range.start : a_col_range.stop,
+            ]
+            << a.value
+        )
+
+    return wrap_inner(balloon)
+
+
+def _matmul2_positional(
+    op, dtype, at, bt, a_fullshape, b_fullshape, a, b, *args, computing_meta=None
+):
+    a_ranges = (args[0][0],) if a.ndim == 1 else (args[0][0], args[1][0])
+    b_ranges = (args[a.ndim][0],) if b.ndim == 1 else (args[a.ndim][0], args[a.ndim + 1][0])
+
+    a_expanded = _expand(a, a_fullshape, *a_ranges)
+    b_expanded = _expand(b, b_fullshape, *b_ranges)
+
+    res = _matmul2(op, dtype, at, bt, a_expanded, b_expanded, computing_meta=computing_meta)
+
+    # shrink expanded result to original size:
+    indices = slice(a_ranges[0].start, a_ranges[0].stop)
+    if b.ndim == 2:
+        cols = slice(b_ranges[1].start, b_ranges[1].stop)
+        indices = cols if a.ndim == 1 else (indices, cols)
+
+    return res[indices].new()
+
+
 def _matmul2(op, dtype, at, bt, a, b, computing_meta=None):
     left = _transpose_if(a, at)
     right = _transpose_if(b, bt)
diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py
index 8d7842c..6eb2767 100644
--- a/dask_grblas/matrix.py
+++ b/dask_grblas/matrix.py
@@ -13,7 +13,7 @@
 from . import _automethods
 from .base import BaseType, InnerBaseType, DOnion, is_DOnion, any_dOnions, Box, skip
 from .base import _nvals as _nvals_in_chunk
-from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater
+from .expr import AmbiguousAssignOrExtract, IndexerResolver, GbDelayed, Updater
 from .mask import StructuralMask, ValueMask
 from ._ss.matrix import ss
 from .utils import (
@@ -678,8 +678,19 @@ def _diag_old(self, k=0, dtype=None, chunks="auto"):
         nvals = 0 if self._nvals == 0 else None
         return get_return_type(meta)(delayed, nvals=nvals)
 
-    def __getitem__(self, index):
-        return AmbiguousAssignOrExtract(self, index)
+    def __getitem__(self, keys):
+        resolved_indexes = IndexerResolver(self, keys)
+        shape = resolved_indexes.shape
+        if not shape:
+            from .scalar import ScalarIndexExpr
+
+            return ScalarIndexExpr(self, resolved_indexes)
+        elif len(shape) == 1:
+            from .vector import VectorIndexExpr
+
+            return VectorIndexExpr(self, resolved_indexes, *shape)
+        else:
+            return MatrixIndexExpr(self, resolved_indexes, *shape)
 
     def __delitem__(self, keys, in_dOnion=False):
         if is_DOnion(self._delayed):
@@ -726,7 +737,9 @@ def ewise_add(self, other, op=monoid.plus, *, require_monoid=True):
             else:
                 raise
 
-        return MatrixExpression(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta)
+        return MatrixExpression(
+            self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta
+        )
 
     def ewise_mult(self, other, op=binary.times):
         gb_types = (gb.Matrix, gb.matrix.TransposedMatrix)
@@ -747,9 +760,7 @@ def ewise_mult(self, other, op=binary.times):
     def mxv(self, other, op=semiring.plus_times):
         from .vector import Vector, VectorExpression
 
-        other = self._expect_type(
-            other, (Vector, gb.Vector), within="mxv", argname="other"
-        )
+        other = self._expect_type(other, (Vector, gb.Vector), within="mxv", argname="other")
 
         try:
             meta = self._meta.mxv(other._meta, op=op)
@@ -779,7 +790,9 @@ def mxm(self, other, op=semiring.plus_times):
             else:
                 raise
 
-        return MatrixExpression(self, "mxm", other, op, meta=meta, nrows=self.nrows, ncols=other.ncols)
+        return MatrixExpression(
+            self, "mxm", other, op, meta=meta, nrows=self.nrows, ncols=other.ncols
+        )
 
     def kronecker(self, other, op=binary.times):
         gb_types = (gb.Matrix, gb.matrix.TransposedMatrix)
@@ -790,15 +803,17 @@ def kronecker(self, other, op=binary.times):
         return MatrixExpression(self, "kronecker", other, op, meta=meta)
 
     def apply(self, op, right=None, *, left=None):
-        from .scalar import Scalar
-
         left_meta = left
         right_meta = right
 
-        if type(left) is Scalar:
-            left_meta = left.dtype.np_type(0)
-        if type(right) is Scalar:
-            right_meta = right.dtype.np_type(0)
+        if isinstance(left, BaseType):
+            left_meta = left._meta
+            if left_meta._is_scalar and left_meta.is_empty:
+                left_meta = gb.Scalar.from_value(0, dtype=left_meta.dtype)
+        if isinstance(right, BaseType):
+            right_meta = right._meta
+            if right_meta._is_scalar and right_meta.is_empty:
+                right_meta = gb.Scalar.from_value(0, dtype=right_meta.dtype)
 
         if self._meta.shape == (0,) * self.ndim:
             self._meta.resize(*((1,) * self.ndim))
@@ -817,11 +832,11 @@ def reduce_columnwise(self, op=monoid.plus):
         meta = self._meta.reduce_columnwise(op)
         return VectorExpression(self, "reduce_columnwise", op, meta=meta, size=self.ncols)
 
-    def reduce_scalar(self, op=monoid.plus):
+    def reduce_scalar(self, op=monoid.plus, *, allow_empty=True):
         from .scalar import ScalarExpression
 
         meta = self._meta.reduce_scalar(op)
-        return ScalarExpression(self, "reduce_scalar", op, meta=meta)
+        return ScalarExpression(self, "reduce_scalar", op, meta=meta, allow_empty=allow_empty)
 
     def to_values(self, dtype=None, chunks="auto"):
         dtype = lookup_dtype(self.dtype if dtype is None else dtype)
@@ -1045,9 +1060,6 @@ def to_values(self, dtype=None, chunks="auto"):
         return cols, rows, vals
 
     # Properties
-    def __getitem__(self, index):
-        return AmbiguousAssignOrExtract(self, index)
-
     def isequal(self, other, *, check_dtype=False):
         other = self._expect_type(
             other, (Matrix, TransposedMatrix), within="isequal", argname="other"
@@ -1064,6 +1076,7 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
 
     # Delayed methods
     __contains__ = Matrix.__contains__
+    __getitem__ = Matrix.__getitem__
     ewise_add = Matrix.ewise_add
     ewise_mult = Matrix.ewise_mult
     mxv = Matrix.mxv
@@ -1192,6 +1205,94 @@ def __init__(
     #     return Matrix.new(dtype, nrows, ncols, name=name)
 
 
+class MatrixIndexExpr(AmbiguousAssignOrExtract):
+    __slots__ = "_ncols", "_nrows"
+    ndim = 2
+    output_type = gb.Matrix
+    _is_transposed = False
+
+    def __init__(self, parent, resolved_indexes, nrows, ncols):
+        super().__init__(parent, resolved_indexes)
+        self._nrows = nrows
+        self._ncols = ncols
+
+    @property
+    def ncols(self):
+        return self._ncols
+
+    @property
+    def nrows(self):
+        return self._nrows
+
+    @property
+    def shape(self):
+        return (self._nrows, self._ncols)
+
+    # Begin auto-generated code: Matrix
+    __and__ = gb.matrix.MatrixIndexExpr.__and__
+    __bool__ = gb.matrix.MatrixIndexExpr.__bool__
+    __or__ = gb.matrix.MatrixIndexExpr.__or__
+    _get_value = _automethods._get_value
+    S = gb.matrix.MatrixIndexExpr.S
+    T = gb.matrix.MatrixIndexExpr.T
+    V = gb.matrix.MatrixIndexExpr.V
+    apply = gb.matrix.MatrixIndexExpr.apply
+    ewise_add = gb.matrix.MatrixIndexExpr.ewise_add
+    ewise_mult = gb.matrix.MatrixIndexExpr.ewise_mult
+    isclose = gb.matrix.MatrixIndexExpr.isclose
+    isequal = gb.matrix.MatrixIndexExpr.isequal
+    kronecker = gb.matrix.MatrixIndexExpr.kronecker
+    mxm = gb.matrix.MatrixIndexExpr.mxm
+    mxv = gb.matrix.MatrixIndexExpr.mxv
+    nvals = gb.matrix.MatrixIndexExpr.nvals
+    reduce_rowwise = gb.matrix.MatrixIndexExpr.reduce_rowwise
+    reduce_columnwise = gb.matrix.MatrixIndexExpr.reduce_columnwise
+    reduce_scalar = gb.matrix.MatrixIndexExpr.reduce_scalar
+    nvals = gb.matrix.MatrixIndexExpr.nvals
+
+    # infix sugar:
+    __abs__ = gb.matrix.MatrixIndexExpr.__abs__
+    __add__ = gb.matrix.MatrixIndexExpr.__add__
+    __divmod__ = gb.matrix.MatrixIndexExpr.__divmod__
+    __eq__ = gb.matrix.MatrixIndexExpr.__eq__
+    __floordiv__ = gb.matrix.MatrixIndexExpr.__floordiv__
+    __ge__ = gb.matrix.MatrixIndexExpr.__ge__
+    __gt__ = gb.matrix.MatrixIndexExpr.__gt__
+    __invert__ = gb.matrix.MatrixIndexExpr.__invert__
+    __le__ = gb.matrix.MatrixIndexExpr.__le__
+    __lt__ = gb.matrix.MatrixIndexExpr.__lt__
+    __mod__ = gb.matrix.MatrixIndexExpr.__mod__
+    __mul__ = gb.matrix.MatrixIndexExpr.__mul__
+    __ne__ = gb.matrix.MatrixIndexExpr.__ne__
+    __neg__ = gb.matrix.MatrixIndexExpr.__neg__
+    __pow__ = gb.matrix.MatrixIndexExpr.__pow__
+    __radd__ = gb.matrix.MatrixIndexExpr.__radd__
+    __rdivmod__ = gb.matrix.MatrixIndexExpr.__rdivmod__
+    __rfloordiv__ = gb.matrix.MatrixIndexExpr.__rfloordiv__
+    __rmod__ = gb.matrix.MatrixIndexExpr.__rmod__
+    __rmul__ = gb.matrix.MatrixIndexExpr.__rmul__
+    __rpow__ = gb.matrix.MatrixIndexExpr.__rpow__
+    __rsub__ = gb.matrix.MatrixIndexExpr.__rsub__
+    __rtruediv__ = gb.matrix.MatrixIndexExpr.__rtruediv__
+    __rxor__ = gb.matrix.MatrixIndexExpr.__rxor__
+    __sub__ = gb.matrix.MatrixIndexExpr.__sub__
+    __truediv__ = gb.matrix.MatrixIndexExpr.__truediv__
+    __xor__ = gb.matrix.MatrixIndexExpr.__xor__
+
+    # bad sugar:
+    __itruediv__ = gb.matrix.MatrixIndexExpr.__itruediv__
+    __imul__ = gb.matrix.MatrixIndexExpr.__imul__
+    __imatmul__ = gb.matrix.MatrixIndexExpr.__imatmul__
+    __iadd__ = gb.matrix.MatrixIndexExpr.__iadd__
+    __iand__ = gb.matrix.MatrixIndexExpr.__iand__
+    __ipow__ = gb.matrix.MatrixIndexExpr.__ipow__
+    __imod__ = gb.matrix.MatrixIndexExpr.__imod__
+    __isub__ = gb.matrix.MatrixIndexExpr.__isub__
+    __ixor__ = gb.matrix.MatrixIndexExpr.__ixor__
+    __ifloordiv__ = gb.matrix.MatrixIndexExpr.__ifloordiv__
+    __ior__ = gb.matrix.MatrixIndexExpr.__ior__
+
+
 def _chunk_diag_v2(inner_matrix, k):
     return wrap_inner(gb.ss.diag(inner_matrix.value, k))
 
@@ -1633,3 +1734,4 @@ def _concat_matrix(seq, axis=0):
 gb.utils._output_types[Matrix] = gb.Matrix
 gb.utils._output_types[TransposedMatrix] = gb.matrix.TransposedMatrix
 gb.utils._output_types[MatrixExpression] = gb.Matrix
+gb.utils._output_types[MatrixIndexExpr] = gb.Matrix
diff --git a/dask_grblas/scalar.py b/dask_grblas/scalar.py
index 472282c..2266504 100644
--- a/dask_grblas/scalar.py
+++ b/dask_grblas/scalar.py
@@ -74,6 +74,7 @@ def __init__(self, delayed, meta=None):
             assert delayed.ndim == 0
         if meta is None:
             meta = gb.Scalar.new(delayed.dtype)
+            # meta = gb.Scalar.from_value(1, dtype=delayed.dtype)
         self._meta = meta
         self.dtype = meta.dtype
 
@@ -82,12 +83,11 @@ def update(self, expr, in_dOnion=False):
         if any_dOnions(self, expr):
             self_copy = self.__class__(self._optional_dup(), meta=self._meta)
             expr_ = expr
-            if typ is AmbiguousAssignOrExtract and expr.has_dOnion:
+            if isinstance(expr, AmbiguousAssignOrExtract) and expr.has_dOnion:
 
                 def update_by_aae(c, p, k_0, k_1):
                     keys = k_0 if k_1 is None else (k_0, k_1)
-                    aae = AmbiguousAssignOrExtract(p, keys)
-                    return c.update(aae, in_dOnion=True)
+                    return c.update(p[keys], in_dOnion=True)
 
                 if _is_pair(expr_.index):
                     keys_0, keys_1 = expr_.index[0], expr_.index[1]
@@ -134,7 +134,7 @@ def update_by_gbd(c, *args, **kwargs):
 
         self._meta.update(get_meta(expr))
         self._meta.clear()
-        if typ is AmbiguousAssignOrExtract:
+        if isinstance(expr, AmbiguousAssignOrExtract):
             # Extract (s << v[index])
             expr_new = expr.new(dtype=self.dtype)
             self.value = expr_new.value
@@ -254,10 +254,25 @@ def isequal(self, other, *, check_dtype=False):
         if type(other) is Box:
             other = other.content
         if type(other) is not Scalar:
+            if other is None:
+                return self.is_empty
             self._meta.isequal(get_meta(other))
-            other = Scalar.from_value(other)
+            try:
+                other = Scalar.from_value(other)
+            except TypeError:
+                other = self._expect_type(
+                    other,
+                    (Scalar, gb.Scalar),
+                    within="isequal",
+                    argname="other",
+                    extra_message="Literal scalars also accepted.",
+                )
+            # Don't check dtype if we had to infer dtype of `other`
             check_dtype = False
-        return super().isequal(other, check_dtype=check_dtype)
+        if check_dtype and self.dtype != other.dtype:
+            return False
+        else:
+            return super().isequal(other, check_dtype=check_dtype)
 
     def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
         if other is None:
@@ -271,7 +286,7 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
     @property
     def is_empty(self):
         if self.is_dOnion:
-            donion = DOnion.multi_access(gb.Scalar.new(bool), getattr, self, 'is_empty')
+            donion = DOnion.multi_access(gb.Scalar.new(bool), getattr, self, "is_empty")
             return PythonScalar(donion)
 
         delayed = da.core.elemwise(
@@ -349,6 +364,7 @@ class ScalarExpression(GbDelayed):
     ndim = 0
     shape = ()
     _is_scalar = True
+    _is_cscalar = False
     __and__ = gb.scalar.ScalarExpression.__and__
     __bool__ = gb.scalar.ScalarExpression.__bool__
     __eq__ = gb.scalar.ScalarExpression.__eq__
@@ -365,6 +381,38 @@ class ScalarExpression(GbDelayed):
     #     return getattr(gb.scalar.ScalarExpression, item)
 
 
+class ScalarIndexExpr(AmbiguousAssignOrExtract):
+    output_type = gb.Scalar
+    ndim = 0
+    shape = ()
+    _is_scalar = True
+    _is_cscalar = False
+
+    dup = new
+
+    @property
+    def is_cscalar(self):
+        return self._is_cscalar
+
+    @property
+    def is_grbscalar(self):
+        return not self._is_cscalar
+
+    # Begin auto-generated code: Scalar
+    __and__ = gb.scalar.ScalarIndexExpr.__and__
+    __bool__ = gb.scalar.ScalarIndexExpr.__bool__
+    __eq__ = gb.scalar.ScalarIndexExpr.__eq__
+    __float__ = gb.scalar.ScalarIndexExpr.__float__
+    __index__ = gb.scalar.ScalarIndexExpr.__index__
+    __int__ = gb.scalar.ScalarIndexExpr.__int__
+    __or__ = gb.scalar.ScalarIndexExpr.__or__
+    _get_value = _automethods._get_value
+    isclose = gb.scalar.ScalarIndexExpr.isclose
+    isequal = gb.scalar.ScalarIndexExpr.isequal
+    value = gb.scalar.ScalarIndexExpr.value
+    # End auto-generated code: Scalar
+
+
 # Dask task functions
 def _scalar_dup(x, dtype):
     return InnerScalar(x.value.dup(dtype=dtype))
@@ -385,3 +433,4 @@ def _invert(x):
 gb.utils._output_types[Scalar] = gb.Scalar
 gb.utils._output_types[PythonScalar] = gb.Scalar
 gb.utils._output_types[ScalarExpression] = gb.Scalar
+gb.utils._output_types[ScalarIndexExpr] = gb.Scalar
diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py
index 2c0cbe5..deb654e 100644
--- a/dask_grblas/vector.py
+++ b/dask_grblas/vector.py
@@ -11,7 +11,7 @@
 
 from . import _automethods
 from .base import BaseType, InnerBaseType, _nvals, DOnion, Box, any_dOnions
-from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, Assigner
+from .expr import AmbiguousAssignOrExtract, IndexerResolver, GbDelayed, Updater, Assigner
 from .mask import StructuralMask, ValueMask
 from ._ss.vector import ss
 from .utils import (
@@ -367,6 +367,9 @@ def resize(self, size, inplace=True, chunks="auto"):
         else:
             return Vector(x, nvals=nvals)
 
+    def diag(self, k=0, dtype=None, chunks="auto"):
+        return self._diag(k=k, dtype=dtype, chunks=chunks)
+
     def _diag(self, k=0, dtype=None, chunks="auto"):
         nrows = self.size + abs(k)
         kdiag_col_start = max(0, k)
@@ -415,8 +418,15 @@ def rechunk(self, inplace=False, chunks="auto"):
         else:
             return self.resize(*self.shape, chunks=chunks, inplace=False)
 
-    def __getitem__(self, index):
-        return AmbiguousAssignOrExtract(self, index)
+    def __getitem__(self, keys):
+        resolved_indexes = IndexerResolver(self, keys)
+        shape = resolved_indexes.shape
+        if not shape:
+            from .scalar import ScalarIndexExpr
+
+            return ScalarIndexExpr(self, resolved_indexes)
+        else:
+            return VectorIndexExpr(self, resolved_indexes, *shape)
 
     def __delitem__(self, keys):
         del Updater(self)[keys]
@@ -439,13 +449,32 @@ def __iter__(self):
         return indices.flat
 
     def ewise_add(self, other, op=monoid.plus, *, require_monoid=True):
-        assert type(other) is Vector
+        gb_types = (gb.Vector,)
+        other = self._expect_type(other, (Vector,) + gb_types, within="ewise_add", argname="other")
+
+        try:
+            meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid)
+        except DimensionMismatch:
+            if any_dOnions(self, other):
+                meta = self._meta.ewise_add(self._meta, op=op, require_monoid=require_monoid)
+            else:
+                raise
 
-        meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid)
-        return VectorExpression(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta)
+        return VectorExpression(
+            self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta
+        )
 
     def ewise_mult(self, other, op=binary.times):
-        assert type(other) is Vector
+        gb_types = (gb.Vector,)
+        other = self._expect_type(other, (Vector,) + gb_types, within="ewise_mult", argname="other")
+
+        try:
+            meta = self._meta.ewise_mult(other._meta, op=op)
+        except DimensionMismatch:
+            if any_dOnions(self, other):
+                meta = self._meta.ewise_add(self._meta, op=op)
+            else:
+                raise
 
         meta = self._meta.ewise_mult(other._meta, op=op)
         return VectorExpression(self, "ewise_mult", other, op, meta=meta)
@@ -536,9 +565,11 @@ def apply(self, op, right=None, *, left=None):
         meta = self._meta.apply(op=op, left=left_meta, right=right_meta)
         return VectorExpression(self, "apply", op, right, meta=meta, left=left)
 
-    def reduce(self, op=monoid.plus):
+    def reduce(self, op=monoid.plus, *, allow_empty=True):
+        from .scalar import ScalarExpression
+
         meta = self._meta.reduce(op)
-        return ScalarExpression(self, "reduce", op, meta=meta)
+        return ScalarExpression(self, "reduce", op, meta=meta, allow_empty=allow_empty)
 
     def build(self, indices, values, *, size=None, chunks=None, dup_op=None, clear=False):
         if clear:
@@ -667,15 +698,11 @@ def _to_values(x, starts, stops_, dtype, chunks, nnz):
         return indices, values
 
     def isequal(self, other, *, check_dtype=False):
-        other = self._expect_type(
-            other, (Vector, gb.Vector), within="isequal", argname="other"
-        )
+        other = self._expect_type(other, (Vector, gb.Vector), within="isequal", argname="other")
         return super().isequal(other, check_dtype=check_dtype)
 
     def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False):
-        other = self._expect_type(
-            other, (Vector, gb.Vector), within="isclose", argname="other"
-        )
+        other = self._expect_type(other, (Vector, gb.Vector), within="isclose", argname="other")
         return super().isclose(other, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype)
 
     def _delete_element(self, resolved_indexes):
@@ -799,6 +826,84 @@ def __init__(
     #     return Vector.new(dtype, size, name=name)
 
 
+class VectorIndexExpr(AmbiguousAssignOrExtract):
+    __slots__ = "_size"
+    ndim = 1
+    output_type = gb.Vector
+
+    def __init__(self, parent, resolved_indexes, size):
+        super().__init__(parent, resolved_indexes)
+        self._size = size
+
+    @property
+    def size(self):
+        return self._size
+
+    @property
+    def shape(self):
+        return (self._size,)
+
+    # Begin auto-generated code: Vector
+    _get_value = _automethods._get_value
+    S = gb.vector.VectorIndexExpr.S
+    V = gb.vector.VectorIndexExpr.V
+    __and__ = gb.vector.VectorIndexExpr.__and__
+    __contains__ = gb.vector.VectorIndexExpr.__contains__
+    __or__ = gb.vector.VectorIndexExpr.__or__
+    apply = gb.vector.VectorIndexExpr.apply
+    ewise_add = gb.vector.VectorIndexExpr.ewise_add
+    ewise_mult = gb.vector.VectorIndexExpr.ewise_mult
+    isclose = gb.vector.VectorIndexExpr.isclose
+    isequal = gb.vector.VectorIndexExpr.isequal
+    nvals = gb.vector.VectorIndexExpr.nvals
+    reduce = gb.vector.VectorIndexExpr.reduce
+    vxm = gb.vector.VectorIndexExpr.vxm
+
+    # infix sugar:
+    __abs__ = gb.vector.VectorIndexExpr.__abs__
+    __add__ = gb.vector.VectorIndexExpr.__add__
+    __divmod__ = gb.vector.VectorIndexExpr.__divmod__
+    __eq__ = gb.vector.VectorIndexExpr.__eq__
+    __floordiv__ = gb.vector.VectorIndexExpr.__floordiv__
+    __ge__ = gb.vector.VectorIndexExpr.__ge__
+    __gt__ = gb.vector.VectorIndexExpr.__gt__
+    __invert__ = gb.vector.VectorIndexExpr.__invert__
+    __le__ = gb.vector.VectorIndexExpr.__le__
+    __lt__ = gb.vector.VectorIndexExpr.__lt__
+    __mod__ = gb.vector.VectorIndexExpr.__mod__
+    __mul__ = gb.vector.VectorIndexExpr.__mul__
+    __ne__ = gb.vector.VectorIndexExpr.__ne__
+    __neg__ = gb.vector.VectorIndexExpr.__neg__
+    __pow__ = gb.vector.VectorIndexExpr.__pow__
+    __radd__ = gb.vector.VectorIndexExpr.__radd__
+    __rdivmod__ = gb.vector.VectorIndexExpr.__rdivmod__
+    __rfloordiv__ = gb.vector.VectorIndexExpr.__rfloordiv__
+    __rmod__ = gb.vector.VectorIndexExpr.__rmod__
+    __rmul__ = gb.vector.VectorIndexExpr.__rmul__
+    __rpow__ = gb.vector.VectorIndexExpr.__rpow__
+    __rsub__ = gb.vector.VectorIndexExpr.__rsub__
+    __rtruediv__ = gb.vector.VectorIndexExpr.__rtruediv__
+    __rxor__ = gb.vector.VectorIndexExpr.__rxor__
+    __sub__ = gb.vector.VectorIndexExpr.__sub__
+    __truediv__ = gb.vector.VectorIndexExpr.__truediv__
+    __xor__ = gb.vector.VectorIndexExpr.__xor__
+
+    # bad sugar:
+    __array__ = gb.vector.VectorIndexExpr.__array__
+    __bool__ = gb.vector.VectorIndexExpr.__bool__
+    __iadd__ = gb.vector.VectorIndexExpr.__iadd__
+    __iand__ = gb.vector.VectorIndexExpr.__iand__
+    __ifloordiv__ = gb.vector.VectorIndexExpr.__ifloordiv__
+    __imatmul__ = gb.vector.VectorIndexExpr.__imatmul__
+    __imod__ = gb.vector.VectorIndexExpr.__imod__
+    __imul__ = gb.vector.VectorIndexExpr.__imul__
+    __ior__ = gb.vector.VectorIndexExpr.__ior__
+    __ipow__ = gb.vector.VectorIndexExpr.__ipow__
+    __isub__ = gb.vector.VectorIndexExpr.__isub__
+    __itruediv__ = gb.vector.VectorIndexExpr.__itruediv__
+    __ixor__ = gb.vector.VectorIndexExpr.__ixor__
+
+
 def _chunk_diag(
     inner_vector,
     input_range,
@@ -1037,4 +1142,5 @@ def _concat_vector(seq, axis=0):
 
 gb.utils._output_types[Vector] = gb.Vector
 gb.utils._output_types[VectorExpression] = gb.Vector
+gb.utils._output_types[VectorIndexExpr] = gb.Vector
 from .matrix import InnerMatrix  # noqa isort:skip
diff --git a/tests/from_grblas2/test_matrix.py b/tests/from_grblas2/test_matrix.py
index bde2745..1e5ba55 100644
--- a/tests/from_grblas2/test_matrix.py
+++ b/tests/from_grblas2/test_matrix.py
@@ -112,8 +112,8 @@ def test_dup(As, A_chunks):
             assert C.ncols == A.ncols
             # Ensure they are not the same backend object
             A[0, 0] = 1000
-            assert A[0, 0].value == 1000
-            assert C[0, 0].value != 1000
+            assert A[0, 0].new() == 1000
+            assert C[0, 0].new() != 1000
 
     # extended functionality
     Ds = [Matrix.from_values([0, 1], [0, 1], [0, 2.5], dtype=dtypes.FP64)]
@@ -156,7 +156,7 @@ def test_from_values():
     assert C3.ncols == 3
     assert C3.nvals == 2  # duplicates were combined
     assert C3.dtype == int
-    assert C3[1, 1].value == 6  # 2*3
+    assert C3[1, 1].new() == 6  # 2*3
     C3monoid = Matrix.from_values([0, 1, 1], [2, 1, 1], [1, 2, 3], nrows=10, dup_op=monoid.times)
     assert C3.isequal(C3monoid)
 
@@ -217,7 +217,7 @@ def test_from_values_dask():
     assert C3.ncols == 3
     assert C3.nvals == 2  # duplicates were combined
     assert C3.dtype == int
-    assert C3[1, 1].value == 6  # 2*3
+    assert C3[1, 1].new() == 6  # 2*3
     C3monoid = Matrix.from_values(rows, cols, vals, nrows=10, dup_op=monoid.times)
     assert C3.isequal(C3monoid)
 
@@ -323,21 +323,21 @@ def test_resize(As, A_chunks):
             A.resize(10, 11)
             assert A.nrows == 10
             assert A.ncols == 11
-            assert A.nvals.compute() == 12
-            assert compute(A[9, 9].value) is None
+            assert A.nvals == 12
+            assert compute(A[9, 9].new().value) is None
             A.resize(4, 1)
             assert A.nrows == 4
             assert A.ncols == 1
-            assert A.nvals.compute() == 1
+            assert A.nvals == 1
 
             A = A_.dup()
             assert A.nrows == 7
             assert A.ncols == 7
-            assert A.nvals.compute() == 12
+            assert A.nvals == 12
             A.resize(6, 11, chunks=4)
             assert A.nrows == 6
             assert A.ncols == 11
-            assert A.nvals.compute() == 9
+            assert A.nvals == 9
             if not A.is_dOnion:
                 assert A._delayed.chunks == ((4, 2), (4, 4, 3))
             else:
@@ -345,14 +345,14 @@ def test_resize(As, A_chunks):
                     (4, 2),
                     (4, 4, 3),
                 )
-            assert compute(A[3, 2].value) == 3
-            assert compute(A[5, 7].value) is None
+            assert compute(A[3, 2].new().value) == 3
+            assert compute(A[5, 7].new().value) is None
 
             A = A_.dup()
             A.resize(11, 3, chunks=4)
             assert A.nrows == 11
             assert A.ncols == 3
-            assert A.nvals.compute() == 5
+            assert A.nvals == 5
             if type(A._delayed) is da.Array:
                 assert A._delayed.chunks == ((4, 4, 3), (3,))
             else:
@@ -360,8 +360,8 @@ def test_resize(As, A_chunks):
                     (4, 4, 3),
                     (3,),
                 )
-            assert compute(A[3, 2].value) == 3
-            assert compute(A[7, 2].value) is None
+            assert compute(A[3, 2].new().value) == 3
+            assert compute(A[7, 2].new().value) is None
 
 
 def test_rechunk(As, A_chunks):
@@ -489,8 +489,9 @@ def test_extract_element(As, A_chunks):
             A.rechunk(chunks=chunks, inplace=True)
             assert A[3, 0].new() == 3
             assert A[1, 6].new() == 4
-            assert A[1, 6].value == 4
-            assert A.T[6, 1].value == 4
+            with pytest.raises(TypeError, match="enable automatic"):
+                A[1, 6].value
+            assert A.T[6, 1].new() == 4
             s = A[0, 0].new()
             assert compute(s.value) is None
             assert s.dtype == "INT64"
@@ -504,11 +505,11 @@ def test_set_element(As, A_chunks):
         for chunks in A_chunks:
             A = A_.dup()
             A.rechunk(chunks=chunks, inplace=True)
-            assert compute(A[1, 1].value) is None
-            assert A[3, 0].value == 3
+            assert compute(A[1, 1].new().value) is None
+            assert A[3, 0].new() == 3
             A[1, 1].update(21)
             A[3, 0] << -5
-            assert A[1, 1].value == 21
+            assert A[1, 1].new() == 21
             assert A[3, 0].new() == -5
 
 
@@ -517,10 +518,10 @@ def test_remove_element(As, A_chunks):
         for chunks in A_chunks:
             A = A_.dup()
             A.rechunk(chunks=chunks, inplace=True)
-            assert A[3, 0].value == 3
+            assert A[3, 0].new() == 3
             del A[3, 0]
-            assert compute(A[3, 0].value) is None
-            assert A[6, 3].value == 7
+            assert compute(A[3, 0].new().value) is None
+            assert A[6, 3].new() == 7
             with pytest.raises(TypeError, match="Remove Element only supports"):
                 del A[3:5, 3]
 
@@ -566,7 +567,7 @@ def test_mxm_nonsquare():
     B = Matrix.from_values([0, 2, 4], [0, 0, 0], [10, 20, 30], nrows=5, ncols=1)
     C = Matrix.new(A.dtype, nrows=1, ncols=1)
     C << A.mxm(B, semiring.max_plus)
-    assert C[0, 0].value == 33
+    assert C[0, 0].new() == 33
     C1 = A.mxm(B, semiring.max_plus).new()
     assert C1.isequal(C)
     C2 = A.T.mxm(B.T, semiring.max_plus).new()
@@ -583,7 +584,7 @@ def test_mxm_nonsquare():
     B = Matrix.from_values([0, 2, 4], [0, 0, 0], [10, 20, 30], nrows=5, ncols=1)
     C = Matrix.new(A.dtype, nrows=1, ncols=1)
     C << A.mxm(B, semiring.max_plus)
-    assert C[0, 0].value == 33
+    assert C[0, 0].new() == 33
     C1 = A.mxm(B, semiring.max_plus).new()
     assert C1.isequal(C)
     C2 = A.T.mxm(B.T, semiring.max_plus).new()
@@ -1791,60 +1792,59 @@ def test_reduce_agg(As, A_chunks):
             assert s3.isclose(s1.value.compute() * s2.value.compute())
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
-def test_reduce_agg_argminmax(A, A_chunks):
-    A_ = A
-    for chunks in A_chunks:
-        A = A_.dup()
-        A.rechunk(chunks=chunks, inplace=True)
-        # reduce_rowwise
-        expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [1, 6, 5, 0, 5, 2, 4])
-        w1b = A.reduce_rowwise(agg.argmin).new()
-        assert w1b.isequal(expected)
-        w1c = A.T.reduce_columnwise(agg.argmin).new()
-        assert w1c.isequal(expected)
-        expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 4, 5, 0, 5, 2, 3])
-        w2b = A.reduce_rowwise(agg.argmax).new()
-        assert w2b.isequal(expected)
-        w2c = A.T.reduce_columnwise(agg.argmax).new()
-        assert w2c.isequal(expected)
-
-        # reduce_cols
-        expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 5, 0, 6, 2, 1])
-        w7b = A.reduce_columnwise(agg.argmin).new()
-        assert w7b.isequal(expected)
-        w7c = A.T.reduce_rowwise(agg.argmin).new()
-        assert w7c.isequal(expected)
-        expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 6, 6, 1, 4, 1])
-        w8b = A.reduce_columnwise(agg.argmax).new()
-        assert w8b.isequal(expected)
-        w8c = A.T.reduce_rowwise(agg.argmax).new()
-        assert w8c.isequal(expected)
+def test_reduce_agg_argminmax(As, A_chunks):
+    for A_ in As:
+        for chunks in A_chunks:
+            A = A_.dup()
+            A.rechunk(chunks=chunks, inplace=True)
+            # reduce_rowwise
+            expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [1, 6, 5, 0, 5, 2, 4])
+            w1b = A.reduce_rowwise(agg.argmin).new()
+            assert w1b.isequal(expected)
+            w1c = A.T.reduce_columnwise(agg.argmin).new()
+            assert w1c.isequal(expected)
+            expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 4, 5, 0, 5, 2, 3])
+            w2b = A.reduce_rowwise(agg.argmax).new()
+            assert w2b.isequal(expected)
+            w2c = A.T.reduce_columnwise(agg.argmax).new()
+            assert w2c.isequal(expected)
+
+            # reduce_cols
+            expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 5, 0, 6, 2, 1])
+            w7b = A.reduce_columnwise(agg.argmin).new()
+            assert w7b.isequal(expected)
+            w7c = A.T.reduce_rowwise(agg.argmin).new()
+            assert w7c.isequal(expected)
+            expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 6, 6, 1, 4, 1])
+            w8b = A.reduce_columnwise(agg.argmax).new()
+            assert w8b.isequal(expected)
+            w8c = A.T.reduce_rowwise(agg.argmax).new()
+            assert w8c.isequal(expected)
+
+            # reduce_scalar
+            with pytest.raises(
+                ValueError, match="Aggregator argmin may not be used with Matrix.reduce_scalar"
+            ):
+                A.reduce_scalar(agg.argmin)
 
-        # reduce_scalar
-        with pytest.raises(
-            ValueError, match="Aggregator argmin may not be used with Matrix.reduce_scalar"
-        ):
-            A.reduce_scalar(agg.argmin)
+            silly = agg.Aggregator(
+                "silly",
+                composite=[agg.argmin, agg.argmax],
+                finalize=lambda x, y: binary.plus(x & y),
+                types=[agg.argmin],
+            )
+            v1 = A.reduce_rowwise(agg.argmin).new()
+            v2 = A.reduce_rowwise(agg.argmax).new()
+            v3 = A.reduce_rowwise(silly).new()
+            assert v3.isequal(binary.plus(v1 & v2).new())
 
-        silly = agg.Aggregator(
-            "silly",
-            composite=[agg.argmin, agg.argmax],
-            finalize=lambda x, y: binary.plus(x & y),
-            types=[agg.argmin],
-        )
-        v1 = A.reduce_rowwise(agg.argmin).new()
-        v2 = A.reduce_rowwise(agg.argmax).new()
-        v3 = A.reduce_rowwise(silly).new()
-        assert v3.isequal(binary.plus(v1 & v2).new())
+            v1 = A.reduce_columnwise(agg.argmin).new()
+            v2 = A.reduce_columnwise(agg.argmax).new()
+            v3 = A.reduce_columnwise(silly).new()
+            assert v3.isequal(binary.plus(v1 & v2).new())
 
-        v1 = A.reduce_columnwise(agg.argmin).new()
-        v2 = A.reduce_columnwise(agg.argmax).new()
-        v3 = A.reduce_columnwise(silly).new()
-        assert v3.isequal(binary.plus(v1 & v2).new())
-
-        with pytest.raises(ValueError, match="Aggregator"):
-            A.reduce_scalar(silly).new()
+            with pytest.raises(ValueError, match="Aggregator"):
+                A.reduce_scalar(silly).new()
 
 
 @pytest.mark.xfail("'Needs investigation'", strict=True)
@@ -2059,7 +2059,6 @@ def test_transpose(As, A_chunks):
             assert C3.isequal(result)
 
 
-@pytest.mark.xfail("'Needs investigation'", strict=True)
 def test_kronecker():
     # A  0 1     B  0 1 2
     # 0 [1 -]    0 [- 2 3]
@@ -2070,15 +2069,29 @@ def test_kronecker():
     # 1 [8  -  4  -  -  - ]
     # 2 [-  4  6  -  6  9 ]
     # 3 [16 -  8  24 -  12]
-    A = Matrix.from_values([0, 1, 1], [0, 0, 1], [1, 2, 3])
-    B = Matrix.from_values([0, 0, 1, 1], [1, 2, 0, 2], [2, 3, 8, 4])
+    A0 = Matrix.from_values([0, 1, 1], [0, 0, 1], [1, 2, 3])
+    A1 = Matrix.from_values(
+        da.from_array([0, 1, 1]),
+        da.from_array([0, 0, 1]),
+        da.from_array([1, 2, 3]),
+    )
+    As = [A0, A1]
+    B0 = Matrix.from_values([0, 0, 1, 1], [1, 2, 0, 2], [2, 3, 8, 4])
+    B1 = Matrix.from_values(
+        da.from_array([0, 0, 1, 1]),
+        da.from_array([1, 2, 0, 2]),
+        da.from_array([2, 3, 8, 4]),
+    )
+    Bs = [B0, B1]
     result = Matrix.from_values(
         [0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3],
         [1, 2, 0, 2, 1, 2, 4, 5, 0, 2, 3, 5],
         [2, 3, 8, 4, 4, 6, 6, 9, 16, 8, 24, 12],
     )
-    C = A.kronecker(B, binary.times).new()
-    assert C.isequal(result)
+    for A in As:
+        for B in Bs:
+            C = A.kronecker(B, binary.times).new()
+            assert C.isequal(result)
 
 
 def test_simple_assignment(As, A_chunks):
@@ -2105,7 +2118,7 @@ def test_assign_transpose(As, A_chunks):
                 C.T << A
             with pytest.raises(TypeError, match="does not support item assignment"):
                 C.T[:, :] << A
-            with pytest.raises(AttributeError):
+            with pytest.raises(TypeError, match="autocompute"):
                 C[:, :].T << A
 
             nrows, ncols = A.nrows, A.ncols
@@ -2938,15 +2951,15 @@ def test_bool_eq_on_scalar_expressions(As, A_chunks):
             assert range(expr) == range(2)
 
             expr = A[0, [1, 1]]
-            with pytest.raises(TypeError, match="not defined"):
-                expr == expr
+            # with pytest.raises(TypeError, match="not defined"):
+            expr == expr  # Now okay
             with pytest.raises(TypeError, match="not defined"):
                 bool(expr)
-            with pytest.raises(TypeError, match="not defined"):
+            with pytest.raises(TypeError):
                 int(expr)
-            with pytest.raises(TypeError, match="not defined"):
+            with pytest.raises(TypeError):
                 float(expr)
-            with pytest.raises(TypeError, match="not defined"):
+            with pytest.raises(TypeError):
                 range(expr)
 
 
@@ -2971,10 +2984,10 @@ def test_contains(As, A_chunks):
             A.rechunk(chunks=chunks, inplace=True)
             assert (0, 1) in A
             assert (1, 0) in A.T
-    
+
             assert (0, 1) not in A.T
             assert (1, 0) not in A
-    
+
             with pytest.raises(TypeError):
                 1 in A
             with pytest.raises(TypeError):
@@ -3245,7 +3258,7 @@ def test_auto(As, A_chunks, v):
                     # "__matmul__",
                     "__and__",
                     "__or__",
-                    # "kronecker",
+                    "kronecker",
                 ]:
                     val1 = getattr(expected, method)(expected).new()
                     val2 = getattr(expected, method)(expr)
@@ -3259,7 +3272,7 @@ def test_auto(As, A_chunks, v):
                     s2 = getattr(expr, method)(monoid.lor)
                     assert s1.isequal(s2.new())
                     assert s1.isequal(s2)
-    
+
             expected = binary.times(A & A).new()
             for expr in [binary.times(A & A)]:
                 assert expr.dtype == expected.dtype
@@ -3294,7 +3307,7 @@ def test_auto(As, A_chunks, v):
                     s2 = getattr(expr, method)()
                     assert s1.isequal(s2.new())
                     assert s1.isequal(s2)
-    
+
             expected = semiring.plus_times(A @ v).new()
             for expr in [(A @ v), (v @ A.T), semiring.plus_times(A @ v)]:
                 assert expr.vxm(A).isequal(expected.vxm(A))
@@ -3314,14 +3327,14 @@ def test_auto_assign(As, A_chunks):
             expected[:3, :3] = expr.new()
             A[:3, :3] = expr
             assert expected.isequal(A)
-            with pytest.raises(TypeError):
-                # Not yet supported, but we could!
-                A[:3, :3] = A[1:4, 1:4]
             v = A[2:5, 5].new(dtype=bool)
             expr = v & v
             A[:3, 4] << expr
             expected[:3, 4] << expr.new()
             assert expected.isequal(A)
+            C = A[1:4, 1:4].new()
+            A[:3, :3] = A[1:4, 1:4]
+            assert A[:3, :3].isequal(C)
 
 
 @autocompute
@@ -3505,12 +3518,12 @@ def test_infix_sugar(As, A_chunks):
             assert binary.numpy.mod(-3, A).isequal(y)
             # assert binary.fmod(-3, A).isequal(y)  # The reason we use numpy.mod
             assert binary.plus(binary.times(A & x) & y).isequal(-3 * unary.one(A))
-    
+
             assert binary.eq(A & A).isequal(A == A)
             assert binary.ne(A.T & A.T).isequal(A.T != A.T)
             assert binary.lt(A & A.T).isequal(A < A.T)
             assert binary.ge(A.T & A).isequal(A.T >= A)
-    
+
             B = A.dup()
             B += 1
             assert type(B) is Matrix
@@ -3547,7 +3560,7 @@ def test_infix_sugar(As, A_chunks):
             B ^= B
             assert type(B) is Matrix
             assert not B.reduce_scalar(agg.any).new()
-    
+
             expr = binary.plus(A & A)
             assert unary.abs(expr).isequal(abs(expr))
             assert unary.ainv(expr).isequal(-expr)