diff --git a/dask_grblas/_automethods.py b/dask_grblas/_automethods.py new file mode 100644 index 0000000..a781ad0 --- /dev/null +++ b/dask_grblas/_automethods.py @@ -0,0 +1,21 @@ +from grblas import config + + +def _get_value(self, attr=None, default=None): + if config.get("autocompute"): + if self._value is None: + self._value = self.new() + if getattr(self, "is_dOnion", False): + self._value = self._value.strip() + if attr is None: + return self._value + else: + return getattr(self._value, attr) + if default is not None: + return default.__get__(self) + raise TypeError( + f"{attr} not enabled for objects of type {type(self)}. " + f"Use `.new()` to create a new {self.output_type.__name__}.\n\n" + "Hint: use `grblas.config.set(autocompute=True)` to enable " + "automatic computation of expressions." + ) diff --git a/dask_grblas/_ss/matrix.py b/dask_grblas/_ss/matrix.py index 8aad87a..ed64b4e 100644 --- a/dask_grblas/_ss/matrix.py +++ b/dask_grblas/_ss/matrix.py @@ -30,3 +30,27 @@ def diag(self, vector, k=0, chunks="auto", dtype=None): vector = self._parent._expect_type(vector, dgb.Vector, within="ss.diag", argname="vector") rv = vector._diag(k, chunks=chunks, dtype=dtype) self._parent.__init__(rv._delayed, nvals=rv._nvals) + + def build_scalar( + self, + rows, + columns, + values, + *, + dup_op=None, + clear=False, + nrows=None, + ncols=None, + chunks=None, + in_dOnion=False, # not part of the API + ): + self._parent.build( + rows, + columns, + values, + dup_op=dup_op, + clear=clear, + nrows=nrows, + ncols=ncols, + chunks=chunks, + ) diff --git a/dask_grblas/base.py b/dask_grblas/base.py index 6febd0b..c1e80fb 100644 --- a/dask_grblas/base.py +++ b/dask_grblas/base.py @@ -1,4 +1,7 @@ from numbers import Number +from collections.abc import Iterable +from tlz import compose +from functools import partial import dask.array as da import grblas as gb import numpy as np @@ -6,16 +9,23 @@ from . import replace as replace_singleton from .mask import Mask +from .functools import flexible_partial, skip from .utils import get_grblas_type, get_meta, np_dtype, wrap_inner +from dask.base import is_dask_collection _expect_type = gb.base._expect_type -def _check_mask(mask, output=None): +def is_type(arg_type, a): + return type(a) is arg_type + + +def _check_mask(mask, output=None, ignore_None=False): if not isinstance(mask, Mask): if isinstance(mask, BaseType): raise TypeError("Mask must indicate values (M.V) or structure (M.S)") - raise TypeError(f"Invalid mask: {type(mask)}") + elif mask is None and not ignore_None: + raise TypeError(f"Invalid mask: {type(mask)}") if output is not None: from .vector import Vector @@ -32,9 +42,27 @@ class BaseType: _expect_type = _expect_type _is_scalar = False + @property + def is_dOnion(self): + return is_DOnion(self._delayed) + + @property + def dOnion_if(self): + return self._delayed if self.is_dOnion else self + + def strip(self, *args, **kwargs): + return self._delayed.strip(*args, **kwargs) if self.is_dOnion else self + def isequal(self, other, *, check_dtype=False): from .scalar import PythonScalar + if any_dOnions(self, other): + meta = gb.Scalar.new(bool) + delayed = DOnion.multi_access( + meta, self.__class__.isequal, self, other, check_dtype=check_dtype + ) + return PythonScalar(delayed, meta=meta) + # if type(other) is not type(self): # raise TypeError(f'Argument of isequal must be of type {type(self).__name__}') if not self._meta.isequal(other._meta): @@ -52,46 +80,125 @@ def isequal(self, other, *, check_dtype=False): adjust_chunks={i: 1 for i in range(self._delayed.ndim)}, ) """ - delayed = da.core.elemwise( - _isequal, - self._delayed, - other._delayed, - check_dtype, - dtype=bool, + ndim = ( + self._matrix._delayed.ndim + if getattr(self, "_is_transposed", False) + else self._delayed.ndim ) - if self._delayed.ndim > 0: + if ndim < 2: delayed = da.core.elemwise( - _to_scalar, - delayed.all(), - bool, + partial(_isequal, False, False), + self._delayed, + other._delayed, + check_dtype, + dtype=bool, + ) + else: + xt = getattr(self, "_is_transposed", False) + yt = getattr(other, "_is_transposed", False) + self_ = (self._matrix._delayed, "ji") if xt else (self._delayed, "ij") + other_ = (other._matrix._delayed, "ji") if yt else (other._delayed, "ij") + delayed = da.core.blockwise( + *(partial(_isequal, xt, yt), "ij"), + *self_, + *other_, + *(check_dtype, None), + dtype=bool, + ) + if ndim > 0: + delayed = da.core.blockwise( + *(_to_scalar, ()), + *(delayed.all(), None), + *(bool, None), + dtype=np.bool_, + meta=wrap_inner(gb.Scalar.new(bool)), ) return PythonScalar(delayed) def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): from .scalar import PythonScalar + if any_dOnions(self, other): + meta = gb.Scalar.new(bool) + delayed = DOnion.multi_access( + meta, + self.__class__.isclose, + self, + other, + rel_tol=rel_tol, + abs_tol=abs_tol, + check_dtype=check_dtype, + ) + return PythonScalar(delayed, meta=meta) + # if type(other) is not type(self): # raise TypeError(f'Argument of isclose must be of type {type(self).__name__}') if not self._meta.isequal(other._meta): return PythonScalar.from_value(False) - delayed = da.core.elemwise( - _isclose, - self._delayed, - other._delayed, - rel_tol, - abs_tol, - check_dtype, - dtype=bool, + + ndim = ( + self._matrix._delayed.ndim + if getattr(self, "_is_transposed", False) + else self._delayed.ndim ) - if self._delayed.ndim > 0: + if ndim < 2: delayed = da.core.elemwise( - _to_scalar, - delayed.all(), - bool, + partial(_isclose, False, False), + self._delayed, + other._delayed, + rel_tol, + abs_tol, + check_dtype, + dtype=bool, + ) + else: + xt = getattr(self, "_is_transposed", False) + yt = getattr(other, "_is_transposed", False) + self_ = (self._matrix._delayed, "ji") if xt else (self._delayed, "ij") + other_ = (other._matrix._delayed, "ji") if yt else (other._delayed, "ij") + delayed = da.core.blockwise( + *(partial(_isclose, xt, yt), "ij"), + *self_, + *other_, + *(rel_tol, None), + *(abs_tol, None), + *(check_dtype, None), + dtype=bool, + ) + if ndim > 0: + delayed = da.core.blockwise( + *(_to_scalar, ()), + *(delayed.all(), None), + *(bool, None), + dtype=np.bool_, + meta=wrap_inner(gb.Scalar.new(bool)), ) return PythonScalar(delayed) + def _clear(self): + delayed = self._optional_dup() + # for a function like this, what's the difference between `map_blocks` and `elemwise`? + if self.ndim == 0: + return self.__class__( + delayed.map_blocks( + _clear, + dtype=np_dtype(self.dtype), + ) + ) + else: + return self.__class__( + delayed.map_blocks( + _clear, + dtype=np_dtype(self.dtype), + ), + nvals=0, + ) + def clear(self): + if is_DOnion(self._delayed): + self.__init__(self._delayed.getattr(self._meta, "_clear"), meta=self._meta, nvals=0) + return + # Should we copy and mutate or simply create new chunks? delayed = self._optional_dup() # for a function like this, what's the difference between `map_blocks` and `elemwise`? @@ -110,6 +217,13 @@ def clear(self): ) def dup(self, dtype=None, *, mask=None, name=None): + if any_dOnions(self, mask): + meta = self._meta.dup(dtype=dtype) + donion = DOnion.multi_access( + meta, self.__class__.dup, self, dtype=dtype, mask=mask, name=name + ) + return self.__class__(donion, meta=meta) + if mask is not None: if not isinstance(mask, Mask): self._meta.dup(dtype=dtype, mask=mask, name=name) # should raise @@ -188,6 +302,9 @@ def __call__( __imatmul__ = gb.base.BaseType.__imatmul__ def _optional_dup(self): + if self.is_dOnion: + return DOnion.multi_access(self._meta, _dOnion_dup, self) + # TODO: maybe try to create an optimization pass that remove these if they are unnecessary return da.core.elemwise( _optional_dup, @@ -210,16 +327,22 @@ def compute_and_store_nvals(self): def nvals(self): from .scalar import PythonScalar + if self.is_dOnion: + donion = DOnion.multi_access(self._meta.nvals, getattr, self, "nvals") + return PythonScalar(donion) + delayed = da.core.elemwise( _nvals, self._delayed, dtype=int, ) if self._delayed.ndim > 0: - delayed = da.core.elemwise( - _to_scalar, - delayed.sum(), - int, + delayed = da.core.blockwise( + *(_to_scalar, ()), + *(delayed.sum(), None), + *(int, None), + dtype=np.int_, + meta=wrap_inner(gb.Scalar.new(int)), ) return PythonScalar(delayed) @@ -239,7 +362,7 @@ def _name_html(self): return self.name return f"{split[0]}{split[1]}" - def update(self, expr): + def update(self, expr, in_dOnion=False): if isinstance(expr, Number): if self.ndim == 2: raise TypeError( @@ -250,12 +373,75 @@ def update(self, expr): "If you do wish to make a dense matrix, then please be explicit:" "\n\n M[:, :] = s" ) + typ = type(expr) + if any_dOnions(self, expr): + self_copy = self.__class__(self._optional_dup(), meta=self._meta) + expr_ = expr + if isinstance(expr, AmbiguousAssignOrExtract) and expr.has_dOnion: + + def update_by_aae(c, p, k_0, k_1): + keys = k_0 if k_1 is None else (k_0, k_1) + return c.update(p[keys], in_dOnion=True) + + if _is_pair(expr_.index): + keys_0, keys_1 = expr_.index[0], expr_.index[1] + else: + keys_0, keys_1 = expr_.index, None + + donion = DOnion.multi_access( + self._meta, + update_by_aae, + self_copy, + expr_.parent, + *(keys_0, keys_1), + ) + self.__init__(donion, self._meta) + return + + if isinstance(expr, GbDelayed) and expr.has_dOnion: + + def update_by_gbd(c, *args, **kwargs): + gbd = getattr(args[0], args[1])(*args[2:], **kwargs) + return c.update(gbd, in_dOnion=True) + + donion = DOnion.multi_access( + self._meta, + update_by_gbd, + self_copy, + expr_.parent, + expr_.method_name, + *expr_.args, + **expr_.kwargs, + ) + self.__init__(donion, self._meta) + return + + elif typ is TransposedMatrix and expr.is_dOnion: + + donion = DOnion.multi_access( + self._meta, BaseType.update, self_copy, expr_, in_dOnion=True + ) + self.__init__(donion, self._meta) + return + + donion = DOnion.multi_access( + self._meta, BaseType.update, self_copy, expr_, in_dOnion=True + ) + self.__init__(donion, self._meta) + return + + if typ is Box: + expr = expr.content + typ = type(expr) + + if isinstance(expr, Number): Updater(self)[...] << expr + if in_dOnion: + return self.__class__(self._delayed, meta=self._meta) return - self._meta.update(expr._meta) + self._meta.clear() - typ = type(expr) - if typ is AmbiguousAssignOrExtract: + if isinstance(expr, AmbiguousAssignOrExtract): # Extract (w << v[index]) # Is it safe/reasonable to simply replace `_delayed`? # Should we try to preserve e.g. format or partitions? @@ -266,25 +452,100 @@ def update(self, expr): self.__init__(expr._optional_dup()) else: self.__init__(expr.dup(dtype=self.dtype)._delayed) - elif typ is GbDelayed: + elif isinstance(expr, GbDelayed): expr._update(self) elif typ is TransposedMatrix: # "C << A.T" - C = expr.new() + C = expr.new(dtype=self.dtype) self.__init__(C._delayed) + elif typ is type(None): # noqa + raise TypeError("Assignment value must be a valid expression") else: # Anything else we need to handle? - raise TypeError() + raise NotImplementedError() + if in_dOnion: + return self.__class__(self._delayed, meta=self._meta) + + def _update(self, expr, *, mask=None, accum=None, replace=None, in_dOnion=False): + typ = type(expr) + if any_dOnions(self, expr, mask): + self_copy = self.__class__(self._optional_dup(), meta=self._meta) + mask_ = mask.dOnion_if if mask is not None else None + expr_ = expr + if isinstance(expr, AmbiguousAssignOrExtract) and expr.has_dOnion: + + def _update_by_aae(c, p, k_0, k_1, mask=None, accum=None, replace=None): + keys = k_0 if k_1 is None else (k_0, k_1) + return c.update( + p[keys], mask=mask, accum=accum, replace=replace, in_dOnion=True + ) + + if _is_pair(expr_.index): + keys_0, keys_1 = expr_.index[0], expr_.index[1] + else: + keys_0, keys_1 = expr_.index, None + + donion = DOnion.multi_access( + self._meta, + _update_by_aae, + self_copy, + expr_.parent, + *(keys_0, keys_1), + mask=mask_, + accum=accum, + replace=replace, + ) + self.__init__(donion, self._meta) + return + + if isinstance(expr, GbDelayed) and expr.has_dOnion: + + def _update_by_gbd(c, *args, mask=None, accum=None, replace=None, **kwargs): + gbd = getattr(args[0], args[1])(*args[2:], **kwargs) + return c._update(gbd, mask=mask, accum=accum, replace=replace, in_dOnion=True) + + donion = DOnion.multi_access( + self._meta, + _update_by_gbd, + self_copy, + expr_.parent, + expr_.method_name, + *expr_.args, + mask=mask_, + accum=accum, + replace=replace, + **expr_.kwargs, + ) + self.__init__(donion, self._meta) + return + + donion = DOnion.multi_access( + self._meta, + BaseType._update, + self_copy, + expr_, + mask=mask_, + accum=accum, + replace=replace, + in_dOnion=True, + ) + self.__init__(donion, meta=self._meta) + return + + if typ is Box: + expr = expr.content + typ = type(expr) - def _update(self, expr, *, mask=None, accum=None, replace=None): if mask is None and accum is None: self.update(expr) + if in_dOnion: + return self return - typ = type(expr) - if typ is AmbiguousAssignOrExtract: + if isinstance(expr, AmbiguousAssignOrExtract): # Extract (w(mask=mask, accum=accum) << v[index]) + expr_new = expr.new(dtype=self.dtype) + expr_delayed = expr_new._delayed delayed = self._optional_dup() - expr_delayed = expr.new(dtype=self.dtype)._delayed self._meta(mask=get_meta(mask), accum=accum, replace=replace) if mask is not None: delayed_mask = mask.mask._delayed @@ -304,7 +565,7 @@ def _update(self, expr, *, mask=None, accum=None, replace=None): dtype=np_dtype(self._meta.dtype), ) ) - elif typ is GbDelayed: + elif isinstance(expr, GbDelayed): # v(mask=mask) << left.ewise_mult(right) # Meta check handled in Updater expr._update(self, mask=mask, accum=accum, replace=replace) @@ -333,6 +594,9 @@ def _update(self, expr, *, mask=None, accum=None, replace=None): else: raise NotImplementedError(f"{typ}") + if in_dOnion: + return self.__class__(self._delayed, meta=self._meta) + def wait(self): # TODO: What should this do? self._meta.wait() @@ -340,6 +604,8 @@ def wait(self): def compute(self, *args, **kwargs): # kwargs['scheduler'] = 'synchronous' val = self._delayed.compute(*args, **kwargs) + if self.is_dOnion: + return val return val.value def persist(self, *args, **kwargs): @@ -349,6 +615,276 @@ def visualize(self, *args, **kwargs): return self._delayed.visualize(*args, **kwargs) +class Box: + """ + An arbitrary wrapper to wrap around the inner values of + an Array object to prevent dask from post-processing the + Array at the end of compute() + """ + + def __init__(self, content): + self.content = content + + def __getattr__(self, item): + return getattr(self.content, item) + + +const_obj = object() +_const0_DOnion = {"dtype": np.object_, "meta": np.array(const_obj, dtype=np.object_)} + + +class DOnion: + """ + Dask (or Delayed) Onion (DOnion): + + Encapsulates a dask array whose inner value is also a dask array. + Intended to be used in cases where the size of the inner dask + array (the seed) depends on the inner value of another dask array + (the shroud) + """ + + is_dOnion = True + + @classmethod + def sprout(cls, shroud, seed_meta, seed_func, *args, **kwargs): + """ + Develop a DOnion from dask arrays listed in `shroud` and using function `seed_func` + + Return dask.array.map_blocks(seed_func, shroud) as a DOnion. + + :shroud: a dask array; or an iterable of multiple such dask arrays; or a tuple (x, y) + where x and y are respectively a list of dask arrays and a dict of named dask arrays. + The inner values of these arrays determine the (size of) seed dask array + :seed_meta: empty instance of the inner value type of the seed + :seed_func: the function that takes as input the inner value of `shroud` and returns + another dask array (the seed) + :args: tuple of arguments to `seed_func`. May contain one or more `skip` sentinels + denoting a vacant positions to be taken up by the inner values of dask arrays in + shroud. + :kwargs: dict of keyword arguments to `seed_func` + """ + named_shrouds = {} + if is_dask_collection(shroud): + shroud = [shroud] + else: + if isinstance(shroud, Iterable): + if len(shroud) > 0: + if ( + len(shroud) == 2 + and isinstance(shroud[0], Iterable) + and isinstance(shroud[1], dict) + ): + shroud = shroud[0] + named_shrouds = shroud[1] + else: + raise ValueError("`shroud` must contain at least one dask array!") + else: + raise ValueError( + "`shroud` must be a dask array; a list x of dask arrays or" + "a dict y of named dask arrays; or a tuple of both: (x, y)" + ) + + seed_func = flexible_partial(seed_func, *args, **kwargs) + kernel = da.map_blocks(seed_func, *shroud, **named_shrouds, **_const0_DOnion) + return DOnion(kernel, meta=seed_meta) + + def __init__(self, kernel, meta=None): + self.kernel = kernel + # Why have ._meta and .dtype attributes? B'cos Scalar, Vector & Matrix need them + self._meta = meta + self.dtype = getattr(meta, "dtype", type(meta)) + + def __eq__(self, other): + if like_dOnion(other): + other = other.compute() + return self.compute() == other + + def compute(self, *args, **kwargs): + value = self.kernel.compute(*args, **kwargs) + while hasattr(value, "compute"): + value = value.compute(*args, **kwargs) + if type(value) is Box: + value = value.content + return value + + def compute_once(self, *args, **kwargs): + value = self.kernel.compute(*args, **kwargs) + if type(value) is Box: + value = value.content + return value + + def strip(self, *args, **kwargs): + value = self.compute_once(*args, **kwargs) + while like_dOnion(value): + if type(value) is DOnion: + value = value.compute_once(*args, **kwargs) + else: + value = value._delayed.compute_once(*args, **kwargs) + return value + + def persist(self, *args, **kwargs): + value = self.strip(*args, **kwargs) + if hasattr(value, "persist"): + return value.persist(*args, **kwargs) + else: + raise AttributeError( + f"Something went wrong: stripped dOnion {self} value {value} has" + " no `persist()` attribute." + ) + + def _persist(self, *args, **kwargs): + value = self.strip(*args, **kwargs) + if hasattr(value, "_persist"): + value._persist(*args, **kwargs) + return value._delayed + else: + raise AttributeError( + f"Something went wrong: stripped dOnion {self} value {value} has" + " no `_persist()` attribute." + ) + + @classmethod + def multi_access(cls, out_meta, func, *args, **kwargs): + def adaptor(func, ts, cs, ss, vs, kwargs_desc, *args, **kwargs): + args_ = () + for arg, t, c, s, v in zip(args, ts, cs, ss, vs): + if type(arg) is Box: + arg = arg.content + if t: + arg = arg.T + if s: + arg = arg.S + if v: + arg = arg.V + if c: + arg = arg.__invert__() + args_ += (arg,) + + kwargs_ = kwargs.copy() + for k in kwargs: + t, c, s, v = kwargs_desc[k] + if t: + kwargs_[k] = kwargs_[k].T + if s: + kwargs_[k] = kwargs_[k].S + if v: + kwargs_[k] = kwargs_[k].V + if c: + kwargs_[k] = kwargs_[k].__invert__() + + return func(*args_, **kwargs_) + + _args = [getattr(arg, "dOnion_if", arg) for arg in args] + ts = [ + getattr(arg, "is_dOnion", False) and getattr(arg, "_is_transposed", False) + for arg in args + ] + cs = [ + getattr(arg, "is_dOnion", False) + and isinstance(arg, Mask) + and getattr(arg, "complement", False) + for arg in args + ] + ss = [ + getattr(arg, "is_dOnion", False) + and isinstance(arg, Mask) + and getattr(arg, "structure", False) + for arg in args + ] + vs = [ + getattr(arg, "is_dOnion", False) + and isinstance(arg, Mask) + and getattr(arg, "value", False) + for arg in args + ] + + _kwargs = {k: getattr(arg, "dOnion_if", arg) for k, arg in kwargs.items()} + + kwargs_desc = { + k: ( + getattr(arg, "is_dOnion", False) and getattr(arg, "_is_transposed", False), + getattr(arg, "is_dOnion", False) + and isinstance(arg, Mask) + and getattr(arg, "complement", False), + getattr(arg, "is_dOnion", False) + and isinstance(arg, Mask) + and getattr(arg, "structure", False), + getattr(arg, "is_dOnion", False) + and isinstance(arg, Mask) + and getattr(arg, "value", False), + ) + for k, arg in kwargs.items() + } + return DOnion.multiple_access( + out_meta, adaptor, func, ts, cs, ss, vs, kwargs_desc, *_args, **_kwargs + ) + + @classmethod + def multiple_access(cls, out_meta, func, *args, **kwargs): + """ + Pass inner values of any DOnions in `args` and/or `kwargs` into `func`. + + :func: Callable that can accept the contents of `args` and/or `kwargs` + as parameters + :args: a list of positional arguments to `func` + :kwargs: a dict of named arguments to `func` + """ + # First, pass non-DOnion args and kwargs to func: + skip_Donions = [arg if not is_DOnion(arg) else skip for arg in args] + non_DOnion_kwargs = {k: v for (k, v) in kwargs.items() if not is_DOnion(v)} + func = flexible_partial(func, *skip_Donions, **non_DOnion_kwargs) + + # Next, pass func and DOnion args and kwargs to map_blocks: + donion_args = tuple(arg.kernel for arg in args if is_DOnion(arg)) + donion_kwargs = {k: v.kernel for (k, v) in kwargs.items() if is_DOnion(v)} + kernel = da.map_blocks(func, *donion_args, **donion_kwargs, **_const0_DOnion) + return DOnion(kernel, meta=out_meta) + + def deep_extract(self, out_meta, func, *args, **kwargs): + func = flexible_partial(func, *args, **kwargs) + if not isinstance( + out_meta, (np.ndarray, gb.base.BaseType, gb.mask.Mask, gb.matrix.TransposedMatrix) + ): + func = compose(Box, func) + kernel = self.kernel.map_blocks(func, **_const0_DOnion) + return DOnion(kernel, meta=out_meta) + + def __call__(self, *args, **kwargs): + meta = self._meta(*args, **kwargs) + return self.getattr(meta, "__call__", *args, **kwargs) + + def __getattr__(self, item): + try: + meta = getattr(self._meta, item, getattr(self.kernel, item)) + except AttributeError: + raise AttributeError(f"Unable to compute meta corresponding to attribute {item}.") + _getattr = flexible_partial(getattr, skip, item) + return self.deep_extract(meta, _getattr) + + def getattr(self, meta, attr_name, *args, **kwargs): + _getattr = flexible_partial(DOnion._getattr, skip, attr_name, *args, **kwargs) + return self.deep_extract(meta, _getattr) + + @classmethod + def _getattr(cls, x, attr_name, *args, **kwargs): + return getattr(x, attr_name)(*args, **kwargs) + + +is_DOnion = partial(is_type, DOnion) + + +def like_dOnion(arg): + return arg is not None and ( + is_DOnion(arg) or getattr(arg, "is_dOnion", False) or getattr(arg, "has_dOnion", False) + ) + + +def any_dOnions(*args, **kwargs): + return np.any([like_dOnion(arg) for arg in args]) or np.any( + [like_dOnion(v) for _, v in kwargs.items()] + ) + + # Dask task functions def _clear(x): x.value.clear() @@ -361,13 +897,17 @@ def _dup(x, mask, dtype, mask_type): return wrap_inner(x.value.dup(dtype=dtype, mask=mask)) -def _isclose(x, y, rel_tol, abs_tol, check_dtype): - val = x.value.isclose(y.value, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype) +def _isclose(xt, yt, x, y, rel_tol, abs_tol, check_dtype): + x_ = x.value.T if xt else x.value + y_ = y.value.T if yt else y.value + val = x_.isclose(y_, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype) return _reduction_value(x, val) -def _isequal(x, y, check_dtype): - val = x.value.isequal(y.value, check_dtype=check_dtype) +def _isequal(xt, yt, x, y, check_dtype): + x_ = x.value.T if xt else x.value + y_ = y.value.T if yt else y.value + val = x_.isequal(y_, check_dtype=check_dtype) return _reduction_value(x, val) @@ -380,6 +920,10 @@ def _optional_dup(x): return wrap_inner(x.value.dup()) +def _dOnion_dup(x): + return x.dup() + + def _reduction_value(x, val): """Helper function used when reducing objects to scalars such as for `isclose`""" if x.ndim == 0: @@ -402,5 +946,5 @@ def _update_assign(updating, accum, mask, mask_type, replace, x): return updating -from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater # noqa isort: skip +from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, _is_pair # noqa isort: skip from .matrix import TransposedMatrix # noqa isort: skip diff --git a/dask_grblas/expr.py b/dask_grblas/expr.py index e226ef0..7d59c70 100644 --- a/dask_grblas/expr.py +++ b/dask_grblas/expr.py @@ -8,15 +8,18 @@ from grblas.exceptions import DimensionMismatch from dask.base import tokenize +from dask.highlevelgraph import HighLevelGraph -from .base import BaseType, InnerBaseType, _check_mask +from .base import BaseType, InnerBaseType, _check_mask, DOnion, is_DOnion, any_dOnions from .mask import Mask from .utils import ( get_grblas_type, + get_inner_type, get_meta, get_return_type, np_dtype, wrap_inner, + flatten, build_chunk_offsets_dask_array, build_chunk_ranges_dask_array, build_slice_dask_array_from_chunks, @@ -24,7 +27,8 @@ class GbDelayed: - def __init__(self, parent, method_name, *args, meta, **kwargs): + def __init__(self, parent, method_name, *args, meta=None, **kwargs): + self.has_dOnion = any_dOnions(parent, *args) self.parent = parent self.method_name = method_name self.args = args @@ -32,13 +36,8 @@ def __init__(self, parent, method_name, *args, meta, **kwargs): self._meta = meta # InfixExpression and Aggregator requirements: self.dtype = meta.dtype - self.output_type = meta.output_type - self.ndim = len(meta.shape) - if self.ndim == 1: - self._size = meta.size - elif self.ndim == 2: - self._nrows = meta.nrows - self._ncols = meta.ncols + # autocompute requirements: + self._value = None def _matmul(self, meta, mask=None): left_operand = self.parent @@ -125,39 +124,94 @@ def _matmul2(self, meta, mask=None): op = self.args[1] sum_meta = wrap_inner(meta) - if mask is None: - out = da.core.blockwise( - partial(_matmul2, op, meta.dtype, at, bt), - out_ind, - a, - lhs_ind, - b, - rhs_ind, - adjust_chunks={compress_axis: 1}, - dtype=np.result_type(a, b), - concatenate=False, - meta=FakeInnerTensor(meta, compress_axis), - ) + if op.is_positional: + _, (a, b) = da.core.unify_chunks(a, lhs_ind, b, rhs_ind) + x = build_chunk_ranges_dask_array(a, 0, "row-ranges-" + tokenize(a, 0)) + a_ranges = (x, (lhs_ind[0],)) + if a.ndim == 2: + x = build_chunk_ranges_dask_array(a, 1, "col-ranges-" + tokenize(a, 1)) + a_ranges += (x, (lhs_ind[1],)) + + x = build_chunk_ranges_dask_array(b, 0, "row-ranges-" + tokenize(b, 0)) + b_ranges = (x, (rhs_ind[0],)) + if b.ndim == 2: + x = build_chunk_ranges_dask_array(b, 1, "col-ranges-" + tokenize(b, 1)) + b_ranges += (x, (rhs_ind[1],)) + + if mask is None: + matmul_pos = partial( + _matmul2_positional, + op, + meta.dtype, + at, + bt, + a.shape, + b.shape, + ) + out = da.core.blockwise( + *(matmul_pos, out_ind), + *(a, lhs_ind), + *(b, rhs_ind), + *(a_ranges + b_ranges), + adjust_chunks={compress_axis: 1}, + dtype=np.result_type(a, b), + concatenate=False, + meta=FakeInnerTensor(meta, compress_axis), + ) + else: + m = mask.mask._delayed + grblas_mask_type = get_grblas_type(mask) + mask_ind = list(out_ind) + mask_ind.remove(compress_axis) + mask_ind = tuple(mask_ind) + out = da.core.blockwise( + partial(_matmul2_masked, op, meta.dtype, at, bt, grblas_mask_type), + out_ind, + m, + mask_ind, + a, + lhs_ind, + b, + rhs_ind, + adjust_chunks={compress_axis: 1}, + dtype=np.result_type(a, b), + concatenate=False, + meta=FakeInnerTensor(meta, compress_axis), + ) else: - m = mask.mask._delayed - grblas_mask_type = get_grblas_type(mask) - mask_ind = list(out_ind) - mask_ind.remove(compress_axis) - mask_ind = tuple(mask_ind) - out = da.core.blockwise( - partial(_matmul2_masked, op, meta.dtype, at, bt, grblas_mask_type), - out_ind, - m, - mask_ind, - a, - lhs_ind, - b, - rhs_ind, - adjust_chunks={compress_axis: 1}, - dtype=np.result_type(a, b), - concatenate=False, - meta=FakeInnerTensor(meta, compress_axis), - ) + if mask is None: + out = da.core.blockwise( + partial(_matmul2, op, meta.dtype, at, bt), + out_ind, + a, + lhs_ind, + b, + rhs_ind, + adjust_chunks={compress_axis: 1}, + dtype=np.result_type(a, b), + concatenate=False, + meta=FakeInnerTensor(meta, compress_axis), + ) + else: + m = mask.mask._delayed + grblas_mask_type = get_grblas_type(mask) + mask_ind = list(out_ind) + mask_ind.remove(compress_axis) + mask_ind = tuple(mask_ind) + out = da.core.blockwise( + partial(_matmul2_masked, op, meta.dtype, at, bt, grblas_mask_type), + out_ind, + m, + mask_ind, + a, + lhs_ind, + b, + rhs_ind, + adjust_chunks={compress_axis: 1}, + dtype=np.result_type(a, b), + concatenate=False, + meta=FakeInnerTensor(meta, compress_axis), + ) # out has an extra dimension (a slab or a bar), and now reduce along it out = sum_by_monoid(op.monoid, out, axis=compress_axis, meta=sum_meta) @@ -180,26 +234,24 @@ def _reduce_along_axis(self, axis, dtype): return delayed def _reduce_scalar(self, dtype): - assert not self.kwargs op = self.args[0] at = self.parent._is_transposed delayed = self.parent._matrix._delayed if at else self.parent._delayed delayed = da.reduction( delayed, - partial(_reduce_scalar, op, dtype), - partial(_reduce_combine, op), + partial(_reduce_scalar, op, dtype, **self.kwargs), + partial(_reduce_combine, op, **self.kwargs), concatenate=False, dtype=np_dtype(dtype), ) return delayed def _reduce(self, dtype): - assert not self.kwargs op = self.args[0] delayed = da.reduction( self.parent._delayed, - partial(_reduce, op, dtype), - partial(_reduce_combine, op), + partial(_reduce, op, dtype, **self.kwargs), + partial(_reduce_combine, op, **self.kwargs), concatenate=False, dtype=np_dtype(dtype), ) @@ -228,17 +280,128 @@ def _aggregate( op._new(updater, self) return output + def _kronecker(self, a, b, op, meta): + a = a.rechunk(chunks=1) + frag = da.core.blockwise( + *(partial(_kronecker, a._is_transposed, b._is_transposed), "ijMN"), + *((a._matrix._delayed, "ji") if a._is_transposed else (a._delayed, "ij")), + *((b._matrix._delayed, "NM") if b._is_transposed else (b._delayed, "MN")), + *(op, None), + dtype=np_dtype(meta.dtype), + meta=wrap_inner(meta), + ) + + name = "kronecker-" + tokenize(a, b) + b_ = b._matrix._delayed if b._is_transposed else b._delayed + + out_chunks = () + for axis in range(2): + out_chunks += (b_.chunks[axis] * a.shape[axis],) + + dsk = dict() + for i in range(a.shape[0]): + for j in range(a.shape[1]): + for M in range(b_.numblocks[0]): + for N in range(b_.numblocks[1]): + + dsk[(name, i * b_.numblocks[0] + M, j * b_.numblocks[1] + N)] = ( + lambda x: x, + (frag.name, i, j, M, N), + ) + + graph = HighLevelGraph.from_collections(name, dsk, dependencies=[frag]) + out = da.core.Array(graph, name, out_chunks, meta=wrap_inner(meta)) + return out + def new(self, dtype=None, *, mask=None, name=None): + _check_mask(mask, ignore_None=True) + + if any_dOnions(self, mask): + + def recall_GbDelayed_new(p, m, *args, dtype=None, mask=None, **kwargs): + gbd = getattr(p, m)(*args, **kwargs) + return gbd.new(dtype=dtype, mask=mask) + + meta_args = list(getattr(v, "_meta", v) for v in self.args) + meta_kwargs = {k: getattr(v, "_meta", v) for k, v in self.kwargs.items()} + if self.method_name.startswith(("reduce", "apply")): + # unary operations + a = self.parent + op = self.args[0] + if self.method_name == "apply": + # grblas `apply()` does not like empty Scalars! + if len(meta_args) > 1 and type(meta_args[1]) is gb.Scalar: + meta_args[1] = gb.Scalar.from_value(1, dtype=meta_args[1].dtype) + if len(meta_args) > 2 and type(meta_args[2]) is gb.Scalar: + meta_args[2] = gb.Scalar.from_value(1, dtype=meta_args[2].dtype) + if "left" in meta_kwargs and type(meta_kwargs["left"]) is gb.Scalar: + meta_kwargs["left"] = gb.Scalar.from_value( + 1, dtype=meta_kwargs["left"].dtype + ) + if "right" in meta_kwargs and type(meta_kwargs["right"]) is gb.Scalar: + meta_kwargs["right"] = gb.Scalar.from_value( + 1, dtype=meta_kwargs["right"].dtype + ) + elif self.method_name.startswith("reduce"): + # grblas bug occurs when shape is (0, 0) + if a._meta.shape == (0,) * a.ndim: + a._meta.resize(*((1,) * a.ndim)) + meta = getattr(a._meta, self.method_name)(*meta_args, **meta_kwargs).new( + dtype=dtype + ) + meta.clear() + else: + # binary operations + a = self.parent + b = self.args[0] + op = self.args[1] + + try: + meta = getattr(a._meta, self.method_name)(b._meta, op=op, **meta_kwargs).new( + dtype=dtype + ) + except DimensionMismatch: + if self.method_name == "mxm": + b_meta = gb.Matrix.new( + dtype=b._meta.dtype, nrows=a._meta.ncols, ncols=b._meta.ncols + ) + elif self.method_name == "vxm": + b_meta = gb.Matrix.new( + dtype=b._meta.dtype, nrows=a._meta.size, ncols=b._meta.ncols + ) + elif self.method_name == "mxv": + b_meta = gb.Vector.new(dtype=b._meta.dtype, size=a._meta.ncols) + + elif self.method_name in ("ewise_add", "ewise_mult"): + b_meta = a._meta.dup(dtype=b._meta.dtype) + + meta = getattr(a._meta, self.method_name)(b_meta, op=op, **meta_kwargs).new( + dtype=dtype + ) + + donion = DOnion.multi_access( + meta, + recall_GbDelayed_new, + self.parent, + self.method_name, + *self.args, + dtype=dtype, + mask=mask, + **self.kwargs, + ) + return get_return_type(meta)(donion, meta=meta) + + # no dOnions + meta = self._meta.new(dtype=dtype) if mask is not None: - _check_mask(mask) - meta = self._meta.new(dtype=dtype, mask=mask._meta) delayed_mask = mask.mask._delayed grblas_mask_type = get_grblas_type(mask) else: - meta = self._meta.new(dtype=dtype) delayed_mask = None grblas_mask_type = None + meta.clear() + if self.method_name.startswith("reduce"): op = self._meta.op if op is not None and op.opclass == "Aggregator": @@ -261,20 +424,37 @@ def new(self, dtype=None, *, mask=None, name=None): ) for key in self.kwargs } - delayed = da.core.elemwise( - _expr_new, - self.method_name, - dtype, - grblas_mask_type, - self_kwargs, - self.parent._delayed, - delayed_mask, - *[x._delayed if isinstance(x, BaseType) else x for x in self.args], + pt = getattr(self.parent, "_is_transposed", False) + xts = [getattr(arg, "_is_transposed", False) for arg in self.args] + axes = "ij" if self.parent.ndim == 2 else "i" + delayed = da.core.blockwise( + *(partial(_expr_new, pt, xts), axes), + *(self.method_name, None), + *(dtype, None), + *(grblas_mask_type, None), + *( + (self.parent._matrix._delayed, axes[::-1]) + if pt + else (self.parent._delayed, axes) + ), + *(delayed_mask, (None if mask is None else axes)), + *flatten( + ( + (x._matrix._delayed, axes[::-1]) + if xt + else (x._delayed, (None if x._is_scalar else axes)) + ) + if isinstance(x, BaseType) or getattr(x, "_is_transposed", False) + else (x, None) + for x, xt in zip(self.args, xts) + ), + **self_kwargs, dtype=np_dtype(meta.dtype), ) elif self.method_name in {"vxm", "mxv", "mxm"}: - # TODO: handle dtype and mask delayed = self._matmul2(meta, mask=mask) + elif self.method_name == "kronecker": + delayed = self._kronecker(self.parent, self.args[0], self.args[1], meta) else: raise ValueError(self.method_name) return get_return_type(meta)(delayed) @@ -381,6 +561,9 @@ def _update(self, updating, *, mask=None, accum=None, replace=None): delayed = self._matmul2(meta, mask=mask) updating(mask=mask, accum=accum, replace=replace) << get_return_type(meta)(delayed) return + elif self.method_name == "kronecker": + updating(mask=mask, accum=accum, replace=replace) << self.new() + return else: raise ValueError(self.method_name) updating.__init__(delayed) @@ -420,14 +603,24 @@ def _new_matrix(self, dtype, nrows=0, ncols=0, *, name=None): class IndexerResolver: - def __init__(self, obj, indices): + __slots__ = "obj", "indices", "is_dOnion", "shape" + + def __init__(self, obj, indices, check_shape=True): + index_is_dOnion = obj.ndim == 1 and is_DOnion(indices) + index_is_dOnion = index_is_dOnion or ( + obj.ndim == 2 and _is_pair(indices) and (is_DOnion(indices[0]) or is_DOnion(indices[1])) + ) + self.is_dOnion = index_is_dOnion + check_shape = not (index_is_dOnion or obj.is_dOnion) + self.obj = obj if indices is Ellipsis: from .vector import Vector - if type(obj) is Vector: + if type(obj) in {Vector, gb.Vector}: normalized = slice(None).indices(obj._size) self.indices = [AxisIndex(obj._size, slice(*normalized))] + self.shape = (obj._size,) else: normalized0 = slice(None).indices(obj._nrows) normalized1 = slice(None).indices(obj._ncols) @@ -435,8 +628,14 @@ def __init__(self, obj, indices): AxisIndex(obj._nrows, slice(*normalized0)), AxisIndex(obj._ncols, slice(*normalized1)), ] + self.shape = (obj._nrows, obj._ncols) else: - self.indices = self.parse_indices(indices, obj.shape) + if not check_shape and hasattr(obj, "_meta"): + shape = obj._meta.shape + else: + shape = obj.shape + self.indices = self.parse_indices(indices, shape, check_shape) + self.shape = tuple(index.size for index in self.indices if index.size is not None) @property def is_single_element(self): @@ -445,7 +644,7 @@ def is_single_element(self): return False return True - def parse_indices(self, indices, shape): + def parse_indices(self, indices, shape, check_shape=True): """ Returns [(rows, rowsize), (cols, colsize)] for Matrix @@ -469,24 +668,41 @@ def parse_indices(self, indices, shape): raise TypeError( f"Index in position {i} cannot be a tuple; must use slice or list or int" ) - out.append(self.parse_index(idx, typ, shape[i])) + out.append(self.parse_index(idx, typ, shape[i], check_shape)) return out - def parse_index(self, index, typ, size): + def parse_index(self, index, typ, size, check_shape=True): if np.issubdtype(typ, np.integer): if index >= size: - raise IndexError(f"Index out of range: index={index}, size={size}") + if check_shape: + raise IndexError(f"Index out of range: index={index}, size={size}") if index < 0: index += size if index < 0: - raise IndexError(f"Index out of range: index={index - size}, size={size}") - return AxisIndex(None, IndexerResolver.normalize_index(index, size)) + if check_shape: + raise IndexError(f"Index out of range: index={index - size}, size={size}") + return AxisIndex(None, IndexerResolver.normalize_index(index, size, check_shape)) + + def compute_scalar(index): + from .scalar import Scalar, PythonScalar + + if type(index) is Scalar: + return index.value.compute() + if type(index) is PythonScalar: + return index.compute() + return index + if typ is list: - index = [IndexerResolver.normalize_index(i, size) for i in index] + index = [ + IndexerResolver.normalize_index(compute_scalar(i), size, check_shape) for i in index + ] return AxisIndex(len(index), index) elif typ is slice: - normalized = index.indices(size) - return AxisIndex(len(range(*normalized)), slice(*normalized)) + if check_shape: + normalized = index.indices(size) + return AxisIndex(len(range(*normalized)), slice(*normalized)) + else: + return AxisIndex(0, index) elif typ in {np.ndarray, da.Array}: if len(index.shape) != 1: @@ -494,13 +710,17 @@ def parse_index(self, index, typ, size): if not np.issubdtype(index.dtype, np.integer): raise TypeError(f"Invalid dtype for index: {index.dtype}") return AxisIndex(index.shape[0], index) + + elif is_DOnion(index): + return AxisIndex(0, index) + else: - from .scalar import Scalar + from .scalar import Scalar, PythonScalar - if typ is Scalar: + if typ in {Scalar, PythonScalar}: if index.dtype.name.startswith("F"): raise TypeError(f"An integer is required for indexing. Got: {index.dtype}") - index = index.value.compute() + index = index.value.compute() if typ is Scalar else index.compute() return AxisIndex(None, IndexerResolver.normalize_index(index, size)) from .matrix import Matrix, TransposedMatrix @@ -530,7 +750,7 @@ def parse_index(self, index, typ, size): f"`x(mask={index.name}) << value`." ) raise TypeError(f"Invalid type for index: {typ}; unable to convert to list") - index = [IndexerResolver.normalize_index(i, size) for i in index] + index = [IndexerResolver.normalize_index(i, size, check_shape) for i in index] return AxisIndex(len(index), index) def get_index(self, dim): @@ -548,39 +768,52 @@ def validate_types(cls, indices): return @classmethod - def normalize_index(cls, index, size): + def normalize_index(cls, index, size, check_size=True): if type(index) is get_return_type(gb.Scalar.new(int)): # This branch needs a second look: How to work with the lazy index? index = index.value.compute() if not isinstance(index, Integral): raise TypeError("An integer is required for indexing") if index >= size: - raise IndexError(f"Index out of range: index={index}, size={size}") + if check_size: + raise IndexError(f"Index out of range: index={index}, size={size}") if index < 0: index += size if index < 0: - raise IndexError(f"Index out of range: index={index - size}, size={size}") + if check_size: + raise IndexError(f"Index out of range: index={index - size}, size={size}") return int(index) class Updater: + __bool__ = gb.expr.Updater.__bool__ + __eq__ = gb.expr.Updater.__eq__ + def __init__(self, parent, *, mask=None, accum=None, replace=False, input_mask=None): - if input_mask is not None and mask is not None: + if mask is not None and input_mask is not None: raise TypeError("mask and input_mask arguments cannot both be given") - if input_mask is not None and not isinstance(input_mask, Mask): - raise TypeError(r"Mask must indicate values (M.V) or structure (M.S)") + _check_mask(mask, ignore_None=True) + _check_mask(input_mask, ignore_None=True) + + self.has_dOnion = any_dOnions(parent, mask, input_mask) self.parent = parent self.mask = mask self.input_mask = input_mask self.accum = accum - if mask is None: - self.replace = None - else: - self.replace = replace + self.replace = replace if mask is not None else None self._meta = parent._meta(mask=get_meta(mask), accum=accum, replace=replace) + + # copy `mask` if `parent` is the source of `mask` + if parent is getattr(mask, "mask", None): + self.mask = type(mask)(mask.mask.dup()) + + # copy `input_mask` if `parent` is the source of `input_mask` + if parent is getattr(input_mask, "mask", None): + self.input_mask = type(input_mask)(input_mask.mask.dup()) + # Aggregator specific attribute requirements: - self.kwargs = {"mask": mask} + self.kwargs = {"mask": self.mask} def __delitem__(self, keys): # Occurs when user calls `del C(params)[index]` @@ -611,8 +844,13 @@ def __lshift__(self, delayed): def update(self, delayed): # Occurs when user calls C(params) << delayed if self.input_mask is not None: - if type(delayed) is AmbiguousAssignOrExtract: + if isinstance(delayed, AmbiguousAssignOrExtract): # w(input_mask) << v[index] + if self.parent is delayed.parent: + # replace `v` with a copy of itself if `w` is `v` + delayed.parent = delayed.parent.__class__( + delayed.parent._optional_dup(), delayed.parent._meta + ) self.parent._update( delayed.new(mask=self.mask, input_mask=self.input_mask), accum=self.accum, @@ -626,7 +864,8 @@ def update(self, delayed): if isinstance(delayed, Number) or ( isinstance(delayed, BaseType) and get_meta(delayed)._is_scalar ): - ndim = len(self.parent.shape) + # w(mask, accum, replace) << s + ndim = self.parent.ndim if ndim > 0: self.__setitem__(_squeeze((slice(None),) * ndim), delayed) elif self.accum is not None: @@ -637,12 +876,15 @@ def update(self, delayed): if self.mask is None and self.accum is None: return self.parent.update(delayed) - self.parent._meta._update( - get_meta(delayed), - mask=get_meta(self.mask), - accum=self.accum, - replace=self.replace, - ) + + if not any_dOnions(self.parent, delayed): + self.parent._meta._update( + get_meta(delayed), + mask=get_meta(self.mask), + accum=self.accum, + replace=self.replace, + ) + if self.parent._meta._is_scalar: self.parent._update(delayed, accum=self.accum) else: @@ -650,6 +892,11 @@ def update(self, delayed): def _csc_chunk(row_range, col_range, indices, red_columns, track_indices=False): + """ + create chunk of Reduce_Assign Matrix in Compressed Sparse Column (CSC) format + + (Used in `reduce_assign()`) + """ row_range = row_range[0] nrows = row_range.stop - row_range.start if type(indices[0]) is slice: @@ -689,13 +936,14 @@ def _csc_chunk(row_range, col_range, indices, red_columns, track_indices=False): def _fill(inner_vector, rhs): + # used in reduce_assign() rhs = rhs.value if isinstance(rhs, InnerBaseType) else rhs inner_vector.value[:] << rhs return inner_vector def reduce_assign(lhs, indices, rhs, dup_op="last", mask=None, accum=None, replace=False): - # lhs(mask, accum, replace)[i] << rhs + # lhs(mask, accum, replace, dup_op)[i] << rhs rhs_is_scalar = not (isinstance(rhs, BaseType) and type(rhs._meta) is gb.Vector) if type(indices) is slice: chunksz = "auto" if rhs_is_scalar else rhs._delayed.chunks @@ -808,6 +1056,15 @@ def _get_type_with_ndims(n): return get_return_type(gb.Matrix.new(int)) +def _get_inner_type_with_ndims(n): + if n == 0: + return get_inner_type(gb.Scalar.new(int)) + elif n == 1: + return get_inner_type(gb.Vector.new(int)) + else: + return get_inner_type(gb.Matrix.new(int)) + + def _get_grblas_type_with_ndims(n): if n == 0: return gb.Scalar @@ -1245,24 +1502,111 @@ def _defrag_to_index_chunk(*args, x_chunks, dtype=None): return wrap_inner(fused_fragments[index_tuple].new()) +def _adjust_meta_to_index(meta, index): + from .scalar import Scalar, PythonScalar + + # Since grblas does not support indices that are dask arrays + # this complicates meta deduction. We therefore substitute + # any non-Integral type indices with `slice(None)` + index = index if type(index) is tuple else (index,) + # Next, we resize `meta` to accept any Integral-type indices: + numbers = [x for x in index if isinstance(x, (Integral, Scalar, PythonScalar))] + max_index = np.max(numbers) if numbers else None + meta = meta.dup() + if max_index is not None: + if len(index) == 1: + meta.resize(max_index + 1) + else: + meta.resize(max_index + 1, max_index + 1) + + meta_index = tuple( + x if isinstance(x, (Integral, Scalar, PythonScalar)) else slice(None) for x in index + ) + return meta[_squeeze(meta_index)] + + class AmbiguousAssignOrExtract: - def __init__(self, parent, index): - self.resolved_indices = IndexerResolver(parent, index) + __slots__ = ( + "has_dOnion", + "index", + "parent", + "resolved_indexes", + "_meta", + "_value", + "__weakref__", + ) + _is_scalar = False + + def __init__(self, parent, index, meta=None): self.parent = parent - self.index = index - # IndexerResolver.validate_types(self.index) - self._meta = parent._meta[index] - # infix expression requirements: - shape = tuple(i.size for i in self.resolved_indices.indices if i.size) - self.ndim = len(shape) - self.output_type = _get_grblas_type_with_ndims(self.ndim) - if self.ndim == 1: - self._size = shape[0] - elif self.ndim == 2: - self._nrows = shape[0] - self._ncols = shape[1] + self.resolved_indexes = index + self.index = _squeeze(tuple(i.index for i in index.indices)) + self._value = None + if parent.is_dOnion or index.is_dOnion: + self.has_dOnion = True + self._meta = _adjust_meta_to_index(parent._meta, self.index) + else: + self.has_dOnion = False + self._meta = parent._meta[self.index] if meta is None else meta + + @staticmethod + def _extract_single_element(x, xt, T, dxn, indices, meta, dtype): + def getitem(inner, key, dtype): + return wrap_inner(inner.value[key].new(dtype=dtype)) + + name = "extract_single_element-" + tokenize(x, xt, indices) + + block = () + element = () + for axis, i in enumerate(indices): + stops_ = np.cumsum(x.chunks[T[axis]]) + starts = np.roll(stops_, 1) + starts[0] = 0 + + blockid = np.arange(x.numblocks[T[axis]]) + + # locate chunk containing element: + filter = (starts <= i) & (i < stops_) + (R,) = blockid[filter] + + block += (R,) + element += (i - starts[R],) + + dsk = dict() + dsk[(name,)] = (getitem, (x.name, *block[::dxn]), _squeeze(element[::dxn]), dtype) + graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x]) + out = da.core.Array(graph, name, (), meta=wrap_inner(meta)) + return out def new(self, *, dtype=None, mask=None, input_mask=None, name=None): + _check_mask(mask, ignore_None=True) + _check_mask(input_mask, ignore_None=True) + + if any_dOnions(self, mask, input_mask): + + def _recall_getitem(parent, keys_0, keys_1, dtype, mask, input_mask): + keys = keys_0 if keys_1 is None else (keys_0, keys_1) + return parent[keys].new(dtype=dtype, mask=mask, input_mask=input_mask) + + meta = self._meta.new(dtype=dtype) + + if _is_pair(self.index): + keys_0, keys_1 = self.index[0], self.index[1] + else: + keys_0, keys_1 = self.index, None + + donion = DOnion.multi_access( + meta, + _recall_getitem, + self.parent, + *(keys_0, keys_1), + dtype=dtype, + mask=mask, + input_mask=input_mask, + ) + return get_return_type(meta)(donion) + + # no dOnions parent = self.parent xt = False # xt = parent._is_transposed dxn = 1 # dxn = -1 if xt else 1 @@ -1278,8 +1622,8 @@ def new(self, *, dtype=None, mask=None, input_mask=None, name=None): input_ndim = len(input_shape) axes = tuple(range(input_ndim)) x_axes = axes[::dxn] - indices = tuple(i.index for i in self.resolved_indices.indices) - out_shape = tuple(i.size for i in self.resolved_indices.indices if i.size is not None) + indices = tuple(i.index for i in self.resolved_indexes.indices) + out_shape = tuple(i.size for i in self.resolved_indexes.indices if i.size is not None) out_ndim = len(out_shape) if mask is not None: @@ -1332,7 +1676,7 @@ def new(self, *, dtype=None, mask=None, input_mask=None, name=None): elif out_ndim < input_ndim: (rem_axis,) = [ axis - for axis, index in enumerate(self.resolved_indices.indices) + for axis, index in enumerate(self.resolved_indexes.indices) if index.size is not None ] if out_ndim == input_mask_ndim: @@ -1356,6 +1700,12 @@ def new(self, *, dtype=None, mask=None, input_mask=None, name=None): dtype = np_dtype(meta.dtype) if input_ndim in [1, 2]: + if out_ndim == 0: + delayed = self.__class__._extract_single_element( + x, xt, T, dxn, indices, meta, meta.dtype + ) + return get_return_type(meta)(delayed) + # prepare arguments for blockwise: indices_args = [] offset_args = [] @@ -1449,15 +1799,27 @@ def __call__(self, *args, **kwargs): def update(self, obj): if getattr(self.parent, "_is_transposed", False): raise TypeError("'TransposedMatrix' object does not support item assignment") + + if self.parent.is_dOnion: + self.parent.__setitem__(self.index, obj) + return + Assigner(Updater(self.parent), self.index).update(obj) def __lshift__(self, rhs): self.update(rhs) + @property + def dtype(self): + return self.parent.dtype + @property def value(self): - self._meta.value - return self.new().value + self._meta.new().value + scalar = self.new() + return scalar.value + + dup = new def _uniquify(ndim, index, obj, mask=None, ot=False): @@ -1504,20 +1866,81 @@ def _identity_func(x, axis, keepdims): class Assigner: + __bool__ = gb.expr.Assigner.__bool__ + __eq__ = gb.expr.Assigner.__eq__ + def __init__(self, updater, index, subassign=False): self.updater = updater self.parent = updater.parent - self.resolved_indices = IndexerResolver(self.parent, index).indices - self.index = tuple(i.index for i in self.resolved_indices) self._meta = updater.parent._meta self.subassign = subassign + input_ndim = self.parent.ndim + index_is_dOnion = input_ndim == 1 and is_DOnion(index) + index_is_dOnion = index_is_dOnion or ( + input_ndim == 2 and _is_pair(index) and (is_DOnion(index[0]) or is_DOnion(index[1])) + ) + if self.updater.has_dOnion or index_is_dOnion: + self.has_dOnion = True + IndexerResolver(self.parent, index, check_shape=False) + self.index = index + else: + self.has_dOnion = False + self.resolved_indexes = IndexerResolver(self.parent, index).indices + self.index = tuple(i.index for i in self.resolved_indexes) + def update(self, obj): - if not (isinstance(obj, BaseType) or isinstance(obj, Number)): - try: - obj_transposed = obj._is_transposed - except AttributeError: - raise TypeError("Bad type for argument `obj`") + if not ( + isinstance(obj, Number) + or isinstance(obj, BaseType) + or getattr(obj, "_is_transposed", False) + ): + obj = self.parent._expect_type( + obj, + ( + gb.Scalar, + gb.Vector, + gb.Matrix, + gb.matrix.TransposedMatrix, + ), + within="Assign.update", + ) + if any_dOnions(self, obj): + + def _recall_update(lhs, mask, accum, replace, keys_0, keys_1, obj, subassign): + keys = (keys_0,) if keys_1 is None else (keys_0, keys_1) + updater = Updater(lhs, mask=mask, accum=accum, replace=replace) + Assigner(updater, keys, subassign=subassign).update(obj) + return lhs + + lhs = self.parent + lhs_copy = lhs.__class__(lhs._optional_dup(), meta=lhs._meta) + + updater = self.updater + + if _is_pair(self.index): + keys_0, keys_1 = self.index[0], self.index[1] + else: + keys_0, keys_1 = self.index, None + + donion = DOnion.multi_access( + lhs._meta, + _recall_update, + lhs_copy, + updater.mask, + updater.accum, + updater.replace, + keys_0, + keys_1, + obj, + self.subassign, + ) + lhs.__init__(donion, meta=lhs._meta) + return + + # no dOnions + if getattr(obj, "_is_transposed", False): + obj_transposed = obj._is_transposed obj = obj._matrix else: obj_transposed = False @@ -1570,7 +1993,7 @@ def update(self, obj): else: (rem_axis,) = [ axis - for axis, index in enumerate(self.resolved_indices) + for axis, index in enumerate(self.resolved_indexes) if index.size is not None ] if parent.shape[rem_axis] != out_shape[0]: @@ -1579,7 +2002,7 @@ def update(self, obj): if ndim == 2 and out_dim == 1: (int_axis,) = [ axis - for axis, index in enumerate(self.resolved_indices) + for axis, index in enumerate(self.resolved_indexes) if index.size is None ] indices = list(indices) @@ -1800,14 +2223,14 @@ def __init__(self, value, compress_axis): self.compress_axis = compress_axis -def _expr_new(method_name, dtype, grblas_mask_type, kwargs, x, mask, *args): +def _expr_new(xt, ats, method_name, dtype, grblas_mask_type, x, mask, *args, **kwargs): # expr.new(...) - args = [x.value if isinstance(x, InnerBaseType) else x for x in args] + args = [_transpose_if(y, yt) if isinstance(y, InnerBaseType) else y for y, yt in zip(args, ats)] kwargs = { key: (kwargs[key].value if isinstance(kwargs[key], InnerBaseType) else kwargs[key]) for key in kwargs } - expr = getattr(x.value, method_name)(*args, **kwargs) + expr = getattr(_transpose_if(x, xt), method_name)(*args, **kwargs) if mask is not None: mask = grblas_mask_type(mask.value) return wrap_inner(expr.new(dtype=dtype, mask=mask)) @@ -1869,32 +2292,34 @@ def _add_blocks(monoid_, x, y): return x -def _reduce_scalar(op, gb_dtype, x, axis=None, keepdims=None, computing_meta=None, dtype=None): +def _reduce_scalar( + op, gb_dtype, x, axis=None, keepdims=None, computing_meta=None, dtype=None, **kwargs +): """Call reduce_scalar on each chunk""" if computing_meta: return np.empty(0, dtype=dtype) - return wrap_inner(x.value.reduce_scalar(op).new(dtype=gb_dtype)) + return wrap_inner(x.value.reduce_scalar(op, **kwargs).new(dtype=gb_dtype)) -def _reduce(op, gb_dtype, x, axis=None, keepdims=None, computing_meta=None, dtype=None): +def _reduce(op, gb_dtype, x, axis=None, keepdims=None, computing_meta=None, dtype=None, **kwargs): """Call reduce on each chunk""" if computing_meta: return np.empty(0, dtype=dtype) - return wrap_inner(x.value.reduce(op).new(dtype=gb_dtype)) + return wrap_inner(x.value.reduce(op, **kwargs).new(dtype=gb_dtype)) -def _reduce_combine(op, x, axis=None, keepdims=None, computing_meta=None, dtype=None): +def _reduce_combine(op, x, axis=None, keepdims=None, computing_meta=None, dtype=None, **kwargs): """Combine results from reduce or reduce_scalar on each chunk""" if computing_meta: return np.empty(0, dtype=dtype) if type(x) is list: # do we need `gb_dtype` instead of `np_dtype` below? if type(x[0]) is list: - vals = [val.value.value for sublist in x for val in sublist] + vals = [val.value.value for sublist in x for val in sublist if val.value.value] else: - vals = [val.value.value for val in x] + vals = [val.value.value for val in x if val.value.value] values = gb.Vector.from_values(list(range(len(vals))), vals, size=len(vals), dtype=dtype) - return wrap_inner(values.reduce(op).new()) + return wrap_inner(values.reduce(op, **kwargs).new()) return x @@ -1938,6 +2363,12 @@ def _transpose_if(inner_x, xt): return inner_x.value +def _kronecker(at, bt, a, b, op): + a = _transpose_if(a, at) + b = _transpose_if(b, bt) + return wrap_inner(a.kronecker(b, op=op).new()) + + def _matmul(op, at, bt, dtype, no_mask, mask_type, *args, computing_meta=None): if computing_meta: return np.empty(0, dtype=dtype) @@ -1958,6 +2389,57 @@ def _matmul(op, at, bt, dtype, no_mask, mask_type, *args, computing_meta=None): return wrap_inner(gb_obj) +def _expand(inner, fullshape, *index_ranges): + a = inner + if a.ndim == 1: + (a_index_range,) = index_ranges + balloon = gb.Vector.new(a.value.dtype, *fullshape) + balloon[a_index_range.start : a_index_range.stop] << a.value + else: + (a_row_range, a_col_range) = index_ranges + balloon = gb.Matrix.new(a.value.dtype, *fullshape) + ( + balloon[ + a_row_range.start : a_row_range.stop, + a_col_range.start : a_col_range.stop, + ] + << a.value + ) + + return wrap_inner(balloon) + + +def _matmul2_positional( + op, dtype, at, bt, a_fullshape, b_fullshape, a, b, *args, computing_meta=None +): + a_ranges = (args[0][0],) if a.ndim == 1 else (args[0][0], args[1][0]) + b_ranges = (args[a.ndim][0],) if b.ndim == 1 else (args[a.ndim][0], args[a.ndim + 1][0]) + + a_expanded = _expand(a, a_fullshape, *a_ranges) + b_expanded = _expand(b, b_fullshape, *b_ranges) + + res = _matmul2(op, dtype, at, bt, a_expanded, b_expanded, computing_meta=computing_meta) + + if a.ndim == 1 and b.ndim == 1: + return res + + # shrink expanded result to original size: + indices = ( + slice(a_ranges[1].start, a_ranges[1].stop) + if at + else slice(a_ranges[0].start, a_ranges[0].stop) + ) + if b.ndim == 2: + cols = ( + slice(b_ranges[0].start, b_ranges[0].stop) + if bt + else slice(b_ranges[1].start, b_ranges[1].stop) + ) + indices = cols if a.ndim == 1 else (indices, cols) + + return res[indices].new() + + def _matmul2(op, dtype, at, bt, a, b, computing_meta=None): left = _transpose_if(a, at) right = _transpose_if(b, bt) @@ -2092,3 +2574,7 @@ def concatenate_fragments(frag1, frag2, axis=0, base_axis=0): return reduce(partial(concatenate_fragments, axis=axis, base_axis=base_axis), seq_) else: return seq[0] + + +def _is_pair(arg): + return type(arg) is tuple and len(arg) == 2 diff --git a/dask_grblas/functools.py b/dask_grblas/functools.py new file mode 100644 index 0000000..257d5ec --- /dev/null +++ b/dask_grblas/functools.py @@ -0,0 +1,112 @@ +from reprlib import recursive_repr + + +class skip: + def __repr__(self): + return "skip" + + __str__ = __repr__ + __reduce__ = __repr__ # This makes it pickle well! + + +skip = skip() + + +class flexible_partial: + """New function with flexible partial application of the given + arguments and keywords. Any argument slot of the given function + may be occupied (not just the leading slots). Use the sentinel + `skip` to denote vacant argument slots. + """ + + __slots__ = "base_func", "args", "keywords", "__dict__", "__weakref__" + + def __new__(cls, func, /, *args, **keywords): + if not callable(func): + raise TypeError("the first argument must be callable") + + if hasattr(func, "base_func"): + func_ = func.base_func + func_is_partial = True + elif hasattr(func, "func"): + func_ = func.func + func_is_partial = True + else: + func_is_partial = False + + if func_is_partial: + old_arg, new_arg = iter(func.args), iter(args) + exhausted = False + args = () + for arg in func.args: + if arg is skip: + try: + args += (next(new_arg),) + except StopIteration: + exhausted = True + break + else: + args += arg + next(old_arg) + + args += tuple(old_arg if exhausted else new_arg) + keywords = {**func.keywords, **keywords} + func = func_ + + self = super(flexible_partial, cls).__new__(cls) + + self.base_func = func + self.args = args + self.keywords = keywords + return self + + def __call__(self, /, *args, **keywords): + new_arg = iter(args) + args = (next(new_arg) if arg is skip else arg for arg in self.args) + + keywords = {**self.keywords, **keywords} + return self.base_func(*args, *new_arg, **keywords) + + @recursive_repr() + def __repr__(self): + qualname = type(self).__qualname__ + args = [repr(self.base_func)] + args.extend(repr(x) for x in self.args) + args.extend(f"{k}={v!r}" for (k, v) in self.keywords.items()) + if type(self).__module__ == "functools": + return f"functools.{qualname}({', '.join(args)})" + return f"{qualname}({', '.join(args)})" + + def __reduce__(self): + return ( + type(self), + (self.base_func,), + (self.base_func, self.args, self.keywords or None, self.__dict__ or None), + ) + + def __setstate__(self, state): + if not isinstance(state, tuple): + raise TypeError("argument to __setstate__ must be a tuple") + if len(state) != 4: + raise TypeError(f"expected 4 items in state, got {len(state)}") + func, args, kwds, namespace = state + if ( + not callable(func) + or not isinstance(args, tuple) + or (kwds is not None and not isinstance(kwds, dict)) + or (namespace is not None and not isinstance(namespace, dict)) + ): + raise TypeError("invalid partial state") + + args = tuple(args) # just in case it's a subclass + if kwds is None: + kwds = {} + elif type(kwds) is not dict: # XXX does it need to be *exactly* dict? + kwds = dict(kwds) + if namespace is None: + namespace = {} + + self.__dict__ = namespace + self.base_func = func + self.args = args + self.keywords = kwds diff --git a/dask_grblas/io.py b/dask_grblas/io.py index d3c462e..6f183cb 100644 --- a/dask_grblas/io.py +++ b/dask_grblas/io.py @@ -2,7 +2,6 @@ from math import floor, sqrt from numpy import asarray, conj, zeros, concatenate, ones, empty -from scipy.io import mmio # noqa def symm_I_J(pos, n): @@ -97,40 +96,238 @@ def home(stream, search_window_size=8): # ----------------------------------------------------------------------------- +def asstr(s): + if isinstance(s, bytes): + return s.decode("latin1") + return str(s) -def mmread(source, *, dup_op=None, name=None, row_begin=0, row_end=None, col_begin=0, col_end=None): - """ - Read the contents of a Matrix Market filename or file into a new Matrix. +# ----------------------------------------------------------------------------- - This uses `scipy.io.mmread`: - https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.mmread.html - For more information on the Matrix Market format, see: - https://math.nist.gov/MatrixMarket/formats.html - """ - from . import Matrix - - try: - from scipy.sparse import coo_matrix # noqa - except ImportError: # pragma: no cover - raise ImportError("scipy is required to read Matrix Market files") - array = MMFile().read( - source, row_begin=row_begin, row_end=row_end, col_begin=col_begin, col_end=col_end +class MMFile: + __slots__ = ("_rows", "_cols", "_entries", "_format", "_field", "_symmetry") + + @property + def rows(self): + return self._rows + + @property + def cols(self): + return self._cols + + @property + def entries(self): + return self._entries + + @property + def format(self): + return self._format + + @property + def field(self): + return self._field + + @property + def symmetry(self): + return self._symmetry + + @property + def has_symmetry(self): + return self._symmetry in ( + self.SYMMETRY_SYMMETRIC, + self.SYMMETRY_SKEW_SYMMETRIC, + self.SYMMETRY_HERMITIAN, + ) + + # format values + FORMAT_COORDINATE = "coordinate" + FORMAT_ARRAY = "array" + + # field values + FIELD_INTEGER = "integer" + FIELD_UNSIGNED = "unsigned-integer" + FIELD_REAL = "real" + FIELD_COMPLEX = "complex" + FIELD_PATTERN = "pattern" + FIELD_VALUES = (FIELD_INTEGER, FIELD_UNSIGNED, FIELD_REAL, FIELD_COMPLEX, FIELD_PATTERN) + + # symmetry values + SYMMETRY_GENERAL = "general" + SYMMETRY_SYMMETRIC = "symmetric" + SYMMETRY_SKEW_SYMMETRIC = "skew-symmetric" + SYMMETRY_HERMITIAN = "hermitian" + SYMMETRY_VALUES = ( + SYMMETRY_GENERAL, + SYMMETRY_SYMMETRIC, + SYMMETRY_SKEW_SYMMETRIC, + SYMMETRY_HERMITIAN, ) - if isinstance(array, coo_matrix): - nrows, ncols = array.shape - return Matrix.from_values( - array.row, array.col, array.data, nrows=nrows, ncols=ncols, dup_op=dup_op, name=name + + @classmethod + def info(self, source): + """ + Return size, storage parameters from Matrix Market file-like 'source'. + + Parameters + ---------- + source : str or file-like + Matrix Market filename (extension .mtx) or open file-like object + + Returns + ------- + rows : int + Number of matrix rows. + cols : int + Number of matrix columns. + entries : int + Number of non-zero entries of a sparse matrix + or rows*cols for a dense matrix. + format : str + Either 'coordinate' or 'array'. + field : str + Either 'real', 'complex', 'pattern', or 'integer'. + symmetry : str + Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'. + """ + + stream, close_it = self._open(source) + + try: + + # read and validate header line + line = stream.readline() + mmid, matrix, format, field, symmetry = [asstr(part.strip()) for part in line.split()] + if not mmid.startswith("%%MatrixMarket"): + raise ValueError("source is not in Matrix Market format") + if not matrix.lower() == "matrix": + raise ValueError("Problem reading file header: " + line) + + # http://math.nist.gov/MatrixMarket/formats.html + if format.lower() == "array": + format = self.FORMAT_ARRAY + elif format.lower() == "coordinate": + format = self.FORMAT_COORDINATE + + # skip comments + # line.startswith('%') + while line and line[0] in ["%", 37]: + line = stream.readline() + + # skip empty lines + while not line.strip(): + line = stream.readline() + + split_line = line.split() + if format == self.FORMAT_ARRAY: + if not len(split_line) == 2: + raise ValueError("Header line not of length 2: " + line.decode("ascii")) + rows, cols = map(int, split_line) + entries = rows * cols + else: + if not len(split_line) == 3: + raise ValueError("Header line not of length 3: " + line.decode("ascii")) + rows, cols, entries = map(int, split_line) + + return (rows, cols, entries, format, field.lower(), symmetry.lower()) + + finally: + if close_it: + stream.close() + + @staticmethod + def _open(filespec, mode="rb"): + """Return an open file stream for reading based on source. + + If source is a file name, open it (after trying to find it with mtx and + gzipped mtx extensions). Otherwise, just return source. + + Parameters + ---------- + filespec : str or file-like + String giving file name or file-like object + mode : str, optional + Mode with which to open file, if `filespec` is a file name. + + Returns + ------- + fobj : file-like + Open file-like object. + close_it : bool + True if the calling function should close this file when done, + false otherwise. + """ + # If 'filespec' is path-like (str, pathlib.Path, os.DirEntry, other class + # implementing a '__fspath__' method), try to convert it to str. If this + # fails by throwing a 'TypeError', assume it's an open file handle and + # return it as-is. + try: + filespec = os.fspath(filespec) + except TypeError: + return filespec, False + + # 'filespec' is definitely a str now + + # open for reading + if mode[0] == "r": + + # determine filename plus extension + if not os.path.isfile(filespec): + if os.path.isfile(filespec + ".mtx"): + filespec = filespec + ".mtx" + elif os.path.isfile(filespec + ".mtx.gz"): + filespec = filespec + ".mtx.gz" + elif os.path.isfile(filespec + ".mtx.bz2"): + filespec = filespec + ".mtx.bz2" + # open filename + if filespec.endswith(".gz"): + import gzip + + stream = gzip.open(filespec, mode) + elif filespec.endswith(".bz2"): + import bz2 + + stream = bz2.BZ2File(filespec, "rb") + else: + stream = open(filespec, mode) + + # open for writing + else: + if filespec[-4:] != ".mtx": + filespec = filespec + ".mtx" + stream = open(filespec, mode) + + return stream, True + + # ------------------------------------------------------------------------- + def _parse_header(self, stream): + rows, cols, entries, format, field, symmetry = self.__class__.info(stream) + self._init_attrs( + rows=rows, cols=cols, entries=entries, format=format, field=field, symmetry=symmetry ) - # SS, SuiteSparse-specific: import_full - return Matrix.ss.import_fullr(values=array, take_ownership=True, name=name) + # ------------------------------------------------------------------------- + def _init_attrs(self, **kwargs): + """ + Initialize each attributes with the corresponding keyword arg value + or a default of None + """ -# ----------------------------------------------------------------------------- + attrs = self.__class__.__slots__ + public_attrs = [attr[1:] for attr in attrs] + invalid_keys = set(kwargs.keys()) - set(public_attrs) + + if invalid_keys: + raise ValueError( + """found %s invalid keyword arguments, please only + use %s""" + % (tuple(invalid_keys), public_attrs) + ) + for attr in attrs: + setattr(self, attr, kwargs.get(attr[1:], None)) -class MMFile(mmio.MMFile): + # ------------------------------------------------------------------------- def get_data_begin(self, source): """ Reads the contents of a Matrix Market file-like 'source' into a matrix. @@ -157,11 +354,13 @@ def get_data_begin(self, source): stream.close() # ------------------------------------------------------------------------- + def _get_data_begin(self, stream): _ = self.__class__.info(stream) return stream.tell() # ----------------------------------------------------------------------------- + def read_part(self, source, line_start=None, line_stop=None, read_begin=None, read_end=None): """ Reads the contents of a Matrix Market file-like 'source' into a matrix. diff --git a/dask_grblas/mask.py b/dask_grblas/mask.py index 314781a..21a760a 100644 --- a/dask_grblas/mask.py +++ b/dask_grblas/mask.py @@ -1,3 +1,4 @@ +from grblas.mask import Mask as gb_Mask from .utils import get_grblas_type @@ -6,6 +7,9 @@ class Mask: structure = False value = False + __bool__ = gb_Mask.__bool__ + __eq__ = gb_Mask.__eq__ + def __init__(self, mask): from . import matrix, vector @@ -13,6 +17,14 @@ def __init__(self, mask): self.mask = mask self._meta = get_grblas_type(self)(mask._meta) + @property + def is_dOnion(self): + return getattr(self.mask, "is_dOnion", False) + + @property + def dOnion_if(self): + return self.mask._delayed if self.is_dOnion else self + class StructuralMask(Mask): complement = False diff --git a/dask_grblas/matrix.py b/dask_grblas/matrix.py index 922965e..52a1296 100644 --- a/dask_grblas/matrix.py +++ b/dask_grblas/matrix.py @@ -1,18 +1,27 @@ import dask.array as da import numpy as np import grblas as gb -from dask.base import tokenize + +from numbers import Integral, Number +from tlz import compose + +from dask.base import tokenize, is_dask_collection from dask.delayed import Delayed, delayed from dask.highlevelgraph import HighLevelGraph from grblas import binary, monoid, semiring from grblas.dtypes import lookup_dtype +from grblas.exceptions import IndexOutOfBound, EmptyObject, DimensionMismatch -from .base import BaseType, InnerBaseType +from . import _automethods +from .base import BaseType, InnerBaseType, DOnion, is_DOnion, any_dOnions, Box, skip from .base import _nvals as _nvals_in_chunk -from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater +from .base import _dup as chunk_dup +from .expr import AmbiguousAssignOrExtract, IndexerResolver, GbDelayed, Updater from .mask import StructuralMask, ValueMask from ._ss.matrix import ss from .utils import ( + pack_args, + pack_kwargs, np_dtype, get_return_type, get_grblas_type, @@ -45,6 +54,44 @@ class Matrix(BaseType): ndim = 2 _is_transposed = False + __abs__ = gb.Matrix.__abs__ + __add__ = gb.Matrix.__add__ + __divmod__ = gb.Matrix.__divmod__ + __eq__ = gb.Matrix.__eq__ + __floordiv__ = gb.Matrix.__floordiv__ + __ge__ = gb.Matrix.__ge__ + __gt__ = gb.Matrix.__gt__ + __iadd__ = gb.Matrix.__iadd__ + __iand__ = gb.Matrix.__iand__ + __ifloordiv__ = gb.Matrix.__ifloordiv__ + __imod__ = gb.Matrix.__imod__ + __imul__ = gb.Matrix.__imul__ + __invert__ = gb.Matrix.__invert__ + __ior__ = gb.Matrix.__ior__ + __ipow__ = gb.Matrix.__ipow__ + __isub__ = gb.Matrix.__isub__ + __itruediv__ = gb.Matrix.__itruediv__ + __ixor__ = gb.Matrix.__ixor__ + __le__ = gb.Matrix.__le__ + __lt__ = gb.Matrix.__lt__ + __mod__ = gb.Matrix.__mod__ + __mul__ = gb.Matrix.__mul__ + __ne__ = gb.Matrix.__ne__ + __neg__ = gb.Matrix.__neg__ + __pow__ = gb.Matrix.__pow__ + __radd__ = gb.Matrix.__radd__ + __rdivmod__ = gb.Matrix.__rdivmod__ + __rfloordiv__ = gb.Matrix.__rfloordiv__ + __rmod__ = gb.Matrix.__rmod__ + __rmul__ = gb.Matrix.__rmul__ + __rpow__ = gb.Matrix.__rpow__ + __rsub__ = gb.Matrix.__rsub__ + __rtruediv__ = gb.Matrix.__rtruediv__ + __rxor__ = gb.Matrix.__rxor__ + __sub__ = gb.Matrix.__sub__ + __truediv__ = gb.Matrix.__truediv__ + __xor__ = gb.Matrix.__xor__ + @classmethod def from_delayed(cls, matrix, dtype, nrows, ncols, *, nvals=None, name=None): if not isinstance(matrix, Delayed): @@ -121,40 +168,92 @@ def from_values( nrows=None, ncols=None, *, - trust_shape=False, dup_op=None, dtype=None, chunks="auto", name=None, ): - # Note: `trust_shape` is a bool parameter that, when True, - # can be used to avoid expensive computation of max(rows) - # and max(columns) which are used to verify that `nrows` - # and `ncols` are indeed large enough to hold all the given - # tuples. - if ( - dup_op is None - and type(rows) is da.Array - and type(columns) is da.Array - and type(values) is da.Array - ): - if not trust_shape or nrows is None or ncols is None: - # this branch is an expensive operation: - implied_nrows = 1 + da.max(rows).compute() - implied_ncols = 1 + da.max(columns).compute() - if nrows is not None and implied_nrows > nrows: - raise Exception() - if ncols is not None and implied_ncols > ncols: - raise Exception() - nrows = implied_nrows if nrows is None else nrows - ncols = implied_ncols if ncols is None else ncols - - idtype = gb.Matrix.new(rows.dtype).dtype - np_idtype_ = np_dtype(idtype) - vdtype = gb.Matrix.new(values.dtype).dtype - np_vdtype_ = np_dtype(vdtype) + if isinstance(values, Number): + dtype = lookup_dtype(type(values) if dtype is None else dtype) + elif hasattr(values, "dtype"): + dtype = lookup_dtype(values.dtype if dtype is None else dtype) + + meta = gb.Matrix.new( + dtype, + nrows=nrows if isinstance(nrows, Number) else 0, + ncols=ncols if isinstance(ncols, Number) else 0, + ) - chunks = da.core.normalize_chunks(chunks, (nrows, ncols), dtype=np_idtype_) + # check for any dOnions: + args = pack_args(rows, columns, values, nrows, ncols) + kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name) + if any_dOnions(*args, **kwargs): + # dive into dOnion(s): + out_donion = DOnion.multi_access(meta, Matrix.from_values, *args, **kwargs) + return Matrix(out_donion, meta=meta) + + # no dOnions + if type(rows) is da.Array or type(columns) is da.Array or type(values) is da.Array: + nrows_, ncols_ = nrows, ncols + if type(rows) in {tuple, list, np.ndarray}: + nrows_ = nrows or (np.max(rows) + 1) + rows = da.asarray(rows) + if type(columns) in {tuple, list, np.ndarray}: + ncols_ = ncols or (np.max(columns) + 1) + columns = da.asarray(columns) + if type(values) in {tuple, list, np.ndarray}: + values = da.asarray(values) + + np_idtype_ = np_dtype(lookup_dtype(rows.dtype)) + if isinstance(nrows_, Integral) and isinstance(ncols_, Integral): + nrows, ncols = nrows_, ncols_ + chunks = da.core.normalize_chunks(chunks, (nrows, ncols), dtype=np_idtype_) + else: + if nrows is None and rows.size == 0: + raise ValueError("No row indices provided. Unable to infer nrows.") + + if ncols is None and columns.size == 0: + raise ValueError("No column indices provided. Unable to infer ncols.") + + if type(values) is da.Array and ( + rows.size != columns.size or columns.size != values.size + ): + raise ValueError( + "`rows` and `columns` and `values` lengths must match: " + f"{rows.size}, {columns.size}, {values.size}" + ) + elif rows.size != columns.size: + raise ValueError( + f"`rows` and `columns` lengths must match: {rows.size}, {columns.size}" + ) + + if rows.dtype.kind not in "ui": + raise ValueError(f"rows must be integers, not {rows.dtype}") + + if columns.dtype.kind not in "ui": + raise ValueError(f"columns must be integers, not {columns.dtype}") + + nrows = nrows_ + if nrows is None: + nrows = da.max(rows) + np.asarray(1, dtype=rows.dtype) + + ncols = ncols_ + if ncols is None: + ncols = da.max(columns) + np.asarray(1, dtype=columns.dtype) + + # Create dOnion from `nrows` and/or `ncols`, that is, + # use the inner value of `nrows` and/or `ncols` to create the new Matrix: + shape = (nrows, ncols) + _shape = [skip if is_dask_collection(x) else x for x in shape] + dasks = [x for x in shape if is_dask_collection(x)] + args = pack_args(rows, columns, values, *_shape) + kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name) + donion = DOnion.sprout(dasks, meta, Matrix.from_values, *args, **kwargs) + return Matrix(donion, meta=meta) + + # output shape `(nrows, ncols)` is completely determined + vdtype = dtype + np_vdtype_ = np_dtype(vdtype) name_ = name name = str(name) if name else "" @@ -166,19 +265,22 @@ def from_values( *(_pick2D, "ijk"), *(rows, "k"), *(columns, "k"), - *(values, "k"), + *(values, "k" if type(values) is da.Array else None), *(row_ranges, "i"), *(col_ranges, "j"), + shape=(nrows, ncols), dtype=np_idtype_, meta=np.array([]), ) - meta = InnerMatrix(gb.Matrix.new(vdtype)) + meta = InnerMatrix(gb.Matrix.new(vdtype, nrows=nrows, ncols=ncols)) delayed = da.core.blockwise( *(_from_values2D, "ij"), + *(values if isinstance(values, Number) else None, None), *(fragments, "ijk"), *(row_ranges, "i"), *(col_ranges, "j"), concatenate=False, + dup_op=dup_op, gb_dtype=vdtype, dtype=np_vdtype_, meta=meta, @@ -192,8 +294,130 @@ def from_values( ) return cls.from_matrix(matrix, chunks=chunks, name=name) + def build( + self, + rows, + columns, + values, + *, + dup_op=None, + clear=False, + nrows=None, + ncols=None, + chunks=None, + in_dOnion=False, # not part of the API + ): + if not clear and self._nvals != 0: + raise gb.exceptions.OutputNotEmpty() + + # TODO: delayed nrows/ncols + nrows = nrows or self._nrows + ncols = ncols or self._ncols + meta = self._meta + + # check for any DOnions: + args = pack_args(self, rows, columns, values) + kwargs = pack_kwargs( + dup_op=dup_op, clear=clear, nrows=nrows, ncols=ncols, chunks=chunks, in_dOnion=True + ) + if any_dOnions(*args, **kwargs): + # dive into DOnion(s): + out_donion = DOnion.multi_access(meta, Matrix.build, *args, **kwargs) + self.__init__(out_donion, meta=meta) + return + + # no DOnions + if clear: + self.clear() + + self.resize(nrows, ncols) + + if chunks is not None: + self.rechunk(inplace=True, chunks=chunks) + + x = self._optional_dup() + if type(rows) in {tuple, list, np.ndarray}: + if np.max(rows) >= self._nrows: + raise gb.exceptions.IndexOutOfBound + rows = da.core.from_array(np.array(rows), name="rows-" + tokenize(rows)) + + if type(columns) in {tuple, list, np.ndarray}: + if np.max(columns) >= self._ncols: + raise gb.exceptions.IndexOutOfBound + columns = da.core.from_array(np.array(columns), name="columns-" + tokenize(columns)) + + if type(values) in {tuple, list, np.ndarray}: + values = da.core.from_array(np.array(values), name="values-" + tokenize(values)) + + if type(values) is da.Array and (rows.size != columns.size or columns.size != values.size): + raise ValueError( + "`rows` and `columns` and `values` lengths must match: " + f"{rows.size}, {columns.size}, {values.size}" + ) + elif rows.size != columns.size: + raise ValueError( + f"`rows` and `columns` lengths must match: {rows.size}, {columns.size}" + ) + elif values is None: + raise EmptyObject() + + idtype = gb.Matrix.new(rows.dtype).dtype + np_idtype_ = np_dtype(idtype) + vdtype = ( + lookup_dtype(type(values)) + if isinstance(values, Number) + else gb.Matrix.new(values.dtype).dtype + ) + np_vdtype_ = np_dtype(vdtype) + + rname = "-row-ranges" + tokenize(x, x.chunks[0]) + cname = "-col-ranges" + tokenize(x, x.chunks[1]) + row_ranges = build_chunk_ranges_dask_array(x, 0, rname) + col_ranges = build_chunk_ranges_dask_array(x, 1, cname) + fragments = da.core.blockwise( + *(_pick2D, "ijk"), + *(rows, "k"), + *(columns, "k"), + *(values, None if isinstance(values, Number) else "k"), + *(row_ranges, "i"), + *(col_ranges, "j"), + shape=(nrows, ncols), + dtype=np_idtype_, + meta=np.array([]), + ) + meta = InnerMatrix(gb.Matrix.new(vdtype)) + delayed = da.core.blockwise( + *(_build_2D_chunk, "ij"), + *(x, "ij"), + *(row_ranges, "i"), + *(col_ranges, "j"), + *(fragments, "ijk"), + values=values if isinstance(values, Number) else None, + dup_op=dup_op, + clear=False, + concatenate=False, + dtype=np_vdtype_, + meta=meta, + ) + if in_dOnion: + return Matrix(delayed) + self.__init__(delayed) + @classmethod def new(cls, dtype, nrows=0, ncols=0, *, chunks="auto", name=None): + if any_dOnions(nrows, ncols): + meta = gb.Matrix.new(dtype) + donion = DOnion.multi_access( + meta, cls.new, dtype, nrows=nrows, ncols=ncols, chunks=chunks, name=name + ) + return Matrix(donion, meta=meta) + + if type(nrows) is Box: + nrows = nrows.content + + if type(ncols) is Box: + ncols = ncols.content + dtype = dtype.lower() if isinstance(dtype, str) else dtype if nrows == 0 and ncols == 0: matrix = gb.Matrix.new(dtype, nrows, ncols) @@ -234,15 +458,19 @@ def __init__(self, delayed, meta=None, nvals=None): # if it is already known at the time of initialization of # this Matrix, otherwise its value should be left as None # (the default) - assert type(delayed) is da.Array - assert delayed.ndim == 2 + assert type(delayed) in {da.Array, DOnion} self._delayed = delayed - if meta is None: - meta = gb.Matrix.new(delayed.dtype, *delayed.shape) + if type(delayed) is da.Array: + assert delayed.ndim == 2 + if meta is None: + meta = gb.Matrix.new(delayed.dtype, *delayed.shape) + else: + if meta is None: + meta = gb.Matrix.new(delayed.dtype) self._meta = meta - self._nrows = meta.nrows - self._ncols = meta.ncols self.dtype = meta.dtype + self._nrows = self.nrows + self._ncols = self.ncols self._nvals = nvals # Add ss extension methods self.ss = ss(self) @@ -261,17 +489,165 @@ def T(self): @property def nrows(self): + if self.is_dOnion: + return DOnion.multi_access(self._meta.nrows, getattr, self, "nrows") return self._meta.nrows @property def ncols(self): + if self.is_dOnion: + return DOnion.multi_access(self._meta.ncols, getattr, self, "ncols") return self._meta.ncols @property def shape(self): - return (self._meta.nrows, self._meta.ncols) + if self.is_dOnion: + return (self.nrows, self.ncols) + # return DOnion.multi_access(self._meta.shape, getattr, self, "shape") + return self._meta.shape + + def _head(self, delayed, shape): + """ + Take the leading portion of shape `shape` from `delayed` + """ + def _slice(inner, slc_x, slc_y): + return InnerMatrix(inner.value[slc_x, slc_y].new()) + + x = delayed + numblocks = () + heads = () + new_chunks = () + for axis in range(2): + stops_ = np.cumsum(x.chunks[axis]) + starts = np.roll(stops_, 1) + starts[0] = 0 + + M = x.numblocks[axis] + blockid = np.arange(M) + + # locate chunk containing last element on axis: + i = min(self.shape[axis], shape[axis]) - 1 + filter = (starts <= i) & (i < stops_) + (last_block,) = blockid[filter] + tail_sz = i - starts[last_block] + 1 + + numblocks += (last_block + 1,) + heads += (tail_sz,) + new_chunks += (x.chunks[axis][:last_block] + (tail_sz,), ) + + name = "Matrix.resize-" + tokenize(x) + dtype = self.dtype + dsk = dict() + for i in range(numblocks[0]): + x_cut = (i == numblocks[0] - 1) + for j in range(numblocks[1]): + y_cut = (j == numblocks[1] - 1) + if x_cut or y_cut: + dsk[(name, i, j)] = ( + _slice, + (x.name, i, j), + slice(heads[0]) if x_cut else slice(None), + slice(heads[1]) if y_cut else slice(None), + ) + else: + dsk[(name, i, j)] = (chunk_dup, (x.name, i, j), None, dtype, None) + + return name, dsk, new_chunks, numblocks + + def _add_tail(self, axis, size, name, dsk, chunks, numblocks): + """ + Append dask graph `dsk` with empty chunks on axis `axis` up to size `size` + """ + rem = size - self.shape[axis] + if rem > 0: + j = numblocks[axis] + other = 0 if axis else 1 + new_chunks = chunks[axis] + (rem,) + new_chunks = (chunks[0], new_chunks) if axis else (new_chunks, chunks[1]) + + for i, sz_i in enumerate(chunks[other]): + if axis: + dsk[(name, i, j)] = (compose(InnerMatrix, gb.Matrix.new), self.dtype, sz_i, rem) + else: + dsk[(name, j, i)] = (compose(InnerMatrix, gb.Matrix.new), self.dtype, rem, sz_i) + + return name, dsk, new_chunks, (len(new_chunks[0]), len(new_chunks[1])) + + else: + return name, dsk, chunks, numblocks def resize(self, nrows, ncols, inplace=True, chunks="auto"): + if any_dOnions(self, nrows, ncols): + donion = DOnion.multi_access( + self._meta, Matrix.resize, self, nrows, ncols, inplace=False, chunks=chunks + ) + if inplace: + self.__init__(donion, meta=self._meta) + return + else: + return Matrix(donion, meta=self._meta) + + name, dsk, new_chunks, num_blocks = self._head(self._delayed, (nrows, ncols)) + name, dsk, new_chunks, num_blocks = self._add_tail(0, nrows, name, dsk, new_chunks, num_blocks) + name, dsk, new_chunks, num_blocks = self._add_tail(1, ncols, name, dsk, new_chunks, num_blocks) + + graph = HighLevelGraph.from_collections(name, dsk, dependencies=[self._delayed]) + x = da.core.Array(graph, name, new_chunks, meta=wrap_inner(self._meta)) + x = x.rechunk(chunks=chunks) + + if nrows >= self.nrows and ncols >= self.ncols: + nvals = self.nvals + else: + nvals = None + + if inplace: + self.__init__(x, nvals=nvals) + else: + return Matrix(x, nvals=nvals) + + def _resize_old(self, nrows, ncols, inplace=True, chunks="auto"): + if self.is_dOnion: + donion = self._delayed.getattr( + self._meta, "resize", nrows, ncols, inplace=False, chunks=chunks + ) + if inplace: + self.__init__(donion, meta=self._meta) + return + else: + return Matrix(donion, meta=self._meta) + + if nrows >= self.nrows and ncols >= self.ncols: + new_matrix = Matrix.new(self.dtype, nrows, ncols, chunks=chunks) + rows, cols = slice(0, self.nrows), slice(0, self.ncols) + new_matrix[rows, cols] << self + nvals = self._nvals + elif nrows < self.nrows and ncols < self.ncols: + rows, cols = slice(0, nrows), slice(0, ncols) + new_matrix = self[rows, cols].new() + new_matrix.rechunk(chunks=chunks) + nvals = None + else: + new_matrix = Matrix.new(self.dtype, nrows, ncols, chunks=chunks) + rows, cols = slice(0, min(nrows, self.nrows)), slice(0, min(ncols, self.ncols)) + new_matrix[rows, cols] << self[rows, cols].new() + nvals = None + + if inplace: + self.__init__(new_matrix._delayed, nvals=nvals) + else: + return new_matrix + + def _resize_old2(self, nrows, ncols, inplace=True, chunks="auto"): + if self.is_dOnion: + donion = self._delayed.getattr( + self._meta, "resize", nrows, ncols, inplace=False, chunks=chunks + ) + if inplace: + self.__init__(donion, meta=self._meta) + return + else: + return Matrix(donion, meta=self._meta) + chunks = da.core.normalize_chunks(chunks, (nrows, ncols), dtype=np.int64) output_row_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_row_ranges-") output_col_ranges = build_ranges_dask_array_from_chunks(chunks[1], "output_col_ranges-") @@ -311,7 +687,40 @@ def resize(self, nrows, ncols, inplace=True, chunks="auto"): else: return Matrix(x, nvals=nvals) + def rechunk(self, inplace=False, chunks="auto"): + if self.is_dOnion: + meta = self._meta + donion = self._delayed.getattr(meta, "rechunk", inplace=False, chunks=chunks) + if inplace: + self.__init__(donion, meta=meta) + return + else: + return Matrix(donion, meta=meta) + + delayed = self._delayed.rechunk(chunks=chunks) + if inplace: + self._delayed = delayed + return + else: + return Matrix(delayed, meta=self._meta, nvals=self._nvals) + # chunks = da.core.normalize_chunks(chunks, self.shape, dtype=np.int64) + # if inplace: + # self.resize(*self.shape, chunks=chunks) + # return + # else: + # return self.resize(*self.shape, chunks=chunks, inplace=False) + + def diag(self, k=0, dtype=None, chunks="auto"): + return self._diag(k=k, dtype=dtype, chunks=chunks) + def _diag(self, k=0, dtype=None, chunks="auto"): + if self.is_dOnion: + meta = self._meta.diag(k=k, dtype=dtype) + donion = DOnion.multi_access( + meta, self.__class__._diag, self, k=k, dtype=dtype, chunks=chunks + ) + return get_return_type(meta)(donion, meta=meta) + kdiag_row_start = max(0, -k) kdiag_col_start = max(0, k) kdiag_row_stop = min(self.nrows, self.ncols - k) @@ -336,7 +745,7 @@ def _diag(self, k=0, dtype=None, chunks="auto"): row_blockid = np.arange(A.numblocks[0]) col_blockid = np.arange(A.numblocks[1]) - # locate first chunk containing diaagonal: + # locate first chunk containing diagonal: row_filter = (row_starts <= kdiag_row_start) & (kdiag_row_start < row_stops_) col_filter = (col_starts <= kdiag_col_start) & (kdiag_col_start < col_stops_) (R,) = row_blockid[row_filter] @@ -413,13 +822,35 @@ def _diag_old(self, k=0, dtype=None, chunks="auto"): nvals = 0 if self._nvals == 0 else None return get_return_type(meta)(delayed, nvals=nvals) - def __getitem__(self, index): - return AmbiguousAssignOrExtract(self, index) + def __getitem__(self, keys): + resolved_indexes = IndexerResolver(self, keys) + shape = resolved_indexes.shape + if not shape: + from .scalar import ScalarIndexExpr + + return ScalarIndexExpr(self, resolved_indexes) + elif len(shape) == 1: + from .vector import VectorIndexExpr + + return VectorIndexExpr(self, resolved_indexes, *shape) + else: + return MatrixIndexExpr(self, resolved_indexes, *shape) + + def __delitem__(self, keys, in_dOnion=False): + if is_DOnion(self._delayed): + good_keys = [x for x in keys if isinstance(x, Integral)] + if len(good_keys) != 2: + raise TypeError("Remove Element only supports scalars.") + + donion = self._delayed.getattr(self._meta, "__delitem__", keys, in_dOnion=True) + self.__init__(donion, meta=self._meta) + return - def __delitem__(self, keys): del Updater(self)[keys] + if in_dOnion: + return self - def __setitem__(self, index, delayed): + def __setitem__(self, index, delayed, in_dOnion=False): Updater(self)[index] = delayed def __contains__(self, index): @@ -437,219 +868,214 @@ def __iter__(self): return zip(rows.flat, columns.flat) def ewise_add(self, other, op=monoid.plus, *, require_monoid=True): - assert type(other) is Matrix # TODO: or TransposedMatrix - meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid) - return GbDelayed(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta) + gb_types = (gb.Matrix, gb.matrix.TransposedMatrix) + other = self._expect_type( + other, (Matrix, TransposedMatrix) + gb_types, within="ewise_add", argname="other" + ) + + try: + meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid) + except DimensionMismatch: + if any_dOnions(self, other): + meta = self._meta.ewise_add(self._meta, op=op, require_monoid=require_monoid) + else: + raise + + return MatrixExpression( + self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta + ) def ewise_mult(self, other, op=binary.times): - assert type(other) is Matrix # TODO: or TransposedMatrix - meta = self._meta.ewise_mult(other._meta, op=op) - return GbDelayed(self, "ewise_mult", other, op, meta=meta) + gb_types = (gb.Matrix, gb.matrix.TransposedMatrix) + other = self._expect_type( + other, (Matrix, TransposedMatrix) + gb_types, within="ewise_mult", argname="other" + ) + + try: + meta = self._meta.ewise_mult(other._meta, op=op) + except DimensionMismatch: + if any_dOnions(self, other): + meta = self._meta.ewise_mult(self._meta, op=op) + else: + raise + + return MatrixExpression(self, "ewise_mult", other, op, meta=meta) def mxv(self, other, op=semiring.plus_times): - from .vector import Vector + from .vector import Vector, VectorExpression - assert type(other) is Vector - meta = self._meta.mxv(other._meta, op=op) - return GbDelayed(self, "mxv", other, op, meta=meta) + other = self._expect_type(other, (Vector, gb.Vector), within="mxv", argname="other") + + try: + meta = self._meta.mxv(other._meta, op=op) + except DimensionMismatch: + if any_dOnions(self, other): + other_meta = gb.Vector.new(dtype=other._meta.dtype, size=self._meta.ncols) + meta = self._meta.mxv(other_meta, op=op) + else: + raise + + return VectorExpression(self, "mxv", other, op, meta=meta, size=self.nrows) def mxm(self, other, op=semiring.plus_times): - assert type(other) in (Matrix, TransposedMatrix) - meta = self._meta.mxm(other._meta, op=op) - return GbDelayed(self, "mxm", other, op, meta=meta) + gb_types = (gb.Matrix, gb.matrix.TransposedMatrix) + other = self._expect_type( + other, (Matrix, TransposedMatrix) + gb_types, within="mxm", argname="other" + ) + + try: + meta = self._meta.mxm(other._meta, op=op) + except DimensionMismatch: + if any_dOnions(self, other): + other_meta = gb.Matrix.new( + dtype=other._meta.dtype, nrows=self._meta.ncols, ncols=other._meta.ncols + ) + meta = self._meta.mxm(other_meta, op=op) + else: + raise + + return MatrixExpression( + self, "mxm", other, op, meta=meta, nrows=self.nrows, ncols=other.ncols + ) def kronecker(self, other, op=binary.times): - assert type(other) is Matrix # TODO: or TransposedMatrix + gb_types = (gb.Matrix, gb.matrix.TransposedMatrix) + other = self._expect_type( + other, (Matrix, TransposedMatrix) + gb_types, within="kronecker", argname="other" + ) meta = self._meta.kronecker(other._meta, op=op) - return GbDelayed(self, "kronecker", other, op, meta=meta) + return MatrixExpression(self, "kronecker", other, op, meta=meta) def apply(self, op, right=None, *, left=None): - from .scalar import Scalar - left_meta = left right_meta = right - if type(left) is Scalar: - left_meta = left.dtype.np_type(0) - if type(right) is Scalar: - right_meta = right.dtype.np_type(0) - + if isinstance(left, BaseType): + left_meta = left._meta + if left_meta._is_scalar and left_meta.is_empty: + left_meta = gb.Scalar.from_value(0, dtype=left_meta.dtype) + if isinstance(right, BaseType): + right_meta = right._meta + if right_meta._is_scalar and right_meta.is_empty: + right_meta = gb.Scalar.from_value(0, dtype=right_meta.dtype) + + if self._meta.shape == (0,) * self.ndim: + self._meta.resize(*((1,) * self.ndim)) meta = self._meta.apply(op=op, left=left_meta, right=right_meta) - return GbDelayed(self, "apply", op, right, meta=meta, left=left) + return MatrixExpression(self, "apply", op, right, meta=meta, left=left) def reduce_rowwise(self, op=monoid.plus): + from .vector import VectorExpression + meta = self._meta.reduce_rowwise(op) - return GbDelayed(self, "reduce_rowwise", op, meta=meta) + return VectorExpression(self, "reduce_rowwise", op, meta=meta, size=self.nrows) def reduce_columnwise(self, op=monoid.plus): - meta = self._meta.reduce_columnwise(op) - return GbDelayed(self, "reduce_columnwise", op, meta=meta) - - def reduce_scalar(self, op=monoid.plus): - meta = self._meta.reduce_scalar(op) - return GbDelayed(self, "reduce_scalar", op, meta=meta) - - def build( - self, - rows, - columns, - values, - *, - dup_op=None, - clear=False, - nrows=None, - ncols=None, - chunks=None, - ): - if clear: - self.clear() - elif self.nvals.compute() > 0: - raise gb.exceptions.OutputNotEmpty + from .vector import VectorExpression - if nrows is not None or ncols is not None: - if nrows is None: - nrows = self._nrows - if ncols is None: - ncols = self._ncols - self.resize(nrows, ncols) + meta = self._meta.reduce_columnwise(op) + return VectorExpression(self, "reduce_columnwise", op, meta=meta, size=self.ncols) - if chunks is not None: - self.rechunk(inplace=True, chunks=chunks) + def reduce_scalar(self, op=monoid.plus, *, allow_empty=True): + from .scalar import ScalarExpression - x = self._optional_dup() - if type(rows) is list: - if np.max(rows) >= self._nrows: - raise gb.exceptions.IndexOutOfBound - rows = da.core.from_array(np.array(rows), name="rows-" + tokenize(rows)) - else: - if da.max(rows).compute() >= self._nrows: - raise gb.exceptions.IndexOutOfBound - if type(columns) is list: - if np.max(columns) >= self._ncols: - raise gb.exceptions.IndexOutOfBound - columns = da.core.from_array(np.array(columns), name="columns-" + tokenize(columns)) - else: - if da.max(columns).compute() >= self._ncols: - raise gb.exceptions.IndexOutOfBound - if type(values) is list: - values = da.core.from_array(np.array(values), name="values-" + tokenize(values)) + meta = self._meta.reduce_scalar(op) + return ScalarExpression(self, "reduce_scalar", op, meta=meta, allow_empty=allow_empty) - idtype = gb.Matrix.new(rows.dtype).dtype - np_idtype_ = np_dtype(idtype) - vdtype = gb.Matrix.new(values.dtype).dtype - np_vdtype_ = np_dtype(vdtype) + def to_values(self, dtype=None, chunks="auto"): + dtype = lookup_dtype(self.dtype if dtype is None else dtype) + meta_i, _, meta_v = self._meta.to_values(dtype) - rname = "-row-ranges" + tokenize(x, x.chunks[0]) - cname = "-col-ranges" + tokenize(x, x.chunks[1]) - row_ranges = build_chunk_ranges_dask_array(x, 0, rname) - col_ranges = build_chunk_ranges_dask_array(x, 1, cname) - fragments = da.core.blockwise( - *(_pick2D, "ijk"), - *(rows, "k"), - *(columns, "k"), - *(values, "k"), - *(row_ranges, "i"), - *(col_ranges, "j"), - dtype=np_idtype_, - meta=np.array([]), - ) - meta = InnerMatrix(gb.Matrix.new(vdtype)) - delayed = da.core.blockwise( - *(_build_2D_chunk, "ij"), - *(x, "ij"), - *(row_ranges, "i"), - *(col_ranges, "j"), - *(fragments, "ijk"), - dup_op=dup_op, - concatenate=False, - dtype=np_vdtype_, - meta=meta, - ) - self.__init__(delayed) + if self.is_dOnion: + meta = np.array([]) + result = DOnion.multi_access( + meta, self.__class__.to_values, self, dtype=dtype, chunks=chunks + ) + rows = DOnion.multi_access(meta_i, tuple.__getitem__, result, 0) + columns = DOnion.multi_access(meta_i, tuple.__getitem__, result, 1) + values = DOnion.multi_access(meta_v, tuple.__getitem__, result, 2) + return rows, columns, values - def to_values(self, dtype=None, chunks="auto"): - x = self._delayed # first find the number of values in each chunk and return # them as a 2D numpy array whose shape is equal to x.numblocks + x = self._delayed nvals_2D = da.core.blockwise( *(_nvals_in_chunk, "ij"), *(x, "ij"), adjust_chunks={"i": 1, "j": 1}, dtype=np.int64, meta=np.array([[]]), - ).compute() + ) # use the above array to determine the output tuples' array - # bounds (`starts` and `stops`) for each chunk of this + # bounds (`starts` and `stops_`) for each chunk of this # Matrix (self) - nvals_1D = nvals_2D.flatten() - - stops = np.cumsum(nvals_1D) - starts = np.roll(stops, 1) + stops_ = da.cumsum(nvals_2D) # BEWARE: this function rechunks! + starts = da.roll(stops_, 1) + starts = starts.copy() if starts.size == 1 else starts # bug!! starts[0] = 0 - nnz = stops[-1] - - # convert numpy 2D-arrays (`starts` and `stops`) to 2D dask Arrays - # of ranges. Don't forget to fix their `chunks` in oder to enable - # them to align with x - starts = starts.reshape(nvals_2D.shape) - starts = da.from_array(starts, chunks=1, name="starts" + tokenize(starts)) - starts = da.core.Array(starts.dask, starts.name, x.chunks, starts.dtype, meta=x._meta) - - stops = stops.reshape(nvals_2D.shape) - stops = da.from_array(stops, chunks=1, name="stops" + tokenize(stops)) - stops = da.core.Array(stops.dask, stops.name, x.chunks, stops.dtype, meta=x._meta) - - chunks = da.core.normalize_chunks(chunks, (nnz,), dtype=np.int64) - output_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_ranges-") + nnz = stops_[-1] + starts = starts.reshape(nvals_2D.shape).rechunk(1) + stops_ = stops_.reshape(nvals_2D.shape).rechunk(1) + + def _to_values(x, starts, stops_, dtype, chunks, nnz): + # the following changes the `.chunks` attribute of `starts` and `stops_` so that + # `blockwise()` can align them with `x` + starts = da.core.Array(starts.dask, starts.name, x.chunks, starts.dtype, meta=x._meta) + stops_ = da.core.Array(stops_.dask, stops_.name, x.chunks, stops_.dtype, meta=x._meta) + + chunks = da.core.normalize_chunks(chunks, (nnz,), dtype=np.int64) + output_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_ranges-") + + gb_dtype = lookup_dtype(dtype) + dtype_ = np_dtype(gb_dtype) + # Compute row/col offsets as dask arrays that can align with this + # Matrix's (self's) chunks to convert chunk row/col indices to + # full dask-grblas Matrix indices. + row_offsets = build_chunk_offsets_dask_array(x, 0, "row_offset-") + col_offsets = build_chunk_offsets_dask_array(x, 1, "col_offset-") + x = da.core.blockwise( + *(MatrixTupleExtractor, "ijk"), + *(output_ranges, "k"), + *(x, "ij"), + *(row_offsets, "i"), + *(col_offsets, "j"), + *(starts, "ij"), + *(stops_, "ij"), + gb_dtype=dtype, + dtype=dtype_, + meta=np.array([[[]]]), + ) + x = da.reduction( + x, _identity, _flatten, axis=1, concatenate=False, dtype=dtype_, meta=np.array([[]]) + ) + return da.reduction( + x, _identity, _flatten, axis=0, concatenate=False, dtype=dtype_, meta=np.array([]) + ) - dtype_ = np_dtype(self.dtype) - # Compute row/col offsets as dask arrays that can align with this - # Matrix's (self's) chunks to convert chunk row/col indices to - # full dask-grblas Matrix indices. - row_offsets = build_chunk_offsets_dask_array(x, 0, "row_offset-") - col_offsets = build_chunk_offsets_dask_array(x, 1, "col_offset-") - x = da.core.blockwise( - *(MatrixTupleExtractor, "ijk"), - *(output_ranges, "k"), - *(x, "ij"), - *(row_offsets, "i"), - *(col_offsets, "j"), - *(starts, "ij"), - *(stops, "ij"), - gb_dtype=dtype, - dtype=dtype_, - meta=np.array([[[]]]), - ) - x = da.reduction( - x, _identity, _flatten, axis=1, concatenate=False, dtype=dtype_, meta=np.array([[]]) - ) - x = da.reduction( - x, _identity, _flatten, axis=0, concatenate=False, dtype=dtype_, meta=np.array([]) - ) + # since the size of the output (rows, columns, values) depends on nnz, a delayed quantity, + # we need to return the output as DOnions (twice-delayed dask-arrays) + meta = np.array([]) + rcv_donion = DOnion.sprout(nnz, meta, _to_values, x, starts, stops_, dtype, chunks) - meta_i, meta_j, meta_v = self._meta.to_values(dtype) - rows = da.map_blocks(_get_rows, x, dtype=meta_i.dtype, meta=meta_i) - cols = da.map_blocks(_get_cols, x, dtype=meta_j.dtype, meta=meta_j) - vals = da.map_blocks(_get_vals, x, dtype=meta_v.dtype, meta=meta_v) + dtype_i = np_dtype(lookup_dtype(meta_i.dtype)) + rows = rcv_donion.deep_extract(meta_i, da.map_blocks, _get_rows, dtype=dtype_i, meta=meta_i) + cols = rcv_donion.deep_extract(meta_i, da.map_blocks, _get_cols, dtype=dtype_i, meta=meta_i) + dtype_v = np_dtype(lookup_dtype(meta_v.dtype)) + vals = rcv_donion.deep_extract(meta_v, da.map_blocks, _get_vals, dtype=dtype_v, meta=meta_v) return rows, cols, vals - def rechunk(self, inplace=False, chunks="auto"): - chunks = da.core.normalize_chunks(chunks, self.shape, dtype=np.int64) - if inplace: - self.resize(*self.shape, chunks=chunks) - else: - return self.resize(*self.shape, chunks=chunks, inplace=False) - def isequal(self, other, *, check_dtype=False): + gb_types = (gb.Matrix, gb.matrix.TransposedMatrix) other = self._expect_type( - other, (Matrix, TransposedMatrix), within="isequal", argname="other" + other, (Matrix, TransposedMatrix) + gb_types, within="isequal", argname="other" ) return super().isequal(other, check_dtype=check_dtype) def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): + gb_types = (gb.Matrix, gb.matrix.TransposedMatrix) other = self._expect_type( - other, (Matrix, TransposedMatrix), within="isclose", argname="other" + other, (Matrix, TransposedMatrix) + gb_types, within="isclose", argname="other" ) return super().isclose(other, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype) @@ -681,29 +1107,79 @@ def _delete_element(self, resolved_indexes): class TransposedMatrix: ndim = 2 + _is_scalar = False _is_transposed = True - def __init__(self, matrix): + __and__ = gb.matrix.TransposedMatrix.__and__ + __bool__ = gb.matrix.TransposedMatrix.__bool__ + __or__ = gb.matrix.TransposedMatrix.__or__ + + __abs__ = gb.matrix.TransposedMatrix.__abs__ + __add__ = gb.matrix.TransposedMatrix.__add__ + __divmod__ = gb.matrix.TransposedMatrix.__divmod__ + __eq__ = gb.matrix.TransposedMatrix.__eq__ + __floordiv__ = gb.matrix.TransposedMatrix.__floordiv__ + __ge__ = gb.matrix.TransposedMatrix.__ge__ + __gt__ = gb.matrix.TransposedMatrix.__gt__ + __invert__ = gb.matrix.TransposedMatrix.__invert__ + __le__ = gb.matrix.TransposedMatrix.__le__ + __lt__ = gb.matrix.TransposedMatrix.__lt__ + __mod__ = gb.matrix.TransposedMatrix.__mod__ + __mul__ = gb.matrix.TransposedMatrix.__mul__ + __ne__ = gb.matrix.TransposedMatrix.__ne__ + __neg__ = gb.matrix.TransposedMatrix.__neg__ + __pow__ = gb.matrix.TransposedMatrix.__pow__ + __radd__ = gb.matrix.TransposedMatrix.__radd__ + __rdivmod__ = gb.matrix.TransposedMatrix.__rdivmod__ + __rfloordiv__ = gb.matrix.TransposedMatrix.__rfloordiv__ + __rmod__ = gb.matrix.TransposedMatrix.__rmod__ + __rmul__ = gb.matrix.TransposedMatrix.__rmul__ + __rpow__ = gb.matrix.TransposedMatrix.__rpow__ + __rsub__ = gb.matrix.TransposedMatrix.__rsub__ + __rtruediv__ = gb.matrix.TransposedMatrix.__rtruediv__ + __rxor__ = gb.matrix.TransposedMatrix.__rxor__ + __sub__ = gb.matrix.TransposedMatrix.__sub__ + __truediv__ = gb.matrix.TransposedMatrix.__truediv__ + __xor__ = gb.matrix.TransposedMatrix.__xor__ + + def __init__(self, matrix, meta=None): assert type(matrix) is Matrix self._matrix = matrix - self._meta = matrix._meta.T + self._meta = matrix._meta.T if meta is None else meta # Aggregator-specific requirements: - self._nrows = self.nrows - self._ncols = self.ncols + self._nrows = self._meta.nrows + self._ncols = self._meta.ncols + + @property + def is_dOnion(self): + return is_DOnion(self._matrix._delayed) + + @property + def dOnion_if(self): + return self._matrix._delayed if self.is_dOnion else self + + def dup(self, dtype=None, *, mask=None, name=None): + return self.new(dtype=dtype, mask=mask) def new(self, *, dtype=None, mask=None): + if any_dOnions(self, mask): + donion = DOnion.multi_access( + self._meta.new(dtype), self.__class__.new, self, dtype=dtype, mask=mask + ) + return Matrix(donion) + gb_dtype = self._matrix.dtype if dtype is None else lookup_dtype(dtype) dtype = np_dtype(gb_dtype) delayed = self._matrix._delayed if mask is None: - mask_ind = None mask_type = None + mask_ind = None else: - mask = mask.mask - mask_ind = "ji" mask_type = get_grblas_type(mask) + mask = mask.mask._delayed + mask_ind = "ji" delayed = da.core.blockwise( *(_transpose, "ji"), *(delayed, "ij"), @@ -724,17 +1200,27 @@ def dtype(self): return self._meta.dtype def to_values(self, dtype=None, chunks="auto"): - # TODO: make this lazy; can we do something smart with this? rows, cols, vals = self._matrix.to_values(dtype=dtype, chunks=chunks) return cols, rows, vals # Properties - nrows = Matrix.nrows - ncols = Matrix.ncols - shape = Matrix.shape - nvals = Matrix.nvals + def isequal(self, other, *, check_dtype=False): + other = self._expect_type( + other, (Matrix, TransposedMatrix), within="isequal", argname="other" + ) + return BaseType.isequal(self, other, check_dtype=check_dtype) + + def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): + other = self._expect_type( + other, (Matrix, TransposedMatrix), within="isclose", argname="other" + ) + return BaseType.isclose( + self, other, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype + ) # Delayed methods + __contains__ = Matrix.__contains__ + __getitem__ = Matrix.__getitem__ ewise_add = Matrix.ewise_add ewise_mult = Matrix.ewise_mult mxv = Matrix.mxv @@ -746,13 +1232,211 @@ def to_values(self, dtype=None, chunks="auto"): reduce_scalar = Matrix.reduce_scalar # Misc. - isequal = Matrix.isequal - isclose = Matrix.isclose - __getitem__ = Matrix.__getitem__ + nrows = Matrix.nrows + ncols = Matrix.ncols + shape = Matrix.shape + nvals = Matrix.nvals + _expect_type = Matrix._expect_type __array__ = Matrix.__array__ name = Matrix.name +class MatrixExpression(GbDelayed): + __slots__ = () + output_type = gb.Matrix + ndim = 2 + _is_scalar = False + + # automethods: + __and__ = gb.matrix.MatrixExpression.__and__ + __bool__ = gb.matrix.MatrixExpression.__bool__ + __or__ = gb.matrix.MatrixExpression.__or__ + _get_value = _automethods._get_value + S = gb.matrix.MatrixExpression.S + T = gb.matrix.MatrixExpression.T + V = gb.matrix.MatrixExpression.V + apply = gb.matrix.MatrixExpression.apply + ewise_add = gb.matrix.MatrixExpression.ewise_add + ewise_mult = gb.matrix.MatrixExpression.ewise_mult + isclose = gb.matrix.MatrixExpression.isclose + isequal = gb.matrix.MatrixExpression.isequal + kronecker = gb.matrix.MatrixExpression.kronecker + mxm = gb.matrix.MatrixExpression.mxm + mxv = gb.matrix.MatrixExpression.mxv + ncols = gb.matrix.MatrixExpression.ncols + nrows = gb.matrix.MatrixExpression.nrows + nvals = gb.matrix.MatrixExpression.nvals + reduce_rowwise = gb.matrix.MatrixExpression.reduce_rowwise + reduce_columnwise = gb.matrix.MatrixExpression.reduce_columnwise + reduce_scalar = gb.matrix.MatrixExpression.reduce_scalar + shape = gb.matrix.MatrixExpression.shape + nvals = gb.matrix.MatrixExpression.nvals + + # infix sugar: + __abs__ = gb.matrix.MatrixExpression.__abs__ + __add__ = gb.matrix.MatrixExpression.__add__ + __divmod__ = gb.matrix.MatrixExpression.__divmod__ + __eq__ = gb.matrix.MatrixExpression.__eq__ + __floordiv__ = gb.matrix.MatrixExpression.__floordiv__ + __ge__ = gb.matrix.MatrixExpression.__ge__ + __gt__ = gb.matrix.MatrixExpression.__gt__ + __invert__ = gb.matrix.MatrixExpression.__invert__ + __le__ = gb.matrix.MatrixExpression.__le__ + __lt__ = gb.matrix.MatrixExpression.__lt__ + __mod__ = gb.matrix.MatrixExpression.__mod__ + __mul__ = gb.matrix.MatrixExpression.__mul__ + __ne__ = gb.matrix.MatrixExpression.__ne__ + __neg__ = gb.matrix.MatrixExpression.__neg__ + __pow__ = gb.matrix.MatrixExpression.__pow__ + __radd__ = gb.matrix.MatrixExpression.__radd__ + __rdivmod__ = gb.matrix.MatrixExpression.__rdivmod__ + __rfloordiv__ = gb.matrix.MatrixExpression.__rfloordiv__ + __rmod__ = gb.matrix.MatrixExpression.__rmod__ + __rmul__ = gb.matrix.MatrixExpression.__rmul__ + __rpow__ = gb.matrix.MatrixExpression.__rpow__ + __rsub__ = gb.matrix.MatrixExpression.__rsub__ + __rtruediv__ = gb.matrix.MatrixExpression.__rtruediv__ + __rxor__ = gb.matrix.MatrixExpression.__rxor__ + __sub__ = gb.matrix.MatrixExpression.__sub__ + __truediv__ = gb.matrix.MatrixExpression.__truediv__ + __xor__ = gb.matrix.MatrixExpression.__xor__ + + # bad sugar: + __itruediv__ = gb.matrix.MatrixExpression.__itruediv__ + __imul__ = gb.matrix.MatrixExpression.__imul__ + __imatmul__ = gb.matrix.MatrixExpression.__imatmul__ + __iadd__ = gb.matrix.MatrixExpression.__iadd__ + __iand__ = gb.matrix.MatrixExpression.__iand__ + __ipow__ = gb.matrix.MatrixExpression.__ipow__ + __imod__ = gb.matrix.MatrixExpression.__imod__ + __isub__ = gb.matrix.MatrixExpression.__isub__ + __ixor__ = gb.matrix.MatrixExpression.__ixor__ + __ifloordiv__ = gb.matrix.MatrixExpression.__ifloordiv__ + __ior__ = gb.matrix.MatrixExpression.__ior__ + + def __init__( + self, + parent, + method_name, + *args, + meta=None, + ncols=None, + nrows=None, + **kwargs, + ): + super().__init__( + parent, + method_name, + *args, + meta=meta, + **kwargs, + ) + if ncols is None: + ncols = self.parent._ncols + if nrows is None: + nrows = self.parent._nrows + self._ncols = ncols + self._nrows = nrows + + # def __getattr__(self, item): + # return getattr(gb.matrix.MatrixExpression, item) + + # def construct_output(self, dtype=None, *, name=None): + # if dtype is None: + # dtype = self.dtype + # nrows = 0 if self._nrows.is_dOnion else self._nrows + # ncols = 0 if self._ncols.is_dOnion else self._ncols + # return Matrix.new(dtype, nrows, ncols, name=name) + + +class MatrixIndexExpr(AmbiguousAssignOrExtract): + __slots__ = "_ncols", "_nrows" + ndim = 2 + output_type = gb.Matrix + _is_transposed = False + + def __init__(self, parent, resolved_indexes, nrows, ncols): + super().__init__(parent, resolved_indexes) + self._nrows = nrows + self._ncols = ncols + + @property + def ncols(self): + return self._ncols + + @property + def nrows(self): + return self._nrows + + @property + def shape(self): + return (self._nrows, self._ncols) + + # Begin auto-generated code: Matrix + __and__ = gb.matrix.MatrixIndexExpr.__and__ + __bool__ = gb.matrix.MatrixIndexExpr.__bool__ + __or__ = gb.matrix.MatrixIndexExpr.__or__ + _get_value = _automethods._get_value + S = gb.matrix.MatrixIndexExpr.S + T = gb.matrix.MatrixIndexExpr.T + V = gb.matrix.MatrixIndexExpr.V + apply = gb.matrix.MatrixIndexExpr.apply + ewise_add = gb.matrix.MatrixIndexExpr.ewise_add + ewise_mult = gb.matrix.MatrixIndexExpr.ewise_mult + isclose = gb.matrix.MatrixIndexExpr.isclose + isequal = gb.matrix.MatrixIndexExpr.isequal + kronecker = gb.matrix.MatrixIndexExpr.kronecker + mxm = gb.matrix.MatrixIndexExpr.mxm + mxv = gb.matrix.MatrixIndexExpr.mxv + nvals = gb.matrix.MatrixIndexExpr.nvals + reduce_rowwise = gb.matrix.MatrixIndexExpr.reduce_rowwise + reduce_columnwise = gb.matrix.MatrixIndexExpr.reduce_columnwise + reduce_scalar = gb.matrix.MatrixIndexExpr.reduce_scalar + nvals = gb.matrix.MatrixIndexExpr.nvals + + # infix sugar: + __abs__ = gb.matrix.MatrixIndexExpr.__abs__ + __add__ = gb.matrix.MatrixIndexExpr.__add__ + __divmod__ = gb.matrix.MatrixIndexExpr.__divmod__ + __eq__ = gb.matrix.MatrixIndexExpr.__eq__ + __floordiv__ = gb.matrix.MatrixIndexExpr.__floordiv__ + __ge__ = gb.matrix.MatrixIndexExpr.__ge__ + __gt__ = gb.matrix.MatrixIndexExpr.__gt__ + __invert__ = gb.matrix.MatrixIndexExpr.__invert__ + __le__ = gb.matrix.MatrixIndexExpr.__le__ + __lt__ = gb.matrix.MatrixIndexExpr.__lt__ + __mod__ = gb.matrix.MatrixIndexExpr.__mod__ + __mul__ = gb.matrix.MatrixIndexExpr.__mul__ + __ne__ = gb.matrix.MatrixIndexExpr.__ne__ + __neg__ = gb.matrix.MatrixIndexExpr.__neg__ + __pow__ = gb.matrix.MatrixIndexExpr.__pow__ + __radd__ = gb.matrix.MatrixIndexExpr.__radd__ + __rdivmod__ = gb.matrix.MatrixIndexExpr.__rdivmod__ + __rfloordiv__ = gb.matrix.MatrixIndexExpr.__rfloordiv__ + __rmod__ = gb.matrix.MatrixIndexExpr.__rmod__ + __rmul__ = gb.matrix.MatrixIndexExpr.__rmul__ + __rpow__ = gb.matrix.MatrixIndexExpr.__rpow__ + __rsub__ = gb.matrix.MatrixIndexExpr.__rsub__ + __rtruediv__ = gb.matrix.MatrixIndexExpr.__rtruediv__ + __rxor__ = gb.matrix.MatrixIndexExpr.__rxor__ + __sub__ = gb.matrix.MatrixIndexExpr.__sub__ + __truediv__ = gb.matrix.MatrixIndexExpr.__truediv__ + __xor__ = gb.matrix.MatrixIndexExpr.__xor__ + + # bad sugar: + __itruediv__ = gb.matrix.MatrixIndexExpr.__itruediv__ + __imul__ = gb.matrix.MatrixIndexExpr.__imul__ + __imatmul__ = gb.matrix.MatrixIndexExpr.__imatmul__ + __iadd__ = gb.matrix.MatrixIndexExpr.__iadd__ + __iand__ = gb.matrix.MatrixIndexExpr.__iand__ + __ipow__ = gb.matrix.MatrixIndexExpr.__ipow__ + __imod__ = gb.matrix.MatrixIndexExpr.__imod__ + __isub__ = gb.matrix.MatrixIndexExpr.__isub__ + __ixor__ = gb.matrix.MatrixIndexExpr.__ixor__ + __ifloordiv__ = gb.matrix.MatrixIndexExpr.__ifloordiv__ + __ior__ = gb.matrix.MatrixIndexExpr.__ior__ + + def _chunk_diag_v2(inner_matrix, k): return wrap_inner(gb.ss.diag(inner_matrix.value, k)) @@ -911,7 +1595,9 @@ def _build_2D_chunk( out_row_range, out_col_range, fragments, + values, dup_op=None, + clear=False, ): """ Reassembles filtered tuples (row, col, val) in the list `fragments` @@ -921,17 +1607,25 @@ def _build_2D_chunk( """ rows = np.concatenate([rows for (rows, _, _) in fragments]) cols = np.concatenate([cols for (_, cols, _) in fragments]) - vals = np.concatenate([vals for (_, _, vals) in fragments]) nrows = out_row_range[0].stop - out_row_range[0].start ncols = out_col_range[0].stop - out_col_range[0].start - inner_matrix.value.build( - rows, - cols, - vals, - nrows=nrows, - ncols=ncols, - dup_op=dup_op, - ) + if not clear and inner_matrix.value.nvals > 0: + raise gb.exceptions.OutputNotEmpty() + + if values is None: + vals = np.concatenate([vals for (_, _, vals) in fragments]) + inner_matrix.value.build( + rows, + cols, + vals, + nrows=nrows, + ncols=ncols, + dup_op=dup_op, + clear=clear, + ) + else: + vals = values + inner_matrix.value.ss.build_scalar(rows, cols, vals) return InnerMatrix(inner_matrix.value) @@ -944,7 +1638,7 @@ def _new_Matrix_chunk(out_row_range, out_col_range, gb_dtype=None): return InnerMatrix(gb.Matrix.new(gb_dtype, nrows=nrows, ncols=ncols)) -def _from_values2D(fragments, out_row_range, out_col_range, gb_dtype=None): +def _from_values2D(values, fragments, out_row_range, out_col_range, dup_op=None, gb_dtype=None): """ Reassembles filtered tuples (row, col, val) in the list `fragments` obtained from _pick2D() for the chunk within the given row and column @@ -953,26 +1647,46 @@ def _from_values2D(fragments, out_row_range, out_col_range, gb_dtype=None): """ rows = np.concatenate([rows for (rows, _, _) in fragments]) cols = np.concatenate([cols for (_, cols, _) in fragments]) - vals = np.concatenate([vals for (_, _, vals) in fragments]) + if values is None: + vals = np.concatenate([vals for (_, _, vals) in fragments]) + else: + vals = values nrows = out_row_range[0].stop - out_row_range[0].start ncols = out_col_range[0].stop - out_col_range[0].start + if rows.size == 0 or cols.size == 0: + return InnerMatrix(gb.Matrix.new(gb_dtype, nrows=nrows, ncols=ncols)) return InnerMatrix( - gb.Matrix.from_values(rows, cols, vals, nrows=nrows, ncols=ncols, dtype=gb_dtype) + gb.Matrix.from_values( + rows, cols, vals, nrows=nrows, ncols=ncols, dup_op=dup_op, dtype=gb_dtype + ) ) -def _pick2D(rows, cols, values, row_range, col_range): +def _pick2D(rows, cols, values, row_range, col_range, shape): """ Filters out only those tuples (row, col, val) that lie within the given row and column ranges. Indices are also offset appropriately. """ + # validate indices: + rows = np.where(rows < 0, rows + shape[0], rows) + bad_indices = (rows < 0) | (shape[0] <= rows) + if np.any(bad_indices): + raise IndexOutOfBound + + cols = np.where(cols < 0, cols + shape[1], cols) + bad_indices = (cols < 0) | (shape[1] <= cols) + if np.any(bad_indices): + raise IndexOutOfBound + + # filter into chunk: row_range, col_range = row_range[0], col_range[0] rows_in = (row_range.start <= rows) & (rows < row_range.stop) cols_in = (col_range.start <= cols) & (cols < col_range.stop) rows = rows[rows_in & cols_in] - row_range.start cols = cols[rows_in & cols_in] - col_range.start - values = values[rows_in & cols_in] + if isinstance(values, np.ndarray): + values = values[rows_in & cols_in] return (rows, cols, values) @@ -1005,7 +1719,8 @@ def _identity(chunk, keepdims=None, axis=None): def _concatenate_files(chunk_files, keepdims=None, axis=None): import os import shutil - from scipy.io.mmio import MMFile, mminfo + from .io import MMFile + from scipy.io import mminfo chunk_files = chunk_files if type(chunk_files) is list else [chunk_files] first_chunk_file, _, row_range_first, col_range_first = chunk_files[0] @@ -1162,3 +1877,5 @@ def _concat_matrix(seq, axis=0): gb.utils._output_types[Matrix] = gb.Matrix gb.utils._output_types[TransposedMatrix] = gb.matrix.TransposedMatrix +gb.utils._output_types[MatrixExpression] = gb.Matrix +gb.utils._output_types[MatrixIndexExpr] = gb.Matrix diff --git a/dask_grblas/scalar.py b/dask_grblas/scalar.py index 89c74e7..2266504 100644 --- a/dask_grblas/scalar.py +++ b/dask_grblas/scalar.py @@ -3,8 +3,9 @@ import numpy as np from dask.delayed import Delayed, delayed -from .base import BaseType, InnerBaseType -from .expr import AmbiguousAssignOrExtract, GbDelayed +from . import _automethods +from .base import BaseType, InnerBaseType, DOnion, Box, any_dOnions +from .expr import AmbiguousAssignOrExtract, GbDelayed, _is_pair from .utils import get_meta, np_dtype @@ -67,35 +68,128 @@ def new(cls, dtype, *, name=None): return new(cls, dtype, name=name) def __init__(self, delayed, meta=None): - assert type(delayed) is da.Array, type(delayed) - assert delayed.ndim == 0 + assert type(delayed) in {da.Array, DOnion}, type(delayed) self._delayed = delayed + if type(delayed) is da.Array: + assert delayed.ndim == 0 if meta is None: meta = gb.Scalar.new(delayed.dtype) + # meta = gb.Scalar.from_value(1, dtype=delayed.dtype) self._meta = meta self.dtype = meta.dtype - def update(self, expr): + def update(self, expr, in_dOnion=False): + typ = type(expr) + if any_dOnions(self, expr): + self_copy = self.__class__(self._optional_dup(), meta=self._meta) + expr_ = expr + if isinstance(expr, AmbiguousAssignOrExtract) and expr.has_dOnion: + + def update_by_aae(c, p, k_0, k_1): + keys = k_0 if k_1 is None else (k_0, k_1) + return c.update(p[keys], in_dOnion=True) + + if _is_pair(expr_.index): + keys_0, keys_1 = expr_.index[0], expr_.index[1] + else: + keys_0, keys_1 = expr_.index, None + + donion = DOnion.multi_access( + self._meta, + update_by_aae, + self_copy, + expr_.parent, + *(keys_0, keys_1), + ) + self.__init__(donion, self._meta) + return + + if isinstance(expr, GbDelayed) and expr.has_dOnion: + + def update_by_gbd(c, *args, **kwargs): + gbd = getattr(args[0], args[1])(*args[2:], **kwargs) + return c.update(gbd, in_dOnion=True) + + donion = DOnion.multi_access( + self._meta, + update_by_gbd, + self_copy, + expr_.parent, + expr_.method_name, + *expr_.args, + **expr_.kwargs, + ) + self.__init__(donion, self._meta) + return + + donion = DOnion.multi_access( + self._meta, Scalar.update, self_copy, expr_, in_dOnion=True + ) + self.__init__(donion, self._meta) + return + + if typ is Box: + expr = expr.content + typ = type(expr) + self._meta.update(get_meta(expr)) self._meta.clear() - typ = type(expr) - if typ is AmbiguousAssignOrExtract: + if isinstance(expr, AmbiguousAssignOrExtract): # Extract (s << v[index]) - self.value = expr.new(dtype=self.dtype).value + expr_new = expr.new(dtype=self.dtype) + self.value = expr_new.value elif typ is Scalar: # Simple assignment (s << t) self.value = expr.value - elif typ is GbDelayed: + elif isinstance(expr, GbDelayed): # s << v.reduce() expr._update(self) else: # Try simple assignment (s << 1) self.value = expr + if in_dOnion: + return self.__class__(self._delayed, meta=self._meta) - def _update(self, delayed, *, accum): + def _update(self, expr, *, accum, in_dOnion=False): # s(accum=accum) << v.reduce() - assert type(delayed) is GbDelayed - delayed._update(self, accum=accum) + typ = type(expr) + if typ is Box: + expr = expr.content + + assert isinstance(expr, GbDelayed) + + if any_dOnions(self, expr): + self_copy = self.__class__(self._optional_dup(), meta=self._meta) + expr_ = expr + if isinstance(expr, GbDelayed) and expr.has_dOnion: + + def _update_by_gbd(c, *args, accum=None, **kwargs): + gbd = getattr(args[0], args[1])(*args[2:], **kwargs) + return c._update(gbd, accum=accum, in_dOnion=True) + + donion = DOnion.multi_access( + self._meta, + _update_by_gbd, + self_copy, + expr_.parent, + expr_.method_name, + *expr_.args, + accum=accum, + **expr_.kwargs, + ) + self.__init__(donion, self._meta) + return + + expr_ = expr.parent.dOnion_if + donion = DOnion.mult_access( + self._meta, Scalar._update, self_copy, expr_, accum=accum, in_dOnion=True + ) + self.__init__(donion, self._meta) + return + + expr._update(self, accum=accum) + if in_dOnion: + return self.__class__(self._delayed, meta=self._meta) def dup(self, dtype=None, *, name=None): if dtype is None: @@ -112,7 +206,10 @@ def dup(self, dtype=None, *, name=None): def _persist(self, *args, **kwargs): """Since scalars are small, persist them if they need to be computed""" - self._delayed = self._delayed.persist(*args, **kwargs) + if self.is_dOnion: + self._delayed = self._delayed._persist(*args, **kwargs) + else: + self._delayed = self._delayed.persist(*args, **kwargs) def __eq__(self, other): return self.isequal(other).compute() @@ -154,11 +251,28 @@ def __array__(self, dtype=None): def isequal(self, other, *, check_dtype=False): if other is None: return self.is_empty + if type(other) is Box: + other = other.content if type(other) is not Scalar: + if other is None: + return self.is_empty self._meta.isequal(get_meta(other)) - other = Scalar.from_value(other) + try: + other = Scalar.from_value(other) + except TypeError: + other = self._expect_type( + other, + (Scalar, gb.Scalar), + within="isequal", + argname="other", + extra_message="Literal scalars also accepted.", + ) + # Don't check dtype if we had to infer dtype of `other` check_dtype = False - return super().isequal(other, check_dtype=check_dtype) + if check_dtype and self.dtype != other.dtype: + return False + else: + return super().isequal(other, check_dtype=check_dtype) def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): if other is None: @@ -171,6 +285,10 @@ def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): @property def is_empty(self): + if self.is_dOnion: + donion = DOnion.multi_access(gb.Scalar.new(bool), getattr, self, "is_empty") + return PythonScalar(donion) + delayed = da.core.elemwise( _is_empty, self._delayed, @@ -198,6 +316,11 @@ def value(self): @value.setter def value(self, val): + if any_dOnions(self, val): + donion = DOnion.multi_access(self._meta, Scalar.from_value, val) + self.__init__(donion, meta=self._meta) + return + scalar = Scalar.from_value(val, dtype=self.dtype) self._delayed = scalar._delayed @@ -210,6 +333,7 @@ class PythonScalar: __complex__ = Scalar.__complex__ __index__ = Scalar.__index__ _persist = Scalar._persist + is_dOnion = Scalar.is_dOnion @classmethod def from_delayed(cls, scalar, dtype, *, name=None): @@ -228,9 +352,67 @@ def __eq__(self, other): def compute(self, *args, **kwargs): innerval = self._delayed.compute(*args, **kwargs) + if self.is_dOnion: + return innerval.value if hasattr(innerval, "value") else innerval + return innerval.value.value +class ScalarExpression(GbDelayed): + __slots__ = () + output_type = gb.Scalar + ndim = 0 + shape = () + _is_scalar = True + _is_cscalar = False + __and__ = gb.scalar.ScalarExpression.__and__ + __bool__ = gb.scalar.ScalarExpression.__bool__ + __eq__ = gb.scalar.ScalarExpression.__eq__ + __float__ = gb.scalar.ScalarExpression.__float__ + __index__ = gb.scalar.ScalarExpression.__index__ + __int__ = gb.scalar.ScalarExpression.__int__ + __or__ = gb.scalar.ScalarExpression.__or__ + _get_value = _automethods._get_value + isclose = gb.scalar.ScalarExpression.isclose + isequal = gb.scalar.ScalarExpression.isequal + value = gb.scalar.ScalarExpression.value + + # def __getattr__(self, item): + # return getattr(gb.scalar.ScalarExpression, item) + + +class ScalarIndexExpr(AmbiguousAssignOrExtract): + output_type = gb.Scalar + ndim = 0 + shape = () + _is_scalar = True + _is_cscalar = False + + dup = new + + @property + def is_cscalar(self): + return self._is_cscalar + + @property + def is_grbscalar(self): + return not self._is_cscalar + + # Begin auto-generated code: Scalar + __and__ = gb.scalar.ScalarIndexExpr.__and__ + __bool__ = gb.scalar.ScalarIndexExpr.__bool__ + __eq__ = gb.scalar.ScalarIndexExpr.__eq__ + __float__ = gb.scalar.ScalarIndexExpr.__float__ + __index__ = gb.scalar.ScalarIndexExpr.__index__ + __int__ = gb.scalar.ScalarIndexExpr.__int__ + __or__ = gb.scalar.ScalarIndexExpr.__or__ + _get_value = _automethods._get_value + isclose = gb.scalar.ScalarIndexExpr.isclose + isequal = gb.scalar.ScalarIndexExpr.isequal + value = gb.scalar.ScalarIndexExpr.value + # End auto-generated code: Scalar + + # Dask task functions def _scalar_dup(x, dtype): return InnerScalar(x.value.dup(dtype=dtype)) @@ -250,3 +432,5 @@ def _invert(x): gb.utils._output_types[Scalar] = gb.Scalar gb.utils._output_types[PythonScalar] = gb.Scalar +gb.utils._output_types[ScalarExpression] = gb.Scalar +gb.utils._output_types[ScalarIndexExpr] = gb.Scalar diff --git a/dask_grblas/utils.py b/dask_grblas/utils.py index b07e64d..1141f07 100644 --- a/dask_grblas/utils.py +++ b/dask_grblas/utils.py @@ -2,11 +2,20 @@ import pandas as pd import dask.array as da import dask.dataframe as dd +from functools import reduce from dask.base import tokenize from dask.delayed import delayed from .io import MMFile +def pack_args(*args): + return args + + +def pack_kwargs(**kwargs): + return kwargs + + def np_dtype(dtype): return np.dtype(dtype.numba_type.name) @@ -31,6 +40,10 @@ def wrap_inner(val): return _inner_types[type(val)](val) +def flatten(lol): + return reduce(lambda x, y: x + y, lol) + + def build_block_index_dask_array(x, axis, name): """ Calculate block-index for each chunk of x along axis `axis` diff --git a/dask_grblas/vector.py b/dask_grblas/vector.py index 282dcea..16caed1 100644 --- a/dask_grblas/vector.py +++ b/dask_grblas/vector.py @@ -1,16 +1,26 @@ import dask.array as da import numpy as np import grblas as gb + +from numbers import Integral +from tlz import compose + from dask.base import tokenize +from dask.highlevelgraph import HighLevelGraph from dask.delayed import Delayed, delayed from grblas import binary, monoid, semiring from grblas.dtypes import lookup_dtype +from grblas.exceptions import IndexOutOfBound, DimensionMismatch -from .base import BaseType, InnerBaseType, _nvals -from .expr import AmbiguousAssignOrExtract, GbDelayed, Updater, Assigner +from . import _automethods +from .base import BaseType, InnerBaseType, _nvals, DOnion, Box, any_dOnions +from .base import _dup as chunk_dup +from .expr import AmbiguousAssignOrExtract, IndexerResolver, GbDelayed, Updater, Assigner from .mask import StructuralMask, ValueMask from ._ss.vector import ss from .utils import ( + pack_args, + pack_kwargs, np_dtype, get_return_type, wrap_inner, @@ -77,6 +87,43 @@ def __getitem__(self, index): class Vector(BaseType): __slots__ = ("ss",) ndim = 1 + __abs__ = gb.Vector.__abs__ + __add__ = gb.Vector.__add__ + __divmod__ = gb.Vector.__divmod__ + __eq__ = gb.Vector.__eq__ + __floordiv__ = gb.Vector.__floordiv__ + __ge__ = gb.Vector.__ge__ + __gt__ = gb.Vector.__gt__ + __iadd__ = gb.Vector.__iadd__ + __iand__ = gb.Vector.__iand__ + __ifloordiv__ = gb.Vector.__ifloordiv__ + __imod__ = gb.Vector.__imod__ + __imul__ = gb.Vector.__imul__ + __invert__ = gb.Vector.__invert__ + __ior__ = gb.Vector.__ior__ + __ipow__ = gb.Vector.__ipow__ + __isub__ = gb.Vector.__isub__ + __itruediv__ = gb.Vector.__itruediv__ + __ixor__ = gb.Vector.__ixor__ + __le__ = gb.Vector.__le__ + __lt__ = gb.Vector.__lt__ + __mod__ = gb.Vector.__mod__ + __mul__ = gb.Vector.__mul__ + __ne__ = gb.Vector.__ne__ + __neg__ = gb.Vector.__neg__ + __pow__ = gb.Vector.__pow__ + __radd__ = gb.Vector.__radd__ + __rdivmod__ = gb.Vector.__rdivmod__ + __rfloordiv__ = gb.Vector.__rfloordiv__ + __rmod__ = gb.Vector.__rmod__ + __rmul__ = gb.Vector.__rmul__ + __rpow__ = gb.Vector.__rpow__ + __rsub__ = gb.Vector.__rsub__ + __rtruediv__ = gb.Vector.__rtruediv__ + __rxor__ = gb.Vector.__rxor__ + __sub__ = gb.Vector.__sub__ + __truediv__ = gb.Vector.__truediv__ + __xor__ = gb.Vector.__xor__ @classmethod def from_delayed(cls, vector, dtype, size, *, nvals=None, name=None): @@ -105,31 +152,68 @@ def from_values( /, size=None, *, - trust_size=False, dup_op=None, dtype=None, chunks="auto", name=None, ): - # Note: `trust_size` is a bool parameter that, when True, - # can be used to avoid expensive computation of max(indices) - # which is used to verify that `size` is indeed large enough - # to hold all the given tuples. - # TODO: - # dup_op support for dask_array indices/values (use reduce_assign?) - if dup_op is None and type(indices) is da.Array and type(values) is da.Array: - if not trust_size or size is None: - # this branch is an expensive operation: - implied_size = 1 + da.max(indices).compute() - if size is not None and implied_size > size: - raise Exception() - size = implied_size if size is None else size - - idtype = gb.Vector.new(indices.dtype).dtype - np_idtype_ = np_dtype(idtype) - vdtype = gb.Vector.new(values.dtype).dtype + if hasattr(values, "dtype"): + dtype = lookup_dtype(values.dtype if dtype is None else dtype) + + meta = gb.Vector.new(dtype, size=size if isinstance(size, Integral) else 0) + + # check for any DOnions: + args = pack_args(indices, values, size) + kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name) + if any_dOnions(*args, **kwargs): + # dive into DOnion(s): + out_donion = DOnion.multi_access(meta, Vector.from_values, *args, **kwargs) + return Vector(out_donion, meta=meta) + + # no DOnions + if type(indices) is da.Array or type(values) is da.Array: + size_ = size + if type(indices) in {tuple, list, np.ndarray}: + size_ = size or (np.max(indices) + 1) + indices = da.asarray(indices) + if type(values) in {tuple, list, np.ndarray}: + values = da.asarray(values) + + np_idtype_ = np_dtype(lookup_dtype(indices.dtype)) + if isinstance(size_, Integral): + size = size_ + chunks = da.core.normalize_chunks(chunks, (size,), dtype=np_idtype_) + else: + if indices.size == 0: + raise ValueError("No indices provided. Unable to infer size.") + + if indices.dtype.kind not in "ui": + raise ValueError(f"indices must be integers, not {indices.dtype}") + + # Note: uint + int = float which numpy cannot cast to uint. So we + # ensure the same dtype for each summand here: + size = size_ + if size is None: + size = da.max(indices) + np.asarray(1, dtype=indices.dtype) + # Here `size` is a dask 0d-array whose computed value is + # used to determine the size of the Vector to be returned. + # But since we do not want to compute anything just now, + # we instead create a "dOnion" (dask Onion) object. This + # effectively means that we will use the inner value of + # `size` to create the new Vector: + args = pack_args(indices, values) + kwargs = pack_kwargs(dup_op=dup_op, dtype=dtype, chunks=chunks, name=name) + donion = DOnion.sprout(size, meta, Vector.from_values, *args, **kwargs) + return Vector(donion, meta=meta) + + # output shape `(size,)` is completely determined + if indices.size > 0: + if indices.size != values.size: + raise ValueError("`indices` and `values` lengths must match") + + vdtype = dtype np_vdtype_ = np_dtype(vdtype) - chunks = da.core.normalize_chunks(chunks, (size,), dtype=np_idtype_) + name_ = name name = str(name) if name else "" name = name + "-index-ranges" + tokenize(cls, chunks[0]) @@ -139,15 +223,17 @@ def from_values( *(indices, "j"), *(values, "j"), *(index_ranges, "i"), + size=size, dtype=np_vdtype_, meta=np.array([]), ) - meta = InnerVector(gb.Vector.new(vdtype)) + meta = InnerVector(gb.Vector.new(vdtype, size=size)) delayed = da.core.blockwise( *(_from_values1D, "i"), *(fragments, "ij"), *(index_ranges, "i"), concatenate=False, + dup_op=dup_op, gb_dtype=dtype, dtype=np_vdtype_, meta=meta, @@ -161,6 +247,14 @@ def from_values( @classmethod def new(cls, dtype, size=0, *, chunks="auto", name=None): + if any_dOnions(size): + meta = gb.Vector.new(dtype) + donion = DOnion.multi_access(meta, cls.new, dtype, size=size, chunks=chunks, name=name) + return Vector(donion, meta=meta) + + if type(size) is Box: + size = size.content + if size > 0: chunks = da.core.normalize_chunks(chunks, (size,), dtype=int) meta = gb.Vector.new(dtype) @@ -187,14 +281,18 @@ def __init__(self, delayed, meta=None, nvals=None): # if it is already known at the time of initialization of # this Vector, otherwise its value should be left as None # (the default) - assert type(delayed) is da.Array - assert delayed.ndim == 1 + assert type(delayed) in {da.Array, DOnion} self._delayed = delayed - if meta is None: - meta = gb.Vector.new(delayed.dtype, delayed.shape[0]) + if type(delayed) is da.Array: + assert delayed.ndim == 1 + if meta is None: + meta = gb.Vector.new(delayed.dtype, delayed.shape[0]) + else: + if meta is None: + meta = gb.Vector.new(delayed.dtype) self._meta = meta - self._size = meta.size self.dtype = meta.dtype + self._size = self.size self._nvals = nvals # Add ss extension methods self.ss = ss(self) @@ -227,13 +325,104 @@ def V(self): @property def size(self): + if self.is_dOnion: + return DOnion.multi_access(self._meta.size, getattr, self, "size") return self._meta.size @property def shape(self): + if self.is_dOnion: + return (self.size,) return self._meta.shape + def _head(self, delayed, shape): + """ + Take the leading portion of shape `shape` from `delayed` + """ + def _slice(inner, slc_x): + return InnerVector(inner.value[slc_x].new()) + + x = delayed + + stops_ = np.cumsum(x.chunks[0]) + starts = np.roll(stops_, 1) + starts[0] = 0 + + M = x.numblocks[0] + blockid = np.arange(M) + + # locate chunk containing last element: + i = min(self.shape[0], shape[0]) - 1 + filter = (starts <= i) & (i < stops_) + (last_block,) = blockid[filter] + tail_sz = i - starts[last_block] + 1 + + numblocks = (last_block + 1,) + heads = (tail_sz,) + new_chunks = (x.chunks[0][:last_block] + (tail_sz,),) + + name = "Vector.resize-" + tokenize(x) + dtype = self.dtype + dsk = dict() + for i in range(numblocks[0]): + x_cut = (i == numblocks[0] - 1) + if x_cut: + dsk[(name, i)] = ( + _slice, + (x.name, i), + slice(heads[0]) if x_cut else slice(None), + ) + else: + dsk[(name, i)] = (chunk_dup, (x.name, i), None, dtype, None) + + return name, dsk, new_chunks, numblocks + + def _add_tail(self, axis, size, name, dsk, chunks, numblocks): + """ + Append dask graph `dsk` with empty chunks on axis `axis` up to size `size` + """ + rem = size - self.shape[axis] + if rem > 0: + j = numblocks[axis] + new_chunks = chunks[axis] + (rem,) + new_chunks = (new_chunks,) + + dsk[(name, j)] = (compose(InnerVector, gb.Vector.new), self.dtype, rem) + + return name, dsk, new_chunks, (len(new_chunks[0]),) + + else: + return name, dsk, chunks, numblocks + def resize(self, size, inplace=True, chunks="auto"): + if any_dOnions(self, size): + donion = DOnion.multi_access( + self._meta, Vector.resize, self, size, inplace=False, chunks=chunks + ) + if inplace: + self.__init__(donion, meta=self._meta) + return + else: + return Vector(donion, meta=self._meta) + + name, dsk, new_chunks, num_blocks = self._head(self._delayed, (size,)) + name, dsk, new_chunks, num_blocks = self._add_tail(0, size, name, dsk, new_chunks, num_blocks) + + graph = HighLevelGraph.from_collections(name, dsk, dependencies=[self._delayed]) + x = da.core.Array(graph, name, new_chunks, meta=wrap_inner(self._meta)) + x = x.rechunk(chunks=chunks) + + if size >= self.size: + nvals = self.nvals + else: + nvals = None + + if inplace: + self.__init__(x, nvals=nvals) + else: + return Vector(x, nvals=nvals) + + def _resize_old(self, size, inplace=True, chunks="auto"): chunks = da.core.normalize_chunks(chunks, (size,), dtype=np.int64) output_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_ranges-") @@ -269,6 +458,9 @@ def resize(self, size, inplace=True, chunks="auto"): else: return Vector(x, nvals=nvals) + def diag(self, k=0, dtype=None, chunks="auto"): + return self._diag(k=k, dtype=dtype, chunks=chunks) + def _diag(self, k=0, dtype=None, chunks="auto"): nrows = self.size + abs(k) kdiag_col_start = max(0, k) @@ -316,30 +508,20 @@ def rechunk(self, inplace=False, chunks="auto"): self.resize(*self.shape, chunks=chunks) else: return self.resize(*self.shape, chunks=chunks, inplace=False) - # chunks = da.core.normalize_chunks(chunks, self.shape, dtype=np.int64) - # id = self.to_values() - # new = Vector.from_values(*id, *self.shape, trust_size=True, chunks=chunks) - # if inplace: - # self.__init__(new._delayed) - # else: - # return new - def __getitem__(self, index): - return AmbiguousAssignOrExtract(self, index) + def __getitem__(self, keys): + resolved_indexes = IndexerResolver(self, keys) + shape = resolved_indexes.shape + if not shape: + from .scalar import ScalarIndexExpr + + return ScalarIndexExpr(self, resolved_indexes) + else: + return VectorIndexExpr(self, resolved_indexes, *shape) def __delitem__(self, keys): del Updater(self)[keys] - # del self._meta[index] - # delayed = self._optional_dup() - # TODO: normalize index - # delayed = delayed.map_blocks( - # _delitem, - # index, - # dtype=np_dtype(self.dtype), - # ) - # raise NotImplementedError() - def __setitem__(self, index, delayed): Assigner(Updater(self), index).update(delayed) @@ -358,14 +540,35 @@ def __iter__(self): return indices.flat def ewise_add(self, other, op=monoid.plus, *, require_monoid=True): - assert type(other) is Vector - meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid) - return GbDelayed(self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta) + gb_types = (gb.Vector,) + other = self._expect_type(other, (Vector,) + gb_types, within="ewise_add", argname="other") + + try: + meta = self._meta.ewise_add(other._meta, op=op, require_monoid=require_monoid) + except DimensionMismatch: + if any_dOnions(self, other): + meta = self._meta.ewise_add(self._meta, op=op, require_monoid=require_monoid) + else: + raise + + return VectorExpression( + self, "ewise_add", other, op, require_monoid=require_monoid, meta=meta + ) def ewise_mult(self, other, op=binary.times): - assert type(other) is Vector + gb_types = (gb.Vector,) + other = self._expect_type(other, (Vector,) + gb_types, within="ewise_mult", argname="other") + + try: + meta = self._meta.ewise_mult(other._meta, op=op) + except DimensionMismatch: + if any_dOnions(self, other): + meta = self._meta.ewise_add(self._meta, op=op) + else: + raise + meta = self._meta.ewise_mult(other._meta, op=op) - return GbDelayed(self, "ewise_mult", other, op, meta=meta) + return VectorExpression(self, "ewise_mult", other, op, meta=meta) # Unofficial methods def inner(self, other, op=semiring.plus_times): @@ -423,9 +626,21 @@ def outer(self, other, op=binary.times): def vxm(self, other, op=semiring.plus_times): from .matrix import Matrix, TransposedMatrix - assert type(other) in (Matrix, TransposedMatrix) - meta = self._meta.vxm(other._meta, op=op) - return GbDelayed(self, "vxm", other, op, meta=meta) + gb_types = (gb.Matrix, gb.matrix.TransposedMatrix) + other = self._expect_type( + other, (Matrix, TransposedMatrix) + gb_types, within="vxm", argname="other" + ) + try: + meta = self._meta.vxm(other._meta, op=op) + except DimensionMismatch: + if any_dOnions(self, other): + other_meta = gb.Matrix.new( + dtype=other._meta.dtype, nrows=self._meta.size, ncols=other._meta.ncols + ) + meta = self._meta.vxm(other_meta, op=op) + else: + raise + return VectorExpression(self, "vxm", other, op, meta=meta, size=other.ncols) def apply(self, op, right=None, *, left=None): from .scalar import Scalar @@ -439,11 +654,13 @@ def apply(self, op, right=None, *, left=None): right_meta = right.dtype.np_type(0) meta = self._meta.apply(op=op, left=left_meta, right=right_meta) - return GbDelayed(self, "apply", op, right, meta=meta, left=left) + return VectorExpression(self, "apply", op, right, meta=meta, left=left) + + def reduce(self, op=monoid.plus, *, allow_empty=True): + from .scalar import ScalarExpression - def reduce(self, op=monoid.plus): meta = self._meta.reduce(op) - return GbDelayed(self, "reduce", op, meta=meta) + return ScalarExpression(self, "reduce", op, meta=meta, allow_empty=allow_empty) def build(self, indices, values, *, size=None, chunks=None, dup_op=None, clear=False): if clear: @@ -461,11 +678,11 @@ def build(self, indices, values, *, size=None, chunks=None, dup_op=None, clear=F x = self._optional_dup() if type(indices) is list: if np.max(indices) >= self._size: - raise gb.exceptions.IndexOutOfBound + raise IndexOutOfBound indices = da.core.from_array(np.array(indices), name="indices-" + tokenize(indices)) else: if da.max(indices).compute() >= self._size: - raise gb.exceptions.IndexOutOfBound + raise IndexOutOfBound if type(values) is list: values = da.core.from_array(np.array(values), name="values-" + tokenize(values)) @@ -504,67 +721,79 @@ def build(self, indices, values, *, size=None, chunks=None, dup_op=None, clear=F # self.__init__(Vector.from_vector(vector)._delayed) def to_values(self, dtype=None, chunks="auto"): + dtype = lookup_dtype(self.dtype if dtype is None else dtype) + meta_i, meta_v = self._meta.to_values(dtype) + x = self._delayed + if type(x) is DOnion: + meta = np.array([]) + result = x.getattr(meta, "to_values", dtype=dtype, chunks=chunks) + indices = result.getattr(meta_i, "__getitem__", 0) + values = result.getattr(meta_v, "__getitem__", 1) + return indices, values + + # get dask array of nvals for each chunk: nvals_array = da.core.blockwise( *(_nvals, "i"), *(x, "i"), adjust_chunks={"i": 1}, dtype=np.int64, meta=np.array([]) - ).compute() + ) - stops = np.cumsum(nvals_array) - starts = np.roll(stops, 1) + # accumulate dask array to get index-ranges of the output (indices, values) + stops_ = da.cumsum(nvals_array) # BEWARE: this function rechunks! + starts = da.roll(stops_, 1) + starts = starts.copy() if starts.size == 1 else starts # bug!! starts[0] = 0 - nnz = stops[-1] - - starts = starts.reshape(nvals_array.shape) - starts = da.from_array(starts, chunks=1, name="starts" + tokenize(starts)) - starts = da.core.Array(starts.dask, starts.name, x.chunks, starts.dtype, meta=x._meta) + nnz = stops_[-1] + starts = starts.rechunk(1) + stops_ = stops_.rechunk(1) + + def _to_values(x, starts, stops_, dtype, chunks, nnz): + # the following changes the `.chunks` attribute of `starts` and `stops_` so that + # `blockwise()` can align them with `x` + starts = da.core.Array(starts.dask, starts.name, x.chunks, starts.dtype, meta=x._meta) + stops_ = da.core.Array(stops_.dask, stops_.name, x.chunks, stops_.dtype, meta=x._meta) + + chunks = da.core.normalize_chunks(chunks, (nnz,), dtype=np.int64) + output_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_ranges-") + + gb_dtype = lookup_dtype(dtype) + dtype_ = np_dtype(gb_dtype) + index_offsets = build_chunk_offsets_dask_array(x, 0, "index_offset-") + x = da.core.blockwise( + *(VectorTupleExtractor, "ij"), + *(output_ranges, "j"), + *(x, "i"), + *(index_offsets, "i"), + *(starts, "i"), + *(stops_, "i"), + gb_dtype=gb_dtype, + dtype=dtype_, + meta=np.array([[]]), + ) + return da.reduction( + x, _identity, _flatten, axis=0, concatenate=False, dtype=dtype_, meta=np.array([]) + ) - stops = stops.reshape(nvals_array.shape) - stops = da.from_array(stops, chunks=1, name="stops" + tokenize(stops)) - stops = da.core.Array(stops.dask, stops.name, x.chunks, stops.dtype, meta=x._meta) + # since the size of the output (indices, values) depends on nnz, a delayed quantity, + # we need to return (indices, values) as DOnions (twice-delayed dask-arrays) + meta = np.array([]) + iv_donion = DOnion.sprout(nnz, meta, _to_values, x, starts, stops_, dtype, chunks) - chunks = da.core.normalize_chunks(chunks, (nnz,), dtype=np.int64) - output_ranges = build_ranges_dask_array_from_chunks(chunks[0], "output_ranges-") - - dtype_ = np_dtype(self.dtype) - index_offsets = build_chunk_offsets_dask_array(x, 0, "index_offset-") - x = da.core.blockwise( - *(VectorTupleExtractor, "ij"), - *(output_ranges, "j"), - *(x, "i"), - *(index_offsets, "i"), - *(starts, "i"), - *(stops, "i"), - gb_dtype=dtype, - dtype=dtype_, - meta=np.array([[]]), + dtype_i = np_dtype(lookup_dtype(meta_i.dtype)) + indices = iv_donion.deep_extract( + meta_i, da.map_blocks, _get_indices, dtype=dtype_i, meta=meta_i ) - x = da.reduction( - x, _identity, _flatten, axis=0, concatenate=False, dtype=dtype_, meta=np.array([]) + dtype_v = np_dtype(lookup_dtype(meta_v.dtype)) + values = iv_donion.deep_extract( + meta_v, da.map_blocks, _get_values, dtype=dtype_v, meta=meta_v ) - - meta_i, meta_v = self._meta.to_values(dtype) - indices = da.map_blocks(_get_indices, x, dtype=meta_i.dtype, meta=meta_i) - values = da.map_blocks(_get_values, x, dtype=meta_v.dtype, meta=meta_v) return indices, values - # delayed = self._delayed - # dtype_ = np_dtype(self.dtype) - # meta_i, meta_v = self._meta.to_values(dtype) - # meta = np.array([]) - # offsets = build_chunk_offsets_dask_array(delayed, 0, "index_offset-") - # x = da.map_blocks( - # TupleExtractor, delayed, offsets, gb_dtype=dtype, dtype=dtype_, meta=meta - # ) - # indices = da.map_blocks(_get_indices, x, dtype=meta_i.dtype, meta=meta) - # values = da.map_blocks(_get_values, x, dtype=meta_v.dtype, meta=meta) - # return indices, values - def isequal(self, other, *, check_dtype=False): - other = self._expect_type(other, Vector, within="isequal", argname="other") + other = self._expect_type(other, (Vector, gb.Vector), within="isequal", argname="other") return super().isequal(other, check_dtype=check_dtype) def isclose(self, other, *, rel_tol=1e-7, abs_tol=0.0, check_dtype=False): - other = self._expect_type(other, Vector, within="isclose", argname="other") + other = self._expect_type(other, (Vector, gb.Vector), within="isclose", argname="other") return super().isclose(other, rel_tol=rel_tol, abs_tol=abs_tol, check_dtype=check_dtype) def _delete_element(self, resolved_indexes): @@ -592,6 +821,180 @@ def _carg(self): Vector.ss = gb.utils.class_property(Vector.ss, ss) +class VectorExpression(GbDelayed): + __slots__ = () + output_type = gb.Vector + ndim = 1 + _is_scalar = False + + # automethods: + __and__ = gb.vector.VectorExpression.__and__ + __bool__ = gb.vector.VectorExpression.__bool__ + __or__ = gb.vector.VectorExpression.__or__ + _get_value = _automethods._get_value + S = gb.vector.VectorExpression.S + V = gb.vector.VectorExpression.V + apply = gb.vector.VectorExpression.apply + ewise_add = gb.vector.VectorExpression.ewise_add + ewise_mult = gb.vector.VectorExpression.ewise_mult + isclose = gb.vector.VectorExpression.isclose + isequal = gb.vector.VectorExpression.isequal + nvals = gb.vector.VectorExpression.nvals + reduce = gb.vector.VectorExpression.reduce + shape = gb.vector.VectorExpression.shape + size = gb.vector.VectorExpression.size + vxm = gb.vector.VectorExpression.vxm + + # infix sugar: + __abs__ = gb.vector.VectorExpression.__abs__ + __add__ = gb.vector.VectorExpression.__add__ + __divmod__ = gb.vector.VectorExpression.__divmod__ + __eq__ = gb.vector.VectorExpression.__eq__ + __floordiv__ = gb.vector.VectorExpression.__floordiv__ + __ge__ = gb.vector.VectorExpression.__ge__ + __gt__ = gb.vector.VectorExpression.__gt__ + __invert__ = gb.vector.VectorExpression.__invert__ + __le__ = gb.vector.VectorExpression.__le__ + __lt__ = gb.vector.VectorExpression.__lt__ + __mod__ = gb.vector.VectorExpression.__mod__ + __mul__ = gb.vector.VectorExpression.__mul__ + __ne__ = gb.vector.VectorExpression.__ne__ + __neg__ = gb.vector.VectorExpression.__neg__ + __pow__ = gb.vector.VectorExpression.__pow__ + __radd__ = gb.vector.VectorExpression.__radd__ + __rdivmod__ = gb.vector.VectorExpression.__rdivmod__ + __rfloordiv__ = gb.vector.VectorExpression.__rfloordiv__ + __rmod__ = gb.vector.VectorExpression.__rmod__ + __rmul__ = gb.vector.VectorExpression.__rmul__ + __rpow__ = gb.vector.VectorExpression.__rpow__ + __rsub__ = gb.vector.VectorExpression.__rsub__ + __rtruediv__ = gb.vector.VectorExpression.__rtruediv__ + __rxor__ = gb.vector.VectorExpression.__rxor__ + __sub__ = gb.vector.VectorExpression.__sub__ + __truediv__ = gb.vector.VectorExpression.__truediv__ + __xor__ = gb.vector.VectorExpression.__xor__ + + # bad sugar: + __itruediv__ = gb.vector.VectorExpression.__itruediv__ + __imul__ = gb.vector.VectorExpression.__imul__ + __imatmul__ = gb.vector.VectorExpression.__imatmul__ + __iadd__ = gb.vector.VectorExpression.__iadd__ + __iand__ = gb.vector.VectorExpression.__iand__ + __ipow__ = gb.vector.VectorExpression.__ipow__ + __imod__ = gb.vector.VectorExpression.__imod__ + __isub__ = gb.vector.VectorExpression.__isub__ + __ixor__ = gb.vector.VectorExpression.__ixor__ + __ifloordiv__ = gb.vector.VectorExpression.__ifloordiv__ + __ior__ = gb.vector.VectorExpression.__ior__ + + def __init__( + self, + parent, + method_name, + *args, + meta=None, + size=None, + **kwargs, + ): + super().__init__( + parent, + method_name, + *args, + meta=meta, + **kwargs, + ) + if size is None: + size = self.parent._size + self._size = size + + # def __getattr__(self, item): + # return getattr(gb.vector.VectorExpression, item) + + # def construct_output(self, dtype=None, *, name=None): + # if dtype is None: + # dtype = self.dtype + # size = 0 if self._size.is_dOnion else self._size + # return Vector.new(dtype, size, name=name) + + +class VectorIndexExpr(AmbiguousAssignOrExtract): + __slots__ = "_size" + ndim = 1 + output_type = gb.Vector + + def __init__(self, parent, resolved_indexes, size): + super().__init__(parent, resolved_indexes) + self._size = size + + @property + def size(self): + return self._size + + @property + def shape(self): + return (self._size,) + + # Begin auto-generated code: Vector + _get_value = _automethods._get_value + S = gb.vector.VectorIndexExpr.S + V = gb.vector.VectorIndexExpr.V + __and__ = gb.vector.VectorIndexExpr.__and__ + __contains__ = gb.vector.VectorIndexExpr.__contains__ + __or__ = gb.vector.VectorIndexExpr.__or__ + apply = gb.vector.VectorIndexExpr.apply + ewise_add = gb.vector.VectorIndexExpr.ewise_add + ewise_mult = gb.vector.VectorIndexExpr.ewise_mult + isclose = gb.vector.VectorIndexExpr.isclose + isequal = gb.vector.VectorIndexExpr.isequal + nvals = gb.vector.VectorIndexExpr.nvals + reduce = gb.vector.VectorIndexExpr.reduce + vxm = gb.vector.VectorIndexExpr.vxm + + # infix sugar: + __abs__ = gb.vector.VectorIndexExpr.__abs__ + __add__ = gb.vector.VectorIndexExpr.__add__ + __divmod__ = gb.vector.VectorIndexExpr.__divmod__ + __eq__ = gb.vector.VectorIndexExpr.__eq__ + __floordiv__ = gb.vector.VectorIndexExpr.__floordiv__ + __ge__ = gb.vector.VectorIndexExpr.__ge__ + __gt__ = gb.vector.VectorIndexExpr.__gt__ + __invert__ = gb.vector.VectorIndexExpr.__invert__ + __le__ = gb.vector.VectorIndexExpr.__le__ + __lt__ = gb.vector.VectorIndexExpr.__lt__ + __mod__ = gb.vector.VectorIndexExpr.__mod__ + __mul__ = gb.vector.VectorIndexExpr.__mul__ + __ne__ = gb.vector.VectorIndexExpr.__ne__ + __neg__ = gb.vector.VectorIndexExpr.__neg__ + __pow__ = gb.vector.VectorIndexExpr.__pow__ + __radd__ = gb.vector.VectorIndexExpr.__radd__ + __rdivmod__ = gb.vector.VectorIndexExpr.__rdivmod__ + __rfloordiv__ = gb.vector.VectorIndexExpr.__rfloordiv__ + __rmod__ = gb.vector.VectorIndexExpr.__rmod__ + __rmul__ = gb.vector.VectorIndexExpr.__rmul__ + __rpow__ = gb.vector.VectorIndexExpr.__rpow__ + __rsub__ = gb.vector.VectorIndexExpr.__rsub__ + __rtruediv__ = gb.vector.VectorIndexExpr.__rtruediv__ + __rxor__ = gb.vector.VectorIndexExpr.__rxor__ + __sub__ = gb.vector.VectorIndexExpr.__sub__ + __truediv__ = gb.vector.VectorIndexExpr.__truediv__ + __xor__ = gb.vector.VectorIndexExpr.__xor__ + + # bad sugar: + __array__ = gb.vector.VectorIndexExpr.__array__ + __bool__ = gb.vector.VectorIndexExpr.__bool__ + __iadd__ = gb.vector.VectorIndexExpr.__iadd__ + __iand__ = gb.vector.VectorIndexExpr.__iand__ + __ifloordiv__ = gb.vector.VectorIndexExpr.__ifloordiv__ + __imatmul__ = gb.vector.VectorIndexExpr.__imatmul__ + __imod__ = gb.vector.VectorIndexExpr.__imod__ + __imul__ = gb.vector.VectorIndexExpr.__imul__ + __ior__ = gb.vector.VectorIndexExpr.__ior__ + __ipow__ = gb.vector.VectorIndexExpr.__ipow__ + __isub__ = gb.vector.VectorIndexExpr.__isub__ + __itruediv__ = gb.vector.VectorIndexExpr.__itruediv__ + __ixor__ = gb.vector.VectorIndexExpr.__ixor__ + + def _chunk_diag( inner_vector, input_range, @@ -611,9 +1014,6 @@ def _chunk_diag( The returned matrix is either empty or contains a piece of the k-diagonal given by inner_vector """ - # This function creates a new matrix chunk with dimensions determined - # by the input k-diagonal vector chunk. The matrix chunk may or may - # not include the k-diagonal chunk vector = inner_vector.value vec_chunk = input_range[0] rows = row_range[0] @@ -747,14 +1147,23 @@ def _build_1D_chunk(inner_vector, out_index_range, fragments, dup_op=None): return InnerVector(inner_vector.value) -def _from_values1D(fragments, index_range, gb_dtype=None): +def _from_values1D(fragments, index_range, dup_op=None, gb_dtype=None): inds = np.concatenate([inds for (inds, _) in fragments]) vals = np.concatenate([vals for (_, vals) in fragments]) size = index_range[0].stop - index_range[0].start - return InnerVector(gb.Vector.from_values(inds, vals, size=size, dtype=gb_dtype)) + if inds.size == 0: + return InnerVector(gb.Vector.new(gb_dtype, size=size)) + return InnerVector(gb.Vector.from_values(inds, vals, size=size, dup_op=dup_op, dtype=gb_dtype)) + +def _pick1D(indices, values, index_range, size): + # validate indices: + indices = np.where(indices < 0, indices + size, indices) + bad_indices = (indices < 0) | (size <= indices) + if np.any(bad_indices): + raise IndexOutOfBound -def _pick1D(indices, values, index_range): + # filter into chunk: index_range = index_range[0] indices_in = (index_range.start <= indices) & (indices < index_range.stop) indices = indices[indices_in] - index_range.start @@ -823,4 +1232,6 @@ def _concat_vector(seq, axis=0): gb.utils._output_types[Vector] = gb.Vector +gb.utils._output_types[VectorExpression] = gb.Vector +gb.utils._output_types[VectorIndexExpr] = gb.Vector from .matrix import InnerMatrix # noqa isort:skip diff --git a/tests/from_grblas2/conftest.py b/tests/from_grblas2/conftest.py index 5d0e635..a9d4632 100644 --- a/tests/from_grblas2/conftest.py +++ b/tests/from_grblas2/conftest.py @@ -1,5 +1,55 @@ +import atexit +import functools +import itertools + +import numpy as np +import pytest + +import grblas as gb + + +def pytest_configure(config): + backend = config.getoption("--backend", "suitesparse") + blocking = config.getoption("--blocking", True) + record = config.getoption("--record", False) + mapnumpy = config.getoption("--mapnumpy", None) + if mapnumpy is None: # pragma: no branch + mapnumpy = np.random.rand() < 0.5 # heh + + gb.config.set(autocompute=False, mapnumpy=mapnumpy) + + gb.init(backend, blocking=blocking) + print( + f'Running tests with "{backend}" backend, blocking={blocking}, ' + f"record={record}, mapnumpy={mapnumpy}" + ) + if record: + rec = gb.Recorder() + rec.start() + + def save_records(): + with open("record.txt", "w") as f: # pragma: no cover + f.write("\n".join(rec.data)) + + # I'm sure there's a `pytest` way to do this... + atexit.register(save_records) + for mod in [gb.unary, gb.binary, gb.monoid, gb.semiring, gb.op]: + for name in list(mod._delayed): + getattr(mod, name) + + +def pytest_runtest_setup(item): + if "slow" in item.keywords and not item.config.getoption("--runslow", True): # pragma: no cover + pytest.skip("need --runslow option to run") + + def autocompute(func): - return func + @functools.wraps(func) + def inner(*args, **kwargs): + with gb.config.set(autocompute=True): + return func(*args, **kwargs) + + return inner def compute(val): diff --git a/tests/from_grblas2/test_matrix.py b/tests/from_grblas2/test_matrix.py index 9d40744..d37b69a 100644 --- a/tests/from_grblas2/test_matrix.py +++ b/tests/from_grblas2/test_matrix.py @@ -4,6 +4,7 @@ import sys import weakref +import dask.array as da import dask_grblas import grblas import numpy as np @@ -22,6 +23,7 @@ from .conftest import autocompute, compute from dask_grblas import Matrix, Scalar, Vector # isort:skip +from dask_grblas.base import is_DOnion, like_dOnion @pytest.fixture @@ -42,9 +44,33 @@ def A(): return Matrix.from_values(*data) +@pytest.fixture +def A_dask(): + # 0 1 2 3 4 5 6 + # 0 [- 2 - 3 - - -] + # 1 [- - - - 8 - 4] + # 2 [- - - - - 1 -] + # 3 [3 - 3 - - - -] + # 4 [- - - - - 7 -] + # 5 [- - 1 - - - -] + # 6 [- - 5 7 3 - -] + data = [ + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4], + ] + data = [da.from_array(np.array(a, dtype=np.int64)) for a in data] + return Matrix.from_values(*data) + + +@pytest.fixture +def As(A, A_dask): + return [A, A_dask] + + @pytest.fixture def A_chunks(): - return [7, 4, 3] + return [7, 3] @pytest.fixture @@ -53,6 +79,18 @@ def v(): return Vector.from_values(*data) +@pytest.fixture +def v_dask(): + data = [[1, 3, 4, 6], [1, 1, 2, 0]] + data = [da.from_array(a) for a in data] + return Vector.from_values(*data) + + +@pytest.fixture +def vs(v, v_dask): + return [v, v_dask] + + def test_new(): C = Matrix.new(dtypes.INT8, 17, 12) assert C.dtype == "INT8" @@ -61,30 +99,45 @@ def test_new(): assert C.ncols == 12 -def test_dup(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - C = A.dup() - assert C is not A - assert C.dtype == A.dtype - assert C.nvals == A.nvals - assert C.nrows == A.nrows - assert C.ncols == A.ncols - # Ensure they are not the same backend object - A[0, 0] = 1000 - assert C[0, 0].value != 1000 - # extended functionality - D = Matrix.from_values([0, 1], [0, 1], [0, 2.5], dtype=dtypes.FP64) - E = D.dup(dtype=dtypes.INT64) - assert E.isequal( - Matrix.from_values([0, 1], [0, 1], [0, 2], dtype=dtypes.INT64), check_dtype=True +def test_dup(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + C = A.dup() + assert C is not A + assert C.dtype == A.dtype + assert C.nvals == A.nvals + assert C.nrows == A.nrows + assert C.ncols == A.ncols + # Ensure they are not the same backend object + A[0, 0] = 1000 + assert A[0, 0].new() == 1000 + assert C[0, 0].new() != 1000 + + # extended functionality + Ds = [Matrix.from_values([0, 1], [0, 1], [0, 2.5], dtype=dtypes.FP64)] + Ds.append( + Matrix.from_values( + da.from_array([0, 1]), da.from_array([0, 1]), da.from_array([0, 2.5]), dtype=dtypes.FP64 ) - E = D.dup(mask=D.V) - assert E.isequal(Matrix.from_values([1], [1], [2.5], dtype=dtypes.FP64), check_dtype=True) - E = D.dup(dtype=dtypes.INT64, mask=D.V) - assert E.isequal(Matrix.from_values([1], [1], [2], dtype=dtypes.INT64), check_dtype=True) + ) + for D_ in Ds: + for chunks in A_chunks: + D = D_.dup() + D.rechunk(chunks=chunks, inplace=True) + E = D.dup(dtype=dtypes.INT64) + assert E.isequal( + Matrix.from_values([0, 1], [0, 1], [0, 2], dtype=dtypes.INT64), check_dtype=True + ) + E = D.dup(mask=D.V) + assert E.isequal( + Matrix.from_values([1], [1], [2.5], dtype=dtypes.FP64), check_dtype=True + ) + E = D.dup(dtype=dtypes.INT64, mask=D.V) + assert E.isequal( + Matrix.from_values([1], [1], [2], dtype=dtypes.INT64), check_dtype=True + ) def test_from_values(): @@ -103,7 +156,7 @@ def test_from_values(): assert C3.ncols == 3 assert C3.nvals == 2 # duplicates were combined assert C3.dtype == int - assert C3[1, 1].value == 6 # 2*3 + assert C3[1, 1].new() == 6 # 2*3 C3monoid = Matrix.from_values([0, 1, 1], [2, 1, 1], [1, 2, 3], nrows=10, dup_op=monoid.times) assert C3.isequal(C3monoid) @@ -139,139 +192,244 @@ def test_from_values(): Matrix.from_values([0], [1, 2], [0]) -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_from_values_scalar(): - C = Matrix.from_values([0, 1, 3], [1, 1, 2], 7) +def test_from_values_dask(): + rows = da.from_array(np.array([0, 1, 3])) + cols = da.from_array(np.array([1, 1, 2])) + vals = da.from_array(np.array([True, False, True])) + C = Matrix.from_values(rows, cols, vals) assert C.nrows == 4 assert C.ncols == 3 assert C.nvals == 3 - assert C.dtype == dtypes.INT64 - assert C.ss.is_iso - assert C.reduce_scalar(monoid.any).new() == 7 + assert C.dtype == bool + + vals = da.from_array(np.array([12.3, 12.4, 12.5])) + C2 = Matrix.from_values(rows, cols, vals, nrows=17, ncols=3) + assert C2.nrows == 17 + assert C2.ncols == 3 + assert C2.nvals == 3 + assert C2.dtype == float + + rows = da.from_array(np.array([0, 1, 1])) + cols = da.from_array(np.array([2, 1, 1])) + vals = da.from_array(np.array([1, 2, 3], dtype=np.int64)) + C3 = Matrix.from_values(rows, cols, vals, nrows=10, dup_op=binary.times) + assert C3.nrows == 10 + assert C3.ncols == 3 + assert C3.nvals == 2 # duplicates were combined + assert C3.dtype == int + assert C3[1, 1].new() == 6 # 2*3 + C3monoid = Matrix.from_values(rows, cols, vals, nrows=10, dup_op=monoid.times) + assert C3.isequal(C3monoid) + + vals = da.from_array(np.array([True, True, True])) + with pytest.raises(ValueError, match="Duplicate indices found"): + # Duplicate indices requires a dup_op + Matrix.from_values(rows, cols, vals).compute() + + rows = da.from_array(np.array([0, 1, 3])) + cols = da.from_array(np.array([1, 1, 2])) + vals = da.from_array(np.array([12.3, 12.4, 12.5])) + with pytest.raises(IndexOutOfBound): + # Specified ncols can't hold provided indexes + Matrix.from_values(rows, cols, vals, nrows=17, ncols=2).compute() + + empty_da = da.from_array(np.array([])) + with pytest.raises(ValueError, match="No row indices provided. Unable to infer nrows."): + Matrix.from_values(empty_da, empty_da, empty_da) + + # Changed: Assume empty value is float64 (like numpy) + # with pytest.raises(ValueError, match="No vals provided. Unable to determine type"): + empty1 = Matrix.from_values(empty_da, empty_da, empty_da, nrows=3, ncols=4) + assert empty1.dtype == dtypes.FP64 + assert empty1.nrows == 3 + assert empty1.ncols == 4 + assert empty1.nvals == 0 + + with pytest.raises(ValueError, match="Unable to infer"): + Matrix.from_values(empty_da, empty_da, empty_da, dtype=dtypes.INT64) + + zero_da = da.from_array(np.array([0])) + with pytest.raises(ValueError, match="Unable to infer"): + # could also raise b/c rows and columns are different sizes + Matrix.from_values(zero_da, empty_da, zero_da, dtype=dtypes.INT64) + + C4 = Matrix.from_values(empty_da, empty_da, empty_da, nrows=3, ncols=4, dtype=dtypes.INT64) + C5 = Matrix.new(dtypes.INT64, nrows=3, ncols=4) + assert C4.isequal(C5, check_dtype=True) + + cols = da.from_array(np.array([1, 2])) + with pytest.raises( + ValueError, match="`rows` and `columns` and `values` lengths must match: 1, 2, 1" + ): + Matrix.from_values(zero_da, cols, zero_da) + + +def test_from_values_scalar(): + Cs = [Matrix.from_values([0, 1, 3], [1, 1, 2], 7)] + Cs.append( + Matrix.from_values( + da.from_array([0, 1, 3]), + da.from_array([1, 1, 2]), + 7, + ) + ) + for C in Cs: + assert C.nrows == 4 + assert C.ncols == 3 + assert C.nvals == 3 + assert C.dtype == dtypes.INT64 + # assert C.ss.is_iso + assert C.reduce_scalar(monoid.any).new() == 7 # iso drumps duplicates C = Matrix.from_values([0, 1, 3, 0], [1, 1, 2, 1], 7) - assert C.nrows == 4 - assert C.ncols == 3 - assert C.nvals == 3 - assert C.dtype == dtypes.INT64 - assert C.ss.is_iso - assert C.reduce_scalar(monoid.any).new() == 7 - with pytest.raises(ValueError, match="dup_op must be None"): - Matrix.from_values([0, 1, 3, 0], [1, 1, 2, 1], 7, dup_op=binary.plus) + Cs.append( + Matrix.from_values( + da.from_array([0, 1, 3, 0]), + da.from_array([1, 1, 2, 1]), + 7, + ) + ) + for C in Cs: + assert C.nrows == 4 + assert C.ncols == 3 + assert C.nvals == 3 + assert C.dtype == dtypes.INT64 + # assert C.ss.is_iso + assert C.reduce_scalar(monoid.any).new() == 7 + with pytest.raises(ValueError, match="dup_op must be None"): + Matrix.from_values([0, 1, 3, 0], [1, 1, 2, 1], 7, dup_op=binary.plus) + + +def test_clear(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + A.clear() + assert A.nvals == 0 + assert A.nrows == 7 + assert A.ncols == 7 -def test_clear(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - A.clear() - assert A.nvals == 0 - assert A.nrows == 7 - assert A.ncols == 7 +def test_resize(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert A.nrows == 7 + assert A.ncols == 7 + assert A.nvals.compute() == 12 + A.resize(10, 11) + assert A.nrows == 10 + assert A.ncols == 11 + assert A.nvals == 12 + assert compute(A[9, 9].new().value) is None + A.resize(4, 1) + assert A.nrows == 4 + assert A.ncols == 1 + assert A.nvals == 1 + A = A_.dup() + assert A.nrows == 7 + assert A.ncols == 7 + assert A.nvals == 12 + A.resize(6, 11, chunks=4) + assert A.nrows == 6 + assert A.ncols == 11 + assert A.nvals == 9 + if not A.is_dOnion: + assert A._delayed.chunks == ((4, 2), (4, 4, 3)) + else: + assert A._delayed.deep_extract(None, lambda x: x._delayed.chunks) == ( + (4, 2), + (4, 4, 3), + ) + assert compute(A[3, 2].new().value) == 3 + assert compute(A[5, 7].new().value) is None -def test_resize(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert A.nrows == 7 - assert A.ncols == 7 - assert A.nvals.compute() == 12 - A.resize(10, 11) - assert A.nrows == 10 - assert A.ncols == 11 - assert A.nvals.compute() == 12 - assert compute(A[9, 9].value) is None - A.resize(4, 1) - assert A.nrows == 4 - assert A.ncols == 1 - assert A.nvals.compute() == 1 + A = A_.dup() + A.resize(11, 3, chunks=4) + assert A.nrows == 11 + assert A.ncols == 3 + assert A.nvals == 5 + if type(A._delayed) is da.Array: + assert A._delayed.chunks == ((4, 4, 3), (3,)) + else: + assert A._delayed.deep_extract(None, lambda x: x._delayed.chunks) == ( + (4, 4, 3), + (3,), + ) + assert compute(A[3, 2].new().value) == 3 + assert compute(A[7, 2].new().value) is None - A = A_.dup() - assert A.nrows == 7 - assert A.ncols == 7 - assert A.nvals.compute() == 12 - A.resize(6, 11, chunks=4) - assert A.nrows == 6 - assert A.ncols == 11 - assert A.nvals.compute() == 9 - assert A._delayed.chunks == ((4, 2), (4, 4, 3)) - assert compute(A[3, 2].value) == 3 - assert compute(A[5, 7].value) is None +def test_rechunk(As, A_chunks): + for A_ in As: A = A_.dup() - A.resize(11, 3, chunks=4) - assert A.nrows == 11 - assert A.ncols == 3 - assert A.nvals.compute() == 5 - assert A._delayed.chunks == ((4, 4, 3), (3,)) - assert compute(A[3, 2].value) == 3 - assert compute(A[7, 2].value) is None - + for chunks in A_chunks + A_chunks[::-1]: + A.rechunk(chunks=chunks, inplace=True) + assert A.nrows == 7 + assert A.ncols == 7 + assert A.nvals == 12 -def test_rechunk(A, A_chunks): - A_ = A.dup() - for chunks in A_chunks + A_chunks[::-1]: - A_.rechunk(chunks=chunks, inplace=True) - assert A_.nrows == 7 - assert A_.ncols == 7 - assert A_.nvals.compute() == 12 +def test_nrows(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert A.nrows == 7 -def test_nrows(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert A.nrows == 7 +def test_ncols(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert A.ncols == 7 -def test_ncols(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert A.ncols == 7 +def test_nvals(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert A.nvals == 12 -def test_nvals(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert A.nvals == 12 +def test_build(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert A.nvals == 12 + A.clear() + A.build([0, 6], [0, 1], [1, 2]) + assert A.nvals == 2 + with pytest.raises(OutputNotEmpty): + A.build([1, 5], [2, 3], [3, 4]) + assert A.nvals == 2 # nothing should be modified + # We can clear though + A.build([1, 2, 5], [1, 2, 3], [2, 3, 4], clear=True) + assert A.nvals == 3 + A.clear() + if is_DOnion(A._delayed): + A.build([0, 11], [0, 0], [1, 1]) + with pytest.raises(IndexOutOfBound): + A.compute() + else: + with pytest.raises(IndexOutOfBound): + A.build([0, 11], [0, 0], [1, 1]) -def test_build(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert A.nvals == 12 - A.clear() - A.build([0, 6], [0, 1], [1, 2]) - assert A.nvals == 2 - with pytest.raises(OutputNotEmpty): - A.build([1, 5], [2, 3], [3, 4]) - assert A.nvals == 2 # nothing should be modified - # We can clear though - A.build([1, 2, 5], [1, 2, 3], [2, 3, 4], clear=True) - assert A.nvals == 3 - A.clear() - with pytest.raises(IndexOutOfBound): - A.build([0, 11], [0, 0], [1, 1]) - B = Matrix.new(int, nrows=2, ncols=2) - B.build([0, 11], [0, 0], [1, 1], nrows=12) - assert B.isequal(Matrix.from_values([0, 11], [0, 0], [1, 1], ncols=2)) - C = Matrix.new(int, nrows=2, ncols=2) - C.build([0, 0], [0, 11], [1, 1], ncols=12) - assert C.isequal(Matrix.from_values([0, 0], [0, 11], [1, 1], nrows=2)) + B = Matrix.new(int, nrows=2, ncols=2) + B.build([0, 11], [0, 0], [1, 1], nrows=12) + assert B.isequal(Matrix.from_values([0, 11], [0, 0], [1, 1], ncols=2)) + C = Matrix.new(int, nrows=2, ncols=2) + C.build([0, 0], [0, 11], [1, 1], ncols=12) + assert C.isequal(Matrix.from_values([0, 0], [0, 11], [1, 1], nrows=2)) -@pytest.mark.xfail("'Needs investigation'", strict=True) def test_build_scalar(A, A_chunks): A_ = A for chunks in A_chunks: @@ -283,7 +441,7 @@ def test_build_scalar(A, A_chunks): A.clear() A.ss.build_scalar([0, 6], [0, 1], 1) assert A.nvals == 2 - assert A.ss.is_iso + # assert A.ss.is_iso A.clear() with pytest.raises(ValueError, match="lengths must match"): A.ss.build_scalar([0, 6], [0, 1, 2], 1) @@ -291,116 +449,117 @@ def test_build_scalar(A, A_chunks): A.ss.build_scalar([0, 5], [0, 1], None) -def test_extract_values(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - rows, cols, vals = A.to_values(dtype=int) - rcv = set( - zip( - rows.compute(), - cols.compute(), - vals.compute(), +def test_extract_values(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + rows, cols, vals = A.to_values(dtype=int) + rcv = set( + zip( + rows.compute(), + cols.compute(), + vals.compute(), + ) ) - ) - expected = set( - zip( - (0, 0, 1, 1, 2, 3, 3, 4, 5, 6, 6, 6), - (1, 3, 4, 6, 5, 0, 2, 5, 2, 2, 3, 4), - (2, 3, 8, 4, 1, 3, 3, 7, 1, 5, 7, 3), + expected = set( + zip( + (0, 0, 1, 1, 2, 3, 3, 4, 5, 6, 6, 6), + (1, 3, 4, 6, 5, 0, 2, 5, 2, 2, 3, 4), + (2, 3, 8, 4, 1, 3, 3, 7, 1, 5, 7, 3), + ) ) - ) - assert rcv == expected - assert rows.dtype == np.uint64 - assert cols.dtype == np.uint64 - assert vals.dtype == np.int64 - Trows, Tcols, Tvals = A.T.to_values(dtype=float) - np.testing.assert_array_equal(rows, Tcols) - np.testing.assert_array_equal(cols, Trows) - np.testing.assert_array_equal(vals, Tvals) - assert Trows.dtype == np.uint64 - assert Tcols.dtype == np.uint64 - assert Tvals.dtype == np.float64 - - -def test_extract_element(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert A[3, 0].new() == 3 - assert A[1, 6].new() == 4 - assert A[1, 6].value == 4 - assert A.T[6, 1].value == 4 - s = A[0, 0].new() - assert compute(s.value) is None - assert s.dtype == "INT64" - s = A[1, 6].new(dtype=float) - assert s.value == 4.0 - assert s.dtype == "FP64" - - -def test_set_element(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert compute(A[1, 1].value) is None - assert A[3, 0].value == 3 - A[1, 1].update(21) - A[3, 0] << -5 - assert A[1, 1].value == 21 - assert A[3, 0].new() == -5 - - -def test_remove_element(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert A[3, 0].value == 3 - del A[3, 0] - assert compute(A[3, 0].value) is None - assert A[6, 3].value == 7 - with pytest.raises(TypeError, match="Remove Element only supports"): - del A[3:5, 3] - - -def test_mxm(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - C = A.mxm(A, semiring.plus_times).new() - result = Matrix.from_values( - [0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 6], - [0, 2, 4, 6, 2, 3, 4, 5, 2, 1, 3, 5, 2, 5, 0, 2, 5], - [9, 9, 16, 8, 20, 28, 12, 56, 1, 6, 9, 3, 7, 1, 21, 21, 26], - ) - assert C.isequal(result) + assert rcv == expected + assert rows.dtype == np.uint64 + assert cols.dtype == np.uint64 + assert vals.dtype == np.int64 + Trows, Tcols, Tvals = A.T.to_values(dtype=float) + np.testing.assert_array_equal(rows.compute(), Tcols.compute()) + np.testing.assert_array_equal(cols.compute(), Trows.compute()) + np.testing.assert_array_equal(vals.compute(), Tvals.compute()) + assert Trows.dtype == np.uint64 + assert Tcols.dtype == np.uint64 + assert Tvals.dtype == np.float64 + + +def test_extract_element(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert A[3, 0].new() == 3 + assert A[1, 6].new() == 4 + with pytest.raises(TypeError, match="enable automatic"): + A[1, 6].value + assert A.T[6, 1].new() == 4 + s = A[0, 0].new() + assert compute(s.value) is None + assert s.dtype == "INT64" + s = A[1, 6].new(dtype=float) + assert s.value == 4.0 + assert s.dtype == "FP64" + + +def test_set_element(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert compute(A[1, 1].new().value) is None + assert A[3, 0].new() == 3 + A[1, 1].update(21) + A[3, 0] << -5 + assert A[1, 1].new() == 21 + assert A[3, 0].new() == -5 + + +def test_remove_element(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert A[3, 0].new() == 3 + del A[3, 0] + assert compute(A[3, 0].new().value) is None + assert A[6, 3].new() == 7 + with pytest.raises(TypeError, match="Remove Element only supports"): + del A[3:5, 3] + + +def test_mxm(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + C = A.mxm(A, semiring.plus_times).new() + result = Matrix.from_values( + [0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 3, 4, 5, 6, 6, 6], + [0, 2, 4, 6, 2, 3, 4, 5, 2, 1, 3, 5, 2, 5, 0, 2, 5], + [9, 9, 16, 8, 20, 28, 12, 56, 1, 6, 9, 3, 7, 1, 21, 21, 26], + ) + assert C.isequal(result) -def test_mxm_transpose(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - C = A.dup() - C << A.mxm(A.T, semiring.plus_times) - result = Matrix.from_values( - [0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6], - [0, 6, 1, 6, 2, 4, 3, 5, 6, 2, 4, 3, 5, 6, 0, 1, 3, 5, 6], - [13, 21, 80, 24, 1, 7, 18, 3, 15, 7, 49, 3, 1, 5, 21, 24, 15, 5, 83], - ) - assert C.isequal(result) - C << A.T.mxm(A, semiring.plus_times) - result2 = Matrix.from_values( - [0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 6, 6], - [0, 2, 1, 3, 0, 2, 3, 4, 1, 2, 3, 4, 2, 3, 4, 6, 5, 4, 6], - [9, 9, 4, 6, 9, 35, 35, 15, 6, 35, 58, 21, 15, 21, 73, 32, 50, 32, 16], - ) - assert C.isequal(result2) +def test_mxm_transpose(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + C = A.dup() + C << A.mxm(A.T, semiring.plus_times) + result = Matrix.from_values( + [0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6], + [0, 6, 1, 6, 2, 4, 3, 5, 6, 2, 4, 3, 5, 6, 0, 1, 3, 5, 6], + [13, 21, 80, 24, 1, 7, 18, 3, 15, 7, 49, 3, 1, 5, 21, 24, 15, 5, 83], + ) + assert C.isequal(result) + C << A.T.mxm(A, semiring.plus_times) + result2 = Matrix.from_values( + [0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 6, 6], + [0, 2, 1, 3, 0, 2, 3, 4, 1, 2, 3, 4, 2, 3, 4, 6, 5, 4, 6], + [9, 9, 4, 6, 9, 35, 35, 15, 6, 35, 58, 21, 15, 21, 73, 32, 50, 32, 16], + ) + assert C.isequal(result2) def test_mxm_nonsquare(): @@ -408,431 +567,629 @@ def test_mxm_nonsquare(): B = Matrix.from_values([0, 2, 4], [0, 0, 0], [10, 20, 30], nrows=5, ncols=1) C = Matrix.new(A.dtype, nrows=1, ncols=1) C << A.mxm(B, semiring.max_plus) - assert C[0, 0].value == 33 + assert C[0, 0].new() == 33 C1 = A.mxm(B, semiring.max_plus).new() assert C1.isequal(C) C2 = A.T.mxm(B.T, semiring.max_plus).new() assert C2.nrows == 5 assert C2.ncols == 5 - -def test_mxm_mask(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - val_mask = Matrix.from_values([0, 3, 4], [2, 3, 2], [True, True, True], nrows=7, ncols=7) - struct_mask = Matrix.from_values([0, 3, 4], [2, 3, 2], [1, 0, 0], nrows=7, ncols=7) - C = A.dup() - C(val_mask.V) << A.mxm(A, semiring.plus_times) - result = Matrix.from_values( - [0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 5, 6, 6, 6], - [1, 2, 3, 4, 6, 5, 0, 2, 3, 2, 5, 2, 2, 3, 4], - [2, 9, 3, 8, 4, 1, 3, 3, 9, 7, 7, 1, 5, 7, 3], - ) - assert C.isequal(result) - C = A.dup() - C(~val_mask.V) << A.mxm(A, semiring.plus_times) - result2 = Matrix.from_values( - [0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 5, 6, 6, 6], - [0, 4, 6, 2, 3, 4, 5, 2, 1, 5, 5, 0, 2, 5], - [9, 16, 8, 20, 28, 12, 56, 1, 6, 3, 1, 21, 21, 26], - ) - assert C.isequal(result2) - C = A.dup() - C(struct_mask.S, replace=True).update(A.mxm(A, semiring.plus_times)) - result3 = Matrix.from_values([0, 3, 4], [2, 3, 2], [9, 9, 7], nrows=7, ncols=7) - assert C.isequal(result3) - C2 = A.mxm(A, semiring.plus_times).new(mask=struct_mask.S) - assert C2.isequal(result3) - with pytest.raises(TypeError, match="Mask must indicate"): - A.mxm(A).new(mask=struct_mask) - - -def test_mxm_accum(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - A(binary.plus) << A.mxm(A, semiring.plus_times) - # fmt: off - result = Matrix.from_values( - [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6], - [0, 1, 2, 3, 4, 6, 2, 3, 4, 5, 6, 2, 5, 0, 1, 2, 3, 5, 2, 5, 2, 5, 0, 2, 3, 4, 5], - [9, 2, 9, 3, 16, 8, 20, 28, 20, 56, 4, 1, 1, 3, 6, 3, 9, 3, 7, 7, 1, 1, 21, 26, 7, 3, 26], - ) - # fmt: on - assert A.isequal(result) - - -def test_mxv(A, A_chunks, v): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - w = A.mxv(v, semiring.plus_times).new() - result = Vector.from_values([0, 1, 6], [5, 16, 13]) - assert w.isequal(result) + A = Matrix.from_values( + da.from_array([0, 0, 0]), + da.from_array([0, 2, 4]), + da.from_array([1, 2, 3]), + nrows=1, + ncols=5, + ) + B = Matrix.from_values([0, 2, 4], [0, 0, 0], [10, 20, 30], nrows=5, ncols=1) + C = Matrix.new(A.dtype, nrows=1, ncols=1) + C << A.mxm(B, semiring.max_plus) + assert C[0, 0].new() == 33 + C1 = A.mxm(B, semiring.max_plus).new() + assert C1.isequal(C) + C2 = A.T.mxm(B.T, semiring.max_plus).new() + assert C2.nrows == 5 + assert C2.ncols == 5 -def test_ewise_mult(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - # Binary, Monoid, and Semiring - B = Matrix.from_values([0, 0, 5], [1, 2, 2], [5, 4, 8], nrows=7, ncols=7) - result = Matrix.from_values([0, 5], [1, 2], [10, 8], nrows=7, ncols=7) - C = A.ewise_mult(B, binary.times).new() - assert C.isequal(result) - C() << A.ewise_mult(B, monoid.times) - assert C.isequal(result) - with pytest.raises(TypeError, match="Expected type: BinaryOp, Monoid"): - A.ewise_mult(B, semiring.plus_times) - - -def test_ewise_add(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - # Binary, Monoid, and Semiring - B = Matrix.from_values([0, 0, 5], [1, 2, 2], [5, 4, 8], nrows=7, ncols=7) - result = Matrix.from_values( - [0, 3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], - [2, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], - [4, 3, 5, 3, 8, 5, 3, 7, 8, 3, 1, 7, 4], - ) - with pytest.raises(TypeError, match="require_monoid"): - A.ewise_add(B, binary.second) - # surprising that SECOND(x, empty) == x, which is why user - # must opt-in to using binary ops in ewise_add - C = A.ewise_add(B, binary.second, require_monoid=False).new() - assert C.isequal(result) - C << A.ewise_add(B, monoid.max) - assert C.isequal(result) - C << A.ewise_add(B, binary.max) - assert C.isequal(result) - with pytest.raises(TypeError, match="Expected type: Monoid"): - A.ewise_add(B, semiring.max_minus) - - -def test_extract(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - C = Matrix.new(A.dtype, 3, 4) - result = Matrix.from_values( - [0, 0, 1, 2, 2, 2], [0, 2, 1, 1, 2, 3], [2, 3, 3, 5, 7, 3], nrows=3, ncols=4 - ) - C << A[[0, 3, 6], [1, 2, 3, 4]] - assert C.isequal(result) - C << A[0::3, 1:5] - assert C.isequal(result) - C << A[[0, 3, 6], 1:5:1] - assert C.isequal(result) - C2 = A[[0, 3, 6], [1, 2, 3, 4]].new() - assert C2.isequal(result) +def test_mxm_mask(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + val_mask = Matrix.from_values( + [0, 3, 4], [2, 3, 2], [True, True, True], nrows=7, ncols=7 + ) + struct_mask = Matrix.from_values([0, 3, 4], [2, 3, 2], [1, 0, 0], nrows=7, ncols=7) + C = A.dup() + C(val_mask.V) << A.mxm(A, semiring.plus_times) + result = Matrix.from_values( + [0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 5, 6, 6, 6], + [1, 2, 3, 4, 6, 5, 0, 2, 3, 2, 5, 2, 2, 3, 4], + [2, 9, 3, 8, 4, 1, 3, 3, 9, 7, 7, 1, 5, 7, 3], + ) + assert C.isequal(result) + C = A.dup() + C(~val_mask.V) << A.mxm(A, semiring.plus_times) + result2 = Matrix.from_values( + [0, 0, 0, 1, 1, 1, 1, 2, 3, 3, 5, 6, 6, 6], + [0, 4, 6, 2, 3, 4, 5, 2, 1, 5, 5, 0, 2, 5], + [9, 16, 8, 20, 28, 12, 56, 1, 6, 3, 1, 21, 21, 26], + ) + assert C.isequal(result2) + C = A.dup() + C(struct_mask.S, replace=True).update(A.mxm(A, semiring.plus_times)) + result3 = Matrix.from_values([0, 3, 4], [2, 3, 2], [9, 9, 7], nrows=7, ncols=7) + assert C.isequal(result3) + C2 = A.mxm(A, semiring.plus_times).new(mask=struct_mask.S) + assert C2.isequal(result3) + with pytest.raises(TypeError, match="Mask must indicate"): + A.mxm(A).new(mask=struct_mask) + + +def test_mxm_accum(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + A(binary.plus) << A.mxm(A, semiring.plus_times) + # fmt: off + result = Matrix.from_values( + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6], + [0, 1, 2, 3, 4, 6, 2, 3, 4, 5, 6, 2, 5, 0, 1, 2, 3, 5, 2, 5, 2, 5, 0, 2, 3, 4, 5], + [9, 2, 9, 3, 16, 8, 20, 28, 20, 56, 4, 1, 1, 3, 6, 3, 9, 3, 7, 7, 1, 1, 21, 26, 7, 3, 26], + ) + # fmt: on + assert A.isequal(result) -def test_extract_row(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - w = Vector.new(A.dtype, 3) - result = Vector.from_values([1, 2], [5, 3], size=3) - w << A[6, [0, 2, 4]] - assert w.isequal(result) - w << A[6, :5:2] - assert w.isequal(result) - w << A.T[[0, 2, 4], 6] - assert w.isequal(result) - w2 = A[6, [0, 2, 4]].new() - assert w2.isequal(result) - with pytest.raises(TypeError): - # Should be list, not tuple (although tuple isn't so bad) - A[6, (0, 2, 4)] - w3 = A[6, np.array([0, 2, 4])].new() - assert w3.isequal(result) - with pytest.raises(TypeError, match="Invalid dtype"): - A[6, np.array([0, 2, 4], dtype=float)] - with pytest.raises(TypeError, match="Invalid number of dimensions"): - A[6, np.array([[0, 2, 4]])] +def test_mxv(As, A_chunks, v): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + w = A.mxv(v, semiring.plus_times).new() + result = Vector.from_values([0, 1, 6], [5, 16, 13]) + assert w.isequal(result) -def test_extract_column(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - w = Vector.new(A.dtype, 3) - result = Vector.from_values([1, 2], [3, 1], size=3) - w << A[[1, 3, 5], 2] - assert w.isequal(result) - w << A[1:6:2, 2] - assert w.isequal(result) - w << A.T[2, [1, 3, 5]] - assert w.isequal(result) - w2 = A[1:6:2, 2].new() - assert w2.isequal(result) +def test_ewise_mult(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + # Binary, Monoid, and Semiring + B = Matrix.from_values([0, 0, 5], [1, 2, 2], [5, 4, 8], nrows=7, ncols=7) + result = Matrix.from_values([0, 5], [1, 2], [10, 8], nrows=7, ncols=7) + C = A.ewise_mult(B, binary.times).new() + assert C.isequal(result) + C() << A.ewise_mult(B, monoid.times) + assert C.isequal(result) + with pytest.raises(TypeError, match="Expected type: BinaryOp, Monoid"): + A.ewise_mult(B, semiring.plus_times) + + +def test_ewise_add(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + # Binary, Monoid, and Semiring + B = Matrix.from_values([0, 0, 5], [1, 2, 2], [5, 4, 8], nrows=7, ncols=7) + result = Matrix.from_values( + [0, 3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [2, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [4, 3, 5, 3, 8, 5, 3, 7, 8, 3, 1, 7, 4], + ) + with pytest.raises(TypeError, match="require_monoid"): + A.ewise_add(B, binary.second) + # surprising that SECOND(x, empty) == x, which is why user + # must opt-in to using binary ops in ewise_add + C = A.ewise_add(B, binary.second, require_monoid=False).new() + assert C.isequal(result) + C << A.ewise_add(B, monoid.max) + assert C.isequal(result) + C << A.ewise_add(B, binary.max) + assert C.isequal(result) + with pytest.raises(TypeError, match="Expected type: Monoid"): + A.ewise_add(B, semiring.max_minus) + + +def test_extract(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + C = Matrix.new(A.dtype, 3, 4) + result = Matrix.from_values( + [0, 0, 1, 2, 2, 2], [0, 2, 1, 1, 2, 3], [2, 3, 3, 5, 7, 3], nrows=3, ncols=4 + ) + C << A[[0, 3, 6], [1, 2, 3, 4]] + assert C.isequal(result) + C << A[0::3, 1:5] + assert C.isequal(result) + C << A[[0, 3, 6], 1:5:1] + assert C.isequal(result) + C2 = A[[0, 3, 6], [1, 2, 3, 4]].new() + assert C2.isequal(result) + + +def test_extract_row(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + w = Vector.new(A.dtype, 3) + result = Vector.from_values([1, 2], [5, 3], size=3) + w << A[6, [0, 2, 4]] + assert w.isequal(result) + w << A[6, :5:2] + assert w.isequal(result) + w << A.T[[0, 2, 4], 6] + assert w.isequal(result) + w2 = A[6, [0, 2, 4]].new() + assert w2.isequal(result) + with pytest.raises(TypeError): + # Should be list, not tuple (although tuple isn't so bad) + A[6, (0, 2, 4)] + w3 = A[6, np.array([0, 2, 4])].new() + assert w3.isequal(result) + with pytest.raises(TypeError, match="Invalid dtype"): + A[6, np.array([0, 2, 4], dtype=float)] + with pytest.raises(TypeError, match="Invalid number of dimensions"): + A[6, np.array([[0, 2, 4]])] + + +def test_extract_column(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + w = Vector.new(A.dtype, 3) + result = Vector.from_values([1, 2], [3, 1], size=3) + w << A[[1, 3, 5], 2] + assert w.isequal(result) + w << A[1:6:2, 2] + assert w.isequal(result) + w << A.T[2, [1, 3, 5]] + assert w.isequal(result) + w2 = A[1:6:2, 2].new() + assert w2.isequal(result) def test_extract_input_mask(): # A M # 0 1 2 _ 0 1 # 3 4 5 2 3 _ - A = Matrix.from_values( - [0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 1, 2], - [0, 1, 2, 3, 4, 5], - ) - M = Matrix.from_values( - [0, 0, 1, 1], - [1, 2, 0, 1], - [0, 1, 2, 3], - ) - m = M[0, :].new() - MT = M.T.new() - # Matrix structure mask - result = A[0, [0, 1]].new(input_mask=M.S) - expected = Vector.from_values([1], [1]) - assert result.isequal(expected) - # again - result.clear() - result(input_mask=M.S) << A[0, [0, 1]] - assert result.isequal(expected) - - # Vector mask - result = A[0, [0, 1]].new(input_mask=m.S) - assert result.isequal(expected) - # again - result.clear() - result(input_mask=m.S) << A[0, [0, 1]] - assert result.isequal(expected) - - # Matrix value mask - result = A[0, [1, 2]].new(input_mask=M.V) - expected = Vector.from_values([1], [2], size=2) - assert result.isequal(expected) - # again - result.clear() - result(input_mask=M.V) << A[0, [1, 2]] - assert result.isequal(expected) - - with pytest.raises(ValueError, match="Shape of `input_mask` does not match shape of input"): - A[0, [0, 1]].new(input_mask=MT.S) - with pytest.raises(ValueError, match="Shape of `input_mask` does not match shape of input"): - m(input_mask=MT.S) << A[0, [0, 1]] - with pytest.raises( - ValueError, match="Size of `input_mask` Vector does not match ncols of Matrix" - ): - A[0, [0]].new(input_mask=expected.S) - with pytest.raises( - ValueError, match="Size of `input_mask` Vector does not match ncols of Matrix" - ): - m(input_mask=expected.S) << A[0, [0]] - with pytest.raises( - ValueError, match="Size of `input_mask` Vector does not match nrows of Matrix" - ): - A[[0], 0].new(input_mask=m.S) - with pytest.raises( - ValueError, match="Size of `input_mask` Vector does not match nrows of Matrix" - ): - m(input_mask=m.S) << A[[0], 0] - with pytest.raises( - TypeError, match="Got Vector `input_mask` when extracting a submatrix from a Matrix" - ): - A[[0], [0]].new(input_mask=expected.S) - with pytest.raises( - TypeError, match="Got Vector `input_mask` when extracting a submatrix from a Matrix" - ): - A(input_mask=expected.S) << A[[0], [0]] - with pytest.raises(TypeError, match="mask is not allowed for single element extraction"): - A[0, 0].new(input_mask=M.S) - with pytest.raises(TypeError, match="mask and input_mask arguments cannot both be given"): - A[0, [0, 1]].new(input_mask=M.S, mask=expected.S) - with pytest.raises(TypeError, match="mask and input_mask arguments cannot both be given"): - A(input_mask=M.S, mask=expected.S) - with pytest.raises(TypeError, match=r"Mask must indicate values \(M.V\) or structure \(M.S\)"): - A[0, [0, 1]].new(input_mask=M) - with pytest.raises(TypeError, match=r"Mask must indicate values \(M.V\) or structure \(M.S\)"): - A(input_mask=M) - with pytest.raises(TypeError, match="Mask object must be type Vector"): - expected[[0, 1]].new(input_mask=M.S) - with pytest.raises(TypeError, match="Mask object must be type Vector"): - expected(input_mask=M.S) << expected[[0, 1]] - with pytest.raises(TypeError, match=r"new\(\) got an unexpected keyword argument 'input_mask'"): - A.new(input_mask=M.S) - with pytest.raises(TypeError, match="`input_mask` argument may only be used for extract"): - A(input_mask=M.S) << A.apply(unary.ainv) - with pytest.raises(TypeError, match="`input_mask` argument may only be used for extract"): - A(input_mask=M.S)[[0], [0]] = 1 - with pytest.raises(TypeError, match="`input_mask` argument may only be used for extract"): - A(input_mask=M.S)[[0], [0]] + As = [ + Matrix.from_values( + [0, 0, 0, 1, 1, 1], + [0, 1, 2, 0, 1, 2], + [0, 1, 2, 3, 4, 5], + ) + ] + As += [ + Matrix.from_values( + da.from_array([0, 0, 0, 1, 1, 1]), + da.from_array([0, 1, 2, 0, 1, 2]), + da.from_array([0, 1, 2, 3, 4, 5]), + ) + ] + Ms = [ + Matrix.from_values( + [0, 0, 1, 1], + [1, 2, 0, 1], + [0, 1, 2, 3], + ) + ] + Ms += [ + Matrix.from_values( + da.from_array([0, 0, 1, 1]), + da.from_array([1, 2, 0, 1]), + da.from_array([0, 1, 2, 3]), + ) + ] + for A_ in As: + for M_ in Ms: + A = A_.dup() + M = M_.dup() + m = M[0, :].new() + MT = M.T.new() + # Matrix structure mask + result = A[0, [0, 1]].new(input_mask=M.S) + expected = Vector.from_values([1], [1]) + assert result.isequal(expected) + # again + result.clear() + result(input_mask=M.S) << A[0, [0, 1]] + assert result.isequal(expected) + + # Vector mask + result = A[0, [0, 1]].new(input_mask=m.S) + assert result.isequal(expected) + # again + result.clear() + result(input_mask=m.S) << A[0, [0, 1]] + assert result.isequal(expected) + + # Matrix value mask + result = A[0, [1, 2]].new(input_mask=M.V) + expected = Vector.from_values([1], [2], size=2) + assert result.isequal(expected) + # again + result.clear() + result(input_mask=M.V) << A[0, [1, 2]] + assert result.isequal(expected) + + with pytest.raises( + ValueError, match="Shape of `input_mask` does not match shape of input" + ): + A[0, [0, 1]].new(input_mask=MT.S).compute() - # With transpose input value - # Matrix structure mask - result = A.T[[0, 1], 0].new(input_mask=MT.S) - expected = Vector.from_values([1], [1]) - assert result.isequal(expected) - # again - result.clear() - result(input_mask=MT.S) << A.T[[0, 1], 0] - assert result.isequal(expected) - - # Vector mask - result = A.T[[0, 1], 0].new(input_mask=m.S) - assert result.isequal(expected) - # again - result.clear() - result(input_mask=m.S) << A.T[[0, 1], 0] - assert result.isequal(expected) - - # Matrix value mask - result = A.T[[1, 2], 0].new(input_mask=MT.V) - expected = Vector.from_values([1], [2], size=2) - assert result.isequal(expected) - # again - result.clear() - result(input_mask=MT.V) << A.T[[1, 2], 0] - assert result.isequal(expected) + with pytest.raises( + ValueError, match="Shape of `input_mask` does not match shape of input" + ): + m(input_mask=MT.S) << A[0, [0, 1]] + m.compute() + + with pytest.raises( + ValueError, match="Size of `input_mask` Vector does not match ncols of Matrix" + ): + A[0, [0]].new(input_mask=expected.S).compute() + + m = M[0, :].new() + with pytest.raises( + ValueError, match="Size of `input_mask` Vector does not match ncols of Matrix" + ): + m(input_mask=expected.S) << A[0, [0]] + m.compute() + + m = M[0, :].new() + with pytest.raises( + ValueError, match="Size of `input_mask` Vector does not match nrows of Matrix" + ): + A[[0], 0].new(input_mask=m.S).compute() + + m = M[0, :].new() + with pytest.raises( + ValueError, match="Size of `input_mask` Vector does not match nrows of Matrix" + ): + m(input_mask=m.S) << A[[0], 0] + m.compute() + + with pytest.raises( + TypeError, match="Got Vector `input_mask` when extracting a submatrix from a Matrix" + ): + A[[0], [0]].new(input_mask=expected.S).compute() + + with pytest.raises( + TypeError, match="Got Vector `input_mask` when extracting a submatrix from a Matrix" + ): + A(input_mask=expected.S) << A[[0], [0]] + A.compute() + + A = A_.dup() + with pytest.raises( + TypeError, match="mask is not allowed for single element extraction" + ): + A[0, 0].new(input_mask=M.S).compute() + + with pytest.raises( + TypeError, match="mask and input_mask arguments cannot both be given" + ): + A[0, [0, 1]].new(input_mask=M.S, mask=expected.S).compute() + + with pytest.raises( + TypeError, match="mask and input_mask arguments cannot both be given" + ): + A(input_mask=M.S, mask=expected.S).compute() + + with pytest.raises( + TypeError, match=r"Mask must indicate values \(M.V\) or structure \(M.S\)" + ): + A[0, [0, 1]].new(input_mask=M).compute() + + with pytest.raises( + TypeError, match=r"Mask must indicate values \(M.V\) or structure \(M.S\)" + ): + A(input_mask=M).compute() + + with pytest.raises(TypeError, match="Mask object must be type Vector"): + expected[[0, 1]].new(input_mask=M.S).compute() + + with pytest.raises(TypeError, match="Mask object must be type Vector"): + expected(input_mask=M.S) << expected[[0, 1]] + expected.compute() + + with pytest.raises( + TypeError, match=r"new\(\) got an unexpected keyword argument 'input_mask'" + ): + A.new(input_mask=M.S).compute() + + with pytest.raises( + TypeError, match="`input_mask` argument may only be used for extract" + ): + A(input_mask=M.S) << A.apply(unary.ainv) + A.compute() + + A = A_.dup() + with pytest.raises( + TypeError, match="`input_mask` argument may only be used for extract" + ): + A(input_mask=M.S)[[0], [0]] = 1 + A.compute() + + A = A_.dup() + with pytest.raises( + TypeError, match="`input_mask` argument may only be used for extract" + ): + A(input_mask=M.S)[[0], [0]] + A.compute() + + A = A_.dup() + m = M[0, :].new() + # With transpose input value + # Matrix structure mask + result = A.T[[0, 1], 0].new(input_mask=MT.S) + expected = Vector.from_values([1], [1]) + assert result.isequal(expected) + # again + result.clear() + result(input_mask=MT.S) << A.T[[0, 1], 0] + assert result.isequal(expected) + + # Vector mask + result = A.T[[0, 1], 0].new(input_mask=m.S) + assert result.isequal(expected) + # again + result.clear() + result(input_mask=m.S) << A.T[[0, 1], 0] + assert result.isequal(expected) + + # Matrix value mask + result = A.T[[1, 2], 0].new(input_mask=MT.V) + expected = Vector.from_values([1], [2], size=2) + assert result.isequal(expected) + # again + result.clear() + result(input_mask=MT.V) << A.T[[1, 2], 0] + assert result.isequal(expected) # With transpose input value # Matrix structure mask - A = Matrix.from_values( - [0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 1, 2], - [0, 1, 2, 3, 4, 5], + As = [ + Matrix.from_values( + [0, 0, 0, 1, 1, 1], + [0, 1, 2, 0, 1, 2], + [0, 1, 2, 3, 4, 5], + ) + ] + As += [ + Matrix.from_values( + da.from_array([0, 0, 0, 1, 1, 1]), + da.from_array([0, 1, 2, 0, 1, 2]), + da.from_array([0, 1, 2, 3, 4, 5]), + ) + ] + Ms = [ + Matrix.from_values( + [0, 0, 1, 1], + [1, 2, 0, 1], + [0, 1, 2, 3], + ) + ] + Ms += [ + Matrix.from_values( + da.from_array([0, 0, 1, 1]), + da.from_array([1, 2, 0, 1]), + da.from_array([0, 1, 2, 3]), + ) + ] + for A_ in As: + for M_ in Ms: + A = A_.dup() + M = M_.dup() + A.rechunk(chunks=((1, 1), (2, 1)), inplace=True) + result = A.T[[0, 1], 0].new(input_mask=MT.S) + expected = Vector.from_values([1], [1]) + assert result.isequal(expected) + # again + result.clear() + result(input_mask=MT.S) << A.T[[0, 1], 0] + assert result.isequal(expected) + + # Vector mask + result = A.T[[0, 1], 0].new(input_mask=m.S) + assert result.isequal(expected) + # again + result.clear() + result(input_mask=m.S) << A.T[[0, 1], 0] + assert result.isequal(expected) + + # Matrix value mask + result = A.T[[1, 2], 0].new(input_mask=MT.V) + expected = Vector.from_values([1], [2], size=2) + assert result.isequal(expected) + # again + result.clear() + result(input_mask=MT.V) << A.T[[1, 2], 0] + assert result.isequal(expected) + + +def test_extract_with_matrix(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + with pytest.raises(TypeError, match="Invalid type for index"): + A[A.T, 1].new() + with pytest.raises(TypeError, match="Invalid type for index"): + A[A, [1]].new() + with pytest.raises(TypeError, match="Invalid type for index"): + A[[0], A.V].new() + + +def test_assign(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + B = Matrix.from_values([0, 0, 1], [0, 1, 0], [9, 8, 7]) + result = Matrix.from_values( + [0, 0, 2, 3, 0, 3, 5, 6, 0, 6, 1, 6, 4, 1], + [0, 5, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 6], + [9, 8, 7, 3, 2, 3, 1, 5, 3, 7, 8, 3, 7, 4], + ) + C = A.dup() + C()[[0, 2], [0, 5]] = B + assert C.isequal(result) + C = A.dup() + C[:3:2, :6:5]() << B + assert C.isequal(result) + with pytest.raises(TypeError, match="will make the Matrix dense"): + C << 1 + nvals = C.nvals + C(C.S) << 1 + assert C.nvals == nvals + assert C.reduce_scalar().new() == nvals + with pytest.raises(TypeError, match="Invalid type for index"): + C[C, [1]] = C + B = B.T.new() + C = A.dup() + C()[[0, 2], [0, 5]] = B.T + assert C.isequal(result) + C = A.dup() + C[:3:2, :6:5]() << B.T + assert C.isequal(result) + + B.rechunk(chunks=1) + C = A.dup() + C()[[0, 2], [0, 5]] = B.T + assert C.isequal(result) + C = A.dup() + C[:3:2, :6:5]() << B.T + assert C.isequal(result) + + +def test_assign_wrong_dims(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + B = Matrix.from_values([0, 0, 1], [0, 1, 0], [9, 8, 7]) + with pytest.raises(DimensionMismatch): + A[[0, 2, 4], [0, 5]] = B + A.compute() + + +def test_assign_row(As, A_chunks, v): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + result = Matrix.from_values( + [3, 3, 5, 6, 6, 1, 6, 2, 4, 1, 0, 0, 0, 0], + [0, 2, 2, 2, 3, 4, 4, 5, 5, 6, 1, 3, 4, 6], + [3, 3, 1, 5, 7, 8, 3, 1, 7, 4, 1, 1, 2, 0], + ) + C = A.dup() + C[0, :] = v + assert C.isequal(result) + + +def test_subassign_row_col(A_chunks): + A_0 = Matrix.from_values( + [0, 0, 0, 1, 1, 1, 2, 2, 2], + [0, 1, 2, 0, 1, 2, 0, 1, 2], + [0, 1, 2, 3, 4, 5, 6, 7, 8], ) - M = Matrix.from_values( - [0, 0, 1, 1], - [1, 2, 0, 1], - [0, 1, 2, 3], + A_1 = Matrix.from_values( + da.from_array([0, 0, 0, 1, 1, 1, 2, 2, 2]), + da.from_array([0, 1, 2, 0, 1, 2, 0, 1, 2]), + da.from_array([0, 1, 2, 3, 4, 5, 6, 7, 8]), ) - A.rechunk(chunks=((1, 1), (2, 1)), inplace=True) - result = A.T[[0, 1], 0].new(input_mask=MT.S) - expected = Vector.from_values([1], [1]) - assert result.isequal(expected) - # again - result.clear() - result(input_mask=MT.S) << A.T[[0, 1], 0] - assert result.isequal(expected) - - # Vector mask - result = A.T[[0, 1], 0].new(input_mask=m.S) - assert result.isequal(expected) - # again - result.clear() - result(input_mask=m.S) << A.T[[0, 1], 0] - assert result.isequal(expected) - - # Matrix value mask - result = A.T[[1, 2], 0].new(input_mask=MT.V) - expected = Vector.from_values([1], [2], size=2) - assert result.isequal(expected) - # again - result.clear() - result(input_mask=MT.V) << A.T[[1, 2], 0] - assert result.isequal(expected) - - -def test_extract_with_matrix(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - with pytest.raises(TypeError, match="Invalid type for index"): - A[A.T, 1].new() - with pytest.raises(TypeError, match="Invalid type for index"): - A[A, [1]].new() - with pytest.raises(TypeError, match="Invalid type for index"): - A[[0], A.V].new() + As = [A_0, A_1] + for A_ in As: + for chunks in [3, 2, 1]: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + m = Vector.from_values([1], [True]) + v = Vector.from_values([0, 1], [10, 20]) + + A[[0, 1], 0](m.S) << v + result1 = Matrix.from_values( + [0, 0, 0, 1, 1, 1, 2, 2, 2], + [0, 1, 2, 0, 1, 2, 0, 1, 2], + [0, 1, 2, 20, 4, 5, 6, 7, 8], + ) + assert A.isequal(result1) + A[1, [1, 2]](m.V, accum=binary.plus).update(v) + result2 = Matrix.from_values( + [0, 0, 0, 1, 1, 1, 2, 2, 2], + [0, 1, 2, 0, 1, 2, 0, 1, 2], + [0, 1, 2, 20, 4, 25, 6, 7, 8], + ) + assert A.isequal(result2) -def test_assign(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - B = Matrix.from_values([0, 0, 1], [0, 1, 0], [9, 8, 7]) - result = Matrix.from_values( - [0, 0, 2, 3, 0, 3, 5, 6, 0, 6, 1, 6, 4, 1], - [0, 5, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 6], - [9, 8, 7, 3, 2, 3, 1, 5, 3, 7, 8, 3, 7, 4], - ) - C = A.dup() - C()[[0, 2], [0, 5]] = B - assert C.isequal(result) - C = A.dup() - C[:3:2, :6:5]() << B - assert C.isequal(result) - with pytest.raises(TypeError, match="will make the Matrix dense"): - C << 1 - nvals = C.nvals - C(C.S) << 1 - assert C.nvals == nvals - assert C.reduce_scalar().new() == nvals - with pytest.raises(TypeError, match="Invalid type for index"): - C[C, [1]] = C - - B = B.T.new() - C = A.dup() - C()[[0, 2], [0, 5]] = B.T - assert C.isequal(result) - C = A.dup() - C[:3:2, :6:5]() << B.T - assert C.isequal(result) - - B.rechunk(chunks=1) - C = A.dup() - C()[[0, 2], [0, 5]] = B.T - assert C.isequal(result) - C = A.dup() - C[:3:2, :6:5]() << B.T - assert C.isequal(result) - - -def test_assign_wrong_dims(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - B = Matrix.from_values([0, 0, 1], [0, 1, 0], [9, 8, 7]) - with pytest.raises(DimensionMismatch): - A[[0, 2, 4], [0, 5]] = B + A[[0, 1], 0](m.S, binary.plus, replace=True) << v + result3 = Matrix.from_values( + [0, 0, 1, 1, 1, 2, 2, 2], + [1, 2, 0, 1, 2, 0, 1, 2], + [1, 2, 40, 4, 25, 6, 7, 8], + ) + assert A.isequal(result3) + + _A = A.dup() + with pytest.raises(DimensionMismatch): + A(m.S)[[0, 1], 0] << v + A.compute() + + A = _A + A[[0, 1], 0](m.S) << 99 + result4 = Matrix.from_values( + [0, 0, 1, 1, 1, 2, 2, 2], + [1, 2, 0, 1, 2, 0, 1, 2], + [1, 2, 99, 4, 25, 6, 7, 8], + ) + assert A.isequal(result4) + A[[1, 2], 0](m.S, binary.plus, replace=True) << 100 + result5 = Matrix.from_values( + [0, 0, 1, 1, 2, 2, 2], + [1, 2, 1, 2, 0, 1, 2], + [1, 2, 4, 25, 106, 7, 8], + ) + assert A.isequal(result5) -def test_assign_row(A, A_chunks, v): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - result = Matrix.from_values( - [3, 3, 5, 6, 6, 1, 6, 2, 4, 1, 0, 0, 0, 0], - [0, 2, 2, 2, 3, 4, 4, 5, 5, 6, 1, 3, 4, 6], - [3, 3, 1, 5, 7, 8, 3, 1, 7, 4, 1, 1, 2, 0], - ) - C = A.dup() - C[0, :] = v - assert C.isequal(result) + A[2, [0, 1]](m.S) << -1 + result6 = Matrix.from_values( + [0, 0, 1, 1, 2, 2, 2], + [1, 2, 1, 2, 0, 1, 2], + [1, 2, 4, 25, 106, -1, 8], + ) + assert A.isequal(result6) -def test_subassign_row_col(A_chunks): - A = Matrix.from_values( +def test_subassign_matrix(): + A_0 = Matrix.from_values( [0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 3, 4, 5, 6, 7, 8], ) - A_ = A - for chunks in [3, 2, 1]: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - m = Vector.from_values([1], [True]) - v = Vector.from_values([0, 1], [10, 20]) - - A[[0, 1], 0](m.S) << v + A_1 = Matrix.from_values( + da.from_array([0, 0, 0, 1, 1, 1, 2, 2, 2]), + da.from_array([0, 1, 2, 0, 1, 2, 0, 1, 2]), + da.from_array([0, 1, 2, 3, 4, 5, 6, 7, 8]), + ) + As = [A_0, A_1] + for A_i in As: + A = A_i.dup() + m = Matrix.from_values([1], [0], [True]) + v = Matrix.from_values([0, 1], [0, 0], [10, 20]) + mT = m.T.new() + + A[[0, 1], [0]](m.S) << v result1 = Matrix.from_values( [0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2], @@ -840,7 +1197,11 @@ def test_subassign_row_col(A_chunks): ) assert A.isequal(result1) - A[1, [1, 2]](m.V, accum=binary.plus).update(v) + A_ = A.dup() + _A = A.dup() + _A_ = A.dup() + + A[[1], [1, 2]](mT.V, accum=binary.plus) << v.T result2 = Matrix.from_values( [0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2], @@ -848,7 +1209,16 @@ def test_subassign_row_col(A_chunks): ) assert A.isequal(result2) - A[[0, 1], 0](m.S, binary.plus, replace=True) << v + A_[[1], 1:3](mT.V, accum=binary.plus) << v.T + assert A_.isequal(result2) + + _A[1:2, [1, 2]](mT.V, accum=binary.plus) << v.T + assert _A.isequal(result2) + + _A_[1:2, 1:3](mT.V, accum=binary.plus) << v.T + assert _A_.isequal(result2) + + A[[0, 1], [0]](m.S, binary.plus, replace=True) << v result3 = Matrix.from_values( [0, 0, 1, 1, 1, 2, 2, 2], [1, 2, 0, 1, 2, 0, 1, 2], @@ -856,10 +1226,13 @@ def test_subassign_row_col(A_chunks): ) assert A.isequal(result3) + A__ = A.dup() with pytest.raises(DimensionMismatch): - A(m.S)[[0, 1], 0] << v + A(m.S)[[0, 1], [0]] << v + A.compute() - A[[0, 1], 0](m.S) << 99 + A = A__ + A[[0, 1], [0]](m.S) << 99 result4 = Matrix.from_values( [0, 0, 1, 1, 1, 2, 2, 2], [1, 2, 0, 1, 2, 0, 1, 2], @@ -867,7 +1240,7 @@ def test_subassign_row_col(A_chunks): ) assert A.isequal(result4) - A[[1, 2], 0](m.S, binary.plus, replace=True) << 100 + A[[1, 2], [0]](m.S, binary.plus, replace=True) << 100 result5 = Matrix.from_values( [0, 0, 1, 1, 2, 2, 2], [1, 2, 1, 2, 0, 1, 2], @@ -875,7 +1248,7 @@ def test_subassign_row_col(A_chunks): ) assert A.isequal(result5) - A[2, [0, 1]](m.S) << -1 + A[[2], [0, 1]](mT.S) << -1 result6 = Matrix.from_values( [0, 0, 1, 1, 2, 2, 2], [1, 2, 1, 2, 0, 1, 2], @@ -884,715 +1257,705 @@ def test_subassign_row_col(A_chunks): assert A.isequal(result6) -def test_subassign_matrix(): - A = Matrix.from_values( - [0, 0, 0, 1, 1, 1, 2, 2, 2], - [0, 1, 2, 0, 1, 2, 0, 1, 2], - [0, 1, 2, 3, 4, 5, 6, 7, 8], - ) - m = Matrix.from_values([1], [0], [True]) - v = Matrix.from_values([0, 1], [0, 0], [10, 20]) - mT = m.T.new() - - A[[0, 1], [0]](m.S) << v - result1 = Matrix.from_values( - [0, 0, 0, 1, 1, 1, 2, 2, 2], - [0, 1, 2, 0, 1, 2, 0, 1, 2], - [0, 1, 2, 20, 4, 5, 6, 7, 8], - ) - assert A.isequal(result1) - - A_ = A.dup() - _A = A.dup() - _A_ = A.dup() - - A[[1], [1, 2]](mT.V, accum=binary.plus) << v.T - result2 = Matrix.from_values( - [0, 0, 0, 1, 1, 1, 2, 2, 2], - [0, 1, 2, 0, 1, 2, 0, 1, 2], - [0, 1, 2, 20, 4, 25, 6, 7, 8], - ) - assert A.isequal(result2) - - A_[[1], 1:3](mT.V, accum=binary.plus) << v.T - assert A_.isequal(result2) - - _A[1:2, [1, 2]](mT.V, accum=binary.plus) << v.T - assert _A.isequal(result2) - - _A_[1:2, 1:3](mT.V, accum=binary.plus) << v.T - assert _A_.isequal(result2) - - A[[0, 1], [0]](m.S, binary.plus, replace=True) << v - result3 = Matrix.from_values( - [0, 0, 1, 1, 1, 2, 2, 2], - [1, 2, 0, 1, 2, 0, 1, 2], - [1, 2, 40, 4, 25, 6, 7, 8], - ) - assert A.isequal(result3) - - with pytest.raises(DimensionMismatch): - A(m.S)[[0, 1], [0]] << v - - A[[0, 1], [0]](m.S) << 99 - result4 = Matrix.from_values( - [0, 0, 1, 1, 1, 2, 2, 2], - [1, 2, 0, 1, 2, 0, 1, 2], - [1, 2, 99, 4, 25, 6, 7, 8], - ) - assert A.isequal(result4) - - A[[1, 2], [0]](m.S, binary.plus, replace=True) << 100 - result5 = Matrix.from_values( - [0, 0, 1, 1, 2, 2, 2], - [1, 2, 1, 2, 0, 1, 2], - [1, 2, 4, 25, 106, 7, 8], - ) - assert A.isequal(result5) +def test_assign_column(As, A_chunks, v): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + result = Matrix.from_values( + [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4, 6], + [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1, 1], + [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 1, 1, 2, 0], + ) + C = A.dup() + C[:, 1] = v + assert C.isequal(result) - A[[2], [0, 1]](mT.S) << -1 - result6 = Matrix.from_values( - [0, 0, 1, 1, 2, 2, 2], - [1, 2, 1, 2, 0, 1, 2], - [1, 2, 4, 25, 106, -1, 8], - ) - assert A.isequal(result6) +def test_assign_row_scalar(As, A_chunks, v): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + C = A.dup() + C[0, :](v.S) << v + D = A.dup() + D(v.S)[0, :] << v + assert C.isequal(D) + + C[:, :](C.S) << 1 + + C_ = C.dup() + with pytest.raises( + TypeError, match="Unable to use Vector mask on Matrix assignment to a Matrix" + ): + C[:, :](v.S) << 1 + C.compute() -def test_assign_column(A, A_chunks, v): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - result = Matrix.from_values( - [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4, 6], - [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1, 1], - [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 1, 1, 2, 0], - ) - C = A.dup() - C[:, 1] = v - assert C.isequal(result) + C = C_.dup() + with pytest.raises( + TypeError, + match="Unable to use Vector mask on single element assignment to a Matrix", + ): + C[0, 0](v.S) << 1 + C.compute() + C = C_.dup() + with pytest.raises(TypeError): + C[0, 0](v.S) << v + C.compute() -def test_assign_row_scalar(A, A_chunks, v): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - C = A.dup() - C[0, :](v.S) << v - D = A.dup() - D(v.S)[0, :] << v - assert C.isequal(D) + C = C_.dup() + with pytest.raises(TypeError): + C(v.S)[0, 0] << v + C.compute() - C[:, :](C.S) << 1 + C = C_.dup() + with pytest.raises(TypeError): + C[0, 0](C.S) << v + C.compute() - with pytest.raises( - TypeError, match="Unable to use Vector mask on Matrix assignment to a Matrix" - ): - C[:, :](v.S) << 1 - with pytest.raises( - TypeError, match="Unable to use Vector mask on single element assignment to a Matrix" - ): - C[0, 0](v.S) << 1 + C = C_.dup() + with pytest.raises(TypeError): + C(C.S)[0, 0] << v + C.compute() - with pytest.raises(TypeError): - C[0, 0](v.S) << v - with pytest.raises(TypeError): - C(v.S)[0, 0] << v - with pytest.raises(TypeError): - C[0, 0](C.S) << v - with pytest.raises(TypeError): - C(C.S)[0, 0] << v + C = C_.dup() + with pytest.raises(TypeError): + C[0, 0](v.S) << C + C.compute() - with pytest.raises(TypeError): - C[0, 0](v.S) << C - with pytest.raises(TypeError): - C[0, 0](C.S) << C - - C = A.dup() - C(v.S)[0, :] = 10 - result = Matrix.from_values( - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 0, 0], - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 4, 6], - [3, 10, 3, 1, 5, 10, 7, 8, 3, 1, 7, 4, 10, 10], - ) - assert C.isequal(result) + C = C_.dup() + with pytest.raises(TypeError): + C[0, 0](C.S) << C + C.compute() + + C = A.dup() + C(v.S)[0, :] = 10 + result = Matrix.from_values( + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 0, 0], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 4, 6], + [3, 10, 3, 1, 5, 10, 7, 8, 3, 1, 7, 4, 10, 10], + ) + assert C.isequal(result) def test_assign_row_col_matrix_mask(): # A B v1 v2 # 0 1 4 _ 100 10 # 2 _ 0 5 20 - A = Matrix.from_values([0, 0, 1], [0, 1, 0], [0, 1, 2]) - B = Matrix.from_values([0, 1, 1], [0, 0, 1], [4, 0, 5]) - v1 = Vector.from_values([0], [100]) - v2 = Vector.from_values([0, 1], [10, 20]) - - # row assign - C = A.dup() - C(B.S)[0, :] << v2 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 2]) - assert C.isequal(result) - - C = A.dup() - C(B.S, accum=binary.plus)[1, :] = v2 - result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 12, 20]) - assert C.isequal(result) - - C = A.dup() - C(B.S, replace=True)[1, :] << v2 - result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 10, 20]) - assert C.isequal(result) - - # col assign - C = A.dup() - C(B.S)[:, 0] = v2 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 20]) - assert C.isequal(result) - - C = A.dup() - C(B.S, accum=binary.plus)[:, 1] << v2 - result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 2, 20]) - assert C.isequal(result) - - C = A.dup() - C(B.S, replace=True)[:, 1] = v2 - result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 2, 20]) - assert C.isequal(result) - - # row assign scalar (as a sanity check) - C = A.dup() - C(B.S)[0, :] = 100 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2]) - assert C.isequal(result) - - C = A.dup() - C(B.S, accum=binary.plus)[1, :] << 100 - result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 102, 100]) - assert C.isequal(result) - - C = A.dup() - C(B.S, replace=True)[1, :] = 100 - result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 100, 100]) - assert C.isequal(result) - - # col assign scalar (as a sanity check) - C = A.dup() - C(B.S)[:, 0] << 100 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 100]) - assert C.isequal(result) - - C = A.dup() - C(B.S, accum=binary.plus)[:, 1] = 100 - result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 2, 100]) - assert C.isequal(result) - - C = A.dup() - C(B.S, replace=True)[:, 1] << 100 - result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 2, 100]) - assert C.isequal(result) - - # row subassign - C = A.dup() - C[0, :](v2.S) << v2 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 20, 2]) - assert C.isequal(result) - - C = A.dup() - C[0, [0]](v1.S) << v1 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2]) - assert C.isequal(result) - - with pytest.raises( - TypeError, match="Indices for subassign imply Vector submask, but got Matrix mask instead" - ): - C[0, :](B.S) << v2 - - # col subassign - C = A.dup() - C[:, 0](v2.S) << v2 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 20]) - assert C.isequal(result) - - C = A.dup() - C[[0], 0](v1.S) << v1 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2]) - assert C.isequal(result) - - with pytest.raises( - TypeError, match="Indices for subassign imply Vector submask, but got Matrix mask instead" - ): - C[:, 0](B.S) << v2 - - # row subassign scalar - C = A.dup() - C[0, :](v2.S) << 100 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 100, 2]) - assert C.isequal(result) - - C = A.dup() - C[0, [0]](v1.S) << 100 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2]) - assert C.isequal(result) - - with pytest.raises( - TypeError, match="Indices for subassign imply Vector submask, but got Matrix mask instead" - ): - C[:, 0](B.S) << 100 - - # col subassign scalar - C = A.dup() - C[:, 0](v2.S) << 100 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 100]) - assert C.isequal(result) - - C = A.dup() - C[[0], 0](v1.S) << 100 - result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2]) - assert C.isequal(result) - - with pytest.raises( - TypeError, match="Indices for subassign imply Vector submask, but got Matrix mask instead" - ): - C[:, 0](B.S) << 100 - - # Bad subassign - with pytest.raises(TypeError, match="Single element assign does not accept a submask"): - C[0, 0](B.S) << 100 - - -def test_assign_column_scalar(A, A_chunks, v): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - C = A.dup() - C[:, 0](v.S) << v - D = A.dup() - D(v.S)[:, 0] << v - assert C.isequal(D) - - C = A.dup() - C[:, 1] = v - C(v.S)[:, 1] = 10 - result = Matrix.from_values( - [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4, 6], - [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1, 1], - [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 10, 10, 10, 10], - ) - assert C.isequal(result) + A_0 = Matrix.from_values([0, 0, 1], [0, 1, 0], [0, 1, 2]) + B_0 = Matrix.from_values([0, 1, 1], [0, 0, 1], [4, 0, 5]) + v1_0 = Vector.from_values([0], [100]) + v2_0 = Vector.from_values([0, 1], [10, 20]) - C(v.V, replace=True, accum=binary.plus)[:, 1] = 20 - result = Matrix.from_values( - [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4], - [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1], - [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 30, 30, 30], - ) - assert C.isequal(result) + A_1 = Matrix.from_values( + da.from_array([0, 0, 1]), da.from_array([0, 1, 0]), da.from_array([0, 1, 2]) + ) + B_1 = Matrix.from_values( + da.from_array([0, 1, 1]), da.from_array([0, 0, 1]), da.from_array([4, 0, 5]) + ) + v1_1 = Vector.from_values(da.from_array([0]), da.from_array([100])) + v2_1 = Vector.from_values(da.from_array([0, 1]), da.from_array([10, 20])) + + As = [A_0, A_1] + Bs = [B_0, B_1] + v1s = [v1_0, v1_1] + v2s = [v2_0, v2_1] + + for A in As: + for B in Bs: + for v1 in v1s: + for v2 in v2s: + # row assign + C = A.dup() + C(B.S)[0, :] << v2 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 2]) + assert C.isequal(result) + + C = A.dup() + C(B.S, accum=binary.plus)[1, :] = v2 + result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 12, 20]) + assert C.isequal(result) + + C = A.dup() + C(B.S, replace=True)[1, :] << v2 + result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 10, 20]) + assert C.isequal(result) + + # col assign + C = A.dup() + C(B.S)[:, 0] = v2 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 20]) + assert C.isequal(result) + + C = A.dup() + C(B.S, accum=binary.plus)[:, 1] << v2 + result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 2, 20]) + assert C.isequal(result) + + C = A.dup() + C(B.S, replace=True)[:, 1] = v2 + result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 2, 20]) + assert C.isequal(result) + + # row assign scalar (as a sanity check) + C = A.dup() + C(B.S)[0, :] = 100 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2]) + assert C.isequal(result) + + C = A.dup() + C(B.S, accum=binary.plus)[1, :] << 100 + result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 102, 100]) + assert C.isequal(result) + + C = A.dup() + C(B.S, replace=True)[1, :] = 100 + result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 100, 100]) + assert C.isequal(result) + + # col assign scalar (as a sanity check) + C = A.dup() + C(B.S)[:, 0] << 100 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 100]) + assert C.isequal(result) + + C = A.dup() + C(B.S, accum=binary.plus)[:, 1] = 100 + result = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 2, 100]) + assert C.isequal(result) + + C = A.dup() + C(B.S, replace=True)[:, 1] << 100 + result = Matrix.from_values([0, 1, 1], [0, 0, 1], [0, 2, 100]) + assert C.isequal(result) + + # row subassign + C = A.dup() + C[0, :](v2.S) << v2 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 20, 2]) + assert C.isequal(result) + + C = A.dup() + C[0, [0]](v1.S) << v1 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2]) + assert C.isequal(result) + + with pytest.raises( + TypeError, + match="Indices for subassign imply Vector submask, but got Matrix mask instead", + ): + C[0, :](B.S) << v2 + C.compute() + + # col subassign + C = A.dup() + C[:, 0](v2.S) << v2 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [10, 1, 20]) + assert C.isequal(result) + + C = A.dup() + C[[0], 0](v1.S) << v1 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2]) + assert C.isequal(result) + + with pytest.raises( + TypeError, + match="Indices for subassign imply Vector submask, but got Matrix mask instead", + ): + C[:, 0](B.S) << v2 + C.compute() + + # row subassign scalar + C = A.dup() + C[0, :](v2.S) << 100 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 100, 2]) + assert C.isequal(result) + + C = A.dup() + C[0, [0]](v1.S) << 100 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2]) + assert C.isequal(result) + + with pytest.raises( + TypeError, + match="Indices for subassign imply Vector submask, but got Matrix mask instead", + ): + C[:, 0](B.S) << 100 + C.compute() + + # col subassign scalar + C = A.dup() + C[:, 0](v2.S) << 100 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 100]) + assert C.isequal(result) + + C = A.dup() + C[[0], 0](v1.S) << 100 + result = Matrix.from_values([0, 0, 1], [0, 1, 0], [100, 1, 2]) + assert C.isequal(result) + + with pytest.raises( + TypeError, + match="Indices for subassign imply Vector submask, but got Matrix mask instead", + ): + C[:, 0](B.S) << 100 + C.compute() + + # Bad subassign + C = A.dup() + with pytest.raises( + TypeError, match="Single element assign does not accept a submask" + ): + C[0, 0](B.S) << 100 + C.compute() + + +def test_assign_column_scalar(As, A_chunks, v): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + C = A.dup() + C[:, 0](v.S) << v + D = A.dup() + D(v.S)[:, 0] << v + assert C.isequal(D) + + C = A.dup() + C[:, 1] = v + C(v.S)[:, 1] = 10 + result = Matrix.from_values( + [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4, 6], + [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1, 1], + [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 10, 10, 10, 10], + ) + assert C.isequal(result) + C(v.V, replace=True, accum=binary.plus)[:, 1] = 20 + result = Matrix.from_values( + [3, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 1, 3, 4], + [0, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 1, 1], + [3, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 30, 30, 30], + ) + assert C.isequal(result) -def test_assign_scalar(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - # Test block - result_block = Matrix.from_values( - [3, 0, 6, 0, 6, 6, 2, 4, 1, 1, 3, 5, 1, 3, 5], - [0, 1, 2, 3, 3, 4, 5, 5, 6, 2, 2, 2, 4, 4, 4], - [3, 2, 5, 3, 7, 3, 1, 7, 4, 0, 0, 0, 0, 0, 0], - ) - C = A.dup() - C[[1, 3, 5], [2, 4]] = 0 - assert C.isequal(result_block) - C = A.dup() - C[[1, 3, 5], [2, 4]] = Scalar.from_value(0) - assert C.isequal(result_block) - C = A.dup() - C[1::2, 2:5:2] = 0 - assert C.isequal(result_block) - C = A.dup() - C[1::2, 2:5:2] = Scalar.from_value(0) - assert C.isequal(result_block) - # Test row - result_row = Matrix.from_values( - [3, 0, 6, 0, 6, 6, 2, 4, 1, 3, 5, 1, 1], - [0, 1, 2, 3, 3, 4, 5, 5, 6, 2, 2, 2, 4], - [3, 2, 5, 3, 7, 3, 1, 7, 4, 3, 1, 0, 0], - ) - C = A.dup() - C[1, [2, 4]] = 0 - assert C.isequal(result_row) - C = A.dup() - C[1, 2] = Scalar.from_value(0) - C[1, 4] = Scalar.from_value(0) - assert C.isequal(result_row) - C = A.dup() - C[1, 2:5:2] = 0 - assert C.isequal(result_row) - # Test column - result_column = Matrix.from_values( - [3, 0, 6, 0, 6, 6, 2, 4, 1, 1, 1, 3, 5], - [0, 1, 2, 3, 3, 4, 5, 5, 6, 4, 2, 2, 2], - [3, 2, 5, 3, 7, 3, 1, 7, 4, 8, 0, 0, 0], - ) - C = A.dup() - C[[1, 3, 5], 2] = 0 - assert C.isequal(result_column) - C = A.dup() - C[1::2, 2] = 0 - assert C.isequal(result_column) +def test_assign_scalar(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + # Test block + result_block = Matrix.from_values( + [3, 0, 6, 0, 6, 6, 2, 4, 1, 1, 3, 5, 1, 3, 5], + [0, 1, 2, 3, 3, 4, 5, 5, 6, 2, 2, 2, 4, 4, 4], + [3, 2, 5, 3, 7, 3, 1, 7, 4, 0, 0, 0, 0, 0, 0], + ) + C = A.dup() + C[[1, 3, 5], [2, 4]] = 0 + assert C.isequal(result_block) + C = A.dup() + C[[1, 3, 5], [2, 4]] = Scalar.from_value(0) + assert C.isequal(result_block) + C = A.dup() + C[1::2, 2:5:2] = 0 + assert C.isequal(result_block) + C = A.dup() + C[1::2, 2:5:2] = Scalar.from_value(0) + assert C.isequal(result_block) + # Test row + result_row = Matrix.from_values( + [3, 0, 6, 0, 6, 6, 2, 4, 1, 3, 5, 1, 1], + [0, 1, 2, 3, 3, 4, 5, 5, 6, 2, 2, 2, 4], + [3, 2, 5, 3, 7, 3, 1, 7, 4, 3, 1, 0, 0], + ) + C = A.dup() + C[1, [2, 4]] = 0 + assert C.isequal(result_row) + C = A.dup() + C[1, 2] = Scalar.from_value(0) + C[1, 4] = Scalar.from_value(0) + assert C.isequal(result_row) + C = A.dup() + C[1, 2:5:2] = 0 + assert C.isequal(result_row) + # Test column + result_column = Matrix.from_values( + [3, 0, 6, 0, 6, 6, 2, 4, 1, 1, 1, 3, 5], + [0, 1, 2, 3, 3, 4, 5, 5, 6, 4, 2, 2, 2], + [3, 2, 5, 3, 7, 3, 1, 7, 4, 8, 0, 0, 0], + ) + C = A.dup() + C[[1, 3, 5], 2] = 0 + assert C.isequal(result_column) + C = A.dup() + C[1::2, 2] = 0 + assert C.isequal(result_column) -def test_assign_bad(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - with pytest.raises(TypeError, match="Bad type"): - A[0, 0] = object() - with pytest.raises(TypeError, match="Bad type"): - A[:, 0] = object() - with pytest.raises(TypeError, match="Bad type"): - A[0, :] = object() - with pytest.raises(TypeError, match="Bad type"): - A[:, :] = object() - with pytest.raises(TypeError, match="Bad type"): - A[0, 0] = A - with pytest.raises(TypeError, match="Bad type"): - A[:, 0] = A - with pytest.raises(TypeError, match="Bad type"): - A[0, :] = A - v = A[0, :].new() - with pytest.raises(TypeError, match="Bad type"): - A[0, 0] = v - with pytest.raises(TypeError, match="Bad type"): - A[:, :] = v - - -def test_apply(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - result = Matrix.from_values( - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], - [-3, -2, -3, -1, -5, -3, -7, -8, -3, -1, -7, -4], - ) - C = A.apply(unary.ainv).new() - assert C.isequal(result) +def test_assign_bad(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + with pytest.raises(TypeError, match="Bad type"): + A[0, 0] = object() + A.compute() + A = A_.dup() + with pytest.raises(TypeError, match="Bad type"): + A[:, 0] = object() + A.compute() + A = A_.dup() + with pytest.raises(TypeError, match="Bad type"): + A[0, :] = object() + A.compute() + A = A_.dup() + with pytest.raises(TypeError, match="Bad type"): + A[:, :] = object() + A.compute() + A = A_.dup() + with pytest.raises(TypeError, match="Bad type"): + A[0, 0] = A + A.compute() + A = A_.dup() + with pytest.raises(TypeError, match="Bad type"): + A[:, 0] = A + A.compute() + A = A_.dup() + with pytest.raises(TypeError, match="Bad type"): + A[0, :] = A + A.compute() + A = A_.dup() + v = A[0, :].new() + with pytest.raises(TypeError, match="Bad type"): + A[0, 0] = v + A.compute() + A = A_.dup() + with pytest.raises(TypeError, match="Bad type"): + A[:, :] = v + A.compute() -def test_apply_binary(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - result_right = Matrix.from_values( - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], - [1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1], - dtype=bool, - ) - w_right = A.apply(binary.gt, right=1).new() - w_right2 = A.apply(binary.gt, right=Scalar.from_value(1)).new() - assert w_right.isequal(result_right) - assert w_right2.isequal(result_right) - result_left = Matrix.from_values( - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], - [5, 6, 5, 7, 3, 5, 1, 0, 5, 7, 1, 4], - ) - w_left = A.apply(binary.minus, left=8).new() - w_left2 = A.apply(binary.minus, left=Scalar.from_value(8)).new() - assert w_left.isequal(result_left) - assert w_left2.isequal(result_left) - with pytest.raises(TypeError): - A.apply(binary.plus, left=A) - with pytest.raises(TypeError): - A.apply(binary.plus, right=A) - with pytest.raises(TypeError, match="Cannot provide both"): - A.apply(binary.plus, left=1, right=1) - # allow monoids - w1 = A.apply(binary.plus, left=1).new() - w2 = A.apply(monoid.plus, left=1).new() - w3 = A.apply(monoid.plus, right=1).new() - assert w1.isequal(w2) - assert w1.isequal(w3) +def test_apply(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + result = Matrix.from_values( + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [-3, -2, -3, -1, -5, -3, -7, -8, -3, -1, -7, -4], + ) + C = A.apply(unary.ainv).new() + assert C.isequal(result) -def test_reduce_row(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15]) - w = A.reduce_rowwise(monoid.plus).new() - assert w.isequal(result) - w2 = A.reduce_rowwise(binary.plus).new() - assert w2.isequal(result) +def test_apply_binary(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + result_right = Matrix.from_values( + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1], + dtype=bool, + ) + w_right = A.apply(binary.gt, right=1).new() + w_right2 = A.apply(binary.gt, right=Scalar.from_value(1)).new() + assert w_right.isequal(result_right) + assert w_right2.isequal(result_right) + result_left = Matrix.from_values( + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [5, 6, 5, 7, 3, 5, 1, 0, 5, 7, 1, 4], + ) + w_left = A.apply(binary.minus, left=8).new() + w_left2 = A.apply(binary.minus, left=Scalar.from_value(8)).new() + assert w_left.isequal(result_left) + assert w_left2.isequal(result_left) + with pytest.raises(TypeError): + A.apply(binary.plus, left=A) + with pytest.raises(TypeError): + A.apply(binary.plus, right=A) + with pytest.raises(TypeError, match="Cannot provide both"): + A.apply(binary.plus, left=1, right=1) + # allow monoids + w1 = A.apply(binary.plus, left=1).new() + w2 = A.apply(monoid.plus, left=1).new() + w3 = A.apply(monoid.plus, right=1).new() + assert w1.isequal(w2) + assert w1.isequal(w3) -@pytest.mark.slow -def test_reduce_agg(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15]) - w1 = A.reduce_rowwise(agg.sum).new() - assert w1.isequal(result) - w2 = A.T.reduce_columnwise(agg.sum).new() - assert w2.isequal(result) - - counts = A.dup(dtype=bool).reduce_rowwise(monoid.plus[int]).new() - w3 = A.reduce_rowwise(agg.count).new() - assert w3.isequal(counts) - w4 = A.T.reduce_columnwise(agg.count).new() - assert w4.isequal(counts) - - Asquared = monoid.times(A & A).new() - squared = Asquared.reduce_rowwise(monoid.plus).new() - expected = unary.sqrt[float](squared).new() - w5 = A.reduce_rowwise(agg.hypot).new() - assert w5.isclose(expected) - w6 = A.reduce_rowwise(monoid.numpy.hypot[float]).new() - assert w6.isclose(expected) - w7 = Vector.new(w5.dtype, size=w5.size) - w7 << A.reduce_rowwise(agg.hypot) - assert w7.isclose(expected) - - w8 = A.reduce_rowwise(agg.logaddexp).new() - expected = A.reduce_rowwise(monoid.numpy.logaddexp[float]).new() - assert w8.isclose(w8) - - result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 9, 10, 11, 8, 4]) - w9 = A.reduce_columnwise(agg.sum).new() - assert w9.isequal(result) - w10 = A.T.reduce_rowwise(agg.sum).new() - assert w10.isequal(result) - - counts = A.dup(dtype=bool).reduce_columnwise(monoid.plus[int]).new() - w11 = A.reduce_columnwise(agg.count).new() - assert w11.isequal(counts) - w12 = A.T.reduce_rowwise(agg.count).new() - assert w12.isequal(counts) - - w13 = A.reduce_rowwise(agg.mean).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [2.5, 6, 1, 3, 7, 1, 5]) - assert w13.isequal(expected) - w14 = A.reduce_columnwise(agg.mean).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 3, 5, 5.5, 4, 4]) - assert w14.isequal(expected) - - w15 = A.reduce_rowwise(agg.exists).new() - w16 = A.reduce_columnwise(agg.exists).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [1, 1, 1, 1, 1, 1, 1]) - assert w15.isequal(expected) - assert w16.isequal(expected) - - assert A.reduce_scalar(agg.sum).new() == 47 - assert A.reduce_scalar(agg.prod).new() == 1270080 - assert A.reduce_scalar(agg.count).new() == 12 - assert A.reduce_scalar(agg.count_nonzero).new() == 12 - assert A.reduce_scalar(agg.count_zero).new() == 0 - assert A.reduce_scalar(agg.sum_of_squares).new() == 245 - assert A.reduce_scalar(agg.hypot).new().isclose(245 ** 0.5) - assert A.reduce_scalar(agg.logaddexp).new().isclose(8.6071076) - assert A.reduce_scalar(agg.logaddexp2).new().isclose(9.2288187) - assert A.reduce_scalar(agg.mean).new().isclose(47 / 12) - assert A.reduce_scalar(agg.exists).new() == 1 - - silly = agg.Aggregator( - "silly", - composite=[agg.varp, agg.stdp], - finalize=lambda x, y: binary.times(x & y), - types=[agg.varp], - ) - v1 = A.reduce_rowwise(agg.varp).new() - v2 = A.reduce_rowwise(agg.stdp).new() - assert v1.isclose(binary.times(v2 & v2).new()) - v3 = A.reduce_rowwise(silly).new() - assert v3.isclose(binary.times(v1 & v2).new()) - s1 = A.reduce_scalar(agg.varp).new() - s2 = A.reduce_scalar(agg.stdp).new() - assert s1.isclose(s2.value.compute() * s2.value.compute()) - s3 = A.reduce_scalar(silly).new() - assert s3.isclose(s1.value.compute() * s2.value.compute()) +def test_reduce_row(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15]) + w = A.reduce_rowwise(monoid.plus).new() + assert w.isequal(result) + w2 = A.reduce_rowwise(binary.plus).new() + assert w2.isequal(result) -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_reduce_agg_argminmax(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - # reduce_rowwise - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [1, 6, 5, 0, 5, 2, 4]) - w1b = A.reduce_rowwise(agg.argmin).new() - assert w1b.isequal(expected) - w1c = A.T.reduce_columnwise(agg.argmin).new() - assert w1c.isequal(expected) - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 4, 5, 0, 5, 2, 3]) - w2b = A.reduce_rowwise(agg.argmax).new() - assert w2b.isequal(expected) - w2c = A.T.reduce_columnwise(agg.argmax).new() - assert w2c.isequal(expected) - - # reduce_cols - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 5, 0, 6, 2, 1]) - w7b = A.reduce_columnwise(agg.argmin).new() - assert w7b.isequal(expected) - w7c = A.T.reduce_rowwise(agg.argmin).new() - assert w7c.isequal(expected) - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 6, 6, 1, 4, 1]) - w8b = A.reduce_columnwise(agg.argmax).new() - assert w8b.isequal(expected) - w8c = A.T.reduce_rowwise(agg.argmax).new() - assert w8c.isequal(expected) - - # reduce_scalar - with pytest.raises( - ValueError, match="Aggregator argmin may not be used with Matrix.reduce_scalar" - ): - A.reduce_scalar(agg.argmin) +@pytest.mark.slow +def test_reduce_agg(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15]) + w1 = A.reduce_rowwise(agg.sum).new() + assert w1.isequal(result) + w2 = A.T.reduce_columnwise(agg.sum).new() + assert w2.isequal(result) + + counts = A.dup(dtype=bool).reduce_rowwise(monoid.plus[int]).new() + w3 = A.reduce_rowwise(agg.count).new() + assert w3.isequal(counts) + w4 = A.T.reduce_columnwise(agg.count).new() + assert w4.isequal(counts) + + Asquared = monoid.times(A & A).new() + squared = Asquared.reduce_rowwise(monoid.plus).new() + expected = unary.sqrt[float](squared).new() + w5 = A.reduce_rowwise(agg.hypot).new() + assert w5.isclose(expected) + w6 = A.reduce_rowwise(monoid.numpy.hypot[float]).new() + assert w6.isclose(expected) + w7 = Vector.new(w5.dtype, size=w5.size) + w7 << A.reduce_rowwise(agg.hypot) + assert w7.isclose(expected) + + w8 = A.reduce_rowwise(agg.logaddexp).new() + expected = A.reduce_rowwise(monoid.numpy.logaddexp[float]).new() + assert w8.isclose(w8) + + result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 9, 10, 11, 8, 4]) + w9 = A.reduce_columnwise(agg.sum).new() + assert w9.isequal(result) + w10 = A.T.reduce_rowwise(agg.sum).new() + assert w10.isequal(result) + + counts = A.dup(dtype=bool).reduce_columnwise(monoid.plus[int]).new() + w11 = A.reduce_columnwise(agg.count).new() + assert w11.isequal(counts) + w12 = A.T.reduce_rowwise(agg.count).new() + assert w12.isequal(counts) + + w13 = A.reduce_rowwise(agg.mean).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [2.5, 6, 1, 3, 7, 1, 5]) + assert w13.isequal(expected) + w14 = A.reduce_columnwise(agg.mean).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 3, 5, 5.5, 4, 4]) + assert w14.isequal(expected) + + w15 = A.reduce_rowwise(agg.exists).new() + w16 = A.reduce_columnwise(agg.exists).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [1, 1, 1, 1, 1, 1, 1]) + assert w15.isequal(expected) + assert w16.isequal(expected) + + assert A.reduce_scalar(agg.sum).new() == 47 + assert A.reduce_scalar(agg.prod).new() == 1270080 + assert A.reduce_scalar(agg.count).new() == 12 + assert A.reduce_scalar(agg.count_nonzero).new() == 12 + assert A.reduce_scalar(agg.count_zero).new() == 0 + assert A.reduce_scalar(agg.sum_of_squares).new() == 245 + assert A.reduce_scalar(agg.hypot).new().isclose(245 ** 0.5) + assert A.reduce_scalar(agg.logaddexp).new().isclose(8.6071076) + assert A.reduce_scalar(agg.logaddexp2).new().isclose(9.2288187) + assert A.reduce_scalar(agg.mean).new().isclose(47 / 12) + assert A.reduce_scalar(agg.exists).new() == 1 + + silly = agg.Aggregator( + "silly", + composite=[agg.varp, agg.stdp], + finalize=lambda x, y: binary.times(x & y), + types=[agg.varp], + ) + v1 = A.reduce_rowwise(agg.varp).new() + v2 = A.reduce_rowwise(agg.stdp).new() + assert v1.isclose(binary.times(v2 & v2).new()) + v3 = A.reduce_rowwise(silly).new() + assert v3.isclose(binary.times(v1 & v2).new()) + + s1 = A.reduce_scalar(agg.varp).new() + s2 = A.reduce_scalar(agg.stdp).new() + assert s1.isclose(s2.value.compute() * s2.value.compute()) + s3 = A.reduce_scalar(silly).new() + assert s3.isclose(s1.value.compute() * s2.value.compute()) + + +def test_reduce_agg_argminmax(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + # reduce_rowwise + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [1, 6, 5, 0, 5, 2, 4]) + w1b = A.reduce_rowwise(agg.argmin).new() + assert w1b.isequal(expected) + w1c = A.T.reduce_columnwise(agg.argmin).new() + assert w1c.isequal(expected) + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 4, 5, 0, 5, 2, 3]) + w2b = A.reduce_rowwise(agg.argmax).new() + assert w2b.isequal(expected) + w2c = A.T.reduce_columnwise(agg.argmax).new() + assert w2c.isequal(expected) + + # reduce_cols + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 5, 0, 6, 2, 1]) + w7b = A.reduce_columnwise(agg.argmin).new() + assert w7b.isequal(expected) + w7c = A.T.reduce_rowwise(agg.argmin).new() + assert w7c.isequal(expected) + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 6, 6, 1, 4, 1]) + w8b = A.reduce_columnwise(agg.argmax).new() + assert w8b.isequal(expected) + w8c = A.T.reduce_rowwise(agg.argmax).new() + assert w8c.isequal(expected) + + # reduce_scalar + with pytest.raises( + ValueError, match="Aggregator argmin may not be used with Matrix.reduce_scalar" + ): + A.reduce_scalar(agg.argmin) - silly = agg.Aggregator( - "silly", - composite=[agg.argmin, agg.argmax], - finalize=lambda x, y: binary.plus(x & y), - types=[agg.argmin], - ) - v1 = A.reduce_rowwise(agg.argmin).new() - v2 = A.reduce_rowwise(agg.argmax).new() - v3 = A.reduce_rowwise(silly).new() - assert v3.isequal(binary.plus(v1 & v2).new()) + silly = agg.Aggregator( + "silly", + composite=[agg.argmin, agg.argmax], + finalize=lambda x, y: binary.plus(x & y), + types=[agg.argmin], + ) + v1 = A.reduce_rowwise(agg.argmin).new() + v2 = A.reduce_rowwise(agg.argmax).new() + v3 = A.reduce_rowwise(silly).new() + assert v3.isequal(binary.plus(v1 & v2).new()) - v1 = A.reduce_columnwise(agg.argmin).new() - v2 = A.reduce_columnwise(agg.argmax).new() - v3 = A.reduce_columnwise(silly).new() - assert v3.isequal(binary.plus(v1 & v2).new()) + v1 = A.reduce_columnwise(agg.argmin).new() + v2 = A.reduce_columnwise(agg.argmax).new() + v3 = A.reduce_columnwise(silly).new() + assert v3.isequal(binary.plus(v1 & v2).new()) - with pytest.raises(ValueError, match="Aggregator"): - A.reduce_scalar(silly).new() + with pytest.raises(ValueError, match="Aggregator"): + A.reduce_scalar(silly).new() -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_reduce_agg_firstlast(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - # reduce_rowwise - w1 = A.reduce_rowwise(agg.first).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [2, 8, 1, 3, 7, 1, 5]) - assert w1.isequal(expected) - w1b = A.T.reduce_columnwise(agg.first).new() - assert w1b.isequal(expected) - w2 = A.reduce_rowwise(agg.last).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 4, 1, 3, 7, 1, 3]) - assert w2.isequal(expected) - w2b = A.T.reduce_columnwise(agg.last).new() - assert w2b.isequal(expected) - - # reduce_columnwise - w3 = A.reduce_columnwise(agg.first).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 3, 3, 8, 1, 4]) - assert w3.isequal(expected) - w3b = A.T.reduce_rowwise(agg.first).new() - assert w3b.isequal(expected) - w4 = A.reduce_columnwise(agg.last).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 5, 7, 3, 7, 4]) - assert w4.isequal(expected) - w4b = A.T.reduce_rowwise(agg.last).new() - assert w4b.isequal(expected) - - # reduce_scalar - w5 = A.reduce_scalar(agg.first).new() - assert w5 == 2 - w6 = A.reduce_scalar(agg.last).new() - assert w6 == 3 - B = Matrix.new(float, nrows=2, ncols=3) - assert B.reduce_scalar(agg.first).new().is_empty - assert B.reduce_scalar(agg.last).new().is_empty - w7 = B.reduce_rowwise(agg.first).new() - assert w7.isequal(Vector.new(float, size=B.nrows)) - w8 = B.reduce_columnwise(agg.last).new() - assert w8.isequal(Vector.new(float, size=B.ncols)) - - silly = agg.Aggregator( - "silly", - composite=[agg.first, agg.last], - finalize=lambda x, y: binary.plus(x & y), - types=[agg.first], - ) - v1 = A.reduce_rowwise(agg.first).new() - v2 = A.reduce_rowwise(agg.last).new() - v3 = A.reduce_rowwise(silly).new() - assert v3.isequal(binary.plus(v1 & v2).new()) +def test_reduce_agg_firstlast(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + # reduce_rowwise + w1 = A.reduce_rowwise(agg.first).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [2, 8, 1, 3, 7, 1, 5]) + assert w1.isequal(expected) + w1b = A.T.reduce_columnwise(agg.first).new() + assert w1b.isequal(expected) + w2 = A.reduce_rowwise(agg.last).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 4, 1, 3, 7, 1, 3]) + assert w2.isequal(expected) + w2b = A.T.reduce_columnwise(agg.last).new() + assert w2b.isequal(expected) + + # reduce_columnwise + w3 = A.reduce_columnwise(agg.first).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 3, 3, 8, 1, 4]) + assert w3.isequal(expected) + w3b = A.T.reduce_rowwise(agg.first).new() + assert w3b.isequal(expected) + w4 = A.reduce_columnwise(agg.last).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 5, 7, 3, 7, 4]) + assert w4.isequal(expected) + w4b = A.T.reduce_rowwise(agg.last).new() + assert w4b.isequal(expected) + + # reduce_scalar + w5 = A.reduce_scalar(agg.first).new() + assert w5 == 2 + w6 = A.reduce_scalar(agg.last).new() + assert w6 == 3 + B = Matrix.new(float, nrows=2, ncols=3) + assert B.reduce_scalar(agg.first).new().is_empty + assert B.reduce_scalar(agg.last).new().is_empty + w7 = B.reduce_rowwise(agg.first).new() + assert w7.isequal(Vector.new(float, size=B.nrows)) + w8 = B.reduce_columnwise(agg.last).new() + assert w8.isequal(Vector.new(float, size=B.ncols)) + + silly = agg.Aggregator( + "silly", + composite=[agg.first, agg.last], + finalize=lambda x, y: binary.plus(x & y), + types=[agg.first], + ) + v1 = A.reduce_rowwise(agg.first).new() + v2 = A.reduce_rowwise(agg.last).new() + v3 = A.reduce_rowwise(silly).new() + assert v3.isequal(binary.plus(v1 & v2).new()) - s1 = A.reduce_scalar(agg.first).new() - s2 = A.reduce_scalar(agg.last).new() - s3 = A.reduce_scalar(silly).new() - assert s3.isequal(s1.value.compute() + s2.value.compute()) + s1 = A.reduce_scalar(agg.first).new() + s2 = A.reduce_scalar(agg.last).new() + s3 = A.reduce_scalar(silly).new() + assert s3.isequal(s1.value.compute() + s2.value.compute()) -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_reduce_agg_firstlast_index(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - # reduce_rowwise - w1 = A.reduce_rowwise(agg.first_index).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [1, 4, 5, 0, 5, 2, 2]) - assert w1.isequal(expected) - w1b = A.T.reduce_columnwise(agg.first_index).new() - assert w1b.isequal(expected) - w2 = A.reduce_rowwise(agg.last_index).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 6, 5, 2, 5, 2, 4]) - assert w2.isequal(expected) - w2b = A.T.reduce_columnwise(agg.last_index).new() - assert w2b.isequal(expected) - - # reduce_columnwise - w3 = A.reduce_columnwise(agg.first_index).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 3, 0, 1, 2, 1]) - assert w3.isequal(expected) - w3b = A.T.reduce_rowwise(agg.first_index).new() - assert w3b.isequal(expected) - w4 = A.reduce_columnwise(agg.last_index).new() - expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 6, 6, 6, 4, 1]) - assert w4.isequal(expected) - w4b = A.T.reduce_rowwise(agg.last_index).new() - assert w4b.isequal(expected) - - # reduce_scalar - with pytest.raises(ValueError, match="Aggregator first_index may not"): - A.reduce_scalar(agg.first_index).new() - with pytest.raises(ValueError, match="Aggregator last_index may not"): - A.reduce_scalar(agg.last_index).new() - - silly = agg.Aggregator( - "silly", - composite=[agg.first_index, agg.last_index], - finalize=lambda x, y: binary.plus(x & y), - types=[agg.first_index], - ) - v1 = A.reduce_rowwise(agg.first_index).new() - v2 = A.reduce_rowwise(agg.last_index).new() - v3 = A.reduce_rowwise(silly).new() - assert v3.isequal(binary.plus(v1 & v2).new()) +def test_reduce_agg_firstlast_index(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + # reduce_rowwise + w1 = A.reduce_rowwise(agg.first_index).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [1, 4, 5, 0, 5, 2, 2]) + assert w1.isequal(expected) + w1b = A.T.reduce_columnwise(agg.first_index).new() + assert w1b.isequal(expected) + w2 = A.reduce_rowwise(agg.last_index).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 6, 5, 2, 5, 2, 4]) + assert w2.isequal(expected) + w2b = A.T.reduce_columnwise(agg.last_index).new() + assert w2b.isequal(expected) + + # reduce_columnwise + w3 = A.reduce_columnwise(agg.first_index).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 3, 0, 1, 2, 1]) + assert w3.isequal(expected) + w3b = A.T.reduce_rowwise(agg.first_index).new() + assert w3b.isequal(expected) + w4 = A.reduce_columnwise(agg.last_index).new() + expected = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 0, 6, 6, 6, 4, 1]) + assert w4.isequal(expected) + w4b = A.T.reduce_rowwise(agg.last_index).new() + assert w4b.isequal(expected) + + # reduce_scalar + with pytest.raises(ValueError, match="Aggregator first_index may not"): + A.reduce_scalar(agg.first_index).new() + with pytest.raises(ValueError, match="Aggregator last_index may not"): + A.reduce_scalar(agg.last_index).new() + + silly = agg.Aggregator( + "silly", + composite=[agg.first_index, agg.last_index], + finalize=lambda x, y: binary.plus(x & y), + types=[agg.first_index], + ) + v1 = A.reduce_rowwise(agg.first_index).new() + v2 = A.reduce_rowwise(agg.last_index).new() + v3 = A.reduce_rowwise(silly).new() + assert v3.isequal(binary.plus(v1 & v2).new()) - with pytest.raises(ValueError, match="Aggregator"): - A.reduce_scalar(silly).new() + with pytest.raises(ValueError, match="Aggregator"): + A.reduce_scalar(silly).new() -@pytest.mark.xfail("'Needs investigation'", strict=True) def test_reduce_agg_empty(A_chunks): A = Matrix.new("UINT8", nrows=3, ncols=4) A_ = A @@ -1614,86 +1977,85 @@ def test_reduce_agg_empty(A_chunks): assert compute(s.value) is None -def test_reduce_row_udf(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15]) - binop = grblas.operator.BinaryOp.register_anonymous(lambda x, y: x + y) - with pytest.raises(NotImplementedException): - # Although allowed by the spec, SuiteSparse doesn't like user-defined binarops here - A.reduce_rowwise(binop).new() - # If the user creates a monoid from the binop, then we can use the monoid instead - monoid = grblas.operator.Monoid.register_anonymous(binop, 0) - w = A.reduce_rowwise(binop).new() - assert w.isequal(result) - w2 = A.reduce_rowwise(monoid).new() - assert w2.isequal(result) - - -def test_reduce_column(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 9, 10, 11, 8, 4]) - w = A.reduce_columnwise(monoid.plus).new() - assert w.isequal(result) - w2 = A.reduce_columnwise(binary.plus).new() - assert w2.isequal(result) +def test_reduce_row_udf(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [5, 12, 1, 6, 7, 1, 15]) + binop = grblas.operator.BinaryOp.register_anonymous(lambda x, y: x + y) + with pytest.raises(NotImplementedException): + # Although allowed by the spec, SuiteSparse doesn't like user-defined binarops here + A.reduce_rowwise(binop).new() + # If the user creates a monoid from the binop, then we can use the monoid instead + monoid = grblas.operator.Monoid.register_anonymous(binop, 0) + w = A.reduce_rowwise(binop).new() + assert w.isequal(result) + w2 = A.reduce_rowwise(monoid).new() + assert w2.isequal(result) + + +def test_reduce_column(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + result = Vector.from_values([0, 1, 2, 3, 4, 5, 6], [3, 2, 9, 10, 11, 8, 4]) + w = A.reduce_columnwise(monoid.plus).new() + assert w.isequal(result) + w2 = A.reduce_columnwise(binary.plus).new() + assert w2.isequal(result) -def test_reduce_scalar(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - s = A.reduce_scalar(monoid.plus).new() - assert s == 47 - assert A.reduce_scalar(binary.plus).new() == 47 - with pytest.raises(TypeError, match="Expected type: Monoid"): - A.reduce_scalar(binary.minus) - - # test dtype coercion - assert A.dtype == dtypes.INT64 - s = A.reduce_scalar().new(dtype=float) - assert s == 47.0 - assert s.dtype == dtypes.FP64 - t = Scalar.new(float) - t << A.reduce_scalar(monoid.plus) - assert t == 47.0 - t = Scalar.new(float) - t() << A.reduce_scalar(monoid.plus) - assert t == 47.0 - t(accum=binary.times) << A.reduce_scalar(monoid.plus) - assert t == 47 * 47 - assert A.reduce_scalar(monoid.plus[dtypes.UINT64]).new() == 47 - # Make sure we accumulate as a float, not int - t.value = 1.23 - t(accum=binary.plus) << A.reduce_scalar() - assert t == 48.23 - - -def test_transpose(A, A_chunks): +def test_reduce_scalar(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + s = A.reduce_scalar(monoid.plus).new() + assert s == 47 + assert A.reduce_scalar(binary.plus).new() == 47 + with pytest.raises(TypeError, match="Expected type: Monoid"): + A.reduce_scalar(binary.minus) + + # test dtype coercion + assert A.dtype == dtypes.INT64 + s = A.reduce_scalar().new(dtype=float) + assert s == 47.0 + assert s.dtype == dtypes.FP64 + t = Scalar.new(float) + t << A.reduce_scalar(monoid.plus) + assert t == 47.0 + t = Scalar.new(float) + t() << A.reduce_scalar(monoid.plus) + assert t == 47.0 + t(accum=binary.times) << A.reduce_scalar(monoid.plus) + assert t == 47 * 47 + assert A.reduce_scalar(monoid.plus[dtypes.UINT64]).new() == 47 + # Make sure we accumulate as a float, not int + t.value = 1.23 + t(accum=binary.plus) << A.reduce_scalar() + assert t == 48.23 + + +def test_transpose(As, A_chunks): # C << A.T - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - rows, cols, vals = A.to_values() - result = Matrix.from_values(cols, rows, vals) - C = Matrix.new(A.dtype, A.ncols, A.nrows) - C << A.T - assert C.isequal(result) - C2 = A.T.new() - assert C2.isequal(result) - assert A.T.T is A - C3 = A.T.new(dtype=float) - assert C3.isequal(result) + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + rows, cols, vals = A.to_values() + result = Matrix.from_values(cols, rows, vals) + C = Matrix.new(A.dtype, A.ncols, A.nrows) + C << A.T + assert C.isequal(result) + C2 = A.T.new() + assert C2.isequal(result) + assert A.T.T is A + C3 = A.T.new(dtype=float) + assert C3.isequal(result) -@pytest.mark.xfail("'Needs investigation'", strict=True) def test_kronecker(): # A 0 1 B 0 1 2 # 0 [1 -] 0 [- 2 3] @@ -1704,174 +2066,199 @@ def test_kronecker(): # 1 [8 - 4 - - - ] # 2 [- 4 6 - 6 9 ] # 3 [16 - 8 24 - 12] - A = Matrix.from_values([0, 1, 1], [0, 0, 1], [1, 2, 3]) - B = Matrix.from_values([0, 0, 1, 1], [1, 2, 0, 2], [2, 3, 8, 4]) + A0 = Matrix.from_values([0, 1, 1], [0, 0, 1], [1, 2, 3]) + A1 = Matrix.from_values( + da.from_array([0, 1, 1]), + da.from_array([0, 0, 1]), + da.from_array([1, 2, 3]), + ) + As = [A0, A1] + B0 = Matrix.from_values([0, 0, 1, 1], [1, 2, 0, 2], [2, 3, 8, 4]) + B1 = Matrix.from_values( + da.from_array([0, 0, 1, 1]), + da.from_array([1, 2, 0, 2]), + da.from_array([2, 3, 8, 4]), + ) + Bs = [B0, B1] result = Matrix.from_values( [0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3], [1, 2, 0, 2, 1, 2, 4, 5, 0, 2, 3, 5], [2, 3, 8, 4, 4, 6, 6, 9, 16, 8, 24, 12], ) - C = A.kronecker(B, binary.times).new() - assert C.isequal(result) + for A in As: + for B in Bs: + C = A.kronecker(B, binary.times).new() + assert C.isequal(result) -def test_simple_assignment(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - # C << A - C = Matrix.new(A.dtype, A.nrows, A.ncols) - C << A - assert C.isequal(A) - - -def test_assign_transpose(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - C = Matrix.new(A.dtype, A.ncols, A.nrows) - C << A.T - assert C.isequal(A.T.new()) - - with pytest.raises(TypeError): - C.T << A - with pytest.raises(TypeError, match="does not support item assignment"): - C.T[:, :] << A - with pytest.raises(AttributeError): - C[:, :].T << A +def test_simple_assignment(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + # C << A + C = Matrix.new(A.dtype, A.nrows, A.ncols) + C << A + assert C.isequal(A) - C = Matrix.new(A.dtype, A.ncols + 1, A.nrows + 1) - C[: A.ncols, : A.nrows] << A.T - assert C[: A.ncols, : A.nrows].new().isequal(A.T.new()) +def test_assign_transpose(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + C = Matrix.new(A.dtype, A.ncols, A.nrows) + C << A.T + assert C.isequal(A.T.new()) -def test_isequal(A, A_chunks, v): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert A.isequal(A) - with pytest.raises(TypeError, match="Matrix"): - A.isequal(v) # equality is not type-checking - C = Matrix.from_values([1], [1], [1]) - assert not C.isequal(A) - D = Matrix.from_values([1], [2], [1]) - assert not C.isequal(D) - D2 = Matrix.from_values([0], [2], [1], nrows=D.nrows, ncols=D.ncols) - assert not D2.isequal(D) - C2 = Matrix.from_values([1], [1], [1], nrows=7, ncols=7) - assert not C2.isequal(A) - C3 = Matrix.from_values( - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], - [3.0, 2.0, 3.0, 1.0, 5.0, 3.0, 7.0, 8.0, 3.0, 1.0, 7.0, 4.0], - ) - assert not C3.isequal(A, check_dtype=True), "different datatypes are not equal" - C4 = Matrix.from_values( - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], - [3.0, 2.0, 3.0, 1.0, 5.0, 3.000000000000000001, 7.0, 8.0, 3.0, 1 - 1e-11, 7.0, 4.0], - ) - assert not C4.isequal(A) + with pytest.raises(TypeError): + C.T << A + with pytest.raises(TypeError, match="does not support item assignment"): + C.T[:, :] << A + with pytest.raises(TypeError, match="autocompute"): + C[:, :].T << A + + nrows, ncols = A.nrows, A.ncols + if A.is_dOnion: + nrows, ncols = nrows.compute(), ncols.compute() + C = Matrix.new(A.dtype, ncols + 1, nrows + 1) + C[:ncols, :nrows] << A.T + assert C[:ncols, :nrows].new().isequal(A.T.new()) + + +def test_isequal(As, A_chunks, v): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert A.isequal(A) + with pytest.raises(TypeError, match="Matrix"): + A.isequal(v) # equality is not type-checking + C = Matrix.from_values([1], [1], [1]) + assert not C.isequal(A) + D = Matrix.from_values([1], [2], [1]) + assert not C.isequal(D) + D2 = Matrix.from_values([0], [2], [1], nrows=D.nrows, ncols=D.ncols) + assert not D2.isequal(D) + C2 = Matrix.from_values([1], [1], [1], nrows=7, ncols=7) + assert not C2.isequal(A) + C3 = Matrix.from_values( + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [3.0, 2.0, 3.0, 1.0, 5.0, 3.0, 7.0, 8.0, 3.0, 1.0, 7.0, 4.0], + ) + assert not C3.isequal(A, check_dtype=True), "different datatypes are not equal" + C4 = Matrix.from_values( + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [3.0, 2.0, 3.0, 1.0, 5.0, 3.000000000000000001, 7.0, 8.0, 3.0, 1 - 1e-11, 7.0, 4.0], + ) + assert not C4.isequal(A) -@pytest.mark.slow -def test_isclose(A, A_chunks, v): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert A.isclose(A) - with pytest.raises(TypeError, match="Matrix"): - A.isclose(v) # equality is not type-checking - C = Matrix.from_values([1], [1], [1]) # wrong size - assert not C.isclose(A) - D = Matrix.from_values([1], [2], [1]) - assert not C.isclose(D) - D2 = Matrix.from_values([0], [2], [1], nrows=D.nrows, ncols=D.ncols) - assert not D2.isclose(D) - C2 = Matrix.from_values([1], [1], [1], nrows=7, ncols=7) # missing values - assert not C2.isclose(A) - C3 = Matrix.from_values( - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 0], - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 2], - [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 3], - ) # extra values - assert not C3.isclose(A) - C4 = Matrix.from_values( - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], - [3.0, 2.0, 3.0, 1.0, 5.0, 3.0, 7.0, 8.0, 3.0, 1.0, 7.0, 4.0], - ) - assert not C4.isclose(A, check_dtype=True), "different datatypes are not equal" - # fmt: off - C5 = Matrix.from_values( - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], - [3.0, 2.0, 3.0, 1.0, 5.0, 3.000000000000000001, 7.0, 8.0, 3.0, 1 - 1e-11, 7.0, 4.0], - ) - # fmt: on - assert C5.isclose(A) - C6 = Matrix.from_values( - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], - [3.0, 2.000001, 3.0, 1.0, 5.0, 3.0, 7.0, 7.9999999, 3.0, 1.0, 7.0, 4.0], - ) - assert C6.isclose(A, rel_tol=1e-3) +def test_isclose(As, A_chunks, v): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert A.isclose(A) + with pytest.raises(TypeError, match="Matrix"): + A.isclose(v) # equality is not type-checking + C = Matrix.from_values([1], [1], [1]) # wrong size + assert not C.isclose(A) + D = Matrix.from_values([1], [2], [1]) + assert not C.isclose(D) + D2 = Matrix.from_values([0], [2], [1], nrows=D.nrows, ncols=D.ncols) + assert not D2.isclose(D) + C2 = Matrix.from_values([1], [1], [1], nrows=7, ncols=7) # missing values + assert not C2.isclose(A) + C3 = Matrix.from_values( + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1, 0], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 2], + [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4, 3], + ) # extra values + assert not C3.isclose(A) + C4 = Matrix.from_values( + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [3.0, 2.0, 3.0, 1.0, 5.0, 3.0, 7.0, 8.0, 3.0, 1.0, 7.0, 4.0], + ) + assert not C4.isclose(A, check_dtype=True), "different datatypes are not equal" + # fmt: off + C5 = Matrix.from_values( + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [3.0, 2.0, 3.0, 1.0, 5.0, 3.000000000000000001, 7.0, 8.0, 3.0, 1 - 1e-11, 7.0, 4.0], + ) + # fmt: on + assert C5.isclose(A) + C6 = Matrix.from_values( + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [3.0, 2.000001, 3.0, 1.0, 5.0, 3.0, 7.0, 7.9999999, 3.0, 1.0, 7.0, 4.0], + ) + assert C6.isclose(A, rel_tol=1e-3) -@pytest.mark.slow -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_transpose_equals(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - data = [ - [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], - [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], - [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4], - ] - B = Matrix.from_values(*data) - assert A.isequal(B.T) - assert B.isequal(A.T) - assert A.T.isequal(B) - assert A.T.isequal(A.T) - assert A.isclose(A) - assert A.isclose(B.T) - assert B.isclose(A.T) - assert A.T.isclose(B) - assert A.T.isclose(A.T) +def test_transpose_equals(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + data = [ + [0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6], + [3, 0, 3, 5, 6, 0, 6, 1, 6, 2, 4, 1], + [3, 2, 3, 1, 5, 3, 7, 8, 3, 1, 7, 4], + ] + B = Matrix.from_values(*data) + assert A.isequal(B.T) + assert B.isequal(A.T) + assert A.T.isequal(B) + assert A.T.isequal(A.T) + assert A.isclose(A) + assert A.isclose(B.T) + assert B.isclose(A.T) + assert A.T.isclose(B) + assert A.T.isclose(A.T) -@pytest.mark.xfail("'Needs investigation'", strict=True) def test_transpose_exceptional(): - A = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [True, True, False, True]) - B = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [1, 2, 3, 4]) - - with pytest.raises(TypeError, match="not callable"): - B.T(mask=A.V) << B.ewise_mult(B, op=binary.plus) - with pytest.raises(AttributeError): - B(mask=A.T.V) << B.ewise_mult(B, op=binary.plus) - with pytest.raises(AttributeError): - B.T(mask=A.T.V) << B.ewise_mult(B, op=binary.plus) - with pytest.raises(TypeError, match="does not support item assignment"): - B.T[1, 0] << 10 - with pytest.raises(TypeError, match="not callable"): - B.T[1, 0]() << 10 - with pytest.raises(TypeError, match="not callable"): - B.T()[1, 0] << 10 - # with pytest.raises(AttributeError): - # should use new instead--Now okay. - assert B.T.dup().isequal(B.T.new()) - # Not exceptional, but while we're here... - C = B.T.new(mask=A.V) - D = B.T.new() - D = D.dup(mask=A.V) - assert C.isequal(D) - assert C.isequal(Matrix.from_values([0, 0, 1], [0, 1, 1], [1, 3, 4])) + A0 = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [True, True, False, True]) + B0 = Matrix.from_values([0, 0, 1, 1], [0, 1, 0, 1], [1, 2, 3, 4]) + A1 = Matrix.from_values( + da.from_array([0, 0, 1, 1]), + da.from_array([0, 1, 0, 1]), + da.from_array([True, True, False, True]), + ) + B1 = Matrix.from_values( + da.from_array([0, 0, 1, 1]), + da.from_array([0, 1, 0, 1]), + da.from_array([1, 2, 3, 4]), + ) + As, Bs = [A0, A1], [B0, B1] + for A in As: + for B in Bs: + with pytest.raises(TypeError, match="not callable"): + B.T(mask=A.V) << B.ewise_mult(B, op=binary.plus) + with pytest.raises(AttributeError): + B(mask=A.T.V) << B.ewise_mult(B, op=binary.plus) + with pytest.raises(AttributeError): + B.T(mask=A.T.V) << B.ewise_mult(B, op=binary.plus) + with pytest.raises(TypeError, match="does not support item assignment"): + B.T[1, 0] << 10 + with pytest.raises(TypeError, match="not callable"): + B.T[1, 0]() << 10 + with pytest.raises(TypeError, match="not callable"): + B.T()[1, 0] << 10 + # with pytest.raises(AttributeError): + # should use new instead--Now okay. + assert B.T.dup().isequal(B.T.new()) + # Not exceptional, but while we're here... + C = B.T.new(mask=A.V) + D = B.T.new() + D = D.dup(mask=A.V) + assert C.isequal(D) + assert C.isequal(Matrix.from_values([0, 0, 1], [0, 1, 1], [1, 3, 4])) def test_nested_matrix_operations(): @@ -1887,37 +2274,38 @@ def test_bad_init(): Matrix(None, float, name="bad_matrix") -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_equals(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert (A == A).new().reduce_scalar(monoid.land) +def test_equals(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert (A == A).new().reduce_scalar(monoid.land).new() -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_bad_update(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - with pytest.raises(TypeError, match="Assignment value must be a valid expression"): - A << None +def test_bad_update(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + with pytest.raises(TypeError, match="Assignment value must be a valid expression"): + A << None + A.compute() -def test_incompatible_shapes(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - B = A[:-1, :-1].new() - with pytest.raises(DimensionMismatch): - A.mxm(B) - with pytest.raises(DimensionMismatch): - A.ewise_add(B) - with pytest.raises(DimensionMismatch): - A.ewise_mult(B) +def test_incompatible_shapes(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + B = A[:-1, :-1].new() + with pytest.raises(DimensionMismatch): + A.mxm(B).new().compute() + A = A_.dup() + with pytest.raises(DimensionMismatch): + A.ewise_add(B).new().compute() + A = A_.dup() + with pytest.raises(DimensionMismatch): + A.ewise_mult(B).new().compute() @pytest.mark.xfail("'Needs investigation'", strict=True) @@ -2509,104 +2897,100 @@ def import_func(**kwargs): assert C_orig.ss.is_iso is do_iso -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_no_bool_or_eq(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - with pytest.raises(TypeError, match="not defined"): - bool(A) - # with pytest.raises(TypeError, match="not defined"): - A == A - with pytest.raises(TypeError, match="not defined"): - bool(A.S) - with pytest.raises(TypeError, match="not defined"): - A.S == A.S - expr = A.ewise_mult(A) - with pytest.raises(TypeError, match="not defined"): - bool(expr) - with pytest.raises(TypeError, match="not enabled"): - expr == expr - assigner = A[1, 2]() - with pytest.raises(TypeError, match="not defined"): - bool(assigner) - with pytest.raises(TypeError, match="not defined"): - assigner == assigner - updater = A() - with pytest.raises(TypeError, match="not defined"): - bool(updater) - with pytest.raises(TypeError, match="not defined"): - updater == updater +def test_no_bool_or_eq(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + with pytest.raises(TypeError, match="not defined"): + bool(A) + # with pytest.raises(TypeError, match="not defined"): + A == A + with pytest.raises(TypeError, match="not defined"): + bool(A.S) + with pytest.raises(TypeError, match="not defined"): + A.S == A.S + expr = A.ewise_mult(A) + with pytest.raises(TypeError, match="not defined"): + bool(expr) + with pytest.raises(TypeError, match="not enabled"): + expr == expr + assigner = A[1, 2]() + with pytest.raises(TypeError, match="not defined"): + bool(assigner) + with pytest.raises(TypeError, match="not defined"): + assigner == assigner + updater = A() + with pytest.raises(TypeError, match="not defined"): + bool(updater) + with pytest.raises(TypeError, match="not defined"): + updater == updater @autocompute -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_bool_eq_on_scalar_expressions(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - expr = A.reduce_scalar() - assert expr == 47 - assert bool(expr) - assert int(expr) == 47 - assert float(expr) == 47.0 - assert range(expr) == range(47) - - expr = A[0, 1] - assert expr == 2 - assert bool(expr) - assert int(expr) == 2 - assert float(expr) == 2.0 - assert range(expr) == range(2) - - expr = A[0, [1, 1]] - with pytest.raises(TypeError, match="not defined"): - expr == expr - with pytest.raises(TypeError, match="not defined"): - bool(expr) - with pytest.raises(TypeError, match="not defined"): - int(expr) - with pytest.raises(TypeError, match="not defined"): - float(expr) - with pytest.raises(TypeError, match="not defined"): - range(expr) - - -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_bool_eq_on_scalar_expressions_no_auto(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - expr = A.reduce_scalar() - with pytest.raises(TypeError, match="autocompute"): - expr == 47 - with pytest.raises(TypeError, match="autocompute"): - bool(expr) - with pytest.raises(TypeError, match="autocompute"): - int(expr) +def test_bool_eq_on_scalar_expressions(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + expr = A.reduce_scalar() + assert expr == 47 + assert bool(expr) + assert int(expr) == 47 + assert float(expr) == 47.0 + assert range(expr) == range(47) + + expr = A[0, 1] + assert expr == 2 + assert bool(expr) + assert int(expr) == 2 + assert float(expr) == 2.0 + assert range(expr) == range(2) + + expr = A[0, [1, 1]] + # with pytest.raises(TypeError, match="not defined"): + expr == expr # Now okay + with pytest.raises(TypeError, match="not defined"): + bool(expr) + with pytest.raises(TypeError): + int(expr) + with pytest.raises(TypeError): + float(expr) + with pytest.raises(TypeError): + range(expr) -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_contains(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert (0, 1) in A - assert (1, 0) in A.T +def test_bool_eq_on_scalar_expressions_no_auto(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + expr = A.reduce_scalar() + with pytest.raises(TypeError, match="autocompute"): + expr == 47 + with pytest.raises(TypeError, match="autocompute"): + bool(expr) + with pytest.raises(TypeError, match="autocompute"): + int(expr) + + +def test_contains(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert (0, 1) in A + assert (1, 0) in A.T - assert (0, 1) not in A.T - assert (1, 0) not in A + assert (0, 1) not in A.T + assert (1, 0) not in A - with pytest.raises(TypeError): - 1 in A - with pytest.raises(TypeError): - (1,) in A.T - with pytest.raises(TypeError, match="Invalid index"): - (1, [1, 2]) in A + with pytest.raises(TypeError): + 1 in A + with pytest.raises(TypeError): + (1,) in A.T + with pytest.raises(TypeError, match="Invalid index"): + (1, [1, 2]) in A @pytest.mark.xfail("'Needs investigation'", strict=True) @@ -2845,111 +3229,109 @@ def test_nbytes(A, A_chunks): @autocompute -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_auto(A, A_chunks, v): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - expected = binary.land[bool](A & A).new() - B = A.dup(dtype=bool) - for expr in [(B & B), binary.land[bool](A & A)]: - assert expr.dtype == expected.dtype - assert expr.nrows == expected.nrows - assert expr.ncols == expected.ncols - assert expr.shape == expected.shape - assert expr.nvals == expected.nvals - assert expr.isclose(expected) - assert expected.isclose(expr) - assert expr.isequal(expected) - assert expected.isequal(expr) - assert expr.mxv(v).isequal(expected.mxv(v)) - assert expected.T.mxv(v).isequal(expr.T.mxv(v)) - for method in [ - # "ewise_add", - # "ewise_mult", - # "mxm", - # "__matmul__", - "__and__", - "__or__", - # "kronecker", - ]: - val1 = getattr(expected, method)(expected).new() - val2 = getattr(expected, method)(expr) - val3 = getattr(expr, method)(expected) - val4 = getattr(expr, method)(expr) - assert val1.isequal(val2) - assert val1.isequal(val3) - assert val1.isequal(val4) - for method in ["reduce_rowwise", "reduce_columnwise", "reduce_scalar"]: - s1 = getattr(expected, method)(monoid.lor).new() - s2 = getattr(expr, method)(monoid.lor) - assert s1.isequal(s2.new()) - assert s1.isequal(s2) - - expected = binary.times(A & A).new() - for expr in [binary.times(A & A)]: - assert expr.dtype == expected.dtype - assert expr.nrows == expected.nrows - assert expr.ncols == expected.ncols - assert expr.shape == expected.shape - assert expr.nvals == expected.nvals - assert expr.isclose(expected) - assert expected.isclose(expr) - assert expr.isequal(expected) - assert expected.isequal(expr) - assert expr.mxv(v).isequal(expected.mxv(v)) - assert expected.T.mxv(v).isequal(expr.T.mxv(v)) - for method in [ - "ewise_add", - "ewise_mult", - "mxm", - # "__matmul__", - # "__and__", - # "__or__", - "kronecker", - ]: - val1 = getattr(expected, method)(expected).new() - val2 = getattr(expected, method)(expr) - val3 = getattr(expr, method)(expected) - val4 = getattr(expr, method)(expr) - assert val1.isequal(val2) - assert val1.isequal(val3) - assert val1.isequal(val4) - for method in ["reduce_rowwise", "reduce_columnwise", "reduce_scalar"]: - s1 = getattr(expected, method)().new() - s2 = getattr(expr, method)() - assert s1.isequal(s2.new()) - assert s1.isequal(s2) - - expected = semiring.plus_times(A @ v).new() - for expr in [(A @ v), (v @ A.T), semiring.plus_times(A @ v)]: - assert expr.vxm(A).isequal(expected.vxm(A)) - assert expr.vxm(A).new(mask=expr.S).isequal(expected.vxm(A).new(mask=expected.S)) - assert expr.vxm(A).new(mask=expr.V).isequal(expected.vxm(A).new(mask=expected.V)) +def test_auto(As, A_chunks, v): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + expected = binary.land[bool](A & A).new() + B = A.dup(dtype=bool) + for expr in [(B & B), binary.land[bool](A & A)]: + assert expr.dtype == expected.dtype + assert expr.nrows == expected.nrows + assert expr.ncols == expected.ncols + assert expr.shape == expected.shape + assert expr.nvals == expected.nvals + assert expr.isclose(expected) + assert expected.isclose(expr) + assert expr.isequal(expected) + assert expected.isequal(expr) + assert expr.mxv(v).isequal(expected.mxv(v)) + assert expected.T.mxv(v).isequal(expr.T.mxv(v)) + for method in [ + # "ewise_add", + # "ewise_mult", + # "mxm", + # "__matmul__", + "__and__", + "__or__", + "kronecker", + ]: + val1 = getattr(expected, method)(expected).new() + val2 = getattr(expected, method)(expr) + val3 = getattr(expr, method)(expected) + val4 = getattr(expr, method)(expr) + assert val1.isequal(val2) + assert val1.isequal(val3) + assert val1.isequal(val4) + for method in ["reduce_rowwise", "reduce_columnwise", "reduce_scalar"]: + s1 = getattr(expected, method)(monoid.lor).new() + s2 = getattr(expr, method)(monoid.lor) + assert s1.isequal(s2.new()) + assert s1.isequal(s2) + + expected = binary.times(A & A).new() + for expr in [binary.times(A & A)]: + assert expr.dtype == expected.dtype + assert expr.nrows == expected.nrows + assert expr.ncols == expected.ncols + assert expr.shape == expected.shape + assert expr.nvals == expected.nvals + assert expr.isclose(expected) + assert expected.isclose(expr) + assert expr.isequal(expected) + assert expected.isequal(expr) + assert expr.mxv(v).isequal(expected.mxv(v)) + assert expected.T.mxv(v).isequal(expr.T.mxv(v)) + for method in [ + "ewise_add", + "ewise_mult", + "mxm", + # "__matmul__", + # "__and__", + # "__or__", + # "kronecker", + ]: + val1 = getattr(expected, method)(expected).new() + val2 = getattr(expected, method)(expr) + val3 = getattr(expr, method)(expected) + val4 = getattr(expr, method)(expr) + assert val1.isequal(val2) + assert val1.isequal(val3) + assert val1.isequal(val4) + for method in ["reduce_rowwise", "reduce_columnwise", "reduce_scalar"]: + s1 = getattr(expected, method)().new() + s2 = getattr(expr, method)() + assert s1.isequal(s2.new()) + assert s1.isequal(s2) + + expected = semiring.plus_times(A @ v).new() + for expr in [(A @ v), (v @ A.T), semiring.plus_times(A @ v)]: + assert expr.vxm(A).isequal(expected.vxm(A)) + assert expr.vxm(A).new(mask=expr.S).isequal(expected.vxm(A).new(mask=expected.S)) + assert expr.vxm(A).new(mask=expr.V).isequal(expected.vxm(A).new(mask=expected.V)) @autocompute -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_auto_assign(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - expected = A.dup() - B = A[1:4, 1:4].new(dtype=bool) - expr = B & B - expected[:3, :3] = expr.new() - A[:3, :3] = expr - assert expected.isequal(A) - with pytest.raises(TypeError): - # Not yet supported, but we could! +def test_auto_assign(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + expected = A.dup() + B = A[1:4, 1:4].new(dtype=bool) + expr = B & B + expected[:3, :3] = expr.new() + A[:3, :3] = expr + assert expected.isequal(A) + v = A[2:5, 5].new(dtype=bool) + expr = v & v + A[:3, 4] << expr + expected[:3, 4] << expr.new() + assert expected.isequal(A) + C = A[1:4, 1:4].new() A[:3, :3] = A[1:4, 1:4] - v = A[2:5, 5].new(dtype=bool) - expr = v & v - A[:3, 4] << expr - expected[:3, 4] << expr.new() - assert expected.isequal(A) + assert A[:3, :3].isequal(C) @autocompute @@ -3056,149 +3438,147 @@ def test_flatten(A, A_chunks): v.ss.reshape(A.shape + (1,)) -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_autocompute_argument_messages(A, A_chunks, v): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - with pytest.raises(TypeError, match="autocompute"): - A.ewise_mult(A & A) - with pytest.raises(TypeError, match="autocompute"): - A.mxv(A @ v) +def test_autocompute_argument_messages(As, A_chunks, v): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + with pytest.raises(TypeError, match="autocompute"): + A.ewise_mult(A & A) + with pytest.raises(TypeError, match="autocompute"): + A.mxv(A @ v) @autocompute -@pytest.mark.xfail("'Needs investigation'", strict=True) -def test_infix_sugar(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert type(A + 1) is not Matrix - assert binary.plus(A, 1).isequal(A + 1) - assert binary.plus(A.T, 1).isequal(A.T + 1) - assert binary.plus(1, A).isequal(1 + A) - assert binary.minus(A, 1).isequal(A - 1) - assert binary.minus(1, A).isequal(1 - A) - assert binary.times(A, 2).isequal(A * 2) - assert binary.times(2, A).isequal(2 * A) - assert binary.truediv(A, 2).isequal(A / 2) - assert binary.truediv(5, A).isequal(5 / A) - assert binary.floordiv(A, 2).isequal(A // 2) - assert binary.floordiv(5, A).isequal(5 // A) - assert binary.numpy.mod(A, 2).isequal(A % 2) - assert binary.numpy.mod(5, A).isequal(5 % A) - assert binary.pow(A, 2).isequal(A ** 2) - assert binary.pow(2, A).isequal(2 ** A) - assert binary.pow(A, 2).isequal(pow(A, 2)) - assert unary.ainv(A).isequal(-A) - assert unary.ainv(A.T).isequal(-A.T) - B = A.dup(dtype=bool) - assert unary.lnot(B).isequal(~B) - assert unary.lnot(B.T).isequal(~B.T) - with pytest.raises(TypeError): - assert unary.lnot(A).isequal(~A) - with pytest.raises(TypeError): - assert unary.lnot(A.T).isequal(~A.T) - assert binary.lxor(True, B).isequal(True ^ B) - assert binary.lxor(B, True).isequal(B ^ True) - with pytest.raises(TypeError): - A ^ True - with pytest.raises(TypeError): - A ^ B - with pytest.raises(TypeError): - 6 ^ B - assert binary.lt(A, 4).isequal(A < 4) - assert binary.le(A, 4).isequal(A <= 4) - assert binary.gt(A, 4).isequal(A > 4) - assert binary.ge(A, 4).isequal(A >= 4) - assert binary.eq(A, 4).isequal(A == 4) - assert binary.ne(A, 4).isequal(A != 4) - x, y = divmod(A, 3) - assert binary.floordiv(A, 3).isequal(x) - assert binary.numpy.mod(A, 3).isequal(y) - assert binary.fmod(A, 3).isequal(y) - assert A.isequal(binary.plus((3 * x) & y)) - x, y = divmod(-A, 3) - assert binary.floordiv(-A, 3).isequal(x) - assert binary.numpy.mod(-A, 3).isequal(y) - # assert binary.fmod(-A, 3).isequal(y) # The reason we use numpy.mod - assert (-A).isequal(binary.plus((3 * x) & y)) - x, y = divmod(3, A) - assert binary.floordiv(3, A).isequal(x) - assert binary.numpy.mod(3, A).isequal(y) - assert binary.fmod(3, A).isequal(y) - assert binary.plus(binary.times(A & x) & y).isequal(3 * unary.one(A)) - x, y = divmod(-3, A) - assert binary.floordiv(-3, A).isequal(x) - assert binary.numpy.mod(-3, A).isequal(y) - # assert binary.fmod(-3, A).isequal(y) # The reason we use numpy.mod - assert binary.plus(binary.times(A & x) & y).isequal(-3 * unary.one(A)) - - assert binary.eq(A & A).isequal(A == A) - assert binary.ne(A.T & A.T).isequal(A.T != A.T) - assert binary.lt(A & A.T).isequal(A < A.T) - assert binary.ge(A.T & A).isequal(A.T >= A) - - B = A.dup() - B += 1 - assert type(B) is Matrix - assert binary.plus(A, 1).isequal(B) - B = A.dup() - B -= 1 - assert type(B) is Matrix - assert binary.minus(A, 1).isequal(B) - B = A.dup() - B *= 2 - assert type(B) is Matrix - assert binary.times(A, 2).isequal(B) - B = A.dup(dtype=float) - B /= 2 - assert type(B) is Matrix - assert binary.truediv(A, 2).isequal(B) - B = A.dup() - B //= 2 - assert type(B) is Matrix - assert binary.floordiv(A, 2).isequal(B) - B = A.dup() - B %= 2 - assert type(B) is Matrix - assert binary.numpy.mod(A, 2).isequal(B) - B = A.dup() - B **= 2 - assert type(B) is Matrix - assert binary.pow(A, 2).isequal(B) - B = A.dup(dtype=bool) - B ^= True - assert type(B) is Matrix - assert B.isequal(~A.dup(dtype=bool)) - B = A.dup(dtype=bool) - B ^= B - assert type(B) is Matrix - assert not B.reduce_scalar(agg.any).new() - - expr = binary.plus(A & A) - assert unary.abs(expr).isequal(abs(expr)) - assert unary.ainv(expr).isequal(-expr) - with pytest.raises(TypeError): - assert unary.lnot(expr).isequal(~expr) - with pytest.raises(TypeError): - expr += 1 - with pytest.raises(TypeError): - expr -= 1 - with pytest.raises(TypeError): - expr *= 1 - with pytest.raises(TypeError): - expr /= 1 - with pytest.raises(TypeError): - expr //= 1 - with pytest.raises(TypeError): - expr %= 1 - with pytest.raises(TypeError): - expr **= 1 - with pytest.raises(TypeError): - expr ^= 1 +def test_infix_sugar(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert type(A + 1) is not Matrix + assert binary.plus(A, 1).isequal(A + 1) + assert binary.plus(A.T, 1).isequal(A.T + 1) + assert binary.plus(1, A).isequal(1 + A) + assert binary.minus(A, 1).isequal(A - 1) + assert binary.minus(1, A).isequal(1 - A) + assert binary.times(A, 2).isequal(A * 2) + assert binary.times(2, A).isequal(2 * A) + assert binary.truediv(A, 2).isequal(A / 2) + assert binary.truediv(5, A).isequal(5 / A) + assert binary.floordiv(A, 2).isequal(A // 2) + assert binary.floordiv(5, A).isequal(5 // A) + assert binary.numpy.mod(A, 2).isequal(A % 2) + assert binary.numpy.mod(5, A).isequal(5 % A) + assert binary.pow(A, 2).isequal(A ** 2) + assert binary.pow(2, A).isequal(2 ** A) + assert binary.pow(A, 2).isequal(pow(A, 2)) + assert unary.ainv(A).isequal(-A) + assert unary.ainv(A.T).isequal(-A.T) + B = A.dup(dtype=bool) + assert unary.lnot(B).isequal(~B) + assert unary.lnot(B.T).isequal(~B.T) + with pytest.raises(TypeError): + assert unary.lnot(A).isequal(~A) + with pytest.raises(TypeError): + assert unary.lnot(A.T).isequal(~A.T) + assert binary.lxor(True, B).isequal(True ^ B) + assert binary.lxor(B, True).isequal(B ^ True) + with pytest.raises(TypeError): + A ^ True + with pytest.raises(TypeError): + A ^ B + with pytest.raises(TypeError): + 6 ^ B + assert binary.lt(A, 4).isequal(A < 4) + assert binary.le(A, 4).isequal(A <= 4) + assert binary.gt(A, 4).isequal(A > 4) + assert binary.ge(A, 4).isequal(A >= 4) + assert binary.eq(A, 4).isequal(A == 4) + assert binary.ne(A, 4).isequal(A != 4) + x, y = divmod(A, 3) + assert binary.floordiv(A, 3).isequal(x) + assert binary.numpy.mod(A, 3).isequal(y) + assert binary.fmod(A, 3).isequal(y) + assert A.isequal(binary.plus((3 * x) & y)) + x, y = divmod(-A, 3) + assert binary.floordiv(-A, 3).isequal(x) + assert binary.numpy.mod(-A, 3).isequal(y) + # assert binary.fmod(-A, 3).isequal(y) # The reason we use numpy.mod + assert (-A).isequal(binary.plus((3 * x) & y)) + x, y = divmod(3, A) + assert binary.floordiv(3, A).isequal(x) + assert binary.numpy.mod(3, A).isequal(y) + assert binary.fmod(3, A).isequal(y) + assert binary.plus(binary.times(A & x) & y).isequal(3 * unary.one(A)) + x, y = divmod(-3, A) + assert binary.floordiv(-3, A).isequal(x) + assert binary.numpy.mod(-3, A).isequal(y) + # assert binary.fmod(-3, A).isequal(y) # The reason we use numpy.mod + assert binary.plus(binary.times(A & x) & y).isequal(-3 * unary.one(A)) + + assert binary.eq(A & A).isequal(A == A) + assert binary.ne(A.T & A.T).isequal(A.T != A.T) + assert binary.lt(A & A.T).isequal(A < A.T) + assert binary.ge(A.T & A).isequal(A.T >= A) + + B = A.dup() + B += 1 + assert type(B) is Matrix + assert binary.plus(A, 1).isequal(B) + B = A.dup() + B -= 1 + assert type(B) is Matrix + assert binary.minus(A, 1).isequal(B) + B = A.dup() + B *= 2 + assert type(B) is Matrix + assert binary.times(A, 2).isequal(B) + B = A.dup(dtype=float) + B /= 2 + assert type(B) is Matrix + assert binary.truediv(A, 2).isequal(B) + B = A.dup() + B //= 2 + assert type(B) is Matrix + assert binary.floordiv(A, 2).isequal(B) + B = A.dup() + B %= 2 + assert type(B) is Matrix + assert binary.numpy.mod(A, 2).isequal(B) + B = A.dup() + B **= 2 + assert type(B) is Matrix + assert binary.pow(A, 2).isequal(B) + B = A.dup(dtype=bool) + B ^= True + assert type(B) is Matrix + assert B.isequal(~A.dup(dtype=bool)) + B = A.dup(dtype=bool) + B ^= B + assert type(B) is Matrix + assert not B.reduce_scalar(agg.any).new() + + expr = binary.plus(A & A) + assert unary.abs(expr).isequal(abs(expr)) + assert unary.ainv(expr).isequal(-expr) + with pytest.raises(TypeError): + assert unary.lnot(expr).isequal(~expr) + with pytest.raises(TypeError): + expr += 1 + with pytest.raises(TypeError): + expr -= 1 + with pytest.raises(TypeError): + expr *= 1 + with pytest.raises(TypeError): + expr /= 1 + with pytest.raises(TypeError): + expr //= 1 + with pytest.raises(TypeError): + expr %= 1 + with pytest.raises(TypeError): + expr **= 1 + with pytest.raises(TypeError): + expr ^= 1 @pytest.mark.slow @@ -3511,15 +3891,15 @@ def test_deprecated(A, A_chunks): A.ss.scan_columns() -def test_ndim(A, A_chunks): - A_ = A - for chunks in A_chunks: - A = A_.dup() - A.rechunk(chunks=chunks, inplace=True) - assert A.ndim == 2 - assert A.ewise_mult(A).ndim == 2 - assert (A & A).ndim == 2 - assert (A @ A).ndim == 2 +def test_ndim(As, A_chunks): + for A_ in As: + for chunks in A_chunks: + A = A_.dup() + A.rechunk(chunks=chunks, inplace=True) + assert A.ndim == 2 + assert A.ewise_mult(A).ndim == 2 + assert (A & A).ndim == 2 + assert (A @ A).ndim == 2 @pytest.mark.xfail("'Needs investigation'", strict=True) diff --git a/tests/from_grblas2/test_vector.py b/tests/from_grblas2/test_vector.py index fdbd92f..c8bb84c 100644 --- a/tests/from_grblas2/test_vector.py +++ b/tests/from_grblas2/test_vector.py @@ -4,6 +4,7 @@ import sys import weakref +import dask.array as da import dask_grblas import grblas import numpy as np @@ -124,6 +125,138 @@ def test_from_values(): Vector.from_values([0], [1, 2]) +def test_from_values_dask(): + indices = da.from_array(np.array([0, 1, 3])) + values = da.from_array(np.array([True, False, True])) + u = Vector.from_values(indices, values) + assert u.size == 4 + assert u.nvals == 3 + assert u.dtype == bool + values = da.from_array(np.array([12.3, 12.4, 12.5])) + u2 = Vector.from_values(indices, values, size=17) + assert u2.size == 17 + assert u2.nvals == 3 + assert u2.dtype == float + indices = da.from_array(np.array([0, 1, 1])) + values = da.from_array(np.array([1, 2, 3], dtype=np.int64)) + u3 = Vector.from_values(indices, values, size=10, dup_op=binary.times) + assert u3.size == 10 + assert u3.nvals == 2 # duplicates were combined + assert u3.dtype == int + assert u3[1].value == 6 # 2*3 + values = da.from_array(np.array([True, True, True])) + with pytest.raises(ValueError, match="Duplicate indices found"): + # Duplicate indices requires a dup_op + Vector.from_values(indices, values).compute() + empty_da = da.from_array(np.array([])) + with pytest.raises(ValueError, match="No indices provided. Unable to infer size."): + Vector.from_values(empty_da, empty_da).compute() + + # Changed: Assume empty value is float64 (like numpy) + # with pytest.raises(ValueError, match="No values provided. Unable to determine type"): + w = Vector.from_values(empty_da, empty_da, size=10) + assert w.size == 10 + assert w.nvals == 0 + assert w.dtype == dtypes.FP64 + + with pytest.raises(ValueError, match="No indices provided. Unable to infer size"): + Vector.from_values(empty_da, empty_da, dtype=dtypes.INT64) + u4 = Vector.from_values(empty_da, empty_da, size=10, dtype=dtypes.INT64) + u5 = Vector.new(dtypes.INT64, size=10) + assert u4.isequal(u5, check_dtype=True) + + # we check index dtype if given dask array + indices = da.from_array(np.array([1.2, 3.4])) + values = da.from_array(np.array([1, 2])) + with pytest.raises(ValueError, match="indices must be integers, not float64"): + Vector.from_values(indices, values).compute() + + # mis-matched sizes + indices = da.from_array(np.array([0])) + with pytest.raises(ValueError, match="`indices` and `values` lengths must match"): + Vector.from_values(indices, values).compute() + + +def test_from_values_DOnion(v): + indices = da.from_array(np.array([0, 1, 3])) + values = da.from_array(np.array([True, False, True])) + + # The following creates a Vector `u` with `type(u._delayed) == DOnion` + # because keyword argument `size` has not been specified: + u = Vector.from_values(indices, values) + assert u.size == 4 + assert u.nvals == 3 + assert u.dtype == bool + + # The output of `.to_values()` is always a tuple of DOnions + indices, values = u.to_values() + + # The following creates a Vector `v` with `type(v._delayed) == DOnion` + # because arguments `indices` and `values` are DOnions: + v = Vector.from_values(indices, values) + assert v.size == 4 + assert v.nvals == 3 + assert v.dtype == bool + values = da.from_array(np.array([12.3, 12.4, 12.5])) + + # The following creates a Vector `u2` with `type(u2._delayed) == DOnion` + # because argument `indices` is a DOnion: + u2 = Vector.from_values(indices, values, size=17) + assert u2.size == 17 + assert u2.nvals == 3 + assert u2.dtype == float + + indices = da.from_array(np.array([0, 1, 1])) + indices_ = da.from_array(np.array([1, 2, 3])) + i0 = Vector.from_values(indices_, indices) + _, indices = i0.to_values() + values = da.from_array(np.array([1, 2, 3], dtype=np.int64)) + + # The following creates a Vector `u3` with `type(u3._delayed) == DOnion` + # because arguments `indices` and `values` are DOnions: + u3 = Vector.from_values(indices, values, size=10, dup_op=binary.times) + assert u3.size == 10 + assert u3.nvals == 2 # duplicates were combined + assert u3.dtype == int + assert u3[1].value == 6 # 2*3 + + values = da.from_array(np.array([True, True, True])) + with pytest.raises(ValueError, match="Duplicate indices found"): + # Duplicate indices requires a dup_op + Vector.from_values(indices, values).compute() + _, empty_da = Vector.new(float).to_values() + with pytest.raises(ValueError, match="No indices provided. Unable to infer size."): + Vector.from_values(empty_da, empty_da).compute() + + # Changed: Assume empty value is float64 (like numpy) + # with pytest.raises(ValueError, match="No values provided. Unable to determine type"): + w = Vector.from_values(empty_da, empty_da, size=10) + assert w.size == 10 + assert w.nvals == 0 + assert w.dtype == dtypes.FP64 + + with pytest.raises(ValueError, match="No indices provided. Unable to infer size"): + Vector.from_values(empty_da, empty_da, dtype=dtypes.INT64).compute() + u4 = Vector.from_values(empty_da, empty_da, size=10, dtype=dtypes.INT64) + u5 = Vector.new(dtypes.INT64, size=10) + assert u4.isequal(u5, check_dtype=True) + + # we check index dtype if given dask array + indices = da.from_array(np.array([1.2, 3.4])) + values = da.from_array(np.array([1, 2])) + i0 = Vector.from_values(values, indices) + _, indices = i0.to_values() + with pytest.raises(ValueError, match="indices must be integers, not float64"): + Vector.from_values(indices, values).compute() + + # mis-matched sizes + indices = da.from_array(np.array([0])) + i0 = Vector.from_values(indices, indices) + indices, _ = i0.to_values() + with pytest.raises(ValueError, match="`indices` and `values` lengths must match"): + Vector.from_values(indices, values).compute() + + def test_from_values_scalar(): u = Vector.from_values([0, 1, 3], 7) assert u.size == 4 @@ -157,10 +290,10 @@ def test_resize(v): v.resize(20) assert v.size == 20 assert v.nvals == 4 - assert compute(v[19].value) is None + assert v[19].new().value == None v.resize(4) assert v.size == 4 - assert v.nvals.compute() == 2 + assert v.nvals == 2 v = v_.dup() v.rechunk(chunks=2, inplace=True) @@ -170,11 +303,11 @@ def test_resize(v): v.resize(20, chunks=5) assert v.size == 20 assert v.nvals == 4 - assert compute(v[19].value) is None + assert v[19].new().value == None assert v._delayed.chunks == ((5, 5, 5, 5),) v.resize(4, chunks=3) assert v.size == 4 - assert v.nvals.compute() == 2 + assert v.nvals == 2 assert v._delayed.chunks == ((3, 1),) @@ -217,20 +350,20 @@ def test_build_scalar(v): def test_extract_values(v): idx, vals = v.to_values() - np.testing.assert_array_equal(idx, (1, 3, 4, 6)) - np.testing.assert_array_equal(vals, (1, 1, 2, 0)) + np.testing.assert_array_equal(idx.compute(), (1, 3, 4, 6)) + np.testing.assert_array_equal(vals.compute(), (1, 1, 2, 0)) assert idx.dtype == np.uint64 assert vals.dtype == np.int64 idx, vals = v.to_values(dtype=int) - np.testing.assert_array_equal(idx, (1, 3, 4, 6)) - np.testing.assert_array_equal(vals, (1, 1, 2, 0)) + np.testing.assert_array_equal(idx.compute(), (1, 3, 4, 6)) + np.testing.assert_array_equal(vals.compute(), (1, 1, 2, 0)) assert idx.dtype == np.uint64 assert vals.dtype == np.int64 idx, vals = v.to_values(dtype=float) - np.testing.assert_array_equal(idx, (1, 3, 4, 6)) - np.testing.assert_array_equal(vals, (1, 1, 2, 0)) + np.testing.assert_array_equal(idx.compute(), (1, 3, 4, 6)) + np.testing.assert_array_equal(vals.compute(), (1, 1, 2, 0)) assert idx.dtype == np.uint64 assert vals.dtype == np.float64 diff --git a/tests/test_functools.py b/tests/test_functools.py new file mode 100644 index 0000000..92dcdd8 --- /dev/null +++ b/tests/test_functools.py @@ -0,0 +1,39 @@ +import pytest +from functools import partial +from dask_grblas.functools import flexible_partial, skip + + +def func(a, b, c, d, e, f): + return a, b, c, d, e, f + + +def funk(a, b, c, d, e, f, ka="a", kb="b", kc="c"): + return a, b, c, d, e, f, ka, kb, kc + + +def test_flexible_partial(): + # without keywords + part_func = flexible_partial(func, skip, 2, skip, skip, 5) + result = part_func(1, 3, 4, 6) + assert result == (1, 2, 3, 4, 5, 6) + + # with keywords + part_funk = flexible_partial(funk, skip, 2, skip, skip, 5, kb="B") + result = part_funk(1, 3, 4, 6, kc="C") + assert result == (1, 2, 3, 4, 5, 6, "a", "B", "C") + + # apply a 2nd `flexible_partial` on first `flexible_partial`: + part_funk2 = flexible_partial(part_funk, 1, skip, 4, ka="A") + result = part_funk2(3, 6, kc="C") + assert result == (1, 2, 3, 4, 5, 6, "A", "B", "C") + + # or apply a `partial` on first `flexible_partial`: + part_funk2 = partial(part_funk, 1, 3, ka="A") + result = part_funk2(4, 6, kc="C") + assert result == (1, 2, 3, 4, 5, 6, "A", "B", "C") + + # or apply a `flexible_partial` on a `partial`: + part_funk = partial(funk, 1, 2, kb="B") + part_funk2 = flexible_partial(part_funk, skip, 4, ka="A") + result = part_funk2(3, 5, 6, kc="C") + assert result == (1, 2, 3, 4, 5, 6, "A", "B", "C")