Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,488 changes: 184 additions & 1,304 deletions .basedpyright/baseline.json

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ extend-ignore = [
"E226", # missing whitespace around arithmetic operator
"E402", # module-level import not at top of file
"RUF067", # non-empty-init-module
"TRY004",
"TRY300",
]

[tool.ruff.lint.flake8-quotes]
Expand All @@ -127,6 +129,8 @@ required-imports = ["from __future__ import annotations"]
[tool.ruff.lint.per-file-ignores]
"doc/**/*.py" = ["I002"]
"examples/**/*.py" = ["I002"]
"sumpy/test/test_*.py" = ["S102"]
"doc/conf.py" = ["S102"]

[tool.typos.default]
extend-ignore-re = [
Expand Down
1 change: 0 additions & 1 deletion sumpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ def __init__(self, new_flag, new_no_cache_kernels=()):
self.new_no_cache_kernels = new_no_cache_kernels

def __enter__(self):
global CACHING_ENABLED, NO_CACHE_KERNELS
self.previous_flag = CACHING_ENABLED
self.previous_kernels = NO_CACHE_KERNELS
set_caching_enabled(self.new_flag, self.new_no_cache_kernels)
Expand Down
16 changes: 11 additions & 5 deletions sumpy/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,14 @@


if TYPE_CHECKING:
from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set
from collections.abc import (
Callable,
Iterable,
Iterator,
Mapping,
Sequence,
Set as AbstractSet,
)

from numpy.typing import DTypeLike

Expand Down Expand Up @@ -730,10 +737,10 @@ def map_common_subexpression_uncached(

def to_loopy_insns(
assignments: Iterable[tuple[str, sym.Expr]],
vector_names: Set[str] | None = None,
vector_names: AbstractSet[str] | None = None,
pymbolic_expr_maps: Sequence[Callable[[Expression], Expression]] = (),
complex_dtype: DTypeLike | None = None,
retain_names: Set[str] | None = None,
retain_names: AbstractSet[str] | None = None,
) -> Sequence[Assignment | CallInstruction]:
if vector_names is None:
vector_names = frozenset()
Expand Down Expand Up @@ -771,8 +778,7 @@ def cmb_mapper(expr: Expression, /) -> Expression:
expr = ssg(expr)
expr = bik(expr)
expr = cmr(expr)
expr = btog(expr)
return expr
return btog(expr)

def convert_expr(name: str, expr: Expression) -> Expression:
logger.debug("generate expression for: %s", name)
Expand Down
15 changes: 6 additions & 9 deletions sumpy/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from boxtree.distributed.calculation import DistributedExpansionWranglerMixin

import pytools.obj_array as obj_array
from pytools import obj_array

from sumpy.fmm import SumpyExpansionWrangler

Expand Down Expand Up @@ -61,23 +61,20 @@ def distribute_source_weights(self,
local_src_weight_vecs_host = super().distribute_source_weights(
actx, src_weight_vecs_host, src_idx_all_ranks)

local_src_weight_vecs_device = [
return [
actx.from_numpy(local_src_weight)
for local_src_weight in local_src_weight_vecs_host]

return local_src_weight_vecs_device

def gather_potential_results(self,
actx: ArrayContext, potentials, tgt_idx_all_ranks):
potentials_host_vec = [
actx.to_numpy(potentials_dev) for potentials_dev in potentials
]

gathered_potentials_host_vec = []
for potentials_host in potentials_host_vec:
gathered_potentials_host_vec.append(
super().gather_potential_results(
actx, potentials_host, tgt_idx_all_ranks))
gathered_potentials_host_vec = [
super().gather_potential_results(
actx, potentials_host, tgt_idx_all_ranks)
for potentials_host in potentials_host_vec]

if self.is_mpi_root:
return obj_array.new_1d([
Expand Down
44 changes: 13 additions & 31 deletions sumpy/e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,15 @@ def get_cache_key(self):
return (type(self).__name__, self.src_expansion, self.tgt_expansion)

@abstractmethod
def get_kernel(self):
@override
def get_kernel(self) -> lp.TranslationUnit:
pass

def get_optimized_kernel(self):
# FIXME
knl = self.get_kernel()
knl = lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0")
return lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0")

return knl

# }}}

Expand Down Expand Up @@ -259,18 +259,14 @@ def get_kernel(self):
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)

loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
loopy_knl = lp.set_options(loopy_knl,
return lp.set_options(loopy_knl,
enforce_variable_access_ordered="no_check")

return loopy_knl

@override
def get_optimized_kernel(self):
# FIXME
knl = self.get_kernel()
knl = lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0")

return knl
return lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0")

def __call__(self, actx: ArrayContext, **kwargs):
"""
Expand Down Expand Up @@ -511,11 +507,9 @@ def get_kernel(self, result_dtype):

def get_optimized_kernel(self, result_dtype):
knl = self.get_kernel(result_dtype)
knl = self.tgt_expansion.m2l_translation.optimize_loopy_kernel(
return self.tgt_expansion.m2l_translation.optimize_loopy_kernel(
knl, self.tgt_expansion, self.src_expansion)

return knl

def __call__(self, actx: ArrayContext, **kwargs):
"""
:arg src_expansions:
Expand Down Expand Up @@ -612,22 +606,18 @@ def get_kernel(self, result_dtype):

loopy_knl = lp.merge([loopy_knl, translation_classes_data_knl])
loopy_knl = lp.inline_callable_kernel(loopy_knl, "m2l_data")
loopy_knl = lp.set_options(loopy_knl,
return lp.set_options(loopy_knl,
enforce_variable_access_ordered="no_check",
# FIXME: Without this, Loopy spends an eternity checking
# scattered writes to global variables to see whether barriers
# need to be inserted.
disable_global_barriers=True)

return loopy_knl

def get_optimized_kernel(self, result_dtype):
# FIXME
knl = self.get_kernel(result_dtype)
knl = lp.tag_inames(knl, "idim*:unr")
knl = lp.tag_inames(knl, {"itr_class": "g.0"})

return knl
return lp.tag_inames(knl, {"itr_class": "g.0"})

def __call__(self, actx: ArrayContext, **kwargs):
"""
Expand Down Expand Up @@ -722,9 +712,7 @@ def get_kernel(self, result_dtype):
loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

loopy_knl = lp.merge([loopy_knl, single_box_preprocess_knl])
loopy_knl = lp.inline_callable_kernel(loopy_knl, "m2l_preprocess_inner")

return loopy_knl
return lp.inline_callable_kernel(loopy_knl, "m2l_preprocess_inner")

def get_optimized_kernel(self, result_dtype):
knl = self.get_kernel(result_dtype)
Expand Down Expand Up @@ -822,18 +810,16 @@ def get_kernel(self, result_dtype):
loopy_knl = lp.merge([loopy_knl, single_box_postprocess_knl])
loopy_knl = lp.inline_callable_kernel(loopy_knl, "m2l_postprocess_inner")

loopy_knl = lp.set_options(loopy_knl,
return lp.set_options(loopy_knl,
enforce_variable_access_ordered="no_check")
return loopy_knl

def get_optimized_kernel(self, result_dtype):
knl = self.get_kernel(result_dtype)
knl = lp.tag_inames(knl, "itgt_box:g.0")
_, optimizations = self.get_inner_knl_and_optimizations(result_dtype)
for optimization in optimizations:
knl = optimization(knl)
knl = lp.add_inames_for_unused_hw_axes(knl)
return knl
return lp.add_inames_for_unused_hw_axes(knl)

def __call__(self, actx: ArrayContext, **kwargs):
"""
Expand Down Expand Up @@ -943,11 +929,9 @@ def get_kernel(self):
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)

loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
loopy_knl = lp.set_options(loopy_knl,
return lp.set_options(loopy_knl,
enforce_variable_access_ordered="no_check")

return loopy_knl

def __call__(self, actx: ArrayContext, **kwargs):
"""
:arg src_expansions:
Expand Down Expand Up @@ -1050,11 +1034,9 @@ def get_kernel(self):
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)

loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
loopy_knl = lp.set_options(loopy_knl,
return lp.set_options(loopy_knl,
enforce_variable_access_ordered="no_check")

return loopy_knl

def __call__(self, actx: ArrayContext, **kwargs):
"""
:arg src_expansions:
Expand Down
21 changes: 6 additions & 15 deletions sumpy/e2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
import numpy as np

import loopy as lp
import pytools.obj_array as obj_array
from loopy.version import MOST_RECENT_LANGUAGE_VERSION # noqa: F401
from pytools import obj_array

from sumpy.array_context import make_loopy_program
from sumpy.tools import KernelCacheMixin, gather_loopy_arguments
Expand Down Expand Up @@ -101,8 +101,7 @@ def add_loopy_eval_callable(
loopy_knl = lp.remove_unused_inames(loopy_knl)
for kernel in self.kernels:
loopy_knl = kernel.prepare_loopy_kernel(loopy_knl)
loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C")
return loopy_knl
return lp.tag_array_axes(loopy_knl, "targets", "sep,C")

def get_loopy_args(self):
return gather_loopy_arguments((self.expansion, *tuple(self.kernels)))
Expand Down Expand Up @@ -194,20 +193,16 @@ def get_kernel(self):

loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
loopy_knl = lp.tag_inames(loopy_knl, "iknl*:unr")
loopy_knl = self.add_loopy_eval_callable(loopy_knl)

return loopy_knl
return self.add_loopy_eval_callable(loopy_knl)

def get_optimized_kernel(self):
# FIXME
knl = self.get_kernel()
knl = lp.tag_inames(knl, {"itgt_box": "g.0"})
knl = lp.add_inames_to_insn(knl, "itgt_box", "id:kernel_scaling")
knl = lp.set_options(knl,
return lp.set_options(knl,
enforce_variable_access_ordered="no_check")

return knl

def __call__(self, actx: ArrayContext, **kwargs):
"""
:arg expansions:
Expand Down Expand Up @@ -322,20 +317,16 @@ def get_kernel(self):
loopy_knl = lp.tag_inames(loopy_knl, "iknl*:unr")
loopy_knl = lp.prioritize_loops(loopy_knl, "itgt_box,itgt,isrc_box")
loopy_knl = self.add_loopy_eval_callable(loopy_knl)
loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C")

return loopy_knl
return lp.tag_array_axes(loopy_knl, "targets", "sep,C")

def get_optimized_kernel(self):
# FIXME
knl = self.get_kernel()
knl = lp.tag_inames(knl, {"itgt_box": "g.0"})
knl = lp.add_inames_to_insn(knl, "itgt_box", "id:kernel_scaling")
knl = lp.set_options(knl,
return lp.set_options(knl,
enforce_variable_access_ordered="no_check")

return knl

def __call__(self, actx: ArrayContext, **kwargs):
centers = kwargs.pop("centers")
# "1" may be passed for rscale, which won't have its type
Expand Down
6 changes: 2 additions & 4 deletions sumpy/expansion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,7 @@ def _get_mi_hyperplanes(self) -> list[tuple[int, int]]:
axis `d`.
"""
d = self.dim - 1
hyperplanes = [(d, const) for const in range(self.order + 1)]
return hyperplanes
return [(d, const) for const in range(self.order + 1)]

@memoize_method
def _split_coeffs_into_hyperplanes(
Expand Down Expand Up @@ -648,8 +647,7 @@ def mi_key(ident: MultiIndex | DerivativeIdentifier) -> tuple[int, ...]:
mi = ident
key = [sum(mi)]

for i in range(dim):
key.append(mi[axis_permutation[i]])
key.extend(mi[axis_permutation[i]] for i in range(dim))

return tuple(key)

Expand Down
Loading
Loading