From 668b7fe5d71aaf71a692e8dacaa0696d5acd52a3 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Thu, 16 Jan 2025 16:23:39 -0600 Subject: [PATCH 1/7] Revert "KeyBuilder: use system byteorder to match numpy" This reverts commit cee5027972c5c5063f64f7f6624e20d1c9069497. --- pytools/persistent_dict.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 14eda873..02c311b8 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -163,12 +163,6 @@ class KeyBuilder: may stop working as early as 2022. .. versionadded:: 2021.2 - - .. note:: - - Some key-building uses system byte order, so the built keys may not match - across different systems. It would be desirable to fix this, but this is - not yet done. """ # this exists so that we can (conceivably) switch algorithms at some point @@ -277,10 +271,7 @@ def update_for_int(self, key_hash: Hash, key: int) -> None: sz = 8 while True: try: - # Must match system byte order so that numpy and this - # generate the same string of bytes. - # https://github.com/inducer/pytools/issues/259 - key_hash.update(key.to_bytes(sz, byteorder=sys.byteorder, signed=True)) + key_hash.update(key.to_bytes(sz, byteorder="little", signed=True)) return except OverflowError: sz *= 2 From 3046e99a4df4beafc5c364bd91a11ca60e7ccb26 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Thu, 16 Jan 2025 16:26:52 -0600 Subject: [PATCH 2/7] fix numpy hash for bigendian --- pytools/persistent_dict.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 02c311b8..f199e699 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -219,6 +219,8 @@ def rec(self, key_hash: Hash, key: Any) -> Hash: # Non-numpy scalars are handled above in the try block. method = self.update_for_numpy_scalar + # numpy arrays are mutable so they are not handled in KeyBuilder + if method is None: if issubclass(tp, Enum): method = self.update_for_enum @@ -324,6 +326,8 @@ def update_for_specific_dtype(self, key_hash: Hash, key: Any) -> None: def update_for_numpy_scalar(self, key_hash: Hash, key: Any) -> None: import numpy as np + if sys.byteorder == "big": + key = key.byteswap() if hasattr(np, "complex256") and key.dtype == np.dtype("complex256"): key_hash.update(repr(complex(key)).encode("utf8")) elif hasattr(np, "float128") and key.dtype == np.dtype("float128"): From 63e0455afcda877c0b8d1bbd0d4c2079c7342826 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Thu, 16 Jan 2025 16:32:49 -0600 Subject: [PATCH 3/7] speed up endianess determination --- pytools/persistent_dict.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index f199e699..8373852e 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -68,6 +68,9 @@ class RecommendedHashNotFoundWarning(UserWarning): _HAS_ATTRS = True +IS_BIGENDIAN = sys.byteorder == "big" + + logger = logging.getLogger(__name__) # NOTE: not always available so they get hardcoded here @@ -271,6 +274,8 @@ def update_for_type(self, key_hash: Hash, key: type) -> None: def update_for_int(self, key_hash: Hash, key: int) -> None: sz = 8 + # Note: this must match the hash for numpy integers, since + # np.int64(1) == 1 while True: try: key_hash.update(key.to_bytes(sz, byteorder="little", signed=True)) @@ -326,7 +331,7 @@ def update_for_specific_dtype(self, key_hash: Hash, key: Any) -> None: def update_for_numpy_scalar(self, key_hash: Hash, key: Any) -> None: import numpy as np - if sys.byteorder == "big": + if IS_BIGENDIAN: key = key.byteswap() if hasattr(np, "complex256") and key.dtype == np.dtype("complex256"): key_hash.update(repr(complex(key)).encode("utf8")) From dbe4e5f0850a77c058279200622a27988691008e Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Thu, 16 Jan 2025 17:36:08 -0600 Subject: [PATCH 4/7] cleanups, add mock test --- pytools/persistent_dict.py | 7 ++++--- pytools/test/test_persistent_dict.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 8373852e..72cd2d14 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -331,14 +331,15 @@ def update_for_specific_dtype(self, key_hash: Hash, key: Any) -> None: def update_for_numpy_scalar(self, key_hash: Hash, key: Any) -> None: import numpy as np - if IS_BIGENDIAN: - key = key.byteswap() if hasattr(np, "complex256") and key.dtype == np.dtype("complex256"): key_hash.update(repr(complex(key)).encode("utf8")) elif hasattr(np, "float128") and key.dtype == np.dtype("float128"): key_hash.update(repr(float(key)).encode("utf8")) else: - key_hash.update(np.array(key).tobytes()) + if IS_BIGENDIAN: + key_hash.update(np.array(key).byteswap().tobytes()) + else: + key_hash.update(np.array(key).tobytes()) def update_for_dataclass(self, key_hash: Hash, key: Any) -> None: self.rec(key_hash, f"{type(key).__qualname__}.{type(key).__name__}") diff --git a/pytools/test/test_persistent_dict.py b/pytools/test/test_persistent_dict.py index b6dec54f..298ff0ee 100644 --- a/pytools/test/test_persistent_dict.py +++ b/pytools/test/test_persistent_dict.py @@ -479,6 +479,19 @@ def test_scalar_hashing() -> None: assert keyb(np.complex256(1.1+2.2j)) == keyb(np.complex256(1.1+2.2j)) assert keyb(np.clongdouble(1.1+2.2j)) == keyb(np.clongdouble(1.1+2.2j)) + h_int64 = keyb(np.int64(1)) + + import unittest.mock + with unittest.mock.patch("pytools.persistent_dict.IS_BIGENDIAN", True): + # Very crude big-endian test. Apparently, there is no way to + # create a numpy scalar (not an np.array!) of a specific byte order. + if sys.byteorder == "big": + # no need to swap + be_val = np.int64(1) + else: + be_val = np.int64(1).byteswap() + assert h_int64 == keyb(be_val) + @pytest.mark.parametrize("dict_impl", ("immutabledict", "frozendict", "constantdict", From 4ce5d88b649b3663b303255047ff9fe408027c46 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 17 Jan 2025 09:42:24 -0600 Subject: [PATCH 5/7] broaden test slightly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Górny --- pytools/test/test_persistent_dict.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pytools/test/test_persistent_dict.py b/pytools/test/test_persistent_dict.py index 298ff0ee..100db69e 100644 --- a/pytools/test/test_persistent_dict.py +++ b/pytools/test/test_persistent_dict.py @@ -482,14 +482,11 @@ def test_scalar_hashing() -> None: h_int64 = keyb(np.int64(1)) import unittest.mock - with unittest.mock.patch("pytools.persistent_dict.IS_BIGENDIAN", True): - # Very crude big-endian test. Apparently, there is no way to - # create a numpy scalar (not an np.array!) of a specific byte order. - if sys.byteorder == "big": - # no need to swap - be_val = np.int64(1) - else: - be_val = np.int64(1).byteswap() + + from pytools.persistent_dict import IS_BIGENDIAN + with unittest.mock.patch("pytools.persistent_dict.IS_BIGENDIAN", not IS_BIGENDIAN): + # Very crude test that the other endianness works as well. + be_val = np.int64(1).byteswap() assert h_int64 == keyb(be_val) From 49820c79a16672e3aeecea1a715d08fa60978fe1 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 17 Jan 2025 11:35:01 -0600 Subject: [PATCH 6/7] rename _IS_BIGENDIAN --- pytools/persistent_dict.py | 4 ++-- pytools/test/test_persistent_dict.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 72cd2d14..9cbead31 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -68,7 +68,7 @@ class RecommendedHashNotFoundWarning(UserWarning): _HAS_ATTRS = True -IS_BIGENDIAN = sys.byteorder == "big" +_IS_BIGENDIAN = sys.byteorder == "big" logger = logging.getLogger(__name__) @@ -336,7 +336,7 @@ def update_for_numpy_scalar(self, key_hash: Hash, key: Any) -> None: elif hasattr(np, "float128") and key.dtype == np.dtype("float128"): key_hash.update(repr(float(key)).encode("utf8")) else: - if IS_BIGENDIAN: + if _IS_BIGENDIAN: key_hash.update(np.array(key).byteswap().tobytes()) else: key_hash.update(np.array(key).tobytes()) diff --git a/pytools/test/test_persistent_dict.py b/pytools/test/test_persistent_dict.py index 100db69e..76135e05 100644 --- a/pytools/test/test_persistent_dict.py +++ b/pytools/test/test_persistent_dict.py @@ -483,8 +483,9 @@ def test_scalar_hashing() -> None: import unittest.mock - from pytools.persistent_dict import IS_BIGENDIAN - with unittest.mock.patch("pytools.persistent_dict.IS_BIGENDIAN", not IS_BIGENDIAN): + from pytools.persistent_dict import _IS_BIGENDIAN + with unittest.mock.patch("pytools.persistent_dict._IS_BIGENDIAN", + not _IS_BIGENDIAN): # Very crude test that the other endianness works as well. be_val = np.int64(1).byteswap() assert h_int64 == keyb(be_val) From 0194fc66affee8204d0a3d2a46f46cfd9d587122 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 17 Jan 2025 11:38:52 -0600 Subject: [PATCH 7/7] add comments --- pytools/persistent_dict.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 9cbead31..cafd34b3 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -68,6 +68,7 @@ class RecommendedHashNotFoundWarning(UserWarning): _HAS_ATTRS = True +# Used to speed up the comparison in update_for_numpy_scalar and for mock testing _IS_BIGENDIAN = sys.byteorder == "big" @@ -331,6 +332,9 @@ def update_for_specific_dtype(self, key_hash: Hash, key: Any) -> None: def update_for_numpy_scalar(self, key_hash: Hash, key: Any) -> None: import numpy as np + + # tobytes() of np.complex256 and np.float128 are non-deterministic, + # so we need to use their repr() instead. if hasattr(np, "complex256") and key.dtype == np.dtype("complex256"): key_hash.update(repr(complex(key)).encode("utf8")) elif hasattr(np, "float128") and key.dtype == np.dtype("float128"):