From d7aecb6f43eefe13f11b7f423856d66787a3932e Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 5 Feb 2025 14:18:17 -0600 Subject: [PATCH 1/4] KeyBuilder: warn on encountering NaN --- pytools/persistent_dict.py | 8 ++++++++ pytools/test/test_persistent_dict.py | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index cafd34b3..58b22937 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -291,6 +291,14 @@ def update_for_bool(self, key_hash: Hash, key: bool) -> None: key_hash.update(str(key).encode("utf8")) def update_for_float(self, key_hash: Hash, key: float) -> None: + import math + if math.isnan(key): + # Also applies to np.nan, float("nan") + warn("Encountered a NaN while hashing. Since NaNs compare unequal " + "to themselves, the resulting key can not be retrieved from a " + "PersistentDict and will lead to a collision error on retrieval.", + stacklevel=1) + key_hash.update(key.hex().encode("utf8")) def update_for_complex(self, key_hash: Hash, key: float) -> None: diff --git a/pytools/test/test_persistent_dict.py b/pytools/test/test_persistent_dict.py index cd805062..d231b214 100644 --- a/pytools/test/test_persistent_dict.py +++ b/pytools/test/test_persistent_dict.py @@ -1048,6 +1048,32 @@ def test_concurrency_threads() -> None: # }}} +def test_nan_keys() -> None: + # test for https://github.com/inducer/pytools/issues/287 + + try: + tmpdir = tempfile.mkdtemp() + keyb = KeyBuilder() + pdict = PersistentDict("pytools-test", container_dir=tmpdir, + safe_sync=False, key_builder=keyb) + + import math + + import numpy as np + + for nan_value in [math.nan, np.nan, float("nan")]: + assert nan_value != nan_value + assert keyb(nan_value) == keyb(nan_value) + + pdict[nan_value] = 42 + + with (pytest.warns(CollisionWarning), + pytest.raises(NoSuchEntryCollisionError)): + pdict[math.nan] + finally: + shutil.rmtree(tmpdir) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) From 9045fa31c7c16b104d44d3d3255c6c0bc5ebf609 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 5 Feb 2025 14:23:22 -0600 Subject: [PATCH 2/4] add warning to docs --- pytools/persistent_dict.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 58b22937..5641d125 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -669,6 +669,12 @@ class WriteOncePersistentDict(_PersistentDictBase[K, V]): and if they occur, almost always due to a bug in the hash key generation code (:class:`KeyBuilder`). + .. warning:: + + Since NaNs compare unequal to themselves, keys that include NaNs can + not be retrieved from a :class:`WriteOncePersistentDict` and will lead to a + :exc:`NoSuchEntryCollisionError` on retrieval. + .. automethod:: __init__ .. automethod:: __getitem__ .. automethod:: __setitem__ @@ -782,6 +788,12 @@ class PersistentDict(_PersistentDictBase[K, V]): and if they occur, almost always due to a bug in the hash key generation code (:class:`KeyBuilder`). + .. warning:: + + Since NaNs compare unequal to themselves, keys that include NaNs can + not be retrieved from a :class:`PersistentDict` and will lead to a + :exc:`NoSuchEntryCollisionError` on retrieval. + .. automethod:: __init__ .. automethod:: __getitem__ .. automethod:: __setitem__ From 95e5d8d98b34a63702a53482d678a92cf700cb20 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 5 Feb 2025 14:24:53 -0600 Subject: [PATCH 3/4] mypy --- pytools/test/test_persistent_dict.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pytools/test/test_persistent_dict.py b/pytools/test/test_persistent_dict.py index d231b214..3deb6518 100644 --- a/pytools/test/test_persistent_dict.py +++ b/pytools/test/test_persistent_dict.py @@ -1054,8 +1054,10 @@ def test_nan_keys() -> None: try: tmpdir = tempfile.mkdtemp() keyb = KeyBuilder() - pdict = PersistentDict("pytools-test", container_dir=tmpdir, - safe_sync=False, key_builder=keyb) + pdict: PersistentDict[float, int] = PersistentDict("pytools-test", + container_dir=tmpdir, + safe_sync=False, + key_builder=keyb) import math From cf3002a74d2603efd4453a503e75aaffcc66aece Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 5 Feb 2025 14:27:42 -0600 Subject: [PATCH 4/4] make numpy optional in test --- pytools/test/test_persistent_dict.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pytools/test/test_persistent_dict.py b/pytools/test/test_persistent_dict.py index 3deb6518..33cfb33c 100644 --- a/pytools/test/test_persistent_dict.py +++ b/pytools/test/test_persistent_dict.py @@ -1061,9 +1061,15 @@ def test_nan_keys() -> None: import math - import numpy as np + nan_values = [math.nan, float("nan")] + + try: + import numpy as np + nan_values.append(np.nan) + except ImportError: + pass - for nan_value in [math.nan, np.nan, float("nan")]: + for nan_value in nan_values: assert nan_value != nan_value assert keyb(nan_value) == keyb(nan_value)