From cb2f89ddbb5a288020db6c0bfb3b9002083f822c Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 13:09:18 -0800 Subject: [PATCH 01/50] Add base redis cche manager --- src/fides/common/cache/__init__.py | 75 ++++++++++++++++++++ src/fides/common/cache/manager.py | 106 +++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 src/fides/common/cache/__init__.py create mode 100644 src/fides/common/cache/manager.py diff --git a/src/fides/common/cache/__init__.py b/src/fides/common/cache/__init__.py new file mode 100644 index 00000000000..45cd9cb307c --- /dev/null +++ b/src/fides/common/cache/__init__.py @@ -0,0 +1,75 @@ +""" +Shared Redis cache utilities and the RedisCacheManager. + +RedisCacheManager provides modern Redis patterns such as key indexes. +DSRCacheStore wraps that with DSR-specific key naming (dsr:{id}:{part}) +and index-backed list/clear with lazy migration for legacy keys. + +Usage: + with get_dsr_cache_store() as store: + store.write_identity(privacy_request_id, "email", "user@example.com") + store.clear(privacy_request_id) +""" + +from contextlib import contextmanager +from typing import Iterator + +from fides.common.cache.dsr_store import ( + DSR_KEY_PREFIX, + DSRCacheStore, +) +from fides.common.cache.manager import ( + INDEX_KEY_PREFIX, + RedisCacheManager, +) + + +from fides.api.util.cache import get_cache + +@contextmanager +def get_dsr_cache_store( + *, + backfill_index_on_legacy_read: bool = True, + migrate_legacy_on_read: bool = True, +) -> Iterator[DSRCacheStore]: + """ + Context manager that yields a DSRCacheStore backed by the application Redis connection. + + The store handles both new-format keys (dsr:{id}:{part}) and legacy keys + (id-{id}-{field}-{attr}) with automatic migration on read when migrate_legacy_on_read=True. + + Args: + backfill_index_on_legacy_read: When listing keys and falling back to SCAN for + legacy keys, add those keys to the index. Default True. + migrate_legacy_on_read: When a get finds value in legacy key only, write to + new key, delete legacy key, add new key to index. Default True. + + Yields: + DSRCacheStore instance ready for use + + Usage: + with get_dsr_cache_store() as store: + store.clear(privacy_request_id) + + with get_dsr_cache_store() as store: + store.write_identity(pr_id, "email", "user@example.com") + value = store.get_identity(pr_id, "email") + """ + redis_client = get_cache() + manager = RedisCacheManager(redis_client) + store = DSRCacheStore( + manager, + backfill_index_on_legacy_read=backfill_index_on_legacy_read, + migrate_legacy_on_read=migrate_legacy_on_read, + ) + yield store + # No cleanup needed; store doesn't own the Redis connection + + +__all__ = [ + "DSR_KEY_PREFIX", + "DSRCacheStore", + "INDEX_KEY_PREFIX", + "RedisCacheManager", + "get_dsr_cache_store", +] diff --git a/src/fides/common/cache/manager.py b/src/fides/common/cache/manager.py new file mode 100644 index 00000000000..5327aa5f2a2 --- /dev/null +++ b/src/fides/common/cache/manager.py @@ -0,0 +1,106 @@ +""" +RedisCacheManager: Redis client wrapper with key-index support. + +Key indexes allow listing and deleting keys by a logical prefix without +using Redis KEYS or SCAN. Each index is stored as a Redis SET at +__idx:{index_prefix}; members are the actual cache key names. +""" + +from typing import List, Optional, Union + +from redis import Redis + +# Redis key prefix for index sets. Index key = INDEX_KEY_PREFIX + index_prefix. +INDEX_KEY_PREFIX = "__idx:" + +# Value types supported by Redis SET and basic ops +RedisValue = Union[bytes, float, int, str] + + +class RedisCacheManager: + """ + Wraps a Redis client and adds key-index operations. + + Use key indexes when you need to list or delete keys by a logical + prefix (e.g. all keys for a privacy request) without scanning the + key space. Call add_key_to_index when you set a key, and + remove_key_from_index when you delete it (or use the combined + set/delete helpers). + """ + + def __init__(self, redis_client: Redis) -> None: + """ + Args: + redis_client: Any Redis client (e.g. FidesopsRedis from get_cache()). + """ + self._redis = redis_client + + def _index_key(self, index_prefix: str) -> str: + """Return the Redis key used to store the set of keys for this index.""" + return f"{INDEX_KEY_PREFIX}{index_prefix}" + + def add_key_to_index(self, index_prefix: str, key: str) -> None: + """Register a key under an index prefix so it can be listed by prefix.""" + self._redis.sadd(self._index_key(index_prefix), key) + + def remove_key_from_index(self, index_prefix: str, key: str) -> None: + """Unregister a key from an index prefix.""" + self._redis.srem(self._index_key(index_prefix), key) + + def get_keys_by_index(self, index_prefix: str) -> List[str]: + """ + Return all keys registered under this index prefix. + O(set size), no key-space scan. + """ + members = self._redis.smembers(self._index_key(index_prefix)) + return list(members) if members else [] + + def delete_index(self, index_prefix: str) -> None: + """Remove the index set. Does not delete the data keys themselves.""" + self._redis.delete(self._index_key(index_prefix)) + + def delete_keys_by_index( + self, + index_prefix: str, + ) -> None: + """ + Delete every key in the index and then remove the index set. + Use this instead of KEYS/SCAN when you have been maintaining an index. + """ + keys = self.get_keys_by_index(index_prefix) + if keys: + self._redis.delete(*keys) + self.delete_index(index_prefix) + + def set_with_index( + self, + key: str, + value: RedisValue, + index_prefix: str, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """ + Set a key and add it to an index in one step. + If expire_seconds is set, the key will expire after that many seconds. + Returns the result of SET (e.g. True). + """ + if expire_seconds is not None: + result = self._redis.set(key, value, ex=expire_seconds) + else: + result = self._redis.set(key, value) + self.add_key_to_index(index_prefix, key) + return result + + def delete_key_and_remove_from_index( + self, + key: str, + index_prefix: str, + ) -> None: + """Delete a key and remove it from its index.""" + self._redis.delete(key) + self.remove_key_from_index(index_prefix, key) + + @property + def redis(self) -> Redis: + """Access the underlying Redis client for operations not on the manager.""" + return self._redis From 990dfde43bb9445608950f20fe4353080e489eb6 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 14:24:33 -0800 Subject: [PATCH 02/50] Add pipelining to index operations for atomicity --- src/fides/common/cache/manager.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/fides/common/cache/manager.py b/src/fides/common/cache/manager.py index 5327aa5f2a2..2640e685d0f 100644 --- a/src/fides/common/cache/manager.py +++ b/src/fides/common/cache/manager.py @@ -68,9 +68,15 @@ def delete_keys_by_index( Use this instead of KEYS/SCAN when you have been maintaining an index. """ keys = self.get_keys_by_index(index_prefix) + idx_key = self._index_key(index_prefix) if keys: - self._redis.delete(*keys) - self.delete_index(index_prefix) + pipe = self._redis.pipeline() + for k in keys: + pipe.delete(k) + pipe.delete(idx_key) + pipe.execute() + else: + self._redis.delete(idx_key) def set_with_index( self, @@ -84,12 +90,14 @@ def set_with_index( If expire_seconds is set, the key will expire after that many seconds. Returns the result of SET (e.g. True). """ + pipe = self._redis.pipeline() if expire_seconds is not None: - result = self._redis.set(key, value, ex=expire_seconds) + pipe.set(key, value, ex=expire_seconds) else: - result = self._redis.set(key, value) - self.add_key_to_index(index_prefix, key) - return result + pipe.set(key, value) + pipe.sadd(self._index_key(index_prefix), key) + results = pipe.execute() + return results[0] def delete_key_and_remove_from_index( self, @@ -97,8 +105,10 @@ def delete_key_and_remove_from_index( index_prefix: str, ) -> None: """Delete a key and remove it from its index.""" - self._redis.delete(key) - self.remove_key_from_index(index_prefix, key) + pipe = self._redis.pipeline() + pipe.delete(key) + pipe.srem(self._index_key(index_prefix), key) + pipe.execute() @property def redis(self) -> Redis: From 60bd3ffb955820f32f6680762b594f8602554fae Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 14:38:43 -0800 Subject: [PATCH 03/50] Add tests and make index operations pipelined --- src/fides/common/cache/__init__.py | 60 --------- tests/common/__init__.py | 0 tests/common/cache/__init__.py | 0 tests/common/cache/test_manager.py | 195 +++++++++++++++++++++++++++++ 4 files changed, 195 insertions(+), 60 deletions(-) create mode 100644 tests/common/__init__.py create mode 100644 tests/common/cache/__init__.py create mode 100644 tests/common/cache/test_manager.py diff --git a/src/fides/common/cache/__init__.py b/src/fides/common/cache/__init__.py index 45cd9cb307c..1dc9c9e021d 100644 --- a/src/fides/common/cache/__init__.py +++ b/src/fides/common/cache/__init__.py @@ -2,74 +2,14 @@ Shared Redis cache utilities and the RedisCacheManager. RedisCacheManager provides modern Redis patterns such as key indexes. -DSRCacheStore wraps that with DSR-specific key naming (dsr:{id}:{part}) -and index-backed list/clear with lazy migration for legacy keys. - -Usage: - with get_dsr_cache_store() as store: - store.write_identity(privacy_request_id, "email", "user@example.com") - store.clear(privacy_request_id) """ -from contextlib import contextmanager -from typing import Iterator - -from fides.common.cache.dsr_store import ( - DSR_KEY_PREFIX, - DSRCacheStore, -) from fides.common.cache.manager import ( INDEX_KEY_PREFIX, RedisCacheManager, ) - -from fides.api.util.cache import get_cache - -@contextmanager -def get_dsr_cache_store( - *, - backfill_index_on_legacy_read: bool = True, - migrate_legacy_on_read: bool = True, -) -> Iterator[DSRCacheStore]: - """ - Context manager that yields a DSRCacheStore backed by the application Redis connection. - - The store handles both new-format keys (dsr:{id}:{part}) and legacy keys - (id-{id}-{field}-{attr}) with automatic migration on read when migrate_legacy_on_read=True. - - Args: - backfill_index_on_legacy_read: When listing keys and falling back to SCAN for - legacy keys, add those keys to the index. Default True. - migrate_legacy_on_read: When a get finds value in legacy key only, write to - new key, delete legacy key, add new key to index. Default True. - - Yields: - DSRCacheStore instance ready for use - - Usage: - with get_dsr_cache_store() as store: - store.clear(privacy_request_id) - - with get_dsr_cache_store() as store: - store.write_identity(pr_id, "email", "user@example.com") - value = store.get_identity(pr_id, "email") - """ - redis_client = get_cache() - manager = RedisCacheManager(redis_client) - store = DSRCacheStore( - manager, - backfill_index_on_legacy_read=backfill_index_on_legacy_read, - migrate_legacy_on_read=migrate_legacy_on_read, - ) - yield store - # No cleanup needed; store doesn't own the Redis connection - - __all__ = [ - "DSR_KEY_PREFIX", - "DSRCacheStore", "INDEX_KEY_PREFIX", "RedisCacheManager", - "get_dsr_cache_store", ] diff --git a/tests/common/__init__.py b/tests/common/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/common/cache/__init__.py b/tests/common/cache/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/common/cache/test_manager.py b/tests/common/cache/test_manager.py new file mode 100644 index 00000000000..278ec803b0d --- /dev/null +++ b/tests/common/cache/test_manager.py @@ -0,0 +1,195 @@ +""" +Unit tests for RedisCacheManager pipeline operations. + +MockRedis and MockPipeline are inline for easy updates when MockClient +lands upstream - replace with an import. +""" + +import fnmatch + +import pytest + +from fides.common.cache.manager import RedisCacheManager + + +# --- Inline MockRedis (replace with MockClient import when available) --- + + +class MockPipeline: + """In-memory pipeline that batches commands and executes atomically.""" + + def __init__(self, data: dict, sets: dict) -> None: + self._data = data + self._sets = sets + self._commands: list = [] + + def set(self, key: str, value, ex=None) -> "MockPipeline": + self._commands.append(("set", (key, value, ex))) + return self + + def sadd(self, key: str, member: str) -> "MockPipeline": + self._commands.append(("sadd", (key, member))) + return self + + def delete(self, *keys: str) -> "MockPipeline": + self._commands.append(("delete", keys)) + return self + + def srem(self, key: str, member: str) -> "MockPipeline": + self._commands.append(("srem", (key, member))) + return self + + def execute(self) -> list: + results = [] + for op, args in self._commands: + if op == "set": + key, value, _ = args + self._data[key] = value + results.append(True) + elif op == "sadd": + key, member = args + if key not in self._sets: + self._sets[key] = set() + self._sets[key].add(member) + results.append(1) + elif op == "delete": + for k in args: + self._data.pop(k, None) + self._sets.pop(k, None) + results.append(len(args)) + elif op == "srem": + key, member = args + if key in self._sets: + self._sets[key].discard(member) + if not self._sets[key]: + del self._sets[key] + results.append(1) + self._commands = [] + return results + + +class MockRedis: + """In-memory Redis mock for RedisCacheManager tests.""" + + def __init__(self) -> None: + self._data: dict = {} + self._sets: dict = {} + + def get(self, key: str): + return self._data.get(key) + + def set(self, key: str, value, ex=None) -> bool: + self._data[key] = value + return True + + def delete(self, *keys: str) -> int: + count = 0 + for k in keys: + if k in self._data: + del self._data[k] + count += 1 + if k in self._sets: + del self._sets[k] + count += 1 + return count + + def sadd(self, key: str, member: str) -> int: + if key not in self._sets: + self._sets[key] = set() + self._sets[key].add(member) + return 1 + + def srem(self, key: str, member: str) -> int: + if key in self._sets: + self._sets[key].discard(member) + if not self._sets[key]: + del self._sets[key] + return 1 + return 0 + + def smembers(self, key: str) -> set: + return self._sets.get(key, set()).copy() + + def keys(self, pattern: str = "*") -> list: + all_keys = set(self._data) | set(self._sets) + return [k for k in all_keys if fnmatch.fnmatch(k, pattern)] + + def pipeline(self) -> MockPipeline: + return MockPipeline(self._data, self._sets) + + +# --- Fixtures --- + + +@pytest.fixture +def mock_redis() -> MockRedis: + return MockRedis() + + +@pytest.fixture +def manager(mock_redis: MockRedis) -> RedisCacheManager: + return RedisCacheManager(mock_redis) + + +# --- Tests --- + + +@pytest.mark.unit +class TestRedisCacheManagerPipeline: + """Tests for RedisCacheManager pipeline-based index operations.""" + + def test_set_with_index_uses_pipeline_and_returns_set_result( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """set_with_index stores key, adds to index, and returns SET result.""" + result = manager.set_with_index("k1", "v1", "idx1") + + assert result is True + assert mock_redis.get("k1") == "v1" + assert "k1" in mock_redis.smembers("__idx:idx1") + + def test_set_with_index_with_expiry( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """set_with_index with expire_seconds stores value and adds to index.""" + result = manager.set_with_index("k2", "v2", "idx2", expire_seconds=60) + + assert result is True + assert mock_redis.get("k2") == "v2" + assert "k2" in mock_redis.smembers("__idx:idx2") + + def test_delete_key_and_remove_from_index_atomic( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """delete_key_and_remove_from_index removes key and index entry atomically.""" + manager.set_with_index("k3", "v3", "idx3") + assert mock_redis.get("k3") == "v3" + assert "k3" in mock_redis.smembers("__idx:idx3") + + manager.delete_key_and_remove_from_index("k3", "idx3") + + assert mock_redis.get("k3") is None + assert "k3" not in mock_redis.smembers("__idx:idx3") + + def test_delete_keys_by_index_batches_deletes( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """delete_keys_by_index removes all indexed keys and the index in one pipeline.""" + manager.set_with_index("k4a", "v4a", "idx4") + manager.set_with_index("k4b", "v4b", "idx4") + manager.set_with_index("k4c", "v4c", "idx4") + + manager.delete_keys_by_index("idx4") + + assert mock_redis.get("k4a") is None + assert mock_redis.get("k4b") is None + assert mock_redis.get("k4c") is None + assert mock_redis.smembers("__idx:idx4") == set() + + def test_delete_keys_by_index_empty_index( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """delete_keys_by_index on empty index deletes index set without error.""" + manager.delete_keys_by_index("idx5") + + assert mock_redis.smembers("__idx:idx5") == set() From 22319598f81c05237c95749f221fea02db70f6a6 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 14:43:24 -0800 Subject: [PATCH 04/50] Formatting fixes --- tests/common/cache/test_manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/common/cache/test_manager.py b/tests/common/cache/test_manager.py index 278ec803b0d..654a1dc7988 100644 --- a/tests/common/cache/test_manager.py +++ b/tests/common/cache/test_manager.py @@ -11,7 +11,6 @@ from fides.common.cache.manager import RedisCacheManager - # --- Inline MockRedis (replace with MockClient import when available) --- From f9bb52edbf2269313fdf6dc20b5634463e461d6d Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 14:57:16 -0800 Subject: [PATCH 05/50] Cleanup tests --- tests/common/cache/test_manager.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/common/cache/test_manager.py b/tests/common/cache/test_manager.py index 654a1dc7988..0cae1eeede3 100644 --- a/tests/common/cache/test_manager.py +++ b/tests/common/cache/test_manager.py @@ -1,18 +1,9 @@ -""" -Unit tests for RedisCacheManager pipeline operations. - -MockRedis and MockPipeline are inline for easy updates when MockClient -lands upstream - replace with an import. -""" - import fnmatch import pytest from fides.common.cache.manager import RedisCacheManager -# --- Inline MockRedis (replace with MockClient import when available) --- - class MockPipeline: """In-memory pipeline that batches commands and executes atomically.""" From 64a5f7fb034255286b0745bd8367bd8dc0ffca34 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 08:58:03 -0800 Subject: [PATCH 06/50] Add tests for index operations --- tests/common/cache/test_manager.py | 103 +++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/tests/common/cache/test_manager.py b/tests/common/cache/test_manager.py index 0cae1eeede3..25f5181bf25 100644 --- a/tests/common/cache/test_manager.py +++ b/tests/common/cache/test_manager.py @@ -183,3 +183,106 @@ def test_delete_keys_by_index_empty_index( manager.delete_keys_by_index("idx5") assert mock_redis.smembers("__idx:idx5") == set() + + +@pytest.mark.unit +class TestRedisCacheManagerIndexOperations: + """Tests for add_key_to_index, remove_key_from_index, get_keys_by_index, delete_index.""" + + def test_add_key_to_index_registers_key( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Ensure that add_key_to_index adds the key and creates the index set if it doesn't exist.""" + manager.add_key_to_index("myidx", "cache_key_1") + + assert "cache_key_1" in mock_redis.smembers("__idx:myidx") + + def test_add_key_to_index_multiple_keys( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Ensure that add_key_to_index can add multiple keys to the same index.""" + manager.add_key_to_index("idx6", "key_a") + manager.add_key_to_index("idx6", "key_b") + manager.add_key_to_index("idx6", "key_c") + + members = mock_redis.smembers("__idx:idx6") + assert members == {"key_a", "key_b", "key_c"} + + def test_remove_key_from_index_idempotent(self, manager: RedisCacheManager, mock_redis: MockRedis) -> None: + """Ensure that remove_key_from_index is idempotent and does not error when the specified key is not in the index.""" + manager.set_with_index("key_a", "value_a", "idx6") + manager.set_with_index("key_b", "value_b", "idx6") + + manager.remove_key_from_index("idx6", "key_a") + + # Should not remove other keys when the key is in the index and does not remove the key from the cache + assert mock_redis.smembers("__idx:idx6") == {"key_b"} + assert mock_redis.get("key_a") == "value_a" + assert mock_redis.get("key_b") == "value_b" + + + # Should not error when the key is not in the index and does not remove other keys + manager.remove_key_from_index("idx6", "key_a") + + assert mock_redis.smembers("__idx:idx6") == {"key_b"} + assert mock_redis.get("key_a") == "value_a" + assert mock_redis.get("key_b") == "value_b" + + def test_remove_key_from_index_unregisters_key( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Ensure that remove_key_from_index removes a key from the index and does not remove other keys.""" + manager.add_key_to_index("idx7", "keep") + manager.add_key_to_index("idx7", "remove_me") + + manager.remove_key_from_index("idx7", "remove_me") + + assert mock_redis.smembers("__idx:idx7") == {"keep"} + + def test_remove_key_from_index_does_not_error_when_missing( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Ensure that remove_key_from_index does not error when the specified key is not in the index, and does not remove other keys.""" + manager.add_key_to_index("idx8", "existing") + + manager.remove_key_from_index("idx8", "nonexistent") + + assert mock_redis.smembers("__idx:idx8") == {"existing"} + + def test_get_keys_by_index_returns_empty_for_missing_index( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Ensure that get_keys_by_index returns an empty list when the specified index does not exist.""" + keys = manager.get_keys_by_index("never_used") + + assert keys == [] + + def test_get_keys_by_index_returns_registered_keys( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Ensure get_keys_by_index returns all the keys in the index.""" + manager.add_key_to_index("idx9", "k1") + manager.add_key_to_index("idx9", "k2") + + keys = manager.get_keys_by_index("idx9") + + assert set(keys) == {"k1", "k2"} + assert len(keys) == 2 + + def test_delete_index_removes_index_set_only( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Ensure that delete_index removes the index set but NOT the data keys that are still in the cache.""" + mock_redis.set("data_key_1", "value1") + manager.add_key_to_index("idx10", "data_key_1") + + manager.delete_index("idx10") + + assert mock_redis.smembers("__idx:idx10") == set() + assert mock_redis.get("data_key_1") == "value1" + + def test_delete_index_does_not_error_when_empty( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Ensure that delete_index does not error when the specified index does not exist.""" + manager.delete_index("nonexistent_idx") From 901effd5447f5f45b345d7670b1a60f4e4a3fa0f Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 10:36:22 -0800 Subject: [PATCH 07/50] Update src/fides/common/cache/manager.py Deserialize keys to strings when in `get_keys_by_index` - preferring deliberate conversion over `decode_responses` Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- src/fides/common/cache/manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fides/common/cache/manager.py b/src/fides/common/cache/manager.py index 2640e685d0f..9c85f8d75e3 100644 --- a/src/fides/common/cache/manager.py +++ b/src/fides/common/cache/manager.py @@ -53,7 +53,8 @@ def get_keys_by_index(self, index_prefix: str) -> List[str]: O(set size), no key-space scan. """ members = self._redis.smembers(self._index_key(index_prefix)) - return list(members) if members else [] + # Convert bytes to strings for consistent return type + return [m.decode('utf-8') if isinstance(m, bytes) else m for m in members] if members else [] def delete_index(self, index_prefix: str) -> None: """Remove the index set. Does not delete the data keys themselves.""" From 2d3d6e693005d05a9d13faf6e23dd501fa78c9be Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 11:57:20 -0800 Subject: [PATCH 08/50] Add TTL management (opt-in) to manager --- src/fides/common/cache/manager.py | 15 ++++++ tests/common/cache/test_manager.py | 81 ++++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 3 deletions(-) diff --git a/src/fides/common/cache/manager.py b/src/fides/common/cache/manager.py index 9c85f8d75e3..7a5fe227c01 100644 --- a/src/fides/common/cache/manager.py +++ b/src/fides/common/cache/manager.py @@ -13,6 +13,9 @@ # Redis key prefix for index sets. Index key = INDEX_KEY_PREFIX + index_prefix. INDEX_KEY_PREFIX = "__idx:" +# Default additional TTL for index sets when set_with_index is used with index_ttl_enabled=True +INDEX_TTL_EXTRA_SECONDS = 60 * 60 * 24 # 24 hours + # Value types supported by Redis SET and basic ops RedisValue = Union[bytes, float, int, str] @@ -85,11 +88,17 @@ def set_with_index( value: RedisValue, index_prefix: str, expire_seconds: Optional[int] = None, + *, + index_ttl_enabled: bool = False, ) -> Optional[bool]: """ Set a key and add it to an index in one step. If expire_seconds is set, the key will expire after that many seconds. Returns the result of SET (e.g. True). + + Index TTL (opt-in safety): When index_ttl_enabled=True and expire_seconds is set, + the index set will be given a TTL. If the key's TTL is farther out than the + index's current TTL, the index TTL is extended. Disabled by default. """ pipe = self._redis.pipeline() if expire_seconds is not None: @@ -98,6 +107,12 @@ def set_with_index( pipe.set(key, value) pipe.sadd(self._index_key(index_prefix), key) results = pipe.execute() + if index_ttl_enabled and expire_seconds is not None: + idx_key = self._index_key(index_prefix) + current_ttl = self._redis.ttl(idx_key) + proposed_index_ttl = expire_seconds + INDEX_TTL_EXTRA_SECONDS + if current_ttl < 0 or proposed_index_ttl > current_ttl: + self._redis.expire(idx_key, proposed_index_ttl) return results[0] def delete_key_and_remove_from_index( diff --git a/tests/common/cache/test_manager.py b/tests/common/cache/test_manager.py index 25f5181bf25..f90db5e215a 100644 --- a/tests/common/cache/test_manager.py +++ b/tests/common/cache/test_manager.py @@ -2,7 +2,7 @@ import pytest -from fides.common.cache.manager import RedisCacheManager +from fides.common.cache.manager import INDEX_TTL_EXTRA_SECONDS, RedisCacheManager class MockPipeline: @@ -64,6 +64,7 @@ class MockRedis: def __init__(self) -> None: self._data: dict = {} self._sets: dict = {} + self._ttl: dict = {} # key -> seconds until expiry (simplified; no decay) def get(self, key: str): return self._data.get(key) @@ -81,6 +82,7 @@ def delete(self, *keys: str) -> int: if k in self._sets: del self._sets[k] count += 1 + self._ttl.pop(k, None) return count def sadd(self, key: str, member: str) -> int: @@ -104,6 +106,17 @@ def keys(self, pattern: str = "*") -> list: all_keys = set(self._data) | set(self._sets) return [k for k in all_keys if fnmatch.fnmatch(k, pattern)] + def ttl(self, key: str) -> int: + if key not in self._data and key not in self._sets: + return -2 + return self._ttl.get(key, -1) + + def expire(self, key: str, seconds: int) -> bool: + if key in self._data or key in self._sets: + self._ttl[key] = seconds + return True + return False + def pipeline(self) -> MockPipeline: return MockPipeline(self._data, self._sets) @@ -208,7 +221,9 @@ def test_add_key_to_index_multiple_keys( members = mock_redis.smembers("__idx:idx6") assert members == {"key_a", "key_b", "key_c"} - def test_remove_key_from_index_idempotent(self, manager: RedisCacheManager, mock_redis: MockRedis) -> None: + def test_remove_key_from_index_idempotent( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: """Ensure that remove_key_from_index is idempotent and does not error when the specified key is not in the index.""" manager.set_with_index("key_a", "value_a", "idx6") manager.set_with_index("key_b", "value_b", "idx6") @@ -220,7 +235,6 @@ def test_remove_key_from_index_idempotent(self, manager: RedisCacheManager, mock assert mock_redis.get("key_a") == "value_a" assert mock_redis.get("key_b") == "value_b" - # Should not error when the key is not in the index and does not remove other keys manager.remove_key_from_index("idx6", "key_a") @@ -286,3 +300,64 @@ def test_delete_index_does_not_error_when_empty( ) -> None: """Ensure that delete_index does not error when the specified index does not exist.""" manager.delete_index("nonexistent_idx") + + +@pytest.mark.unit +class TestRedisCacheManagerIndexTTL: + """Tests for optional index TTL (index_ttl_enabled).""" + + def test_index_ttl_disabled_by_default( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Without index_ttl_enabled, index has no TTL.""" + manager.set_with_index("k", "v", "idx", expire_seconds=60) + + assert mock_redis.ttl("__idx:idx") == -1 + + def test_index_ttl_applied_when_enabled( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """With index_ttl_enabled, index gets TTL matching key.""" + manager.set_with_index( + "k", "v", "idx", expire_seconds=120, index_ttl_enabled=True + ) + + assert mock_redis.ttl("__idx:idx") == 120 + INDEX_TTL_EXTRA_SECONDS + + def test_index_ttl_extended_when_key_ttl_farther_out( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Index TTL is pushed out when adding key with longer TTL.""" + manager.set_with_index( + "k1", "v1", "idx", expire_seconds=60, index_ttl_enabled=True + ) + assert mock_redis.ttl("__idx:idx") == 60 + INDEX_TTL_EXTRA_SECONDS + + manager.set_with_index( + "k2", "v2", "idx", expire_seconds=300, index_ttl_enabled=True + ) + + assert mock_redis.ttl("__idx:idx") == 300 + INDEX_TTL_EXTRA_SECONDS + + def test_index_ttl_not_shortened_when_key_ttl_shorter( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """Index TTL is NOT shortened when adding key with shorter TTL.""" + manager.set_with_index( + "k1", "v1", "idx", expire_seconds=300, index_ttl_enabled=True + ) + assert mock_redis.ttl("__idx:idx") == 300 + INDEX_TTL_EXTRA_SECONDS + + manager.set_with_index( + "k2", "v2", "idx", expire_seconds=60, index_ttl_enabled=True + ) + + assert mock_redis.ttl("__idx:idx") == 300 + INDEX_TTL_EXTRA_SECONDS + + def test_index_ttl_ignored_when_no_expire_seconds( + self, manager: RedisCacheManager, mock_redis: MockRedis + ) -> None: + """index_ttl_enabled has no effect when expire_seconds is not set.""" + manager.set_with_index("k", "v", "idx", index_ttl_enabled=True) + + assert mock_redis.ttl("__idx:idx") == -1 From bbd2926c0e3bae4fb8ce93165ff51ab5046f3dc6 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 12:13:29 -0800 Subject: [PATCH 09/50] Add new tests dir to noxfiles --- noxfiles/ci_nox.py | 1 + noxfiles/setup_tests_nox.py | 1 + 2 files changed, 2 insertions(+) diff --git a/noxfiles/ci_nox.py b/noxfiles/ci_nox.py index b059711c7b0..e49f66ca1cf 100644 --- a/noxfiles/ci_nox.py +++ b/noxfiles/ci_nox.py @@ -347,6 +347,7 @@ def load_tests(session: nox.Session) -> None: # This maps actual test directories to the test groups that cover them TEST_DIRECTORY_COVERAGE = { "tests/api/": ["api"], + "tests/common/": ["misc-unit"], "tests/ctl/": ["ctl-unit", "ctl-not-external", "ctl-integration", "ctl-external"], "tests/lib/": ["lib"], "tests/ops/": [ diff --git a/noxfiles/setup_tests_nox.py b/noxfiles/setup_tests_nox.py index 4df79fde830..85145982821 100644 --- a/noxfiles/setup_tests_nox.py +++ b/noxfiles/setup_tests_nox.py @@ -458,6 +458,7 @@ def pytest_misc_unit(session: Session, pytest_config: PytestConfig) -> None: "/opt/fides/bin/python", "pytest", *pytest_config.args, + "tests/common/", "tests/service/", "tests/task/", "tests/util/", From fe7bd8b7e28b9bb982936960777452e6b6b635fa Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 13:33:10 -0800 Subject: [PATCH 10/50] Formatting --- src/fides/common/cache/manager.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/fides/common/cache/manager.py b/src/fides/common/cache/manager.py index 7a5fe227c01..8895c567c1a 100644 --- a/src/fides/common/cache/manager.py +++ b/src/fides/common/cache/manager.py @@ -57,7 +57,11 @@ def get_keys_by_index(self, index_prefix: str) -> List[str]: """ members = self._redis.smembers(self._index_key(index_prefix)) # Convert bytes to strings for consistent return type - return [m.decode('utf-8') if isinstance(m, bytes) else m for m in members] if members else [] + return ( + [m.decode("utf-8") if isinstance(m, bytes) else m for m in members] + if members + else [] + ) def delete_index(self, index_prefix: str) -> None: """Remove the index set. Does not delete the data keys themselves.""" From b1a43cc3d96e467e844934f98c7f0af8b10b3a37 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 14:22:43 -0800 Subject: [PATCH 11/50] Add changelog --- changelog/7462.yaml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelog/7462.yaml diff --git a/changelog/7462.yaml b/changelog/7462.yaml new file mode 100644 index 00000000000..e74052b4852 --- /dev/null +++ b/changelog/7462.yaml @@ -0,0 +1,4 @@ +type: Developer Experience +description: Added a Redis cache manager for internal development use +pr: 7462 +labels: [] From 42495fa4b91246766039e804604f033caa80e635 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 10 Feb 2026 21:04:20 -0800 Subject: [PATCH 12/50] Part 2 of ENG-740 This uses the `clear` method on the DSR cache store, which underneath the hood uses `scan` (if needed, will try to use the set-based key index if that particular ID is being tracked). Adds some tests and removes the dependency on get_all_cache_keys_for_privacy_request --- .../models/privacy_request/privacy_request.py | 8 +- .../cache/test_dsr_store_clear_integration.py | 165 ++++++++++++++++++ 2 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 tests/common/cache/test_dsr_store_clear_integration.py diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index 63ab6bad213..826ae361288 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -109,7 +109,6 @@ from fides.api.tasks import celery_app from fides.api.util.cache import ( FidesopsRedis, - get_all_cache_keys_for_privacy_request, get_async_task_tracking_cache_key, get_cache, get_custom_privacy_request_field_cache_key, @@ -117,6 +116,7 @@ get_encryption_cache_key, get_identity_cache_key, ) +from fides.common.cache import get_dsr_cache_store from fides.api.util.collection_util import Row, extract_key_for_address from fides.api.util.constants import API_DATE_FORMAT from fides.api.util.custom_json_encoder import CustomJSONEncoder @@ -483,10 +483,8 @@ def clear_cached_values(self) -> None: Clears all cached values associated with this privacy request from Redis. """ logger.info(f"Clearing cached values for privacy request {self.id}") - cache: FidesopsRedis = get_cache() - all_keys = get_all_cache_keys_for_privacy_request(privacy_request_id=self.id) - for key in all_keys: - cache.delete(key) + with get_dsr_cache_store() as store: + store.clear(self.id) def delete(self, db: Session) -> None: """ diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py new file mode 100644 index 00000000000..95fc5449bfe --- /dev/null +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -0,0 +1,165 @@ +""" +Tests for privacy_request.clear_cached_values() integration with DSRCacheStore. + +Verifies that clearing uses the store and handles both legacy and new cache keys. +""" + +import uuid +from unittest.mock import MagicMock, patch + +import pytest + +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager + + +class MockRedis: + """Minimal mock Redis for testing clear behavior.""" + + def __init__(self): + self._data = {} + self._sets = {} + + def set(self, key, value, ex=None): + self._data[key] = value + return True + + def get(self, key): + return self._data.get(key) + + def delete(self, *keys): + return sum(1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None)) + + def keys(self, pattern): + import fnmatch + return [k for k in self._data if fnmatch.fnmatch(k, pattern)] + + def scan_iter(self, match="*", count=None): + return iter(self.keys(match)) + + def sadd(self, key, *members): + s = self._sets.setdefault(key, set()) + before = len(s) + s.update(members) + return len(s) - before + + def srem(self, key, *members): + if key not in self._sets: + return 0 + before = len(self._sets[key]) + self._sets[key].difference_update(members) + return before - len(self._sets[key]) + + def smembers(self, key): + return self._sets.get(key, set()).copy() + + +@pytest.mark.unit +class TestPrivacyRequestClearCachedValues: + """Test clear_cached_values() with DSR store.""" + + def test_clear_removes_legacy_keys(self): + """clear_cached_values removes legacy cache keys.""" + mock_redis = MockRedis() + pr_id = f"test-pr-{uuid.uuid4()}" + + # Simulate legacy cached data + mock_redis.set(f"id-{pr_id}-identity-email", "test@example.com") + mock_redis.set(f"id-{pr_id}-identity-phone_number", "+1234567890") + mock_redis.set(f"id-{pr_id}-encryption-key", "encryption-key") + + # Mock privacy request + pr = MagicMock() + pr.id = pr_id + + # Patch get_cache in the api.util.cache module where get_dsr_cache_store calls it + with patch("fides.api.util.cache.get_cache", return_value=mock_redis): + # Import here to avoid app initialization + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + PrivacyRequest.clear_cached_values(pr) + + # Verify all keys deleted + assert len(mock_redis.keys(f"*{pr_id}*")) == 0 + + def test_clear_removes_new_keys(self): + """clear_cached_values removes new-format cache keys.""" + mock_redis = MockRedis() + pr_id = f"test-pr-{uuid.uuid4()}" + + # Simulate new cached data via store + manager = RedisCacheManager(mock_redis) + store = DSRCacheStore(manager) + store.write_identity(pr_id, "email", "test@example.com") + store.write_encryption(pr_id, "key", "encryption-key") + + pr = MagicMock() + pr.id = pr_id + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + PrivacyRequest.clear_cached_values(pr) + + assert len(mock_redis.keys(f"*{pr_id}*")) == 0 + + def test_clear_removes_mixed_keys(self): + """clear_cached_values removes both legacy and new keys.""" + mock_redis = MockRedis() + pr_id = f"test-pr-{uuid.uuid4()}" + + # Mixed: legacy identity, new encryption + mock_redis.set(f"id-{pr_id}-identity-email", "legacy@example.com") + mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-dept", "Engineering") + + manager = RedisCacheManager(mock_redis) + store = DSRCacheStore(manager) + store.write_encryption(pr_id, "key", "new-encryption-key") + store.write_async_execution(pr_id, "task-123") + + pr = MagicMock() + pr.id = pr_id + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + PrivacyRequest.clear_cached_values(pr) + + assert len(mock_redis.keys(f"*{pr_id}*")) == 0 + + def test_clear_removes_index(self): + """clear_cached_values removes the DSR index.""" + mock_redis = MockRedis() + pr_id = f"test-pr-{uuid.uuid4()}" + + manager = RedisCacheManager(mock_redis) + store = DSRCacheStore(manager) + store.write_identity(pr_id, "email", "test@example.com") + + # Verify index exists + assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) > 0 + + pr = MagicMock() + pr.id = pr_id + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + PrivacyRequest.clear_cached_values(pr) + + # Index should be deleted + assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) == 0 + + +def _run_standalone_tests(): + """Run tests standalone.""" + tests = TestPrivacyRequestClearCachedValues() + tests.test_clear_removes_legacy_keys() + tests.test_clear_removes_new_keys() + tests.test_clear_removes_mixed_keys() + tests.test_clear_removes_index() + print("All clear_cached_values integration tests passed.") + + +if __name__ == "__main__": + _run_standalone_tests() From 34b86a2f4174511ed88f7546d0a30f69ed3d2053 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 13:10:19 -0800 Subject: [PATCH 13/50] Add DSR cache store + tests --- src/fides/common/cache/dsr_store.py | 299 ++++++++++++++++++ src/fides/common/cache/key_mapping.py | 140 ++++++++ tests/common/cache/conftest.py | 75 +++++ tests/common/cache/test_dsr_store.py | 246 ++++++++++++++ .../cache/test_dsr_store_key_mapping.py | 111 +++++++ .../common/cache/test_dsr_store_migration.py | 209 ++++++++++++ 6 files changed, 1080 insertions(+) create mode 100644 src/fides/common/cache/dsr_store.py create mode 100644 src/fides/common/cache/key_mapping.py create mode 100644 tests/common/cache/conftest.py create mode 100644 tests/common/cache/test_dsr_store.py create mode 100644 tests/common/cache/test_dsr_store_key_mapping.py create mode 100644 tests/common/cache/test_dsr_store_migration.py diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py new file mode 100644 index 00000000000..d6819fc22c7 --- /dev/null +++ b/src/fides/common/cache/dsr_store.py @@ -0,0 +1,299 @@ +""" +DSR cache store: single place for all DSR (privacy request) cache access. + +Enforces: +- Key naming: dsr:{dsr_id}:{part} for every key (part = field_type:field_key) +- Index: one set per DSR (__idx:dsr:{dsr_id}) listing all keys for that DSR +- Legacy: each field type has a legacy key format; reads try new key then legacy, + and can lazily migrate (copy legacy -> new, delete legacy) on first read. + +Hash alternative (future): Using a single Redis HASH per DSR (key=dsr:{id}, +fields=part names) would give one key per DSR, no index, and atomic HSET/HGET +per field so concurrent writers don't touch an index. Tradeoff: one TTL for the +whole DSR and a different storage shape; can introduce a hash-backed backend +later if we want to avoid index consistency concerns. +""" + +from typing import List, Optional, Union + +from redis import Redis + +from fides.common.cache.key_mapping import KeyMapper +from fides.common.cache.manager import RedisCacheManager, RedisValue + +# Key format: dsr:{dsr_id}:{part} (re-export for callers; KeyMapper builds these) +DSR_KEY_PREFIX = "dsr:" + + +def _dsr_key(dsr_id: str, part: str) -> str: + """Build the Redis key for a DSR cache part.""" + return f"{DSR_KEY_PREFIX}{dsr_id}:{part}" + + +def _dsr_index_prefix(dsr_id: str) -> str: + """Index prefix for this DSR; index set is __idx:dsr:{dsr_id}.""" + return f"{DSR_KEY_PREFIX}{dsr_id}" + + +class DSRCacheStore: + """ + Cache store for DSR (privacy request) data with enforced naming and indexing. + + All keys are stored as dsr:{dsr_id}:{part}. Every write is registered in + an index set so listing and clearing by DSR is O(index size) without KEYS/SCAN. + Convenience methods (write_custom_field, get_custom_field, etc.) map to part + names and support lazy migration from legacy key formats on read. + """ + + def __init__( + self, + cache_manager: RedisCacheManager, + *, + backfill_index_on_legacy_read: bool = True, + migrate_legacy_on_read: bool = True, + ) -> None: + """ + Args: + cache_manager: RedisCacheManager (e.g. from get_redis_cache_manager()). + backfill_index_on_legacy_read: When listing keys and we fall back to + KEYS for legacy keys, add those keys to the index. Default True. + migrate_legacy_on_read: When a get finds value in legacy key only, + write to new key, delete legacy key, add new key to index. + Default True. + """ + self._manager = cache_manager + self._redis: Redis = cache_manager.redis + self._backfill = backfill_index_on_legacy_read + self._migrate_on_read = migrate_legacy_on_read + + def write( + self, + dsr_id: str, + field_type: str, + field_key: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """ + Low-level write: set dsr:{dsr_id}:{field_type}:{field_key} and add to index. + Prefer convenience methods (write_custom_field, etc.) so legacy mapping + stays in one place. + """ + part = f"{field_type}:{field_key}" if field_key else field_type + return self.set(dsr_id, part, value, expire_seconds) + + def get_with_legacy( + self, + dsr_id: str, + part: str, + legacy_key: str, + ) -> Optional[Union[str, bytes]]: + """ + Get value for part; if missing, try legacy_key. If found in legacy only + and migrate_legacy_on_read, copy to new key, delete legacy, add to index. + """ + val = self._redis.get(_dsr_key(dsr_id, part)) + if val is not None: + return val + val = self._redis.get(legacy_key) + if val is None: + return None + if self._migrate_on_read: + self.set(dsr_id, part, val) + self._redis.delete(legacy_key) + return val + + def get(self, dsr_id: str, part: str) -> Optional[Union[str, bytes]]: + """Get a value for the given DSR and part. Returns None if missing.""" + return self._redis.get(_dsr_key(dsr_id, part)) + + def set( + self, + dsr_id: str, + part: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """ + Set a value for the given DSR and part. Registers the key in the DSR index. + """ + key = _dsr_key(dsr_id, part) + return self._manager.set_with_index( + key, value, _dsr_index_prefix(dsr_id), expire_seconds + ) + + def delete(self, dsr_id: str, part: str) -> None: + """Delete a single part and remove it from the DSR index.""" + key = _dsr_key(dsr_id, part) + self._manager.delete_key_and_remove_from_index(key, _dsr_index_prefix(dsr_id)) + + # --- Convenience: custom privacy request fields --- + + def write_custom_field( + self, + dsr_id: str, + field_key: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write a custom privacy request field. New key: dsr:{id}:custom_field:{field_key}.""" + return self.write(dsr_id, "custom_field", field_key, value, expire_seconds) + + def get_custom_field( + self, dsr_id: str, field_key: str + ) -> Optional[Union[str, bytes]]: + """Get custom field; reads from legacy id-{id}-custom-privacy-request-field-{key} if needed.""" + part = f"custom_field:{field_key}" + return self.get_with_legacy( + dsr_id, part, KeyMapper.custom_field(dsr_id, field_key)[1] + ) + + # --- Convenience: identity --- + + def write_identity( + self, + dsr_id: str, + attr: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write an identity attribute. New key: dsr:{id}:identity:{attr}.""" + return self.write(dsr_id, "identity", attr, value, expire_seconds) + + def get_identity(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: + """Get identity attribute; reads from legacy id-{id}-identity-{attr} if needed.""" + part = f"identity:{attr}" + return self.get_with_legacy(dsr_id, part, KeyMapper.identity(dsr_id, attr)[1]) + + # --- Convenience: encryption --- + + def write_encryption( + self, + dsr_id: str, + attr: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write an encryption attribute. New key: dsr:{id}:encryption:{attr}.""" + return self.write(dsr_id, "encryption", attr, value, expire_seconds) + + def get_encryption(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: + """Get encryption attribute; reads from legacy id-{id}-encryption-{attr} if needed.""" + part = f"encryption:{attr}" + return self.get_with_legacy(dsr_id, part, KeyMapper.encryption(dsr_id, attr)[1]) + + # --- Convenience: DRP request body --- + + def write_drp( + self, + dsr_id: str, + attr: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write DRP request body attribute. New key: dsr:{id}:drp:{attr}.""" + return self.write(dsr_id, "drp", attr, value, expire_seconds) + + def get_drp(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: + """Get DRP attribute; reads from legacy id-{id}-drp-{attr} if needed.""" + part = f"drp:{attr}" + return self.get_with_legacy(dsr_id, part, KeyMapper.drp(dsr_id, attr)[1]) + + # --- Convenience: masking secret --- + + def write_masking_secret( + self, + dsr_id: str, + strategy: str, + secret_type: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write masking secret. New key: dsr:{id}:masking_secret:{strategy}:{secret_type}.""" + part = f"masking_secret:{strategy}:{secret_type}" + return self.set(dsr_id, part, value, expire_seconds) + + def get_masking_secret( + self, + dsr_id: str, + strategy: str, + secret_type: str, + ) -> Optional[Union[str, bytes]]: + """Get masking secret; reads from legacy id-{id}-masking-secret-{strategy}-{type} if needed.""" + part = f"masking_secret:{strategy}:{secret_type}" + return self.get_with_legacy( + dsr_id, + part, + KeyMapper.masking_secret(dsr_id, strategy, secret_type)[1], + ) + + # --- Convenience: async execution (single value per DSR) --- + + def write_async_execution( + self, + dsr_id: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write async task id. New key: dsr:{id}:async_execution.""" + return self.write(dsr_id, "async_execution", "", value, expire_seconds) + + def get_async_execution(self, dsr_id: str) -> Optional[Union[str, bytes]]: + """Get async task id; reads from legacy id-{id}-async-execution if needed.""" + part = "async_execution" + return self.get_with_legacy(dsr_id, part, KeyMapper.async_execution(dsr_id)[1]) + + # --- Convenience: retry count --- + + def write_retry_count( + self, + dsr_id: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write privacy request retry count. New key: dsr:{id}:retry_count.""" + return self.write(dsr_id, "retry_count", "", value, expire_seconds) + + def get_retry_count(self, dsr_id: str) -> Optional[Union[str, bytes]]: + """Get retry count; reads from legacy id-{id}-privacy-request-retry-count if needed.""" + part = "retry_count" + return self.get_with_legacy(dsr_id, part, KeyMapper.retry_count(dsr_id)[1]) + + # --- List / clear (unchanged) --- + + def get_all_keys(self, dsr_id: str) -> List[str]: + """ + Return all cache keys for this DSR. + Uses the index first; if empty, falls back to SCAN for legacy keys + and optionally backfills the index. + """ + index_prefix = _dsr_index_prefix(dsr_id) + keys = self._manager.get_keys_by_index(index_prefix) + if keys: + return keys + legacy_keys = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) + if not legacy_keys: + return [] + if self._backfill: + for k in legacy_keys: + self._manager.add_key_to_index(index_prefix, k) + return list(legacy_keys) + + def clear(self, dsr_id: str) -> None: + """ + Delete all cache keys for this DSR and remove the index. + + Always uses SCAN to find all keys (both indexed and legacy) to ensure + complete cleanup in mixed-key scenarios. + """ + # Use SCAN to find ALL keys (indexed + legacy) + all_keys_via_scan = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) + + index_prefix = _dsr_index_prefix(dsr_id) + + # Delete all found keys in batch + if all_keys_via_scan: + self._redis.delete(*all_keys_via_scan) + + # Delete the index itself + self._manager.delete_index(index_prefix) diff --git a/src/fides/common/cache/key_mapping.py b/src/fides/common/cache/key_mapping.py new file mode 100644 index 00000000000..9944c4e3493 --- /dev/null +++ b/src/fides/common/cache/key_mapping.py @@ -0,0 +1,140 @@ +""" +Key mapping: legacy Redis key patterns to the DSR store key format. + +Maps (dsr_id, field_type, field_key, ...) to: +- new_key: dsr:{dsr_id}:{part} (part = field_type:field_key or field_type) +- legacy_key: the key used by the old cache API (for encoded objects, the + logical key; set_encoded_object stores under EN_ + logical key in Redis). +""" + +from typing import Tuple + +DSR_KEY_PREFIX = "dsr:" + + +def _new_key(dsr_id: str, part: str) -> str: + """Build the new-format Redis key.""" + return f"{DSR_KEY_PREFIX}{dsr_id}:{part}" + + +def _part(field_type: str, field_key: str = "") -> str: + """Build the part string (field_type or field_type:field_key).""" + return f"{field_type}:{field_key}" if field_key else field_type + + +class KeyMapper: + """ + Maps DSR cache field types to new keys and legacy keys. + All patterns discovered in the privacy request cache audit are encoded here. + """ + + # --- Simple key-value (legacy = Redis key as used with set_with_autoexpire / get) --- + + @staticmethod + def identity(dsr_id: str, attr: str) -> Tuple[str, str]: + """New: dsr:{id}:identity:{attr}. Legacy: id-{id}-identity-{attr}.""" + part = _part("identity", attr) + return _new_key(dsr_id, part), f"id-{dsr_id}-identity-{attr}" + + @staticmethod + def custom_field(dsr_id: str, field_key: str) -> Tuple[str, str]: + """New: dsr:{id}:custom_field:{key}. Legacy: id-{id}-custom-privacy-request-field-{key}.""" + part = _part("custom_field", field_key) + return _new_key(dsr_id, part), f"id-{dsr_id}-custom-privacy-request-field-{field_key}" + + @staticmethod + def drp(dsr_id: str, attr: str) -> Tuple[str, str]: + """New: dsr:{id}:drp:{attr}. Legacy: id-{id}-drp-{attr}.""" + part = _part("drp", attr) + return _new_key(dsr_id, part), f"id-{dsr_id}-drp-{attr}" + + @staticmethod + def encryption(dsr_id: str, attr: str) -> Tuple[str, str]: + """New: dsr:{id}:encryption:{attr}. Legacy: id-{id}-encryption-{attr}.""" + part = _part("encryption", attr) + return _new_key(dsr_id, part), f"id-{dsr_id}-encryption-{attr}" + + @staticmethod + def masking_secret( + dsr_id: str, strategy: str, secret_type: str + ) -> Tuple[str, str]: + """New: dsr:{id}:masking_secret:{strategy}:{secret_type}. Legacy: id-{id}-masking-secret-{strategy}-{secret_type}.""" + part = f"masking_secret:{strategy}:{secret_type}" + return _new_key(dsr_id, part), f"id-{dsr_id}-masking-secret-{strategy}-{secret_type}" + + @staticmethod + def async_execution(dsr_id: str) -> Tuple[str, str]: + """New: dsr:{id}:async_execution. Legacy: id-{id}-async-execution.""" + part = "async_execution" + return _new_key(dsr_id, part), f"id-{dsr_id}-async-execution" + + @staticmethod + def retry_count(dsr_id: str) -> Tuple[str, str]: + """New: dsr:{id}:retry_count. Legacy: id-{id}-privacy-request-retry-count.""" + part = "retry_count" + return _new_key(dsr_id, part), f"id-{dsr_id}-privacy-request-retry-count" + + # --- Encoded objects (legacy = logical key; Redis stores EN_ + logical) --- + + @staticmethod + def webhook_manual_access(dsr_id: str, webhook_id: str) -> Tuple[str, str]: + """New: dsr:{id}:webhook_manual_access:{webhook_id}. Legacy logical: WEBHOOK_MANUAL_ACCESS_INPUT__{id}__{webhook_id}.""" + part = _part("webhook_manual_access", webhook_id) + return _new_key(dsr_id, part), f"WEBHOOK_MANUAL_ACCESS_INPUT__{dsr_id}__{webhook_id}" + + @staticmethod + def webhook_manual_erasure(dsr_id: str, webhook_id: str) -> Tuple[str, str]: + """New: dsr:{id}:webhook_manual_erasure:{webhook_id}. Legacy logical: WEBHOOK_MANUAL_ERASURE_INPUT__{id}__{webhook_id}.""" + part = _part("webhook_manual_erasure", webhook_id) + return _new_key(dsr_id, part), f"WEBHOOK_MANUAL_ERASURE_INPUT__{dsr_id}__{webhook_id}" + + @staticmethod + def data_use_map(dsr_id: str) -> Tuple[str, str]: + """New: dsr:{id}:data_use_map. Legacy logical: DATA_USE_MAP__{id}.""" + part = "data_use_map" + return _new_key(dsr_id, part), f"DATA_USE_MAP__{dsr_id}" + + @staticmethod + def email_info( + dsr_id: str, step: str, dataset: str, collection: str + ) -> Tuple[str, str]: + """New: dsr:{id}:email_info:{step}:{dataset}:{collection}. Legacy logical: EMAIL_INFORMATION__{id}__{step}__{dataset}__{collection}.""" + part = f"email_info:{step}:{dataset}:{collection}" + return _new_key(dsr_id, part), f"EMAIL_INFORMATION__{dsr_id}__{step}__{dataset}__{collection}" + + @staticmethod + def paused_location(dsr_id: str) -> Tuple[str, str]: + """New: dsr:{id}:paused_location. Legacy logical: PAUSED_LOCATION__{id}.""" + part = "paused_location" + return _new_key(dsr_id, part), f"PAUSED_LOCATION__{dsr_id}" + + @staticmethod + def failed_location(dsr_id: str) -> Tuple[str, str]: + """New: dsr:{id}:failed_location. Legacy logical: FAILED_LOCATION__{id}.""" + part = "failed_location" + return _new_key(dsr_id, part), f"FAILED_LOCATION__{dsr_id}" + + @staticmethod + def access_request(dsr_id: str, key: str) -> Tuple[str, str]: + """New: dsr:{id}:access_request:{key}. Legacy logical: {id}__{key} (key e.g. access_request__dataset:collection).""" + part = _part("access_request", key) + return _new_key(dsr_id, part), f"{dsr_id}__{key}" + + @staticmethod + def erasure_request(dsr_id: str, key: str) -> Tuple[str, str]: + """New: dsr:{id}:erasure_request:{key}. Legacy logical: {id}__erasure_request__{key}.""" + part = _part("erasure_request", key) + return _new_key(dsr_id, part), f"{dsr_id}__erasure_request__{key}" + + @staticmethod + def placeholder_results(dsr_id: str, key: str) -> Tuple[str, str]: + """New: dsr:{id}:placeholder_results:{key}. Legacy logical: PLACEHOLDER_RESULTS__{id}__{key}.""" + part = _part("placeholder_results", key) + return _new_key(dsr_id, part), f"PLACEHOLDER_RESULTS__{dsr_id}__{key}" + + # --- Index prefix (for get_all_keys / clear) --- + + @staticmethod + def index_prefix(dsr_id: str) -> str: + """Index set key prefix for this DSR: __idx:dsr:{id}.""" + return f"__idx:{DSR_KEY_PREFIX}{dsr_id}" diff --git a/tests/common/cache/conftest.py b/tests/common/cache/conftest.py new file mode 100644 index 00000000000..180e8c56ca8 --- /dev/null +++ b/tests/common/cache/conftest.py @@ -0,0 +1,75 @@ +""" +Conftest for common/cache tests. Overrides session-scoped fixtures so the +real FastAPI app, DB, and Celery worker are not started when running only these tests. +""" + +import pytest +from unittest.mock import MagicMock + + +@pytest.fixture(scope="session") +def test_client(): + """Minimal test client mock so app/DB are not started for cache-only test runs.""" + client = MagicMock() + response = MagicMock() + response.status_code = 200 + client.get = MagicMock(return_value=response) + client.post = MagicMock(return_value=response) + client.put = MagicMock(return_value=response) + client.patch = MagicMock(return_value=response) + client.delete = MagicMock(return_value=response) + yield client + + +@pytest.fixture(scope="session") +def api_client(): + """Minimal API client mock so app/DB are not started for cache-only test runs.""" + client = MagicMock() + response = MagicMock() + response.status_code = 200 + client.get = MagicMock(return_value=response) + client.post = MagicMock(return_value=response) + client.put = MagicMock(return_value=response) + client.patch = MagicMock(return_value=response) + client.delete = MagicMock(return_value=response) + yield client + + +@pytest.fixture(scope="session", autouse=True) +def app(): + """Mock app fixture so FastAPI doesn't start.""" + yield MagicMock() + + +@pytest.fixture(scope="session") +def config(): + """Mock config so we don't pull in real config.""" + from fides.config import get_config + config = get_config() + config.test_mode = True + yield config + + +@pytest.fixture(scope="session") +def db(api_client, config): + """Override db fixture to prevent database connection.""" + yield MagicMock() + + +@pytest.fixture(scope="session") +async def async_session(): + """Override async_session fixture to prevent database connection.""" + yield MagicMock() + + +@pytest.fixture(scope="function", autouse=True) +async def clear_db_tables(db, async_session): + """Override clear_db_tables to no-op for cache-only tests.""" + yield + # No cleanup needed for MockRedis tests + + +@pytest.fixture(autouse=True, scope="session") +def celery_use_virtual_worker(): + """No-op so we don't start a real Celery worker (and pull in DB) for cache tests.""" + yield None diff --git a/tests/common/cache/test_dsr_store.py b/tests/common/cache/test_dsr_store.py new file mode 100644 index 00000000000..9bde6e5110d --- /dev/null +++ b/tests/common/cache/test_dsr_store.py @@ -0,0 +1,246 @@ +""" +Tests for DSRCacheStore using an in-memory RedisCacheManager (dict + set). +No real Redis required. +""" + +import fnmatch +from typing import Any, Dict, List, Optional, Set, Union + +import pytest + +from fides.common.cache.dsr_store import DSRCacheStore + +RedisValue = Union[bytes, float, int, str] + + +class InMemoryRedis: + """Minimal Redis-like interface: get, set, delete, keys (glob pattern).""" + + def __init__(self) -> None: + self._data: Dict[str, RedisValue] = {} + + def get(self, key: str) -> Optional[Union[str, bytes]]: + val = self._data.get(key) + if val is None: + return None + return val if isinstance(val, (str, bytes)) else str(val) + + def set( + self, + key: str, + value: RedisValue, + ex: Optional[int] = None, + ) -> Optional[bool]: + self._data[key] = value + return True + + def delete(self, *keys: str) -> int: + n = 0 + for key in keys: + if key in self._data: + del self._data[key] + n += 1 + return n + + def keys(self, pattern: str) -> List[str]: + """Glob-style: * matches any number of chars.""" + return [k for k in self._data if fnmatch.fnmatch(k, pattern)] + + def scan_iter( + self, match: str = "*", count: Optional[int] = None + ): + """SCAN-compatible iterator; yields keys matching pattern (count ignored in-memory).""" + return iter(self.keys(match)) + + +class InMemoryRedisCacheManager: + """ + In-memory implementation of the RedisCacheManager interface: a dict for + key -> value and a dict of index_prefix -> set of keys for set_with_index. + """ + + def __init__(self) -> None: + self._redis = InMemoryRedis() + self._index: Dict[str, Set[str]] = {} + + def add_key_to_index(self, index_prefix: str, key: str) -> None: + self._index.setdefault(index_prefix, set()).add(key) + + def remove_key_from_index(self, index_prefix: str, key: str) -> None: + s = self._index.get(index_prefix) + if s is not None: + s.discard(key) + + def get_keys_by_index(self, index_prefix: str) -> List[str]: + return list(self._index.get(index_prefix, set())) + + def delete_index(self, index_prefix: str) -> None: + self._index.pop(index_prefix, None) + + def set_with_index( + self, + key: str, + value: RedisValue, + index_prefix: str, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + result = self._redis.set(key, value, ex=expire_seconds) + self.add_key_to_index(index_prefix, key) + return result + + def delete_key_and_remove_from_index( + self, + key: str, + index_prefix: str, + ) -> None: + self._redis.delete(key) + self.remove_key_from_index(index_prefix, key) + + @property + def redis(self) -> InMemoryRedis: + return self._redis + + +@pytest.fixture +def in_memory_manager() -> InMemoryRedisCacheManager: + return InMemoryRedisCacheManager() + + +@pytest.fixture +def dsr_store(in_memory_manager: InMemoryRedisCacheManager) -> DSRCacheStore: + return DSRCacheStore(in_memory_manager) + + +@pytest.mark.unit +class TestDSRCacheStoreWithInMemoryManager: + """DSRCacheStore behavior with an in-memory RedisCacheManager.""" + + def test_set_and_get(self, dsr_store: DSRCacheStore) -> None: + dsr_store.set("pr-1", "identity:email", "user@example.com") + assert dsr_store.get("pr-1", "identity:email") == "user@example.com" + + def test_get_missing_returns_none(self, dsr_store: DSRCacheStore) -> None: + assert dsr_store.get("pr-1", "identity:email") is None + + def test_set_with_index_registers_key_in_index( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + dsr_store.set("pr-1", "custom_field:foo", "bar") + keys = in_memory_manager.get_keys_by_index("dsr:pr-1") + assert "dsr:pr-1:custom_field:foo" in keys + assert len(keys) == 1 + + def test_get_all_keys_returns_indexed_keys( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + dsr_store.write_custom_field("pr-1", "f1", "v1") + dsr_store.write_identity("pr-1", "email", "e@x.com") + keys = dsr_store.get_all_keys("pr-1") + assert set(keys) == { + "dsr:pr-1:custom_field:f1", + "dsr:pr-1:identity:email", + } + + def test_clear_removes_all_keys_and_index( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + dsr_store.write_custom_field("pr-1", "f1", "v1") + dsr_store.write_identity("pr-1", "email", "e@x.com") + dsr_store.clear("pr-1") + assert dsr_store.get_all_keys("pr-1") == [] + assert dsr_store.get("pr-1", "custom_field:f1") is None + assert dsr_store.get("pr-1", "identity:email") is None + + def test_delete_removes_key_and_index_entry( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + dsr_store.set("pr-1", "identity:email", "e@x.com") + dsr_store.delete("pr-1", "identity:email") + assert dsr_store.get("pr-1", "identity:email") is None + assert "dsr:pr-1:identity:email" not in dsr_store.get_all_keys("pr-1") + + def test_get_with_legacy_reads_new_key_first( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + dsr_store.write_identity("pr-1", "email", "new@example.com") + # Legacy key not set; should still get from new key + assert dsr_store.get_identity("pr-1", "email") == "new@example.com" + + def test_get_with_legacy_migrates_from_legacy_key( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + # Simulate legacy data only (no new key) + in_memory_manager.redis.set("id-pr-1-identity-email", "legacy@example.com") + result = dsr_store.get_identity("pr-1", "email") + assert result == "legacy@example.com" + # After migrate: new key should exist and legacy should be gone + assert dsr_store.get("pr-1", "identity:email") == "legacy@example.com" + assert in_memory_manager.redis.get("id-pr-1-identity-email") is None + + def test_write_custom_field_and_get_custom_field( + self, dsr_store: DSRCacheStore + ) -> None: + dsr_store.write_custom_field("pr-1", "my_field", "my_value") + assert dsr_store.get_custom_field("pr-1", "my_field") == "my_value" + + def test_convenience_async_execution(self, dsr_store: DSRCacheStore) -> None: + dsr_store.write_async_execution("pr-1", "celery-task-id-xyz") + assert dsr_store.get_async_execution("pr-1") == "celery-task-id-xyz" + + def test_retry_count( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + """Mirrors cache.py get/increment/reset_privacy_request_retry_count.""" + assert dsr_store.get_retry_count("pr-1") is None + dsr_store.write_retry_count("pr-1", "3", expire_seconds=86400) + assert dsr_store.get_retry_count("pr-1") == "3" + dsr_store.delete("pr-1", "retry_count") + assert dsr_store.get_retry_count("pr-1") is None + # Legacy key migration + in_memory_manager.redis.set("id-pr-2-privacy-request-retry-count", "1") + assert dsr_store.get_retry_count("pr-2") == "1" + assert in_memory_manager.redis.get("id-pr-2-privacy-request-retry-count") is None + + def test_drp( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + """Mirrors privacy_request.py DRP body cache (get_drp_request_body_cache_key).""" + dsr_store.write_drp("pr-1", "address", "encrypted-body", expire_seconds=300) + assert dsr_store.get_drp("pr-1", "address") == "encrypted-body" + assert dsr_store.get_drp("pr-1", "email") is None + # Legacy key migration + in_memory_manager.redis.set("id-pr-2-drp-email", "legacy-drp") + assert dsr_store.get_drp("pr-2", "email") == "legacy-drp" + assert in_memory_manager.redis.get("id-pr-2-drp-email") is None + + def test_encryption( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + """Mirrors privacy_request.py / encryption_utils.py encryption key cache.""" + dsr_store.write_encryption("pr-1", "key", "enc-key-123", expire_seconds=3600) + assert dsr_store.get_encryption("pr-1", "key") == "enc-key-123" + assert dsr_store.get_encryption("pr-1", "other") is None + # Legacy key migration + in_memory_manager.redis.set("id-pr-2-encryption-key", "legacy-enc") + assert dsr_store.get_encryption("pr-2", "key") == "legacy-enc" + assert in_memory_manager.redis.get("id-pr-2-encryption-key") is None + + def test_masking_secret( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + """Mirrors secrets_util.get_masking_secret cache read (and write path).""" + dsr_store.write_masking_secret( + "pr-1", "hash", "salt", "encoded-secret", expire_seconds=600 + ) + assert dsr_store.get_masking_secret("pr-1", "hash", "salt") == "encoded-secret" + assert dsr_store.get_masking_secret("pr-1", "hash", "other") is None + # Legacy key migration + in_memory_manager.redis.set( + "id-pr-2-masking-secret-hash-pepper", "legacy-masking" + ) + assert ( + dsr_store.get_masking_secret("pr-2", "hash", "pepper") == "legacy-masking" + ) + assert ( + in_memory_manager.redis.get("id-pr-2-masking-secret-hash-pepper") is None + ) diff --git a/tests/common/cache/test_dsr_store_key_mapping.py b/tests/common/cache/test_dsr_store_key_mapping.py new file mode 100644 index 00000000000..1c853a63a93 --- /dev/null +++ b/tests/common/cache/test_dsr_store_key_mapping.py @@ -0,0 +1,111 @@ +""" +Tests that KeyMapper maps legacy key patterns to the new dsr:{id}:{part} format correctly. +No Redis required; exercises fides.common.cache.key_mapping only. +""" + +import pytest + +from fides.common.cache.key_mapping import DSR_KEY_PREFIX, KeyMapper + + +@pytest.mark.unit +class TestKeyMapper: + """Ensure each field type produces the expected new_key and legacy_key.""" + + def test_identity(self) -> None: + new_key, legacy_key = KeyMapper.identity("pr-123", "email") + assert new_key == f"{DSR_KEY_PREFIX}pr-123:identity:email" + assert legacy_key == "id-pr-123-identity-email" + + def test_custom_field(self) -> None: + new_key, legacy_key = KeyMapper.custom_field("pr-456", "my_field") + assert new_key == f"{DSR_KEY_PREFIX}pr-456:custom_field:my_field" + assert legacy_key == "id-pr-456-custom-privacy-request-field-my_field" + + def test_drp(self) -> None: + new_key, legacy_key = KeyMapper.drp("pr-789", "address") + assert new_key == f"{DSR_KEY_PREFIX}pr-789:drp:address" + assert legacy_key == "id-pr-789-drp-address" + + def test_encryption(self) -> None: + new_key, legacy_key = KeyMapper.encryption("pr-abc", "key") + assert new_key == f"{DSR_KEY_PREFIX}pr-abc:encryption:key" + assert legacy_key == "id-pr-abc-encryption-key" + + def test_masking_secret(self) -> None: + new_key, legacy_key = KeyMapper.masking_secret( + "pr-def", "hash", "salt" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-def:masking_secret:hash:salt" + assert legacy_key == "id-pr-def-masking-secret-hash-salt" + + def test_async_execution(self) -> None: + new_key, legacy_key = KeyMapper.async_execution("pr-ghi") + assert new_key == f"{DSR_KEY_PREFIX}pr-ghi:async_execution" + assert legacy_key == "id-pr-ghi-async-execution" + + def test_retry_count(self) -> None: + new_key, legacy_key = KeyMapper.retry_count("pr-jkl") + assert new_key == f"{DSR_KEY_PREFIX}pr-jkl:retry_count" + assert legacy_key == "id-pr-jkl-privacy-request-retry-count" + + def test_webhook_manual_access(self) -> None: + new_key, legacy_key = KeyMapper.webhook_manual_access( + "pr-mno", "webhook-uuid" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-mno:webhook_manual_access:webhook-uuid" + assert legacy_key == "WEBHOOK_MANUAL_ACCESS_INPUT__pr-mno__webhook-uuid" + + def test_webhook_manual_erasure(self) -> None: + new_key, legacy_key = KeyMapper.webhook_manual_erasure( + "pr-pqr", "webhook-uuid-2" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-pqr:webhook_manual_erasure:webhook-uuid-2" + assert legacy_key == "WEBHOOK_MANUAL_ERASURE_INPUT__pr-pqr__webhook-uuid-2" + + def test_data_use_map(self) -> None: + new_key, legacy_key = KeyMapper.data_use_map("pr-stu") + assert new_key == f"{DSR_KEY_PREFIX}pr-stu:data_use_map" + assert legacy_key == "DATA_USE_MAP__pr-stu" + + def test_email_info(self) -> None: + new_key, legacy_key = KeyMapper.email_info( + "pr-vwx", "access", "postgres_example", "address" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-vwx:email_info:access:postgres_example:address" + assert legacy_key == "EMAIL_INFORMATION__pr-vwx__access__postgres_example__address" + + def test_paused_location(self) -> None: + new_key, legacy_key = KeyMapper.paused_location("pr-yz1") + assert new_key == f"{DSR_KEY_PREFIX}pr-yz1:paused_location" + assert legacy_key == "PAUSED_LOCATION__pr-yz1" + + def test_failed_location(self) -> None: + new_key, legacy_key = KeyMapper.failed_location("pr-yz2") + assert new_key == f"{DSR_KEY_PREFIX}pr-yz2:failed_location" + assert legacy_key == "FAILED_LOCATION__pr-yz2" + + def test_access_request(self) -> None: + new_key, legacy_key = KeyMapper.access_request( + "pr-yz3", "access_request__postgres_example:address" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-yz3:access_request:access_request__postgres_example:address" + assert legacy_key == "pr-yz3__access_request__postgres_example:address" + + def test_erasure_request(self) -> None: + new_key, legacy_key = KeyMapper.erasure_request( + "pr-yz4", "postgres_example:address" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-yz4:erasure_request:postgres_example:address" + assert legacy_key == "pr-yz4__erasure_request__postgres_example:address" + + def test_placeholder_results(self) -> None: + new_key, legacy_key = KeyMapper.placeholder_results( + "pr-yz5", "postgres_example:customer" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-yz5:placeholder_results:postgres_example:customer" + assert legacy_key == "PLACEHOLDER_RESULTS__pr-yz5__postgres_example:customer" + + def test_index_prefix(self) -> None: + prefix = KeyMapper.index_prefix("pr-123") + assert prefix == "__idx:dsr:pr-123" diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py new file mode 100644 index 00000000000..b389d8ae051 --- /dev/null +++ b/tests/common/cache/test_dsr_store_migration.py @@ -0,0 +1,209 @@ +""" +Tests for DSRCacheStore migration behavior with legacy keys. + +Verifies existing cached data (legacy format) is correctly read, migrated, and cleared. +""" + +import fnmatch +import uuid +from typing import Dict, List, Optional, Set, Union + +import pytest + +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager + +RedisValue = Union[bytes, float, int, str] + + +class MockRedis: + """Mock Redis with minimal interface for DSRCacheStore.""" + + def __init__(self) -> None: + self._data: Dict[str, RedisValue] = {} + self._sets: Dict[str, Set[Union[str, bytes]]] = {} + + def get(self, key: str) -> Optional[Union[str, bytes]]: + val = self._data.get(key) + return val if isinstance(val, (str, bytes)) else str(val) if val else None + + def set(self, key: str, value: RedisValue, ex: Optional[int] = None) -> bool: + self._data[key] = value + return True + + def delete(self, *keys: str) -> int: + deleted = sum(1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None)) + return deleted + + def keys(self, pattern: str) -> List[str]: + return [k for k in self._data if fnmatch.fnmatch(k, pattern)] + + def scan_iter(self, match: str = "*", count: Optional[int] = None): + return iter(self.keys(match)) + + def sadd(self, key: str, *members: Union[str, bytes]) -> int: + s = self._sets.setdefault(key, set()) + before = len(s) + s.update(members) + return len(s) - before + + def srem(self, key: str, *members: Union[str, bytes]) -> int: + if key not in self._sets: + return 0 + before = len(self._sets[key]) + self._sets[key].difference_update(members) + return before - len(self._sets[key]) + + def smembers(self, key: str) -> Set[Union[str, bytes]]: + return self._sets.get(key, set()).copy() + + +# Test data factories +def make_dsr_id() -> str: + """Generate unique DSR ID.""" + return f"test-pr-{uuid.uuid4()}" + + +def make_legacy_key(dsr_id: str, field_type: str, field_name: str = "") -> str: + """Build legacy key format.""" + if field_name: + return f"id-{dsr_id}-{field_type}-{field_name}" + return f"id-{dsr_id}-{field_type}" + + +def make_new_key(dsr_id: str, part: str) -> str: + """Build new DSR key format.""" + return f"dsr:{dsr_id}:{part}" + + +@pytest.fixture +def mock_redis(): + return MockRedis() + + +@pytest.fixture +def dsr_store(mock_redis): + return DSRCacheStore(RedisCacheManager(mock_redis)) + + +@pytest.fixture +def dsr_id(): + return make_dsr_id() + + +@pytest.mark.unit +class TestLegacyKeyMigration: + """Test legacy key formats are readable and migrated correctly.""" + + @pytest.mark.parametrize("field_type,getter,field_key,value", [ + ("identity", "get_identity", "email", "user@example.com"), + ("custom-privacy-request-field", "get_custom_field", "dept", "Engineering"), + ("encryption", "get_encryption", "key", "encryption-key-123"), + ("async-execution", "get_async_execution", "", "celery-task-123"), + ("privacy-request-retry-count", "get_retry_count", "", "3"), + ("drp", "get_drp", "email", "drp@example.com"), + ("masking-secret-hash", "get_masking_secret", "salt", "secret-123"), + ]) + def test_legacy_keys_readable( + self, mock_redis, dsr_store, dsr_id, field_type, getter, field_key, value + ): + """Legacy keys are readable via store convenience methods.""" + legacy_key = make_legacy_key(dsr_id, field_type, field_key) + mock_redis.set(legacy_key, value) + + # Call appropriate getter + if getter == "get_masking_secret": + result = dsr_store.get_masking_secret(dsr_id, "hash", field_key) + elif field_key: + result = getattr(dsr_store, getter)(dsr_id, field_key) + else: + result = getattr(dsr_store, getter)(dsr_id) + + assert result == value + + def test_legacy_key_migrated_on_read(self, mock_redis, dsr_store, dsr_id): + """Legacy key is migrated to new format on first read.""" + mock_redis.set(make_legacy_key(dsr_id, "identity", "email"), "migrate@test.com") + + email = dsr_store.get_identity(dsr_id, "email") + assert email == "migrate@test.com" + + # New key exists, legacy deleted, index updated + assert mock_redis.get(make_new_key(dsr_id, "identity:email")) == "migrate@test.com" + assert mock_redis.get(make_legacy_key(dsr_id, "identity", "email")) is None + assert make_new_key(dsr_id, "identity:email") in mock_redis.smembers(f"__idx:dsr:{dsr_id}") + + def test_new_writes_create_indexed_keys_only(self, mock_redis, dsr_store, dsr_id): + """New writes create new-format keys and index them; no legacy keys written.""" + dsr_store.write_identity(dsr_id, "email", "new@example.com") + dsr_store.write_custom_field(dsr_id, "department", "Sales") + + assert mock_redis.get(make_new_key(dsr_id, "identity:email")) == "new@example.com" + assert mock_redis.get(make_new_key(dsr_id, "custom_field:department")) == "Sales" + assert mock_redis.get(make_legacy_key(dsr_id, "identity", "email")) is None + assert mock_redis.get(make_legacy_key(dsr_id, "custom-privacy-request-field", "department")) is None + + def test_clear_removes_mixed_keys(self, mock_redis, dsr_store, dsr_id): + """clear() removes both legacy and new keys using SCAN.""" + mock_redis.set(make_legacy_key(dsr_id, "identity", "email"), "legacy@test.com") + mock_redis.set(make_legacy_key(dsr_id, "encryption", "key"), "legacy-key") + dsr_store.write_identity(dsr_id, "phone_number", "+1234567890") + dsr_store.write_custom_field(dsr_id, "department", "Engineering") + + dsr_store.clear(dsr_id) + + assert len(mock_redis.keys(f"*{dsr_id}*")) == 0 + + def test_index_backfill(self, mock_redis, dsr_id): + """Legacy keys are backfilled into index when enabled.""" + mock_redis.set(make_legacy_key(dsr_id, "identity", "email"), "test@example.com") + mock_redis.set(make_legacy_key(dsr_id, "identity", "phone_number"), "+1234567890") + + store = DSRCacheStore(RedisCacheManager(mock_redis), backfill_index_on_legacy_read=True) + keys = store.get_all_keys(dsr_id) + + assert len(keys) == 2 + assert len(mock_redis.smembers(f"__idx:dsr:{dsr_id}")) == 2 + + +@pytest.mark.unit +class TestMultipleRequestIsolation: + """Test DSR IDs don't interfere with each other.""" + + def test_mixed_dsr_states(self, mock_redis): + """Operations on one DSR don't affect others (legacy, new, mixed).""" + dsr1, dsr2, dsr3 = make_dsr_id(), make_dsr_id(), make_dsr_id() + store = DSRCacheStore(RedisCacheManager(mock_redis)) + + # DSR1: legacy, DSR2: new, DSR3: mixed + mock_redis.set(make_legacy_key(dsr1, "identity", "email"), "dsr1@test.com") + store.write_identity(dsr2, "email", "dsr2@test.com") + mock_redis.set(make_legacy_key(dsr3, "identity", "email"), "dsr3@test.com") + store.write_identity(dsr3, "phone_number", "+1234567890") + + # Verify all readable + assert store.get_identity(dsr1, "email") == "dsr1@test.com" + assert store.get_identity(dsr2, "email") == "dsr2@test.com" + assert store.get_identity(dsr3, "email") == "dsr3@test.com" + assert store.get_identity(dsr3, "phone_number") == "+1234567890" + + # Clear DSR2 doesn't affect others + store.clear(dsr2) + assert store.get_identity(dsr1, "email") == "dsr1@test.com" + assert store.get_identity(dsr3, "email") == "dsr3@test.com" + assert store.get_identity(dsr2, "email") is None + assert store.get_all_keys(dsr2) == [] + + def test_clear_isolation(self, mock_redis): + """Clearing one DSR doesn't delete another's keys.""" + dsr1, dsr2 = make_dsr_id(), make_dsr_id() + store = DSRCacheStore(RedisCacheManager(mock_redis)) + + store.write_identity(dsr1, "email", "dsr1@test.com") + store.write_identity(dsr2, "email", "dsr2@test.com") + + store.clear(dsr1) + + assert mock_redis.get(make_new_key(dsr1, "identity:email")) is None + assert mock_redis.get(make_new_key(dsr2, "identity:email")) == "dsr2@test.com" + From 1e953d09ace3eb34870cbf0ff4319895c12cf95c Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 10 Feb 2026 22:19:07 -0800 Subject: [PATCH 14/50] Cleanup tests --- .../cache/test_dsr_store_clear_integration.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py index 95fc5449bfe..5b9c4538b78 100644 --- a/tests/common/cache/test_dsr_store_clear_integration.py +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -150,16 +150,3 @@ def test_clear_removes_index(self): # Index should be deleted assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) == 0 - -def _run_standalone_tests(): - """Run tests standalone.""" - tests = TestPrivacyRequestClearCachedValues() - tests.test_clear_removes_legacy_keys() - tests.test_clear_removes_new_keys() - tests.test_clear_removes_mixed_keys() - tests.test_clear_removes_index() - print("All clear_cached_values integration tests passed.") - - -if __name__ == "__main__": - _run_standalone_tests() From 5a80d71cd1fbdd0c7e23ba66c7d1523ec8b95aef Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 14:40:51 -0800 Subject: [PATCH 15/50] Add DSR store to exports --- src/fides/common/cache/__init__.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/fides/common/cache/__init__.py b/src/fides/common/cache/__init__.py index 1dc9c9e021d..6b24f4828f6 100644 --- a/src/fides/common/cache/__init__.py +++ b/src/fides/common/cache/__init__.py @@ -2,14 +2,26 @@ Shared Redis cache utilities and the RedisCacheManager. RedisCacheManager provides modern Redis patterns such as key indexes. + +DSRCacheStore wraps that with DSR-specific key naming (dsr:{id}:{part}) +and index-backed list/clear with lazy migration for legacy keys. + +Usage: + with get_dsr_cache_store() as store: + store.write_identity(privacy_request_id, "email", "user@example.com") + store.clear(privacy_request_id) """ +from contextlib import contextmanager +from typing import Iterator + +from fides.common.cache.dsr_store import ( + DSR_KEY_PREFIX, + DSRCacheStore, +) from fides.common.cache.manager import ( INDEX_KEY_PREFIX, RedisCacheManager, ) -__all__ = [ - "INDEX_KEY_PREFIX", - "RedisCacheManager", -] +__all__ = ["INDEX_KEY_PREFIX", "RedisCacheManager", "DSR_KEY_PREFIX", "DSRCacheStore"] From 9ed6d6eaa06382557e98edaf3cd79ee887c89f70 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 14:47:14 -0800 Subject: [PATCH 16/50] Formatting --- .../models/privacy_request/privacy_request.py | 2 +- .../cache/test_dsr_store_clear_integration.py | 20 ++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index 826ae361288..b8f059343eb 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -116,7 +116,6 @@ get_encryption_cache_key, get_identity_cache_key, ) -from fides.common.cache import get_dsr_cache_store from fides.api.util.collection_util import Row, extract_key_for_address from fides.api.util.constants import API_DATE_FORMAT from fides.api.util.custom_json_encoder import CustomJSONEncoder @@ -124,6 +123,7 @@ from fides.api.util.identity_verification import IdentityVerificationMixin from fides.api.util.logger import Pii from fides.api.util.logger_context_utils import Contextualizable, LoggerContextKeys +from fides.common.cache import get_dsr_cache_store from fides.config import CONFIG from fides.service.attachment_service import AttachmentService diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py index 5b9c4538b78..3bbbf80e323 100644 --- a/tests/common/cache/test_dsr_store_clear_integration.py +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -28,10 +28,13 @@ def get(self, key): return self._data.get(key) def delete(self, *keys): - return sum(1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None)) + return sum( + 1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None) + ) def keys(self, pattern): import fnmatch + return [k for k in self._data if fnmatch.fnmatch(k, pattern)] def scan_iter(self, match="*", count=None): @@ -76,7 +79,7 @@ def test_clear_removes_legacy_keys(self): with patch("fides.api.util.cache.get_cache", return_value=mock_redis): # Import here to avoid app initialization from fides.api.models.privacy_request.privacy_request import PrivacyRequest - + PrivacyRequest.clear_cached_values(pr) # Verify all keys deleted @@ -86,7 +89,7 @@ def test_clear_removes_new_keys(self): """clear_cached_values removes new-format cache keys.""" mock_redis = MockRedis() pr_id = f"test-pr-{uuid.uuid4()}" - + # Simulate new cached data via store manager = RedisCacheManager(mock_redis) store = DSRCacheStore(manager) @@ -98,7 +101,7 @@ def test_clear_removes_new_keys(self): with patch("fides.api.util.cache.get_cache", return_value=mock_redis): from fides.api.models.privacy_request.privacy_request import PrivacyRequest - + PrivacyRequest.clear_cached_values(pr) assert len(mock_redis.keys(f"*{pr_id}*")) == 0 @@ -111,7 +114,7 @@ def test_clear_removes_mixed_keys(self): # Mixed: legacy identity, new encryption mock_redis.set(f"id-{pr_id}-identity-email", "legacy@example.com") mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-dept", "Engineering") - + manager = RedisCacheManager(mock_redis) store = DSRCacheStore(manager) store.write_encryption(pr_id, "key", "new-encryption-key") @@ -122,7 +125,7 @@ def test_clear_removes_mixed_keys(self): with patch("fides.api.util.cache.get_cache", return_value=mock_redis): from fides.api.models.privacy_request.privacy_request import PrivacyRequest - + PrivacyRequest.clear_cached_values(pr) assert len(mock_redis.keys(f"*{pr_id}*")) == 0 @@ -135,7 +138,7 @@ def test_clear_removes_index(self): manager = RedisCacheManager(mock_redis) store = DSRCacheStore(manager) store.write_identity(pr_id, "email", "test@example.com") - + # Verify index exists assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) > 0 @@ -144,9 +147,8 @@ def test_clear_removes_index(self): with patch("fides.api.util.cache.get_cache", return_value=mock_redis): from fides.api.models.privacy_request.privacy_request import PrivacyRequest - + PrivacyRequest.clear_cached_values(pr) # Index should be deleted assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) == 0 - From 93654512bba4be868a16289ffa717e0f56d3523c Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 14:44:42 -0800 Subject: [PATCH 17/50] Formatting fixes --- src/fides/common/cache/dsr_store.py | 8 +-- src/fides/common/cache/key_mapping.py | 24 +++++--- tests/common/cache/conftest.py | 4 +- tests/common/cache/test_dsr_store.py | 12 ++-- .../cache/test_dsr_store_key_mapping.py | 36 ++++++++---- .../common/cache/test_dsr_store_migration.py | 57 +++++++++++++------ 6 files changed, 91 insertions(+), 50 deletions(-) diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index d6819fc22c7..1151b92b30e 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -282,18 +282,18 @@ def get_all_keys(self, dsr_id: str) -> List[str]: def clear(self, dsr_id: str) -> None: """ Delete all cache keys for this DSR and remove the index. - + Always uses SCAN to find all keys (both indexed and legacy) to ensure complete cleanup in mixed-key scenarios. """ # Use SCAN to find ALL keys (indexed + legacy) all_keys_via_scan = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) - + index_prefix = _dsr_index_prefix(dsr_id) - + # Delete all found keys in batch if all_keys_via_scan: self._redis.delete(*all_keys_via_scan) - + # Delete the index itself self._manager.delete_index(index_prefix) diff --git a/src/fides/common/cache/key_mapping.py b/src/fides/common/cache/key_mapping.py index 9944c4e3493..d312417afd7 100644 --- a/src/fides/common/cache/key_mapping.py +++ b/src/fides/common/cache/key_mapping.py @@ -40,7 +40,9 @@ def identity(dsr_id: str, attr: str) -> Tuple[str, str]: def custom_field(dsr_id: str, field_key: str) -> Tuple[str, str]: """New: dsr:{id}:custom_field:{key}. Legacy: id-{id}-custom-privacy-request-field-{key}.""" part = _part("custom_field", field_key) - return _new_key(dsr_id, part), f"id-{dsr_id}-custom-privacy-request-field-{field_key}" + return _new_key( + dsr_id, part + ), f"id-{dsr_id}-custom-privacy-request-field-{field_key}" @staticmethod def drp(dsr_id: str, attr: str) -> Tuple[str, str]: @@ -55,12 +57,12 @@ def encryption(dsr_id: str, attr: str) -> Tuple[str, str]: return _new_key(dsr_id, part), f"id-{dsr_id}-encryption-{attr}" @staticmethod - def masking_secret( - dsr_id: str, strategy: str, secret_type: str - ) -> Tuple[str, str]: + def masking_secret(dsr_id: str, strategy: str, secret_type: str) -> Tuple[str, str]: """New: dsr:{id}:masking_secret:{strategy}:{secret_type}. Legacy: id-{id}-masking-secret-{strategy}-{secret_type}.""" part = f"masking_secret:{strategy}:{secret_type}" - return _new_key(dsr_id, part), f"id-{dsr_id}-masking-secret-{strategy}-{secret_type}" + return _new_key( + dsr_id, part + ), f"id-{dsr_id}-masking-secret-{strategy}-{secret_type}" @staticmethod def async_execution(dsr_id: str) -> Tuple[str, str]: @@ -80,13 +82,17 @@ def retry_count(dsr_id: str) -> Tuple[str, str]: def webhook_manual_access(dsr_id: str, webhook_id: str) -> Tuple[str, str]: """New: dsr:{id}:webhook_manual_access:{webhook_id}. Legacy logical: WEBHOOK_MANUAL_ACCESS_INPUT__{id}__{webhook_id}.""" part = _part("webhook_manual_access", webhook_id) - return _new_key(dsr_id, part), f"WEBHOOK_MANUAL_ACCESS_INPUT__{dsr_id}__{webhook_id}" + return _new_key( + dsr_id, part + ), f"WEBHOOK_MANUAL_ACCESS_INPUT__{dsr_id}__{webhook_id}" @staticmethod def webhook_manual_erasure(dsr_id: str, webhook_id: str) -> Tuple[str, str]: """New: dsr:{id}:webhook_manual_erasure:{webhook_id}. Legacy logical: WEBHOOK_MANUAL_ERASURE_INPUT__{id}__{webhook_id}.""" part = _part("webhook_manual_erasure", webhook_id) - return _new_key(dsr_id, part), f"WEBHOOK_MANUAL_ERASURE_INPUT__{dsr_id}__{webhook_id}" + return _new_key( + dsr_id, part + ), f"WEBHOOK_MANUAL_ERASURE_INPUT__{dsr_id}__{webhook_id}" @staticmethod def data_use_map(dsr_id: str) -> Tuple[str, str]: @@ -100,7 +106,9 @@ def email_info( ) -> Tuple[str, str]: """New: dsr:{id}:email_info:{step}:{dataset}:{collection}. Legacy logical: EMAIL_INFORMATION__{id}__{step}__{dataset}__{collection}.""" part = f"email_info:{step}:{dataset}:{collection}" - return _new_key(dsr_id, part), f"EMAIL_INFORMATION__{dsr_id}__{step}__{dataset}__{collection}" + return _new_key( + dsr_id, part + ), f"EMAIL_INFORMATION__{dsr_id}__{step}__{dataset}__{collection}" @staticmethod def paused_location(dsr_id: str) -> Tuple[str, str]: diff --git a/tests/common/cache/conftest.py b/tests/common/cache/conftest.py index 180e8c56ca8..3c9a57a610d 100644 --- a/tests/common/cache/conftest.py +++ b/tests/common/cache/conftest.py @@ -3,9 +3,10 @@ real FastAPI app, DB, and Celery worker are not started when running only these tests. """ -import pytest from unittest.mock import MagicMock +import pytest + @pytest.fixture(scope="session") def test_client(): @@ -45,6 +46,7 @@ def app(): def config(): """Mock config so we don't pull in real config.""" from fides.config import get_config + config = get_config() config.test_mode = True yield config diff --git a/tests/common/cache/test_dsr_store.py b/tests/common/cache/test_dsr_store.py index 9bde6e5110d..86dabb3e1df 100644 --- a/tests/common/cache/test_dsr_store.py +++ b/tests/common/cache/test_dsr_store.py @@ -46,9 +46,7 @@ def keys(self, pattern: str) -> List[str]: """Glob-style: * matches any number of chars.""" return [k for k in self._data if fnmatch.fnmatch(k, pattern)] - def scan_iter( - self, match: str = "*", count: Optional[int] = None - ): + def scan_iter(self, match: str = "*", count: Optional[int] = None): """SCAN-compatible iterator; yields keys matching pattern (count ignored in-memory).""" return iter(self.keys(match)) @@ -199,7 +197,9 @@ def test_retry_count( # Legacy key migration in_memory_manager.redis.set("id-pr-2-privacy-request-retry-count", "1") assert dsr_store.get_retry_count("pr-2") == "1" - assert in_memory_manager.redis.get("id-pr-2-privacy-request-retry-count") is None + assert ( + in_memory_manager.redis.get("id-pr-2-privacy-request-retry-count") is None + ) def test_drp( self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager @@ -241,6 +241,4 @@ def test_masking_secret( assert ( dsr_store.get_masking_secret("pr-2", "hash", "pepper") == "legacy-masking" ) - assert ( - in_memory_manager.redis.get("id-pr-2-masking-secret-hash-pepper") is None - ) + assert in_memory_manager.redis.get("id-pr-2-masking-secret-hash-pepper") is None diff --git a/tests/common/cache/test_dsr_store_key_mapping.py b/tests/common/cache/test_dsr_store_key_mapping.py index 1c853a63a93..1ba518719dd 100644 --- a/tests/common/cache/test_dsr_store_key_mapping.py +++ b/tests/common/cache/test_dsr_store_key_mapping.py @@ -33,9 +33,7 @@ def test_encryption(self) -> None: assert legacy_key == "id-pr-abc-encryption-key" def test_masking_secret(self) -> None: - new_key, legacy_key = KeyMapper.masking_secret( - "pr-def", "hash", "salt" - ) + new_key, legacy_key = KeyMapper.masking_secret("pr-def", "hash", "salt") assert new_key == f"{DSR_KEY_PREFIX}pr-def:masking_secret:hash:salt" assert legacy_key == "id-pr-def-masking-secret-hash-salt" @@ -50,9 +48,7 @@ def test_retry_count(self) -> None: assert legacy_key == "id-pr-jkl-privacy-request-retry-count" def test_webhook_manual_access(self) -> None: - new_key, legacy_key = KeyMapper.webhook_manual_access( - "pr-mno", "webhook-uuid" - ) + new_key, legacy_key = KeyMapper.webhook_manual_access("pr-mno", "webhook-uuid") assert new_key == f"{DSR_KEY_PREFIX}pr-mno:webhook_manual_access:webhook-uuid" assert legacy_key == "WEBHOOK_MANUAL_ACCESS_INPUT__pr-mno__webhook-uuid" @@ -60,7 +56,9 @@ def test_webhook_manual_erasure(self) -> None: new_key, legacy_key = KeyMapper.webhook_manual_erasure( "pr-pqr", "webhook-uuid-2" ) - assert new_key == f"{DSR_KEY_PREFIX}pr-pqr:webhook_manual_erasure:webhook-uuid-2" + assert ( + new_key == f"{DSR_KEY_PREFIX}pr-pqr:webhook_manual_erasure:webhook-uuid-2" + ) assert legacy_key == "WEBHOOK_MANUAL_ERASURE_INPUT__pr-pqr__webhook-uuid-2" def test_data_use_map(self) -> None: @@ -72,8 +70,13 @@ def test_email_info(self) -> None: new_key, legacy_key = KeyMapper.email_info( "pr-vwx", "access", "postgres_example", "address" ) - assert new_key == f"{DSR_KEY_PREFIX}pr-vwx:email_info:access:postgres_example:address" - assert legacy_key == "EMAIL_INFORMATION__pr-vwx__access__postgres_example__address" + assert ( + new_key + == f"{DSR_KEY_PREFIX}pr-vwx:email_info:access:postgres_example:address" + ) + assert ( + legacy_key == "EMAIL_INFORMATION__pr-vwx__access__postgres_example__address" + ) def test_paused_location(self) -> None: new_key, legacy_key = KeyMapper.paused_location("pr-yz1") @@ -89,21 +92,30 @@ def test_access_request(self) -> None: new_key, legacy_key = KeyMapper.access_request( "pr-yz3", "access_request__postgres_example:address" ) - assert new_key == f"{DSR_KEY_PREFIX}pr-yz3:access_request:access_request__postgres_example:address" + assert ( + new_key + == f"{DSR_KEY_PREFIX}pr-yz3:access_request:access_request__postgres_example:address" + ) assert legacy_key == "pr-yz3__access_request__postgres_example:address" def test_erasure_request(self) -> None: new_key, legacy_key = KeyMapper.erasure_request( "pr-yz4", "postgres_example:address" ) - assert new_key == f"{DSR_KEY_PREFIX}pr-yz4:erasure_request:postgres_example:address" + assert ( + new_key + == f"{DSR_KEY_PREFIX}pr-yz4:erasure_request:postgres_example:address" + ) assert legacy_key == "pr-yz4__erasure_request__postgres_example:address" def test_placeholder_results(self) -> None: new_key, legacy_key = KeyMapper.placeholder_results( "pr-yz5", "postgres_example:customer" ) - assert new_key == f"{DSR_KEY_PREFIX}pr-yz5:placeholder_results:postgres_example:customer" + assert ( + new_key + == f"{DSR_KEY_PREFIX}pr-yz5:placeholder_results:postgres_example:customer" + ) assert legacy_key == "PLACEHOLDER_RESULTS__pr-yz5__postgres_example:customer" def test_index_prefix(self) -> None: diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py index b389d8ae051..c5f41e297bc 100644 --- a/tests/common/cache/test_dsr_store_migration.py +++ b/tests/common/cache/test_dsr_store_migration.py @@ -32,7 +32,9 @@ def set(self, key: str, value: RedisValue, ex: Optional[int] = None) -> bool: return True def delete(self, *keys: str) -> int: - deleted = sum(1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None)) + deleted = sum( + 1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None) + ) return deleted def keys(self, pattern: str) -> List[str]: @@ -95,15 +97,18 @@ def dsr_id(): class TestLegacyKeyMigration: """Test legacy key formats are readable and migrated correctly.""" - @pytest.mark.parametrize("field_type,getter,field_key,value", [ - ("identity", "get_identity", "email", "user@example.com"), - ("custom-privacy-request-field", "get_custom_field", "dept", "Engineering"), - ("encryption", "get_encryption", "key", "encryption-key-123"), - ("async-execution", "get_async_execution", "", "celery-task-123"), - ("privacy-request-retry-count", "get_retry_count", "", "3"), - ("drp", "get_drp", "email", "drp@example.com"), - ("masking-secret-hash", "get_masking_secret", "salt", "secret-123"), - ]) + @pytest.mark.parametrize( + "field_type,getter,field_key,value", + [ + ("identity", "get_identity", "email", "user@example.com"), + ("custom-privacy-request-field", "get_custom_field", "dept", "Engineering"), + ("encryption", "get_encryption", "key", "encryption-key-123"), + ("async-execution", "get_async_execution", "", "celery-task-123"), + ("privacy-request-retry-count", "get_retry_count", "", "3"), + ("drp", "get_drp", "email", "drp@example.com"), + ("masking-secret-hash", "get_masking_secret", "salt", "secret-123"), + ], + ) def test_legacy_keys_readable( self, mock_redis, dsr_store, dsr_id, field_type, getter, field_key, value ): @@ -129,19 +134,32 @@ def test_legacy_key_migrated_on_read(self, mock_redis, dsr_store, dsr_id): assert email == "migrate@test.com" # New key exists, legacy deleted, index updated - assert mock_redis.get(make_new_key(dsr_id, "identity:email")) == "migrate@test.com" + assert ( + mock_redis.get(make_new_key(dsr_id, "identity:email")) == "migrate@test.com" + ) assert mock_redis.get(make_legacy_key(dsr_id, "identity", "email")) is None - assert make_new_key(dsr_id, "identity:email") in mock_redis.smembers(f"__idx:dsr:{dsr_id}") + assert make_new_key(dsr_id, "identity:email") in mock_redis.smembers( + f"__idx:dsr:{dsr_id}" + ) def test_new_writes_create_indexed_keys_only(self, mock_redis, dsr_store, dsr_id): """New writes create new-format keys and index them; no legacy keys written.""" dsr_store.write_identity(dsr_id, "email", "new@example.com") dsr_store.write_custom_field(dsr_id, "department", "Sales") - assert mock_redis.get(make_new_key(dsr_id, "identity:email")) == "new@example.com" - assert mock_redis.get(make_new_key(dsr_id, "custom_field:department")) == "Sales" + assert ( + mock_redis.get(make_new_key(dsr_id, "identity:email")) == "new@example.com" + ) + assert ( + mock_redis.get(make_new_key(dsr_id, "custom_field:department")) == "Sales" + ) assert mock_redis.get(make_legacy_key(dsr_id, "identity", "email")) is None - assert mock_redis.get(make_legacy_key(dsr_id, "custom-privacy-request-field", "department")) is None + assert ( + mock_redis.get( + make_legacy_key(dsr_id, "custom-privacy-request-field", "department") + ) + is None + ) def test_clear_removes_mixed_keys(self, mock_redis, dsr_store, dsr_id): """clear() removes both legacy and new keys using SCAN.""" @@ -157,9 +175,13 @@ def test_clear_removes_mixed_keys(self, mock_redis, dsr_store, dsr_id): def test_index_backfill(self, mock_redis, dsr_id): """Legacy keys are backfilled into index when enabled.""" mock_redis.set(make_legacy_key(dsr_id, "identity", "email"), "test@example.com") - mock_redis.set(make_legacy_key(dsr_id, "identity", "phone_number"), "+1234567890") + mock_redis.set( + make_legacy_key(dsr_id, "identity", "phone_number"), "+1234567890" + ) - store = DSRCacheStore(RedisCacheManager(mock_redis), backfill_index_on_legacy_read=True) + store = DSRCacheStore( + RedisCacheManager(mock_redis), backfill_index_on_legacy_read=True + ) keys = store.get_all_keys(dsr_id) assert len(keys) == 2 @@ -206,4 +228,3 @@ def test_clear_isolation(self, mock_redis): assert mock_redis.get(make_new_key(dsr1, "identity:email")) is None assert mock_redis.get(make_new_key(dsr2, "identity:email")) == "dsr2@test.com" - From 932774fdddcb238660df15a80b76eb46bfc8e6d6 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 10 Feb 2026 21:30:55 -0800 Subject: [PATCH 18/50] Part 3: Add identity operations to DSR cache store; added tests for the new functionality --- .../models/privacy_request/privacy_request.py | 60 ++-- src/fides/common/cache/dsr_store.py | 52 +++- tests/common/cache/mock_redis.py | 50 ++++ .../cache/test_dsr_store_clear_integration.py | 44 +-- .../test_dsr_store_identity_integration.py | 256 ++++++++++++++++++ .../common/cache/test_dsr_store_migration.py | 47 +--- 6 files changed, 388 insertions(+), 121 deletions(-) create mode 100644 tests/common/cache/mock_redis.py create mode 100644 tests/common/cache/test_dsr_store_identity_integration.py diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index b8f059343eb..e91b0e4e2e7 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -114,7 +114,6 @@ get_custom_privacy_request_field_cache_key, get_drp_request_body_cache_key, get_encryption_cache_key, - get_identity_cache_key, ) from fides.api.util.collection_util import Row, extract_key_for_address from fides.api.util.constants import API_DATE_FORMAT @@ -516,19 +515,23 @@ def cache_identity( self, identity: Union[Identity, Dict[str, LabeledIdentity]] ) -> None: """Sets the identity's values at their specific locations in the Fides app cache""" - cache: FidesopsRedis = get_cache() - if isinstance(identity, dict): identity = Identity(**identity) identity_dict: Dict[str, Any] = identity.labeled_dict() - for key, value in identity_dict.items(): - if value is not None: - cache.set_with_autoexpire( - get_identity_cache_key(self.id, key), - FidesopsRedis.encode_obj(value), - ) + with get_dsr_cache_store() as store: + # Encode values for Redis storage + encoded_dict = { + key: FidesopsRedis.encode_obj(value) + for key, value in identity_dict.items() + if value is not None + } + store.cache_identity_data( + self.id, + encoded_dict, + expire_seconds=CONFIG.redis.default_ttl_seconds, + ) def cache_custom_privacy_request_fields( self, @@ -755,43 +758,36 @@ def persist_masking_secrets( }, ) - def identity_prefix_cache_and_keys(self) -> Tuple[str, FidesopsRedis, List[str]]: - """Returns the prefix and cache keys for the identity data for this request""" - prefix = f"id-{self.id}-identity-*" - cache: FidesopsRedis = get_cache() - keys = cache.keys(prefix) - return prefix, cache, keys - def verify_cache_for_identity_data(self) -> bool: """Verifies if the identity data is cached for this request""" - _, _, keys = self.identity_prefix_cache_and_keys() - return len(keys) > 0 + with get_dsr_cache_store() as store: + return store.has_cached_identity_data(self.id) def get_cached_identity_data(self) -> Dict[str, Any]: """Retrieves any identity data pertaining to this request from the cache""" - result: Dict[str, Any] = {} - prefix, cache, keys = self.identity_prefix_cache_and_keys() + with get_dsr_cache_store() as store: + result = store.get_cached_identity_data(self.id) - if not keys: - logger.debug(f"Cache miss for request {self.id}, falling back to DB") - identity = self.get_persisted_identity() - self.cache_identity(identity) - keys = cache.keys(prefix) + if not result: + logger.debug(f"Cache miss for request {self.id}, falling back to DB") + identity = self.get_persisted_identity() + self.cache_identity(identity) + result = store.get_cached_identity_data(self.id) - for key in keys: - value = cache.get(key) - if value: + # Parse JSON values for backward compatibility + parsed_result: Dict[str, Any] = {} + for key, value in result.items(): try: # try parsing the value as JSON - parsed_value = json.loads(value) + parsed_result[key] = json.loads(value) except json.JSONDecodeError: # if parsing as JSON fails, assume it's a string. # this is purely for backward compatibility: to ensure # that identity data stored pre-2.34.0 in the "old" format # can still be correctly retrieved from the cache. - parsed_value = value - result[key.split("-")[-1]] = parsed_value - return result + parsed_result[key] = value + + return parsed_result def get_cached_custom_privacy_request_fields(self) -> Dict[str, Any]: """Retrieves any custom fields pertaining to this request from the cache""" diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index e4a307bd88a..563e7665f32 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -14,7 +14,7 @@ later if we want to avoid index consistency concerns. """ -from typing import List, Optional, Union +from typing import Any, Dict, List, Optional, Union from redis import Redis @@ -162,6 +162,56 @@ def get_identity(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: part = f"identity:{attr}" return self.get_with_legacy(dsr_id, part, KeyMapper.identity(dsr_id, attr)[1]) + def cache_identity_data( + self, dsr_id: str, identity_dict: Dict[str, Any], expire_seconds: Optional[int] = None + ) -> None: + """ + Cache all identity attributes for a DSR. + + Writes each non-None attribute to dsr:{id}:identity:{attr} format. + """ + for key, value in identity_dict.items(): + if value is not None: + self.write_identity(dsr_id, key, value, expire_seconds) + + def get_cached_identity_data(self, dsr_id: str) -> Dict[str, Any]: + """ + Retrieve all cached identity data for a DSR. + + Returns dict with identity attributes. Automatically migrates legacy keys on read. + Returns empty dict if no identity data cached. + """ + result: Dict[str, Any] = {} + all_keys = self.get_all_keys(dsr_id) + + # Filter for identity keys (both new and legacy formats) + identity_keys = [k for k in all_keys if ":identity:" in k or "-identity-" in k] + + for key in identity_keys: + # Extract attribute name from key + # New format: dsr:{id}:identity:{attr} + # Legacy format: id-{id}-identity-{attr} + if ":identity:" in key: + attr = key.split(":")[-1] + else: + # Legacy format + attr = key.split("-")[-1] + + value = self.get_identity(dsr_id, attr) + if value: + result[attr] = value + + return result + + def has_cached_identity_data(self, dsr_id: str) -> bool: + """ + Check if any identity data is cached for this DSR. + + Returns True if any identity keys exist (legacy or new format). + """ + all_keys = self.get_all_keys(dsr_id) + return any(":identity:" in k or "-identity-" in k for k in all_keys) + # --- Convenience: encryption --- def write_encryption( diff --git a/tests/common/cache/mock_redis.py b/tests/common/cache/mock_redis.py new file mode 100644 index 00000000000..15b99929bb1 --- /dev/null +++ b/tests/common/cache/mock_redis.py @@ -0,0 +1,50 @@ +""" +Shared MockRedis implementation for DSR cache store tests. +""" + +import fnmatch +from typing import Dict, List, Optional, Set, Union + +RedisValue = Union[bytes, float, int, str] + + +class MockRedis: + """Mock Redis with minimal interface for DSRCacheStore.""" + + def __init__(self) -> None: + self._data: Dict[str, RedisValue] = {} + self._sets: Dict[str, Set[Union[str, bytes]]] = {} + + def get(self, key: str) -> Optional[Union[str, bytes]]: + val = self._data.get(key) + return val if isinstance(val, (str, bytes)) else str(val) if val else None + + def set(self, key: str, value: RedisValue, ex: Optional[int] = None) -> bool: + self._data[key] = value + return True + + def delete(self, *keys: str) -> int: + deleted = sum(1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None)) + return deleted + + def keys(self, pattern: str) -> List[str]: + return [k for k in self._data if fnmatch.fnmatch(k, pattern)] + + def scan_iter(self, match: str = "*", count: Optional[int] = None): + return iter(self.keys(match)) + + def sadd(self, key: str, *members: Union[str, bytes]) -> int: + s = self._sets.setdefault(key, set()) + before = len(s) + s.update(members) + return len(s) - before + + def srem(self, key: str, *members: Union[str, bytes]) -> int: + if key not in self._sets: + return 0 + before = len(self._sets[key]) + self._sets[key].difference_update(members) + return before - len(self._sets[key]) + + def smembers(self, key: str) -> Set[Union[str, bytes]]: + return self._sets.get(key, set()).copy() diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py index 3bbbf80e323..cf4d32f91fe 100644 --- a/tests/common/cache/test_dsr_store_clear_integration.py +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -12,49 +12,7 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager - -class MockRedis: - """Minimal mock Redis for testing clear behavior.""" - - def __init__(self): - self._data = {} - self._sets = {} - - def set(self, key, value, ex=None): - self._data[key] = value - return True - - def get(self, key): - return self._data.get(key) - - def delete(self, *keys): - return sum( - 1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None) - ) - - def keys(self, pattern): - import fnmatch - - return [k for k in self._data if fnmatch.fnmatch(k, pattern)] - - def scan_iter(self, match="*", count=None): - return iter(self.keys(match)) - - def sadd(self, key, *members): - s = self._sets.setdefault(key, set()) - before = len(s) - s.update(members) - return len(s) - before - - def srem(self, key, *members): - if key not in self._sets: - return 0 - before = len(self._sets[key]) - self._sets[key].difference_update(members) - return before - len(self._sets[key]) - - def smembers(self, key): - return self._sets.get(key, set()).copy() +from tests.common.cache.mock_redis import MockRedis @pytest.mark.unit diff --git a/tests/common/cache/test_dsr_store_identity_integration.py b/tests/common/cache/test_dsr_store_identity_integration.py new file mode 100644 index 00000000000..2e63c84983c --- /dev/null +++ b/tests/common/cache/test_dsr_store_identity_integration.py @@ -0,0 +1,256 @@ +""" +Tests for identity cache operations integration with DSRCacheStore. + +Verifies cache_identity, get_cached_identity_data, and verify_cache_for_identity_data +work correctly with both legacy and new cache keys. +""" + +import json +import uuid +from contextlib import contextmanager +from unittest.mock import MagicMock, patch + +import pytest + +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager + +from tests.common.cache.mock_redis import MockRedis + + +@pytest.fixture +def mock_redis(): + """Shared MockRedis instance.""" + return MockRedis() + + +@pytest.fixture +def dsr_store(mock_redis): + """DSRCacheStore backed by MockRedis.""" + return DSRCacheStore(RedisCacheManager(mock_redis)) + + +@pytest.fixture +def pr_id(): + """Generate unique privacy request ID.""" + return f"test-pr-{uuid.uuid4()}" + + +@pytest.fixture +def identity_data(): + """Sample identity data for tests.""" + return { + "email": "user@example.com", + "phone_number": "+1234567890", + } + + +@contextmanager +def patch_get_cache(mock_redis): + """Context manager to patch get_cache with mock Redis.""" + with patch("fides.api.util.cache.get_cache", return_value=mock_redis): + with patch("fides.api.util.cache.CONFIG.redis.default_ttl_seconds", 3600): + yield + + +def create_mock_privacy_request(pr_id): + """Create mock PrivacyRequest with given ID.""" + pr = MagicMock() + pr.id = pr_id + return pr + + +# Mark all tests as unit tests that don't require full test infrastructure +pytestmark = pytest.mark.unit + + +class TestIdentityCacheOperations: + """Test identity cache operations with DSR store.""" + + def test_cache_identity_writes_new_format_only(self, mock_redis, pr_id, identity_data): + """cache_identity writes only new-format keys.""" + pr = create_mock_privacy_request(pr_id) + + with patch_get_cache(mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + from fides.api.schemas.redis_cache import Identity + + identity = Identity(**identity_data) + PrivacyRequest.cache_identity(pr, identity) + + # New keys exist + assert mock_redis.get(f"dsr:{pr_id}:identity:email") is not None + assert mock_redis.get(f"dsr:{pr_id}:identity:phone_number") is not None + + # Legacy keys do NOT exist + assert mock_redis.get(f"id-{pr_id}-identity-email") is None + assert mock_redis.get(f"id-{pr_id}-identity-phone_number") is None + + def test_store_cache_identity_data_service_method(self, dsr_store, pr_id): + """DSRCacheStore.cache_identity_data writes all attributes.""" + identity_data = { + "email": json.dumps("user@example.com"), + "phone_number": json.dumps("+1234567890"), + } + + dsr_store.cache_identity_data(pr_id, identity_data, expire_seconds=3600) + + # All keys written in new format + assert dsr_store._redis.get(f"dsr:{pr_id}:identity:email") == json.dumps("user@example.com") + assert dsr_store._redis.get(f"dsr:{pr_id}:identity:phone_number") == json.dumps("+1234567890") + + def test_get_cached_identity_data_reads_legacy_keys(self, mock_redis, pr_id, identity_data): + """get_cached_identity_data reads legacy keys and migrates them.""" + # Write legacy format with JSON encoding + for key, value in identity_data.items(): + mock_redis.set(f"id-{pr_id}-identity-{key}", json.dumps(value)) + + pr = create_mock_privacy_request(pr_id) + + with patch_get_cache(mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + result = PrivacyRequest.get_cached_identity_data(pr) + + # Values are returned correctly + assert result["email"] == "user@example.com" + assert result["phone_number"] == "+1234567890" + + # Legacy keys migrated to new format + assert mock_redis.get(f"dsr:{pr_id}:identity:email") is not None + assert mock_redis.get(f"id-{pr_id}-identity-email") is None + + def test_store_get_cached_identity_data_service_method(self, dsr_store, pr_id): + """DSRCacheStore.get_cached_identity_data reads all attributes.""" + # Write some identity data + identity_data = { + "email": json.dumps("user@example.com"), + "phone_number": json.dumps("+1234567890"), + } + dsr_store.cache_identity_data(pr_id, identity_data) + + # Read it back + result = dsr_store.get_cached_identity_data(pr_id) + + assert result["email"] == json.dumps("user@example.com") + assert result["phone_number"] == json.dumps("+1234567890") + + def test_get_cached_identity_data_reads_new_keys(self, dsr_store, mock_redis, pr_id, identity_data): + """get_cached_identity_data reads new-format keys.""" + # Write via store + for key, value in identity_data.items(): + dsr_store.write_identity(pr_id, key, json.dumps(value)) + + pr = create_mock_privacy_request(pr_id) + + with patch_get_cache(mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + result = PrivacyRequest.get_cached_identity_data(pr) + + assert result["email"] == "user@example.com" + assert result["phone_number"] == "+1234567890" + + def test_verify_cache_for_identity_data_detects_legacy(self, mock_redis, pr_id): + """verify_cache_for_identity_data returns True for legacy keys.""" + mock_redis.set(f"id-{pr_id}-identity-email", json.dumps("test@example.com")) + + pr = create_mock_privacy_request(pr_id) + + with patch_get_cache(mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + has_cache = PrivacyRequest.verify_cache_for_identity_data(pr) + + assert has_cache is True + + def test_store_has_cached_identity_data_service_method(self, dsr_store, mock_redis, pr_id): + """DSRCacheStore.has_cached_identity_data detects both formats.""" + # Empty initially + assert dsr_store.has_cached_identity_data(pr_id) is False + + # Add legacy key + mock_redis.set(f"id-{pr_id}-identity-email", json.dumps("test@example.com")) + assert dsr_store.has_cached_identity_data(pr_id) is True + + # Clear and test new format + mock_redis._data.clear() + dsr_store.write_identity(pr_id, "email", json.dumps("test@example.com")) + assert dsr_store.has_cached_identity_data(pr_id) is True + + def test_verify_cache_for_identity_data_detects_new(self, dsr_store, mock_redis, pr_id): + """verify_cache_for_identity_data returns True for new keys.""" + dsr_store.write_identity(pr_id, "email", json.dumps("test@example.com")) + + pr = create_mock_privacy_request(pr_id) + + with patch_get_cache(mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + has_cache = PrivacyRequest.verify_cache_for_identity_data(pr) + + assert has_cache is True + + def test_verify_cache_returns_false_when_empty(self, mock_redis, pr_id): + """verify_cache_for_identity_data returns False when no cache exists.""" + pr = create_mock_privacy_request(pr_id) + + with patch_get_cache(mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + has_cache = PrivacyRequest.verify_cache_for_identity_data(pr) + + assert has_cache is False + + +def _run_standalone_tests(): + """Run tests standalone.""" + # Create shared fixtures manually for standalone execution + mock_redis = MockRedis() + dsr_store = DSRCacheStore(RedisCacheManager(mock_redis)) + + tests = TestIdentityCacheOperations() + pr_id = f"test-pr-{uuid.uuid4()}" + identity_data = {"email": "user@example.com", "phone_number": "+1234567890"} + + tests.test_cache_identity_writes_new_format_only(mock_redis, pr_id, identity_data) + + pr_id = f"test-pr-{uuid.uuid4()}" + tests.test_store_cache_identity_data_service_method(dsr_store, pr_id) + + mock_redis = MockRedis() + dsr_store = DSRCacheStore(RedisCacheManager(mock_redis)) + pr_id = f"test-pr-{uuid.uuid4()}" + tests.test_get_cached_identity_data_reads_legacy_keys(mock_redis, pr_id, identity_data) + + pr_id = f"test-pr-{uuid.uuid4()}" + tests.test_store_get_cached_identity_data_service_method(dsr_store, pr_id) + + mock_redis = MockRedis() + dsr_store = DSRCacheStore(RedisCacheManager(mock_redis)) + pr_id = f"test-pr-{uuid.uuid4()}" + tests.test_get_cached_identity_data_reads_new_keys(dsr_store, mock_redis, pr_id, identity_data) + + mock_redis = MockRedis() + pr_id = f"test-pr-{uuid.uuid4()}" + tests.test_verify_cache_for_identity_data_detects_legacy(mock_redis, pr_id) + + mock_redis = MockRedis() + dsr_store = DSRCacheStore(RedisCacheManager(mock_redis)) + pr_id = f"test-pr-{uuid.uuid4()}" + tests.test_store_has_cached_identity_data_service_method(dsr_store, mock_redis, pr_id) + + mock_redis = MockRedis() + dsr_store = DSRCacheStore(RedisCacheManager(mock_redis)) + pr_id = f"test-pr-{uuid.uuid4()}" + tests.test_verify_cache_for_identity_data_detects_new(dsr_store, mock_redis, pr_id) + + mock_redis = MockRedis() + pr_id = f"test-pr-{uuid.uuid4()}" + tests.test_verify_cache_returns_false_when_empty(mock_redis, pr_id) + + print("All identity cache integration tests passed.") + + +if __name__ == "__main__": + _run_standalone_tests() diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py index c5f41e297bc..49649563e43 100644 --- a/tests/common/cache/test_dsr_store_migration.py +++ b/tests/common/cache/test_dsr_store_migration.py @@ -4,7 +4,6 @@ Verifies existing cached data (legacy format) is correctly read, migrated, and cleared. """ -import fnmatch import uuid from typing import Dict, List, Optional, Set, Union @@ -13,51 +12,9 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager -RedisValue = Union[bytes, float, int, str] - - -class MockRedis: - """Mock Redis with minimal interface for DSRCacheStore.""" - - def __init__(self) -> None: - self._data: Dict[str, RedisValue] = {} - self._sets: Dict[str, Set[Union[str, bytes]]] = {} +from tests.common.cache.mock_redis import MockRedis - def get(self, key: str) -> Optional[Union[str, bytes]]: - val = self._data.get(key) - return val if isinstance(val, (str, bytes)) else str(val) if val else None - - def set(self, key: str, value: RedisValue, ex: Optional[int] = None) -> bool: - self._data[key] = value - return True - - def delete(self, *keys: str) -> int: - deleted = sum( - 1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None) - ) - return deleted - - def keys(self, pattern: str) -> List[str]: - return [k for k in self._data if fnmatch.fnmatch(k, pattern)] - - def scan_iter(self, match: str = "*", count: Optional[int] = None): - return iter(self.keys(match)) - - def sadd(self, key: str, *members: Union[str, bytes]) -> int: - s = self._sets.setdefault(key, set()) - before = len(s) - s.update(members) - return len(s) - before - - def srem(self, key: str, *members: Union[str, bytes]) -> int: - if key not in self._sets: - return 0 - before = len(self._sets[key]) - self._sets[key].difference_update(members) - return before - len(self._sets[key]) - - def smembers(self, key: str) -> Set[Union[str, bytes]]: - return self._sets.get(key, set()).copy() +RedisValue = Union[bytes, float, int, str] # Test data factories From 48c0bb7889c378b74901a906d4316dc6f2e3efaa Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 08:57:25 -0800 Subject: [PATCH 19/50] Dedupe import and add a TODO about moving it --- src/fides/common/cache/dsr_store.py | 5 +---- src/fides/common/cache/key_mapping.py | 4 ++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index 1151b92b30e..e4a307bd88a 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -18,12 +18,9 @@ from redis import Redis -from fides.common.cache.key_mapping import KeyMapper +from fides.common.cache.key_mapping import KeyMapper, DSR_KEY_PREFIX from fides.common.cache.manager import RedisCacheManager, RedisValue -# Key format: dsr:{dsr_id}:{part} (re-export for callers; KeyMapper builds these) -DSR_KEY_PREFIX = "dsr:" - def _dsr_key(dsr_id: str, part: str) -> str: """Build the Redis key for a DSR cache part.""" diff --git a/src/fides/common/cache/key_mapping.py b/src/fides/common/cache/key_mapping.py index d312417afd7..3284f5b0e43 100644 --- a/src/fides/common/cache/key_mapping.py +++ b/src/fides/common/cache/key_mapping.py @@ -9,6 +9,10 @@ from typing import Tuple +# TODO: Move to dsr_store.py when deprecating +# Once we don't need to do migrations we can get rid of this file, +# the prefix should move to the dsr_store.py (not there to avoid circular +# dependencies since the store depends on this) DSR_KEY_PREFIX = "dsr:" From fb37da5b2b986b817e69fb306a3cb652a0968cc4 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 10 Feb 2026 22:03:50 -0800 Subject: [PATCH 20/50] Simplify some tests --- .../test_dsr_store_identity_integration.py | 205 +++--------------- 1 file changed, 28 insertions(+), 177 deletions(-) diff --git a/tests/common/cache/test_dsr_store_identity_integration.py b/tests/common/cache/test_dsr_store_identity_integration.py index 2e63c84983c..19f7d2c5b84 100644 --- a/tests/common/cache/test_dsr_store_identity_integration.py +++ b/tests/common/cache/test_dsr_store_identity_integration.py @@ -1,14 +1,11 @@ """ -Tests for identity cache operations integration with DSRCacheStore. +Tests for identity cache operations in DSRCacheStore. -Verifies cache_identity, get_cached_identity_data, and verify_cache_for_identity_data -work correctly with both legacy and new cache keys. +Tests the service layer directly with MockRedis - no patching needed. """ import json import uuid -from contextlib import contextmanager -from unittest.mock import MagicMock, patch import pytest @@ -45,49 +42,15 @@ def identity_data(): } -@contextmanager -def patch_get_cache(mock_redis): - """Context manager to patch get_cache with mock Redis.""" - with patch("fides.api.util.cache.get_cache", return_value=mock_redis): - with patch("fides.api.util.cache.CONFIG.redis.default_ttl_seconds", 3600): - yield - - -def create_mock_privacy_request(pr_id): - """Create mock PrivacyRequest with given ID.""" - pr = MagicMock() - pr.id = pr_id - return pr - - -# Mark all tests as unit tests that don't require full test infrastructure +# Mark all tests as unit tests pytestmark = pytest.mark.unit -class TestIdentityCacheOperations: - """Test identity cache operations with DSR store.""" - - def test_cache_identity_writes_new_format_only(self, mock_redis, pr_id, identity_data): - """cache_identity writes only new-format keys.""" - pr = create_mock_privacy_request(pr_id) - - with patch_get_cache(mock_redis): - from fides.api.models.privacy_request.privacy_request import PrivacyRequest - from fides.api.schemas.redis_cache import Identity - - identity = Identity(**identity_data) - PrivacyRequest.cache_identity(pr, identity) - - # New keys exist - assert mock_redis.get(f"dsr:{pr_id}:identity:email") is not None - assert mock_redis.get(f"dsr:{pr_id}:identity:phone_number") is not None - - # Legacy keys do NOT exist - assert mock_redis.get(f"id-{pr_id}-identity-email") is None - assert mock_redis.get(f"id-{pr_id}-identity-phone_number") is None +class TestDSRCacheStoreIdentity: + """Test identity cache operations in DSRCacheStore.""" - def test_store_cache_identity_data_service_method(self, dsr_store, pr_id): - """DSRCacheStore.cache_identity_data writes all attributes.""" + def test_cache_identity_data_writes_all_attributes(self, dsr_store, pr_id): + """cache_identity_data writes all identity attributes to new-format keys.""" identity_data = { "email": json.dumps("user@example.com"), "phone_number": json.dumps("+1234567890"), @@ -98,74 +61,39 @@ def test_store_cache_identity_data_service_method(self, dsr_store, pr_id): # All keys written in new format assert dsr_store._redis.get(f"dsr:{pr_id}:identity:email") == json.dumps("user@example.com") assert dsr_store._redis.get(f"dsr:{pr_id}:identity:phone_number") == json.dumps("+1234567890") + + # Legacy keys do NOT exist + assert dsr_store._redis.get(f"id-{pr_id}-identity-email") is None - def test_get_cached_identity_data_reads_legacy_keys(self, mock_redis, pr_id, identity_data): - """get_cached_identity_data reads legacy keys and migrates them.""" - # Write legacy format with JSON encoding - for key, value in identity_data.items(): - mock_redis.set(f"id-{pr_id}-identity-{key}", json.dumps(value)) - - pr = create_mock_privacy_request(pr_id) - - with patch_get_cache(mock_redis): - from fides.api.models.privacy_request.privacy_request import PrivacyRequest - - result = PrivacyRequest.get_cached_identity_data(pr) - - # Values are returned correctly - assert result["email"] == "user@example.com" - assert result["phone_number"] == "+1234567890" - - # Legacy keys migrated to new format - assert mock_redis.get(f"dsr:{pr_id}:identity:email") is not None - assert mock_redis.get(f"id-{pr_id}-identity-email") is None - - def test_store_get_cached_identity_data_service_method(self, dsr_store, pr_id): - """DSRCacheStore.get_cached_identity_data reads all attributes.""" - # Write some identity data - identity_data = { - "email": json.dumps("user@example.com"), - "phone_number": json.dumps("+1234567890"), - } - dsr_store.cache_identity_data(pr_id, identity_data) + def test_get_cached_identity_data_reads_all_attributes(self, dsr_store, pr_id, identity_data): + """get_cached_identity_data reads all identity attributes from new-format keys.""" + # Write via store + encoded_data = {k: json.dumps(v) for k, v in identity_data.items()} + dsr_store.cache_identity_data(pr_id, encoded_data) - # Read it back result = dsr_store.get_cached_identity_data(pr_id) assert result["email"] == json.dumps("user@example.com") assert result["phone_number"] == json.dumps("+1234567890") - def test_get_cached_identity_data_reads_new_keys(self, dsr_store, mock_redis, pr_id, identity_data): - """get_cached_identity_data reads new-format keys.""" - # Write via store + def test_get_cached_identity_data_migrates_legacy_keys(self, dsr_store, mock_redis, pr_id, identity_data): + """get_cached_identity_data reads and migrates legacy keys on first access.""" + # Write legacy format with JSON encoding for key, value in identity_data.items(): - dsr_store.write_identity(pr_id, key, json.dumps(value)) - - pr = create_mock_privacy_request(pr_id) - - with patch_get_cache(mock_redis): - from fides.api.models.privacy_request.privacy_request import PrivacyRequest - - result = PrivacyRequest.get_cached_identity_data(pr) - - assert result["email"] == "user@example.com" - assert result["phone_number"] == "+1234567890" - - def test_verify_cache_for_identity_data_detects_legacy(self, mock_redis, pr_id): - """verify_cache_for_identity_data returns True for legacy keys.""" - mock_redis.set(f"id-{pr_id}-identity-email", json.dumps("test@example.com")) - - pr = create_mock_privacy_request(pr_id) + mock_redis.set(f"id-{pr_id}-identity-{key}", json.dumps(value)) - with patch_get_cache(mock_redis): - from fides.api.models.privacy_request.privacy_request import PrivacyRequest + result = dsr_store.get_cached_identity_data(pr_id) - has_cache = PrivacyRequest.verify_cache_for_identity_data(pr) + # Values are returned correctly + assert result["email"] == json.dumps("user@example.com") + assert result["phone_number"] == json.dumps("+1234567890") - assert has_cache is True + # Legacy keys migrated to new format + assert mock_redis.get(f"dsr:{pr_id}:identity:email") is not None + assert mock_redis.get(f"id-{pr_id}-identity-email") is None - def test_store_has_cached_identity_data_service_method(self, dsr_store, mock_redis, pr_id): - """DSRCacheStore.has_cached_identity_data detects both formats.""" + def test_has_cached_identity_data_detects_both_formats(self, dsr_store, mock_redis, pr_id): + """has_cached_identity_data detects identity data in both legacy and new formats.""" # Empty initially assert dsr_store.has_cached_identity_data(pr_id) is False @@ -177,80 +105,3 @@ def test_store_has_cached_identity_data_service_method(self, dsr_store, mock_red mock_redis._data.clear() dsr_store.write_identity(pr_id, "email", json.dumps("test@example.com")) assert dsr_store.has_cached_identity_data(pr_id) is True - - def test_verify_cache_for_identity_data_detects_new(self, dsr_store, mock_redis, pr_id): - """verify_cache_for_identity_data returns True for new keys.""" - dsr_store.write_identity(pr_id, "email", json.dumps("test@example.com")) - - pr = create_mock_privacy_request(pr_id) - - with patch_get_cache(mock_redis): - from fides.api.models.privacy_request.privacy_request import PrivacyRequest - - has_cache = PrivacyRequest.verify_cache_for_identity_data(pr) - - assert has_cache is True - - def test_verify_cache_returns_false_when_empty(self, mock_redis, pr_id): - """verify_cache_for_identity_data returns False when no cache exists.""" - pr = create_mock_privacy_request(pr_id) - - with patch_get_cache(mock_redis): - from fides.api.models.privacy_request.privacy_request import PrivacyRequest - - has_cache = PrivacyRequest.verify_cache_for_identity_data(pr) - - assert has_cache is False - - -def _run_standalone_tests(): - """Run tests standalone.""" - # Create shared fixtures manually for standalone execution - mock_redis = MockRedis() - dsr_store = DSRCacheStore(RedisCacheManager(mock_redis)) - - tests = TestIdentityCacheOperations() - pr_id = f"test-pr-{uuid.uuid4()}" - identity_data = {"email": "user@example.com", "phone_number": "+1234567890"} - - tests.test_cache_identity_writes_new_format_only(mock_redis, pr_id, identity_data) - - pr_id = f"test-pr-{uuid.uuid4()}" - tests.test_store_cache_identity_data_service_method(dsr_store, pr_id) - - mock_redis = MockRedis() - dsr_store = DSRCacheStore(RedisCacheManager(mock_redis)) - pr_id = f"test-pr-{uuid.uuid4()}" - tests.test_get_cached_identity_data_reads_legacy_keys(mock_redis, pr_id, identity_data) - - pr_id = f"test-pr-{uuid.uuid4()}" - tests.test_store_get_cached_identity_data_service_method(dsr_store, pr_id) - - mock_redis = MockRedis() - dsr_store = DSRCacheStore(RedisCacheManager(mock_redis)) - pr_id = f"test-pr-{uuid.uuid4()}" - tests.test_get_cached_identity_data_reads_new_keys(dsr_store, mock_redis, pr_id, identity_data) - - mock_redis = MockRedis() - pr_id = f"test-pr-{uuid.uuid4()}" - tests.test_verify_cache_for_identity_data_detects_legacy(mock_redis, pr_id) - - mock_redis = MockRedis() - dsr_store = DSRCacheStore(RedisCacheManager(mock_redis)) - pr_id = f"test-pr-{uuid.uuid4()}" - tests.test_store_has_cached_identity_data_service_method(dsr_store, mock_redis, pr_id) - - mock_redis = MockRedis() - dsr_store = DSRCacheStore(RedisCacheManager(mock_redis)) - pr_id = f"test-pr-{uuid.uuid4()}" - tests.test_verify_cache_for_identity_data_detects_new(dsr_store, mock_redis, pr_id) - - mock_redis = MockRedis() - pr_id = f"test-pr-{uuid.uuid4()}" - tests.test_verify_cache_returns_false_when_empty(mock_redis, pr_id) - - print("All identity cache integration tests passed.") - - -if __name__ == "__main__": - _run_standalone_tests() From 2bf58e0a326bd27f51196f8a47d9ef44d8708b89 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 10 Feb 2026 22:20:38 -0800 Subject: [PATCH 21/50] Part 4: custom fields and encryption --- .../models/privacy_request/privacy_request.py | 73 +++++----- src/fides/common/cache/dsr_store.py | 56 ++++++++ ...est_dsr_store_custom_fields_integration.py | 127 ++++++++++++++++++ 3 files changed, 221 insertions(+), 35 deletions(-) create mode 100644 tests/common/cache/test_dsr_store_custom_fields_integration.py diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index e91b0e4e2e7..3de569a089f 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -111,9 +111,7 @@ FidesopsRedis, get_async_task_tracking_cache_key, get_cache, - get_custom_privacy_request_field_cache_key, get_drp_request_body_cache_key, - get_encryption_cache_key, ) from fides.api.util.collection_util import Row, extract_key_for_address from fides.api.util.constants import API_DATE_FORMAT @@ -547,13 +545,18 @@ def cache_custom_privacy_request_fields( return if CONFIG.execution.allow_custom_privacy_request_fields_in_request_execution: - cache: FidesopsRedis = get_cache() - for key, item in custom_privacy_request_fields.items(): - if item is not None: - cache.set_with_autoexpire( - get_custom_privacy_request_field_cache_key(self.id, key), - json.dumps(item.value, cls=CustomJSONEncoder), - ) + with get_dsr_cache_store() as store: + # Encode values for Redis storage + encoded_fields = { + key: json.dumps(item.value, cls=CustomJSONEncoder) + for key, item in custom_privacy_request_fields.items() + if item is not None + } + store.cache_custom_fields( + self.id, + encoded_fields, + expire_seconds=CONFIG.redis.default_ttl_seconds, + ) else: logger.info( "Custom fields from privacy request {}, but config setting 'CONFIG.execution.allow_custom_privacy_request_fields_in_request_execution' is set to false and prevents their usage.", @@ -733,11 +736,13 @@ def cache_encryption(self, encryption_key: Optional[str] = None) -> None: if not encryption_key: return - cache: FidesopsRedis = get_cache() - cache.set_with_autoexpire( - get_encryption_cache_key(self.id, "key"), - encryption_key, - ) + with get_dsr_cache_store() as store: + store.write_encryption( + self.id, + "key", + encryption_key, + expire_seconds=CONFIG.redis.default_ttl_seconds, + ) def persist_masking_secrets( self, masking_secrets: List[MaskingSecretCache] @@ -791,30 +796,28 @@ def get_cached_identity_data(self) -> Dict[str, Any]: def get_cached_custom_privacy_request_fields(self) -> Dict[str, Any]: """Retrieves any custom fields pertaining to this request from the cache""" - result: Dict[str, Any] = {} - prefix = f"id-{self.id}-custom-privacy-request-field-*" + with get_dsr_cache_store() as store: + result = store.get_cached_custom_fields(self.id) - cache: FidesopsRedis = get_cache() - keys = cache.keys(prefix) + if not result: + logger.debug(f"Cache miss for request {self.id}, falling back to DB") + custom_privacy_request_fields = ( + self.get_persisted_custom_privacy_request_fields() + ) + self.cache_custom_privacy_request_fields( + { + key: CustomPrivacyRequestFieldSchema(**value) + for key, value in custom_privacy_request_fields.items() + } + ) + result = store.get_cached_custom_fields(self.id) - if not keys: - logger.debug(f"Cache miss for request {self.id}, falling back to DB") - custom_privacy_request_fields = ( - self.get_persisted_custom_privacy_request_fields() - ) - self.cache_custom_privacy_request_fields( - { - key: CustomPrivacyRequestFieldSchema(**value) - for key, value in custom_privacy_request_fields.items() - } - ) - keys = cache.keys(prefix) + # Parse JSON values + parsed_result: Dict[str, Any] = {} + for key, value in result.items(): + parsed_result[key] = json.loads(value) - for key in keys: - value = cache.get(key) - if value: - result[key.split("-")[-1]] = json.loads(value) - return result + return parsed_result def get_results(self) -> Dict[str, Any]: """Retrieves all cached identity data associated with this Privacy Request diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index 563e7665f32..4e385be8d55 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -145,6 +145,62 @@ def get_custom_field( dsr_id, part, KeyMapper.custom_field(dsr_id, field_key)[1] ) + def cache_custom_fields( + self, dsr_id: str, custom_fields: Dict[str, Any], expire_seconds: Optional[int] = None + ) -> None: + """ + Cache all custom privacy request fields for a DSR. + + Writes each non-None field to dsr:{id}:custom_field:{field_key} format. + """ + for key, value in custom_fields.items(): + if value is not None: + self.write_custom_field(dsr_id, key, value, expire_seconds) + + def get_cached_custom_fields(self, dsr_id: str) -> Dict[str, Any]: + """ + Retrieve all cached custom fields for a DSR. + + Returns dict with custom field values. Automatically migrates legacy keys on read. + Returns empty dict if no custom fields cached. + """ + result: Dict[str, Any] = {} + all_keys = self.get_all_keys(dsr_id) + + # Filter for custom field keys (both new and legacy formats) + # New: dsr:{id}:custom_field:{key} + # Legacy: id-{id}-custom-privacy-request-field-{key} + custom_keys = [ + k for k in all_keys + if ":custom_field:" in k or "-custom-privacy-request-field-" in k + ] + + for key in custom_keys: + # Extract field name from key + if ":custom_field:" in key: + field_key = key.split(":")[-1] + else: + # Legacy format + field_key = key.split("-")[-1] + + value = self.get_custom_field(dsr_id, field_key) + if value: + result[field_key] = value + + return result + + def has_cached_custom_fields(self, dsr_id: str) -> bool: + """ + Check if any custom fields are cached for this DSR. + + Returns True if any custom field keys exist (legacy or new format). + """ + all_keys = self.get_all_keys(dsr_id) + return any( + ":custom_field:" in k or "-custom-privacy-request-field-" in k + for k in all_keys + ) + # --- Convenience: identity --- def write_identity( diff --git a/tests/common/cache/test_dsr_store_custom_fields_integration.py b/tests/common/cache/test_dsr_store_custom_fields_integration.py new file mode 100644 index 00000000000..2506182ab2b --- /dev/null +++ b/tests/common/cache/test_dsr_store_custom_fields_integration.py @@ -0,0 +1,127 @@ +""" +Tests for custom fields and encryption cache operations in DSRCacheStore. + +Tests the service layer directly with MockRedis - no patching needed. +""" + +import json +import uuid + +import pytest + +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager + +from tests.common.cache.mock_redis import MockRedis + + +@pytest.fixture +def mock_redis(): + """Shared MockRedis instance.""" + return MockRedis() + + +@pytest.fixture +def dsr_store(mock_redis): + """DSRCacheStore backed by MockRedis.""" + return DSRCacheStore(RedisCacheManager(mock_redis)) + + +@pytest.fixture +def pr_id(): + """Generate unique privacy request ID.""" + return f"test-pr-{uuid.uuid4()}" + + +# Mark all tests as unit tests +pytestmark = pytest.mark.unit + + +class TestDSRCacheStoreCustomFields: + """Test custom fields cache operations in DSRCacheStore.""" + + def test_cache_custom_fields_writes_all_fields(self, dsr_store, pr_id): + """cache_custom_fields writes all fields to new-format keys.""" + custom_fields = { + "department": json.dumps("Engineering"), + "employee_id": json.dumps("E12345"), + } + + dsr_store.cache_custom_fields(pr_id, custom_fields, expire_seconds=3600) + + # All keys written in new format + assert dsr_store._redis.get(f"dsr:{pr_id}:custom_field:department") == json.dumps("Engineering") + assert dsr_store._redis.get(f"dsr:{pr_id}:custom_field:employee_id") == json.dumps("E12345") + + # Legacy keys do NOT exist + assert dsr_store._redis.get(f"id-{pr_id}-custom-privacy-request-field-department") is None + + def test_get_cached_custom_fields_reads_all_fields(self, dsr_store, pr_id): + """get_cached_custom_fields reads all fields from new-format keys.""" + custom_fields = { + "department": json.dumps("Engineering"), + "employee_id": json.dumps("E12345"), + } + dsr_store.cache_custom_fields(pr_id, custom_fields) + + result = dsr_store.get_cached_custom_fields(pr_id) + + assert result["department"] == json.dumps("Engineering") + assert result["employee_id"] == json.dumps("E12345") + + def test_get_cached_custom_fields_migrates_legacy_keys(self, dsr_store, mock_redis, pr_id): + """get_cached_custom_fields reads and migrates legacy keys on first access.""" + # Write legacy format + mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-department", json.dumps("Engineering")) + mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-employee_id", json.dumps("E12345")) + + result = dsr_store.get_cached_custom_fields(pr_id) + + # Values are returned correctly + assert result["department"] == json.dumps("Engineering") + assert result["employee_id"] == json.dumps("E12345") + + # Legacy keys migrated to new format + assert mock_redis.get(f"dsr:{pr_id}:custom_field:department") is not None + assert mock_redis.get(f"id-{pr_id}-custom-privacy-request-field-department") is None + + def test_has_cached_custom_fields_detects_both_formats(self, dsr_store, mock_redis, pr_id): + """has_cached_custom_fields detects fields in both legacy and new formats.""" + # Empty initially + assert dsr_store.has_cached_custom_fields(pr_id) is False + + # Add legacy key + mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-department", json.dumps("Engineering")) + assert dsr_store.has_cached_custom_fields(pr_id) is True + + # Clear and test new format + mock_redis._data.clear() + dsr_store.write_custom_field(pr_id, "department", json.dumps("Engineering")) + assert dsr_store.has_cached_custom_fields(pr_id) is True + + +class TestDSRCacheStoreEncryption: + """Test encryption key cache operations in DSRCacheStore.""" + + def test_write_encryption_writes_key(self, dsr_store, pr_id): + """write_encryption writes encryption key to new-format key.""" + dsr_store.write_encryption(pr_id, "key", "test-encryption-key-12345", expire_seconds=3600) + + assert dsr_store._redis.get(f"dsr:{pr_id}:encryption:key") == "test-encryption-key-12345" + + # Legacy key does NOT exist + assert dsr_store._redis.get(f"id-{pr_id}-encryption-key") is None + + def test_get_encryption_migrates_legacy_key(self, dsr_store, mock_redis, pr_id): + """get_encryption reads and migrates legacy encryption keys.""" + # Write legacy format + mock_redis.set(f"id-{pr_id}-encryption-key", "test-encryption-key-12345") + + # Read via store + value = dsr_store.get_encryption(pr_id, "key") + + assert value == "test-encryption-key-12345" + + # Legacy key migrated + assert mock_redis.get(f"dsr:{pr_id}:encryption:key") == "test-encryption-key-12345" + assert mock_redis.get(f"id-{pr_id}-encryption-key") is None From 6f938c40c5242c7a63c38e4a1b31904c2155a80d Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 17 Feb 2026 17:08:31 -0800 Subject: [PATCH 22/50] DRP request body caching updates --- .../models/privacy_request/privacy_request.py | 26 +-- src/fides/common/cache/dsr_store.py | 47 ++++++ .../cache/test_dsr_store_drp_integration.py | 159 ++++++++++++++++++ 3 files changed, 220 insertions(+), 12 deletions(-) create mode 100644 tests/common/cache/test_dsr_store_drp_integration.py diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index 3de569a089f..b34d0504615 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -111,7 +111,6 @@ FidesopsRedis, get_async_task_tracking_cache_key, get_cache, - get_drp_request_body_cache_key, ) from fides.api.util.collection_util import Row, extract_key_for_address from fides.api.util.constants import API_DATE_FORMAT @@ -714,22 +713,25 @@ def get_async_execution_task(self) -> Optional[AsyncResult]: return res def cache_drp_request_body(self, drp_request_body: DrpPrivacyRequestCreate) -> None: - """Sets the identity's values at their specific locations in the Fides app cache""" - cache: FidesopsRedis = get_cache() + """Sets the DRP request body values at their specific locations in the Fides app cache""" drp_request_body_dict: Dict[str, Any] = dict(drp_request_body) + + # Serialize complex objects to repr format for storage + serialized_body = {} for key, value in drp_request_body_dict.items(): if value is not None: - # handle nested dict/objects + # Handle nested dict/objects if not isinstance(value, (bytes, str, int, float)): - cache.set_with_autoexpire( - get_drp_request_body_cache_key(self.id, key), - repr(value), - ) + serialized_body[key] = repr(value) else: - cache.set_with_autoexpire( - get_drp_request_body_cache_key(self.id, key), - value, - ) + serialized_body[key] = value + + with get_dsr_cache_store() as store: + store.cache_drp_request_body( + self.id, + serialized_body, + expire_seconds=CONFIG.redis.default_ttl_seconds, + ) def cache_encryption(self, encryption_key: Optional[str] = None) -> None: """Sets the encryption key in the Fides app cache if provided""" diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index 4e385be8d55..920d2c36488 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -302,6 +302,53 @@ def get_drp(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: part = f"drp:{attr}" return self.get_with_legacy(dsr_id, part, KeyMapper.drp(dsr_id, attr)[1]) + def cache_drp_request_body( + self, dsr_id: str, drp_body: Dict[str, Any], expire_seconds: Optional[int] = None + ) -> None: + """ + Cache all DRP request body fields for a DSR. + Writes each non-None field to dsr:{id}:drp:{field_key} format. + """ + for key, value in drp_body.items(): + if value is not None: + self.write_drp(dsr_id, key, value, expire_seconds) + + def get_cached_drp_request_body(self, dsr_id: str) -> Dict[str, Any]: + """ + Retrieve all cached DRP request body data for a DSR. + Returns dict with DRP fields. Automatically migrates legacy keys on read. + Returns empty dict if no DRP data cached. + """ + result: Dict[str, Any] = {} + all_keys = self.get_all_keys(dsr_id) + + # Filter for DRP keys (both new and legacy formats) + drp_keys = [k for k in all_keys if ":drp:" in k or "-drp-" in k] + + for key in drp_keys: + # Extract field name from key + # New format: dsr:{id}:drp:{field} + # Legacy format: id-{id}-drp-{field} + if ":drp:" in key: + field = key.split(":")[-1] + else: + # Legacy format + field = key.split("-")[-1] + + value = self.get_drp(dsr_id, field) + if value: + result[field] = value + + return result + + def has_cached_drp_request_body(self, dsr_id: str) -> bool: + """ + Check if any DRP request body data is cached for this DSR. + Checks both new and legacy key formats. + """ + all_keys = self.get_all_keys(dsr_id) + return any(":drp:" in k or "-drp-" in k for k in all_keys) + # --- Convenience: masking secret --- def write_masking_secret( diff --git a/tests/common/cache/test_dsr_store_drp_integration.py b/tests/common/cache/test_dsr_store_drp_integration.py new file mode 100644 index 00000000000..63af17a9884 --- /dev/null +++ b/tests/common/cache/test_dsr_store_drp_integration.py @@ -0,0 +1,159 @@ +""" +Tests for DSRCacheStore DRP request body caching. + +Focuses on service-layer methods for DRP data management, including: +- Writing DRP fields in new format +- Reading DRP fields from both new and legacy formats +- Automatic migration on read +""" + +import pytest +import uuid +from typing import Dict, Any + +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager +from tests.common.cache.mock_redis import MockRedis + + +@pytest.fixture +def mock_redis(): + """In-memory Redis mock for isolated testing.""" + return MockRedis() + + +@pytest.fixture +def dsr_store(mock_redis): + """DSRCacheStore instance with mock Redis backend.""" + manager = RedisCacheManager(mock_redis) + return DSRCacheStore(manager) + + +@pytest.fixture +def pr_id(): + """Generate unique privacy request ID for each test.""" + return f"test-pr-{uuid.uuid4()}" + + +class TestDSRCacheStoreDRP: + """Test DSRCacheStore DRP request body methods.""" + + def test_cache_drp_request_body_writes_all_fields(self, dsr_store, pr_id): + """cache_drp_request_body writes all fields to new-format keys.""" + drp_body = { + "meta": "metadata_value", + "regime": "gdpr", + "exercise": "access", + "identity": '{"email": "user@example.com"}', + } + + dsr_store.cache_drp_request_body(pr_id, drp_body, expire_seconds=3600) + + # Verify all fields written to new format + assert dsr_store.get_drp(pr_id, "meta") == "metadata_value" + assert dsr_store.get_drp(pr_id, "regime") == "gdpr" + assert dsr_store.get_drp(pr_id, "exercise") == "access" + assert dsr_store.get_drp(pr_id, "identity") == '{"email": "user@example.com"}' + + def test_cache_drp_request_body_skips_none_values(self, dsr_store, pr_id): + """cache_drp_request_body skips None values.""" + drp_body = { + "meta": "metadata_value", + "regime": None, + "exercise": "access", + } + + dsr_store.cache_drp_request_body(pr_id, drp_body) + + # Only non-None fields should be written + assert dsr_store.get_drp(pr_id, "meta") == "metadata_value" + assert dsr_store.get_drp(pr_id, "regime") is None + assert dsr_store.get_drp(pr_id, "exercise") == "access" + + def test_get_cached_drp_request_body_reads_all_fields(self, dsr_store, pr_id): + """get_cached_drp_request_body reads all fields from new-format keys.""" + drp_body = { + "meta": "metadata_value", + "regime": "gdpr", + "exercise": "access", + } + dsr_store.cache_drp_request_body(pr_id, drp_body) + + result = dsr_store.get_cached_drp_request_body(pr_id) + + assert result == { + "meta": "metadata_value", + "regime": "gdpr", + "exercise": "access", + } + + def test_get_cached_drp_request_body_migrates_legacy_keys( + self, dsr_store, mock_redis, pr_id + ): + """get_cached_drp_request_body reads and migrates legacy keys on first access.""" + # Write legacy format directly + mock_redis.set(f"id-{pr_id}-drp-meta", "legacy_metadata") + mock_redis.set(f"id-{pr_id}-drp-regime", "ccpa") + + result = dsr_store.get_cached_drp_request_body(pr_id) + + assert result == { + "meta": "legacy_metadata", + "regime": "ccpa", + } + + # Verify migration happened (new keys exist, legacy keys deleted) + assert mock_redis.get(f"dsr:{pr_id}:drp:meta") == "legacy_metadata" + assert mock_redis.get(f"dsr:{pr_id}:drp:regime") == "ccpa" + assert mock_redis.get(f"id-{pr_id}-drp-meta") is None + assert mock_redis.get(f"id-{pr_id}-drp-regime") is None + + def test_has_cached_drp_request_body_detects_both_formats( + self, dsr_store, mock_redis, pr_id + ): + """has_cached_drp_request_body detects DRP data in both legacy and new formats.""" + # Empty initially + assert dsr_store.has_cached_drp_request_body(pr_id) is False + + # Write new format + dsr_store.write_drp(pr_id, "meta", "metadata") + assert dsr_store.has_cached_drp_request_body(pr_id) is True + + # Clear and test legacy format + dsr_store.clear(pr_id) + assert dsr_store.has_cached_drp_request_body(pr_id) is False + + mock_redis.set(f"id-{pr_id}-drp-regime", "gdpr") + assert dsr_store.has_cached_drp_request_body(pr_id) is True + + def test_get_cached_drp_request_body_returns_empty_dict_when_no_data( + self, dsr_store, pr_id + ): + """get_cached_drp_request_body returns empty dict when no DRP data cached.""" + result = dsr_store.get_cached_drp_request_body(pr_id) + assert result == {} + + def test_drp_migration_then_new_writes(self, dsr_store, mock_redis, pr_id): + """After migrating legacy keys, new writes use indexed format.""" + # Start with legacy keys + mock_redis.set(f"id-{pr_id}-drp-meta", "legacy_metadata") + + # Read triggers migration + result1 = dsr_store.get_cached_drp_request_body(pr_id) + assert result1["meta"] == "legacy_metadata" + + # Now write new fields - should use indexed format + dsr_store.write_drp(pr_id, "regime", "gdpr") + dsr_store.write_drp(pr_id, "exercise", "access") + + # Read all - should get both migrated and new + result2 = dsr_store.get_cached_drp_request_body(pr_id) + assert result2["meta"] == "legacy_metadata" + assert result2["regime"] == "gdpr" + assert result2["exercise"] == "access" + + # Verify all keys are now indexed + all_keys = dsr_store.get_all_keys(pr_id) + assert f"dsr:{pr_id}:drp:meta" in all_keys + assert f"dsr:{pr_id}:drp:regime" in all_keys + assert f"dsr:{pr_id}:drp:exercise" in all_keys From ebfd156a77f6f8f20612698df337c20c2aa7e479 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 17:23:32 -0800 Subject: [PATCH 23/50] Update tests/common/cache/test_dsr_store.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- tests/common/cache/test_dsr_store.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/common/cache/test_dsr_store.py b/tests/common/cache/test_dsr_store.py index 86dabb3e1df..83483592623 100644 --- a/tests/common/cache/test_dsr_store.py +++ b/tests/common/cache/test_dsr_store.py @@ -34,13 +34,12 @@ def set( self._data[key] = value return True - def delete(self, *keys: str) -> int: - n = 0 + deleted_count = 0 for key in keys: if key in self._data: del self._data[key] - n += 1 - return n + deleted_count += 1 + return deleted_count def keys(self, pattern: str) -> List[str]: """Glob-style: * matches any number of chars.""" From 36af735c167e63d5b3bcf0bd2015405afaf84542 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 13:10:19 -0800 Subject: [PATCH 24/50] Add DSR cache store + tests --- src/fides/common/cache/dsr_store.py | 299 ++++++++++++++++++ src/fides/common/cache/key_mapping.py | 140 ++++++++ tests/common/cache/conftest.py | 75 +++++ tests/common/cache/test_dsr_store.py | 246 ++++++++++++++ .../cache/test_dsr_store_key_mapping.py | 111 +++++++ .../common/cache/test_dsr_store_migration.py | 209 ++++++++++++ 6 files changed, 1080 insertions(+) create mode 100644 src/fides/common/cache/dsr_store.py create mode 100644 src/fides/common/cache/key_mapping.py create mode 100644 tests/common/cache/conftest.py create mode 100644 tests/common/cache/test_dsr_store.py create mode 100644 tests/common/cache/test_dsr_store_key_mapping.py create mode 100644 tests/common/cache/test_dsr_store_migration.py diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py new file mode 100644 index 00000000000..d6819fc22c7 --- /dev/null +++ b/src/fides/common/cache/dsr_store.py @@ -0,0 +1,299 @@ +""" +DSR cache store: single place for all DSR (privacy request) cache access. + +Enforces: +- Key naming: dsr:{dsr_id}:{part} for every key (part = field_type:field_key) +- Index: one set per DSR (__idx:dsr:{dsr_id}) listing all keys for that DSR +- Legacy: each field type has a legacy key format; reads try new key then legacy, + and can lazily migrate (copy legacy -> new, delete legacy) on first read. + +Hash alternative (future): Using a single Redis HASH per DSR (key=dsr:{id}, +fields=part names) would give one key per DSR, no index, and atomic HSET/HGET +per field so concurrent writers don't touch an index. Tradeoff: one TTL for the +whole DSR and a different storage shape; can introduce a hash-backed backend +later if we want to avoid index consistency concerns. +""" + +from typing import List, Optional, Union + +from redis import Redis + +from fides.common.cache.key_mapping import KeyMapper +from fides.common.cache.manager import RedisCacheManager, RedisValue + +# Key format: dsr:{dsr_id}:{part} (re-export for callers; KeyMapper builds these) +DSR_KEY_PREFIX = "dsr:" + + +def _dsr_key(dsr_id: str, part: str) -> str: + """Build the Redis key for a DSR cache part.""" + return f"{DSR_KEY_PREFIX}{dsr_id}:{part}" + + +def _dsr_index_prefix(dsr_id: str) -> str: + """Index prefix for this DSR; index set is __idx:dsr:{dsr_id}.""" + return f"{DSR_KEY_PREFIX}{dsr_id}" + + +class DSRCacheStore: + """ + Cache store for DSR (privacy request) data with enforced naming and indexing. + + All keys are stored as dsr:{dsr_id}:{part}. Every write is registered in + an index set so listing and clearing by DSR is O(index size) without KEYS/SCAN. + Convenience methods (write_custom_field, get_custom_field, etc.) map to part + names and support lazy migration from legacy key formats on read. + """ + + def __init__( + self, + cache_manager: RedisCacheManager, + *, + backfill_index_on_legacy_read: bool = True, + migrate_legacy_on_read: bool = True, + ) -> None: + """ + Args: + cache_manager: RedisCacheManager (e.g. from get_redis_cache_manager()). + backfill_index_on_legacy_read: When listing keys and we fall back to + KEYS for legacy keys, add those keys to the index. Default True. + migrate_legacy_on_read: When a get finds value in legacy key only, + write to new key, delete legacy key, add new key to index. + Default True. + """ + self._manager = cache_manager + self._redis: Redis = cache_manager.redis + self._backfill = backfill_index_on_legacy_read + self._migrate_on_read = migrate_legacy_on_read + + def write( + self, + dsr_id: str, + field_type: str, + field_key: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """ + Low-level write: set dsr:{dsr_id}:{field_type}:{field_key} and add to index. + Prefer convenience methods (write_custom_field, etc.) so legacy mapping + stays in one place. + """ + part = f"{field_type}:{field_key}" if field_key else field_type + return self.set(dsr_id, part, value, expire_seconds) + + def get_with_legacy( + self, + dsr_id: str, + part: str, + legacy_key: str, + ) -> Optional[Union[str, bytes]]: + """ + Get value for part; if missing, try legacy_key. If found in legacy only + and migrate_legacy_on_read, copy to new key, delete legacy, add to index. + """ + val = self._redis.get(_dsr_key(dsr_id, part)) + if val is not None: + return val + val = self._redis.get(legacy_key) + if val is None: + return None + if self._migrate_on_read: + self.set(dsr_id, part, val) + self._redis.delete(legacy_key) + return val + + def get(self, dsr_id: str, part: str) -> Optional[Union[str, bytes]]: + """Get a value for the given DSR and part. Returns None if missing.""" + return self._redis.get(_dsr_key(dsr_id, part)) + + def set( + self, + dsr_id: str, + part: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """ + Set a value for the given DSR and part. Registers the key in the DSR index. + """ + key = _dsr_key(dsr_id, part) + return self._manager.set_with_index( + key, value, _dsr_index_prefix(dsr_id), expire_seconds + ) + + def delete(self, dsr_id: str, part: str) -> None: + """Delete a single part and remove it from the DSR index.""" + key = _dsr_key(dsr_id, part) + self._manager.delete_key_and_remove_from_index(key, _dsr_index_prefix(dsr_id)) + + # --- Convenience: custom privacy request fields --- + + def write_custom_field( + self, + dsr_id: str, + field_key: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write a custom privacy request field. New key: dsr:{id}:custom_field:{field_key}.""" + return self.write(dsr_id, "custom_field", field_key, value, expire_seconds) + + def get_custom_field( + self, dsr_id: str, field_key: str + ) -> Optional[Union[str, bytes]]: + """Get custom field; reads from legacy id-{id}-custom-privacy-request-field-{key} if needed.""" + part = f"custom_field:{field_key}" + return self.get_with_legacy( + dsr_id, part, KeyMapper.custom_field(dsr_id, field_key)[1] + ) + + # --- Convenience: identity --- + + def write_identity( + self, + dsr_id: str, + attr: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write an identity attribute. New key: dsr:{id}:identity:{attr}.""" + return self.write(dsr_id, "identity", attr, value, expire_seconds) + + def get_identity(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: + """Get identity attribute; reads from legacy id-{id}-identity-{attr} if needed.""" + part = f"identity:{attr}" + return self.get_with_legacy(dsr_id, part, KeyMapper.identity(dsr_id, attr)[1]) + + # --- Convenience: encryption --- + + def write_encryption( + self, + dsr_id: str, + attr: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write an encryption attribute. New key: dsr:{id}:encryption:{attr}.""" + return self.write(dsr_id, "encryption", attr, value, expire_seconds) + + def get_encryption(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: + """Get encryption attribute; reads from legacy id-{id}-encryption-{attr} if needed.""" + part = f"encryption:{attr}" + return self.get_with_legacy(dsr_id, part, KeyMapper.encryption(dsr_id, attr)[1]) + + # --- Convenience: DRP request body --- + + def write_drp( + self, + dsr_id: str, + attr: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write DRP request body attribute. New key: dsr:{id}:drp:{attr}.""" + return self.write(dsr_id, "drp", attr, value, expire_seconds) + + def get_drp(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: + """Get DRP attribute; reads from legacy id-{id}-drp-{attr} if needed.""" + part = f"drp:{attr}" + return self.get_with_legacy(dsr_id, part, KeyMapper.drp(dsr_id, attr)[1]) + + # --- Convenience: masking secret --- + + def write_masking_secret( + self, + dsr_id: str, + strategy: str, + secret_type: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write masking secret. New key: dsr:{id}:masking_secret:{strategy}:{secret_type}.""" + part = f"masking_secret:{strategy}:{secret_type}" + return self.set(dsr_id, part, value, expire_seconds) + + def get_masking_secret( + self, + dsr_id: str, + strategy: str, + secret_type: str, + ) -> Optional[Union[str, bytes]]: + """Get masking secret; reads from legacy id-{id}-masking-secret-{strategy}-{type} if needed.""" + part = f"masking_secret:{strategy}:{secret_type}" + return self.get_with_legacy( + dsr_id, + part, + KeyMapper.masking_secret(dsr_id, strategy, secret_type)[1], + ) + + # --- Convenience: async execution (single value per DSR) --- + + def write_async_execution( + self, + dsr_id: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write async task id. New key: dsr:{id}:async_execution.""" + return self.write(dsr_id, "async_execution", "", value, expire_seconds) + + def get_async_execution(self, dsr_id: str) -> Optional[Union[str, bytes]]: + """Get async task id; reads from legacy id-{id}-async-execution if needed.""" + part = "async_execution" + return self.get_with_legacy(dsr_id, part, KeyMapper.async_execution(dsr_id)[1]) + + # --- Convenience: retry count --- + + def write_retry_count( + self, + dsr_id: str, + value: RedisValue, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + """Write privacy request retry count. New key: dsr:{id}:retry_count.""" + return self.write(dsr_id, "retry_count", "", value, expire_seconds) + + def get_retry_count(self, dsr_id: str) -> Optional[Union[str, bytes]]: + """Get retry count; reads from legacy id-{id}-privacy-request-retry-count if needed.""" + part = "retry_count" + return self.get_with_legacy(dsr_id, part, KeyMapper.retry_count(dsr_id)[1]) + + # --- List / clear (unchanged) --- + + def get_all_keys(self, dsr_id: str) -> List[str]: + """ + Return all cache keys for this DSR. + Uses the index first; if empty, falls back to SCAN for legacy keys + and optionally backfills the index. + """ + index_prefix = _dsr_index_prefix(dsr_id) + keys = self._manager.get_keys_by_index(index_prefix) + if keys: + return keys + legacy_keys = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) + if not legacy_keys: + return [] + if self._backfill: + for k in legacy_keys: + self._manager.add_key_to_index(index_prefix, k) + return list(legacy_keys) + + def clear(self, dsr_id: str) -> None: + """ + Delete all cache keys for this DSR and remove the index. + + Always uses SCAN to find all keys (both indexed and legacy) to ensure + complete cleanup in mixed-key scenarios. + """ + # Use SCAN to find ALL keys (indexed + legacy) + all_keys_via_scan = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) + + index_prefix = _dsr_index_prefix(dsr_id) + + # Delete all found keys in batch + if all_keys_via_scan: + self._redis.delete(*all_keys_via_scan) + + # Delete the index itself + self._manager.delete_index(index_prefix) diff --git a/src/fides/common/cache/key_mapping.py b/src/fides/common/cache/key_mapping.py new file mode 100644 index 00000000000..9944c4e3493 --- /dev/null +++ b/src/fides/common/cache/key_mapping.py @@ -0,0 +1,140 @@ +""" +Key mapping: legacy Redis key patterns to the DSR store key format. + +Maps (dsr_id, field_type, field_key, ...) to: +- new_key: dsr:{dsr_id}:{part} (part = field_type:field_key or field_type) +- legacy_key: the key used by the old cache API (for encoded objects, the + logical key; set_encoded_object stores under EN_ + logical key in Redis). +""" + +from typing import Tuple + +DSR_KEY_PREFIX = "dsr:" + + +def _new_key(dsr_id: str, part: str) -> str: + """Build the new-format Redis key.""" + return f"{DSR_KEY_PREFIX}{dsr_id}:{part}" + + +def _part(field_type: str, field_key: str = "") -> str: + """Build the part string (field_type or field_type:field_key).""" + return f"{field_type}:{field_key}" if field_key else field_type + + +class KeyMapper: + """ + Maps DSR cache field types to new keys and legacy keys. + All patterns discovered in the privacy request cache audit are encoded here. + """ + + # --- Simple key-value (legacy = Redis key as used with set_with_autoexpire / get) --- + + @staticmethod + def identity(dsr_id: str, attr: str) -> Tuple[str, str]: + """New: dsr:{id}:identity:{attr}. Legacy: id-{id}-identity-{attr}.""" + part = _part("identity", attr) + return _new_key(dsr_id, part), f"id-{dsr_id}-identity-{attr}" + + @staticmethod + def custom_field(dsr_id: str, field_key: str) -> Tuple[str, str]: + """New: dsr:{id}:custom_field:{key}. Legacy: id-{id}-custom-privacy-request-field-{key}.""" + part = _part("custom_field", field_key) + return _new_key(dsr_id, part), f"id-{dsr_id}-custom-privacy-request-field-{field_key}" + + @staticmethod + def drp(dsr_id: str, attr: str) -> Tuple[str, str]: + """New: dsr:{id}:drp:{attr}. Legacy: id-{id}-drp-{attr}.""" + part = _part("drp", attr) + return _new_key(dsr_id, part), f"id-{dsr_id}-drp-{attr}" + + @staticmethod + def encryption(dsr_id: str, attr: str) -> Tuple[str, str]: + """New: dsr:{id}:encryption:{attr}. Legacy: id-{id}-encryption-{attr}.""" + part = _part("encryption", attr) + return _new_key(dsr_id, part), f"id-{dsr_id}-encryption-{attr}" + + @staticmethod + def masking_secret( + dsr_id: str, strategy: str, secret_type: str + ) -> Tuple[str, str]: + """New: dsr:{id}:masking_secret:{strategy}:{secret_type}. Legacy: id-{id}-masking-secret-{strategy}-{secret_type}.""" + part = f"masking_secret:{strategy}:{secret_type}" + return _new_key(dsr_id, part), f"id-{dsr_id}-masking-secret-{strategy}-{secret_type}" + + @staticmethod + def async_execution(dsr_id: str) -> Tuple[str, str]: + """New: dsr:{id}:async_execution. Legacy: id-{id}-async-execution.""" + part = "async_execution" + return _new_key(dsr_id, part), f"id-{dsr_id}-async-execution" + + @staticmethod + def retry_count(dsr_id: str) -> Tuple[str, str]: + """New: dsr:{id}:retry_count. Legacy: id-{id}-privacy-request-retry-count.""" + part = "retry_count" + return _new_key(dsr_id, part), f"id-{dsr_id}-privacy-request-retry-count" + + # --- Encoded objects (legacy = logical key; Redis stores EN_ + logical) --- + + @staticmethod + def webhook_manual_access(dsr_id: str, webhook_id: str) -> Tuple[str, str]: + """New: dsr:{id}:webhook_manual_access:{webhook_id}. Legacy logical: WEBHOOK_MANUAL_ACCESS_INPUT__{id}__{webhook_id}.""" + part = _part("webhook_manual_access", webhook_id) + return _new_key(dsr_id, part), f"WEBHOOK_MANUAL_ACCESS_INPUT__{dsr_id}__{webhook_id}" + + @staticmethod + def webhook_manual_erasure(dsr_id: str, webhook_id: str) -> Tuple[str, str]: + """New: dsr:{id}:webhook_manual_erasure:{webhook_id}. Legacy logical: WEBHOOK_MANUAL_ERASURE_INPUT__{id}__{webhook_id}.""" + part = _part("webhook_manual_erasure", webhook_id) + return _new_key(dsr_id, part), f"WEBHOOK_MANUAL_ERASURE_INPUT__{dsr_id}__{webhook_id}" + + @staticmethod + def data_use_map(dsr_id: str) -> Tuple[str, str]: + """New: dsr:{id}:data_use_map. Legacy logical: DATA_USE_MAP__{id}.""" + part = "data_use_map" + return _new_key(dsr_id, part), f"DATA_USE_MAP__{dsr_id}" + + @staticmethod + def email_info( + dsr_id: str, step: str, dataset: str, collection: str + ) -> Tuple[str, str]: + """New: dsr:{id}:email_info:{step}:{dataset}:{collection}. Legacy logical: EMAIL_INFORMATION__{id}__{step}__{dataset}__{collection}.""" + part = f"email_info:{step}:{dataset}:{collection}" + return _new_key(dsr_id, part), f"EMAIL_INFORMATION__{dsr_id}__{step}__{dataset}__{collection}" + + @staticmethod + def paused_location(dsr_id: str) -> Tuple[str, str]: + """New: dsr:{id}:paused_location. Legacy logical: PAUSED_LOCATION__{id}.""" + part = "paused_location" + return _new_key(dsr_id, part), f"PAUSED_LOCATION__{dsr_id}" + + @staticmethod + def failed_location(dsr_id: str) -> Tuple[str, str]: + """New: dsr:{id}:failed_location. Legacy logical: FAILED_LOCATION__{id}.""" + part = "failed_location" + return _new_key(dsr_id, part), f"FAILED_LOCATION__{dsr_id}" + + @staticmethod + def access_request(dsr_id: str, key: str) -> Tuple[str, str]: + """New: dsr:{id}:access_request:{key}. Legacy logical: {id}__{key} (key e.g. access_request__dataset:collection).""" + part = _part("access_request", key) + return _new_key(dsr_id, part), f"{dsr_id}__{key}" + + @staticmethod + def erasure_request(dsr_id: str, key: str) -> Tuple[str, str]: + """New: dsr:{id}:erasure_request:{key}. Legacy logical: {id}__erasure_request__{key}.""" + part = _part("erasure_request", key) + return _new_key(dsr_id, part), f"{dsr_id}__erasure_request__{key}" + + @staticmethod + def placeholder_results(dsr_id: str, key: str) -> Tuple[str, str]: + """New: dsr:{id}:placeholder_results:{key}. Legacy logical: PLACEHOLDER_RESULTS__{id}__{key}.""" + part = _part("placeholder_results", key) + return _new_key(dsr_id, part), f"PLACEHOLDER_RESULTS__{dsr_id}__{key}" + + # --- Index prefix (for get_all_keys / clear) --- + + @staticmethod + def index_prefix(dsr_id: str) -> str: + """Index set key prefix for this DSR: __idx:dsr:{id}.""" + return f"__idx:{DSR_KEY_PREFIX}{dsr_id}" diff --git a/tests/common/cache/conftest.py b/tests/common/cache/conftest.py new file mode 100644 index 00000000000..180e8c56ca8 --- /dev/null +++ b/tests/common/cache/conftest.py @@ -0,0 +1,75 @@ +""" +Conftest for common/cache tests. Overrides session-scoped fixtures so the +real FastAPI app, DB, and Celery worker are not started when running only these tests. +""" + +import pytest +from unittest.mock import MagicMock + + +@pytest.fixture(scope="session") +def test_client(): + """Minimal test client mock so app/DB are not started for cache-only test runs.""" + client = MagicMock() + response = MagicMock() + response.status_code = 200 + client.get = MagicMock(return_value=response) + client.post = MagicMock(return_value=response) + client.put = MagicMock(return_value=response) + client.patch = MagicMock(return_value=response) + client.delete = MagicMock(return_value=response) + yield client + + +@pytest.fixture(scope="session") +def api_client(): + """Minimal API client mock so app/DB are not started for cache-only test runs.""" + client = MagicMock() + response = MagicMock() + response.status_code = 200 + client.get = MagicMock(return_value=response) + client.post = MagicMock(return_value=response) + client.put = MagicMock(return_value=response) + client.patch = MagicMock(return_value=response) + client.delete = MagicMock(return_value=response) + yield client + + +@pytest.fixture(scope="session", autouse=True) +def app(): + """Mock app fixture so FastAPI doesn't start.""" + yield MagicMock() + + +@pytest.fixture(scope="session") +def config(): + """Mock config so we don't pull in real config.""" + from fides.config import get_config + config = get_config() + config.test_mode = True + yield config + + +@pytest.fixture(scope="session") +def db(api_client, config): + """Override db fixture to prevent database connection.""" + yield MagicMock() + + +@pytest.fixture(scope="session") +async def async_session(): + """Override async_session fixture to prevent database connection.""" + yield MagicMock() + + +@pytest.fixture(scope="function", autouse=True) +async def clear_db_tables(db, async_session): + """Override clear_db_tables to no-op for cache-only tests.""" + yield + # No cleanup needed for MockRedis tests + + +@pytest.fixture(autouse=True, scope="session") +def celery_use_virtual_worker(): + """No-op so we don't start a real Celery worker (and pull in DB) for cache tests.""" + yield None diff --git a/tests/common/cache/test_dsr_store.py b/tests/common/cache/test_dsr_store.py new file mode 100644 index 00000000000..9bde6e5110d --- /dev/null +++ b/tests/common/cache/test_dsr_store.py @@ -0,0 +1,246 @@ +""" +Tests for DSRCacheStore using an in-memory RedisCacheManager (dict + set). +No real Redis required. +""" + +import fnmatch +from typing import Any, Dict, List, Optional, Set, Union + +import pytest + +from fides.common.cache.dsr_store import DSRCacheStore + +RedisValue = Union[bytes, float, int, str] + + +class InMemoryRedis: + """Minimal Redis-like interface: get, set, delete, keys (glob pattern).""" + + def __init__(self) -> None: + self._data: Dict[str, RedisValue] = {} + + def get(self, key: str) -> Optional[Union[str, bytes]]: + val = self._data.get(key) + if val is None: + return None + return val if isinstance(val, (str, bytes)) else str(val) + + def set( + self, + key: str, + value: RedisValue, + ex: Optional[int] = None, + ) -> Optional[bool]: + self._data[key] = value + return True + + def delete(self, *keys: str) -> int: + n = 0 + for key in keys: + if key in self._data: + del self._data[key] + n += 1 + return n + + def keys(self, pattern: str) -> List[str]: + """Glob-style: * matches any number of chars.""" + return [k for k in self._data if fnmatch.fnmatch(k, pattern)] + + def scan_iter( + self, match: str = "*", count: Optional[int] = None + ): + """SCAN-compatible iterator; yields keys matching pattern (count ignored in-memory).""" + return iter(self.keys(match)) + + +class InMemoryRedisCacheManager: + """ + In-memory implementation of the RedisCacheManager interface: a dict for + key -> value and a dict of index_prefix -> set of keys for set_with_index. + """ + + def __init__(self) -> None: + self._redis = InMemoryRedis() + self._index: Dict[str, Set[str]] = {} + + def add_key_to_index(self, index_prefix: str, key: str) -> None: + self._index.setdefault(index_prefix, set()).add(key) + + def remove_key_from_index(self, index_prefix: str, key: str) -> None: + s = self._index.get(index_prefix) + if s is not None: + s.discard(key) + + def get_keys_by_index(self, index_prefix: str) -> List[str]: + return list(self._index.get(index_prefix, set())) + + def delete_index(self, index_prefix: str) -> None: + self._index.pop(index_prefix, None) + + def set_with_index( + self, + key: str, + value: RedisValue, + index_prefix: str, + expire_seconds: Optional[int] = None, + ) -> Optional[bool]: + result = self._redis.set(key, value, ex=expire_seconds) + self.add_key_to_index(index_prefix, key) + return result + + def delete_key_and_remove_from_index( + self, + key: str, + index_prefix: str, + ) -> None: + self._redis.delete(key) + self.remove_key_from_index(index_prefix, key) + + @property + def redis(self) -> InMemoryRedis: + return self._redis + + +@pytest.fixture +def in_memory_manager() -> InMemoryRedisCacheManager: + return InMemoryRedisCacheManager() + + +@pytest.fixture +def dsr_store(in_memory_manager: InMemoryRedisCacheManager) -> DSRCacheStore: + return DSRCacheStore(in_memory_manager) + + +@pytest.mark.unit +class TestDSRCacheStoreWithInMemoryManager: + """DSRCacheStore behavior with an in-memory RedisCacheManager.""" + + def test_set_and_get(self, dsr_store: DSRCacheStore) -> None: + dsr_store.set("pr-1", "identity:email", "user@example.com") + assert dsr_store.get("pr-1", "identity:email") == "user@example.com" + + def test_get_missing_returns_none(self, dsr_store: DSRCacheStore) -> None: + assert dsr_store.get("pr-1", "identity:email") is None + + def test_set_with_index_registers_key_in_index( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + dsr_store.set("pr-1", "custom_field:foo", "bar") + keys = in_memory_manager.get_keys_by_index("dsr:pr-1") + assert "dsr:pr-1:custom_field:foo" in keys + assert len(keys) == 1 + + def test_get_all_keys_returns_indexed_keys( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + dsr_store.write_custom_field("pr-1", "f1", "v1") + dsr_store.write_identity("pr-1", "email", "e@x.com") + keys = dsr_store.get_all_keys("pr-1") + assert set(keys) == { + "dsr:pr-1:custom_field:f1", + "dsr:pr-1:identity:email", + } + + def test_clear_removes_all_keys_and_index( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + dsr_store.write_custom_field("pr-1", "f1", "v1") + dsr_store.write_identity("pr-1", "email", "e@x.com") + dsr_store.clear("pr-1") + assert dsr_store.get_all_keys("pr-1") == [] + assert dsr_store.get("pr-1", "custom_field:f1") is None + assert dsr_store.get("pr-1", "identity:email") is None + + def test_delete_removes_key_and_index_entry( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + dsr_store.set("pr-1", "identity:email", "e@x.com") + dsr_store.delete("pr-1", "identity:email") + assert dsr_store.get("pr-1", "identity:email") is None + assert "dsr:pr-1:identity:email" not in dsr_store.get_all_keys("pr-1") + + def test_get_with_legacy_reads_new_key_first( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + dsr_store.write_identity("pr-1", "email", "new@example.com") + # Legacy key not set; should still get from new key + assert dsr_store.get_identity("pr-1", "email") == "new@example.com" + + def test_get_with_legacy_migrates_from_legacy_key( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + # Simulate legacy data only (no new key) + in_memory_manager.redis.set("id-pr-1-identity-email", "legacy@example.com") + result = dsr_store.get_identity("pr-1", "email") + assert result == "legacy@example.com" + # After migrate: new key should exist and legacy should be gone + assert dsr_store.get("pr-1", "identity:email") == "legacy@example.com" + assert in_memory_manager.redis.get("id-pr-1-identity-email") is None + + def test_write_custom_field_and_get_custom_field( + self, dsr_store: DSRCacheStore + ) -> None: + dsr_store.write_custom_field("pr-1", "my_field", "my_value") + assert dsr_store.get_custom_field("pr-1", "my_field") == "my_value" + + def test_convenience_async_execution(self, dsr_store: DSRCacheStore) -> None: + dsr_store.write_async_execution("pr-1", "celery-task-id-xyz") + assert dsr_store.get_async_execution("pr-1") == "celery-task-id-xyz" + + def test_retry_count( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + """Mirrors cache.py get/increment/reset_privacy_request_retry_count.""" + assert dsr_store.get_retry_count("pr-1") is None + dsr_store.write_retry_count("pr-1", "3", expire_seconds=86400) + assert dsr_store.get_retry_count("pr-1") == "3" + dsr_store.delete("pr-1", "retry_count") + assert dsr_store.get_retry_count("pr-1") is None + # Legacy key migration + in_memory_manager.redis.set("id-pr-2-privacy-request-retry-count", "1") + assert dsr_store.get_retry_count("pr-2") == "1" + assert in_memory_manager.redis.get("id-pr-2-privacy-request-retry-count") is None + + def test_drp( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + """Mirrors privacy_request.py DRP body cache (get_drp_request_body_cache_key).""" + dsr_store.write_drp("pr-1", "address", "encrypted-body", expire_seconds=300) + assert dsr_store.get_drp("pr-1", "address") == "encrypted-body" + assert dsr_store.get_drp("pr-1", "email") is None + # Legacy key migration + in_memory_manager.redis.set("id-pr-2-drp-email", "legacy-drp") + assert dsr_store.get_drp("pr-2", "email") == "legacy-drp" + assert in_memory_manager.redis.get("id-pr-2-drp-email") is None + + def test_encryption( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + """Mirrors privacy_request.py / encryption_utils.py encryption key cache.""" + dsr_store.write_encryption("pr-1", "key", "enc-key-123", expire_seconds=3600) + assert dsr_store.get_encryption("pr-1", "key") == "enc-key-123" + assert dsr_store.get_encryption("pr-1", "other") is None + # Legacy key migration + in_memory_manager.redis.set("id-pr-2-encryption-key", "legacy-enc") + assert dsr_store.get_encryption("pr-2", "key") == "legacy-enc" + assert in_memory_manager.redis.get("id-pr-2-encryption-key") is None + + def test_masking_secret( + self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + ) -> None: + """Mirrors secrets_util.get_masking_secret cache read (and write path).""" + dsr_store.write_masking_secret( + "pr-1", "hash", "salt", "encoded-secret", expire_seconds=600 + ) + assert dsr_store.get_masking_secret("pr-1", "hash", "salt") == "encoded-secret" + assert dsr_store.get_masking_secret("pr-1", "hash", "other") is None + # Legacy key migration + in_memory_manager.redis.set( + "id-pr-2-masking-secret-hash-pepper", "legacy-masking" + ) + assert ( + dsr_store.get_masking_secret("pr-2", "hash", "pepper") == "legacy-masking" + ) + assert ( + in_memory_manager.redis.get("id-pr-2-masking-secret-hash-pepper") is None + ) diff --git a/tests/common/cache/test_dsr_store_key_mapping.py b/tests/common/cache/test_dsr_store_key_mapping.py new file mode 100644 index 00000000000..1c853a63a93 --- /dev/null +++ b/tests/common/cache/test_dsr_store_key_mapping.py @@ -0,0 +1,111 @@ +""" +Tests that KeyMapper maps legacy key patterns to the new dsr:{id}:{part} format correctly. +No Redis required; exercises fides.common.cache.key_mapping only. +""" + +import pytest + +from fides.common.cache.key_mapping import DSR_KEY_PREFIX, KeyMapper + + +@pytest.mark.unit +class TestKeyMapper: + """Ensure each field type produces the expected new_key and legacy_key.""" + + def test_identity(self) -> None: + new_key, legacy_key = KeyMapper.identity("pr-123", "email") + assert new_key == f"{DSR_KEY_PREFIX}pr-123:identity:email" + assert legacy_key == "id-pr-123-identity-email" + + def test_custom_field(self) -> None: + new_key, legacy_key = KeyMapper.custom_field("pr-456", "my_field") + assert new_key == f"{DSR_KEY_PREFIX}pr-456:custom_field:my_field" + assert legacy_key == "id-pr-456-custom-privacy-request-field-my_field" + + def test_drp(self) -> None: + new_key, legacy_key = KeyMapper.drp("pr-789", "address") + assert new_key == f"{DSR_KEY_PREFIX}pr-789:drp:address" + assert legacy_key == "id-pr-789-drp-address" + + def test_encryption(self) -> None: + new_key, legacy_key = KeyMapper.encryption("pr-abc", "key") + assert new_key == f"{DSR_KEY_PREFIX}pr-abc:encryption:key" + assert legacy_key == "id-pr-abc-encryption-key" + + def test_masking_secret(self) -> None: + new_key, legacy_key = KeyMapper.masking_secret( + "pr-def", "hash", "salt" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-def:masking_secret:hash:salt" + assert legacy_key == "id-pr-def-masking-secret-hash-salt" + + def test_async_execution(self) -> None: + new_key, legacy_key = KeyMapper.async_execution("pr-ghi") + assert new_key == f"{DSR_KEY_PREFIX}pr-ghi:async_execution" + assert legacy_key == "id-pr-ghi-async-execution" + + def test_retry_count(self) -> None: + new_key, legacy_key = KeyMapper.retry_count("pr-jkl") + assert new_key == f"{DSR_KEY_PREFIX}pr-jkl:retry_count" + assert legacy_key == "id-pr-jkl-privacy-request-retry-count" + + def test_webhook_manual_access(self) -> None: + new_key, legacy_key = KeyMapper.webhook_manual_access( + "pr-mno", "webhook-uuid" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-mno:webhook_manual_access:webhook-uuid" + assert legacy_key == "WEBHOOK_MANUAL_ACCESS_INPUT__pr-mno__webhook-uuid" + + def test_webhook_manual_erasure(self) -> None: + new_key, legacy_key = KeyMapper.webhook_manual_erasure( + "pr-pqr", "webhook-uuid-2" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-pqr:webhook_manual_erasure:webhook-uuid-2" + assert legacy_key == "WEBHOOK_MANUAL_ERASURE_INPUT__pr-pqr__webhook-uuid-2" + + def test_data_use_map(self) -> None: + new_key, legacy_key = KeyMapper.data_use_map("pr-stu") + assert new_key == f"{DSR_KEY_PREFIX}pr-stu:data_use_map" + assert legacy_key == "DATA_USE_MAP__pr-stu" + + def test_email_info(self) -> None: + new_key, legacy_key = KeyMapper.email_info( + "pr-vwx", "access", "postgres_example", "address" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-vwx:email_info:access:postgres_example:address" + assert legacy_key == "EMAIL_INFORMATION__pr-vwx__access__postgres_example__address" + + def test_paused_location(self) -> None: + new_key, legacy_key = KeyMapper.paused_location("pr-yz1") + assert new_key == f"{DSR_KEY_PREFIX}pr-yz1:paused_location" + assert legacy_key == "PAUSED_LOCATION__pr-yz1" + + def test_failed_location(self) -> None: + new_key, legacy_key = KeyMapper.failed_location("pr-yz2") + assert new_key == f"{DSR_KEY_PREFIX}pr-yz2:failed_location" + assert legacy_key == "FAILED_LOCATION__pr-yz2" + + def test_access_request(self) -> None: + new_key, legacy_key = KeyMapper.access_request( + "pr-yz3", "access_request__postgres_example:address" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-yz3:access_request:access_request__postgres_example:address" + assert legacy_key == "pr-yz3__access_request__postgres_example:address" + + def test_erasure_request(self) -> None: + new_key, legacy_key = KeyMapper.erasure_request( + "pr-yz4", "postgres_example:address" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-yz4:erasure_request:postgres_example:address" + assert legacy_key == "pr-yz4__erasure_request__postgres_example:address" + + def test_placeholder_results(self) -> None: + new_key, legacy_key = KeyMapper.placeholder_results( + "pr-yz5", "postgres_example:customer" + ) + assert new_key == f"{DSR_KEY_PREFIX}pr-yz5:placeholder_results:postgres_example:customer" + assert legacy_key == "PLACEHOLDER_RESULTS__pr-yz5__postgres_example:customer" + + def test_index_prefix(self) -> None: + prefix = KeyMapper.index_prefix("pr-123") + assert prefix == "__idx:dsr:pr-123" diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py new file mode 100644 index 00000000000..b389d8ae051 --- /dev/null +++ b/tests/common/cache/test_dsr_store_migration.py @@ -0,0 +1,209 @@ +""" +Tests for DSRCacheStore migration behavior with legacy keys. + +Verifies existing cached data (legacy format) is correctly read, migrated, and cleared. +""" + +import fnmatch +import uuid +from typing import Dict, List, Optional, Set, Union + +import pytest + +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager + +RedisValue = Union[bytes, float, int, str] + + +class MockRedis: + """Mock Redis with minimal interface for DSRCacheStore.""" + + def __init__(self) -> None: + self._data: Dict[str, RedisValue] = {} + self._sets: Dict[str, Set[Union[str, bytes]]] = {} + + def get(self, key: str) -> Optional[Union[str, bytes]]: + val = self._data.get(key) + return val if isinstance(val, (str, bytes)) else str(val) if val else None + + def set(self, key: str, value: RedisValue, ex: Optional[int] = None) -> bool: + self._data[key] = value + return True + + def delete(self, *keys: str) -> int: + deleted = sum(1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None)) + return deleted + + def keys(self, pattern: str) -> List[str]: + return [k for k in self._data if fnmatch.fnmatch(k, pattern)] + + def scan_iter(self, match: str = "*", count: Optional[int] = None): + return iter(self.keys(match)) + + def sadd(self, key: str, *members: Union[str, bytes]) -> int: + s = self._sets.setdefault(key, set()) + before = len(s) + s.update(members) + return len(s) - before + + def srem(self, key: str, *members: Union[str, bytes]) -> int: + if key not in self._sets: + return 0 + before = len(self._sets[key]) + self._sets[key].difference_update(members) + return before - len(self._sets[key]) + + def smembers(self, key: str) -> Set[Union[str, bytes]]: + return self._sets.get(key, set()).copy() + + +# Test data factories +def make_dsr_id() -> str: + """Generate unique DSR ID.""" + return f"test-pr-{uuid.uuid4()}" + + +def make_legacy_key(dsr_id: str, field_type: str, field_name: str = "") -> str: + """Build legacy key format.""" + if field_name: + return f"id-{dsr_id}-{field_type}-{field_name}" + return f"id-{dsr_id}-{field_type}" + + +def make_new_key(dsr_id: str, part: str) -> str: + """Build new DSR key format.""" + return f"dsr:{dsr_id}:{part}" + + +@pytest.fixture +def mock_redis(): + return MockRedis() + + +@pytest.fixture +def dsr_store(mock_redis): + return DSRCacheStore(RedisCacheManager(mock_redis)) + + +@pytest.fixture +def dsr_id(): + return make_dsr_id() + + +@pytest.mark.unit +class TestLegacyKeyMigration: + """Test legacy key formats are readable and migrated correctly.""" + + @pytest.mark.parametrize("field_type,getter,field_key,value", [ + ("identity", "get_identity", "email", "user@example.com"), + ("custom-privacy-request-field", "get_custom_field", "dept", "Engineering"), + ("encryption", "get_encryption", "key", "encryption-key-123"), + ("async-execution", "get_async_execution", "", "celery-task-123"), + ("privacy-request-retry-count", "get_retry_count", "", "3"), + ("drp", "get_drp", "email", "drp@example.com"), + ("masking-secret-hash", "get_masking_secret", "salt", "secret-123"), + ]) + def test_legacy_keys_readable( + self, mock_redis, dsr_store, dsr_id, field_type, getter, field_key, value + ): + """Legacy keys are readable via store convenience methods.""" + legacy_key = make_legacy_key(dsr_id, field_type, field_key) + mock_redis.set(legacy_key, value) + + # Call appropriate getter + if getter == "get_masking_secret": + result = dsr_store.get_masking_secret(dsr_id, "hash", field_key) + elif field_key: + result = getattr(dsr_store, getter)(dsr_id, field_key) + else: + result = getattr(dsr_store, getter)(dsr_id) + + assert result == value + + def test_legacy_key_migrated_on_read(self, mock_redis, dsr_store, dsr_id): + """Legacy key is migrated to new format on first read.""" + mock_redis.set(make_legacy_key(dsr_id, "identity", "email"), "migrate@test.com") + + email = dsr_store.get_identity(dsr_id, "email") + assert email == "migrate@test.com" + + # New key exists, legacy deleted, index updated + assert mock_redis.get(make_new_key(dsr_id, "identity:email")) == "migrate@test.com" + assert mock_redis.get(make_legacy_key(dsr_id, "identity", "email")) is None + assert make_new_key(dsr_id, "identity:email") in mock_redis.smembers(f"__idx:dsr:{dsr_id}") + + def test_new_writes_create_indexed_keys_only(self, mock_redis, dsr_store, dsr_id): + """New writes create new-format keys and index them; no legacy keys written.""" + dsr_store.write_identity(dsr_id, "email", "new@example.com") + dsr_store.write_custom_field(dsr_id, "department", "Sales") + + assert mock_redis.get(make_new_key(dsr_id, "identity:email")) == "new@example.com" + assert mock_redis.get(make_new_key(dsr_id, "custom_field:department")) == "Sales" + assert mock_redis.get(make_legacy_key(dsr_id, "identity", "email")) is None + assert mock_redis.get(make_legacy_key(dsr_id, "custom-privacy-request-field", "department")) is None + + def test_clear_removes_mixed_keys(self, mock_redis, dsr_store, dsr_id): + """clear() removes both legacy and new keys using SCAN.""" + mock_redis.set(make_legacy_key(dsr_id, "identity", "email"), "legacy@test.com") + mock_redis.set(make_legacy_key(dsr_id, "encryption", "key"), "legacy-key") + dsr_store.write_identity(dsr_id, "phone_number", "+1234567890") + dsr_store.write_custom_field(dsr_id, "department", "Engineering") + + dsr_store.clear(dsr_id) + + assert len(mock_redis.keys(f"*{dsr_id}*")) == 0 + + def test_index_backfill(self, mock_redis, dsr_id): + """Legacy keys are backfilled into index when enabled.""" + mock_redis.set(make_legacy_key(dsr_id, "identity", "email"), "test@example.com") + mock_redis.set(make_legacy_key(dsr_id, "identity", "phone_number"), "+1234567890") + + store = DSRCacheStore(RedisCacheManager(mock_redis), backfill_index_on_legacy_read=True) + keys = store.get_all_keys(dsr_id) + + assert len(keys) == 2 + assert len(mock_redis.smembers(f"__idx:dsr:{dsr_id}")) == 2 + + +@pytest.mark.unit +class TestMultipleRequestIsolation: + """Test DSR IDs don't interfere with each other.""" + + def test_mixed_dsr_states(self, mock_redis): + """Operations on one DSR don't affect others (legacy, new, mixed).""" + dsr1, dsr2, dsr3 = make_dsr_id(), make_dsr_id(), make_dsr_id() + store = DSRCacheStore(RedisCacheManager(mock_redis)) + + # DSR1: legacy, DSR2: new, DSR3: mixed + mock_redis.set(make_legacy_key(dsr1, "identity", "email"), "dsr1@test.com") + store.write_identity(dsr2, "email", "dsr2@test.com") + mock_redis.set(make_legacy_key(dsr3, "identity", "email"), "dsr3@test.com") + store.write_identity(dsr3, "phone_number", "+1234567890") + + # Verify all readable + assert store.get_identity(dsr1, "email") == "dsr1@test.com" + assert store.get_identity(dsr2, "email") == "dsr2@test.com" + assert store.get_identity(dsr3, "email") == "dsr3@test.com" + assert store.get_identity(dsr3, "phone_number") == "+1234567890" + + # Clear DSR2 doesn't affect others + store.clear(dsr2) + assert store.get_identity(dsr1, "email") == "dsr1@test.com" + assert store.get_identity(dsr3, "email") == "dsr3@test.com" + assert store.get_identity(dsr2, "email") is None + assert store.get_all_keys(dsr2) == [] + + def test_clear_isolation(self, mock_redis): + """Clearing one DSR doesn't delete another's keys.""" + dsr1, dsr2 = make_dsr_id(), make_dsr_id() + store = DSRCacheStore(RedisCacheManager(mock_redis)) + + store.write_identity(dsr1, "email", "dsr1@test.com") + store.write_identity(dsr2, "email", "dsr2@test.com") + + store.clear(dsr1) + + assert mock_redis.get(make_new_key(dsr1, "identity:email")) is None + assert mock_redis.get(make_new_key(dsr2, "identity:email")) == "dsr2@test.com" + From 0181bdafb68482a1429266662d1f6ae64a05caa8 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 14:40:51 -0800 Subject: [PATCH 25/50] Add DSR store to exports --- src/fides/common/cache/__init__.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/fides/common/cache/__init__.py b/src/fides/common/cache/__init__.py index 1dc9c9e021d..6b24f4828f6 100644 --- a/src/fides/common/cache/__init__.py +++ b/src/fides/common/cache/__init__.py @@ -2,14 +2,26 @@ Shared Redis cache utilities and the RedisCacheManager. RedisCacheManager provides modern Redis patterns such as key indexes. + +DSRCacheStore wraps that with DSR-specific key naming (dsr:{id}:{part}) +and index-backed list/clear with lazy migration for legacy keys. + +Usage: + with get_dsr_cache_store() as store: + store.write_identity(privacy_request_id, "email", "user@example.com") + store.clear(privacy_request_id) """ +from contextlib import contextmanager +from typing import Iterator + +from fides.common.cache.dsr_store import ( + DSR_KEY_PREFIX, + DSRCacheStore, +) from fides.common.cache.manager import ( INDEX_KEY_PREFIX, RedisCacheManager, ) -__all__ = [ - "INDEX_KEY_PREFIX", - "RedisCacheManager", -] +__all__ = ["INDEX_KEY_PREFIX", "RedisCacheManager", "DSR_KEY_PREFIX", "DSRCacheStore"] From 90ec2c14d1c9381095ab526cf7f5502d518359bb Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 14:44:42 -0800 Subject: [PATCH 26/50] Formatting fixes --- src/fides/common/cache/dsr_store.py | 8 +-- src/fides/common/cache/key_mapping.py | 24 +++++--- tests/common/cache/conftest.py | 4 +- tests/common/cache/test_dsr_store.py | 12 ++-- .../cache/test_dsr_store_key_mapping.py | 36 ++++++++---- .../common/cache/test_dsr_store_migration.py | 57 +++++++++++++------ 6 files changed, 91 insertions(+), 50 deletions(-) diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index d6819fc22c7..1151b92b30e 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -282,18 +282,18 @@ def get_all_keys(self, dsr_id: str) -> List[str]: def clear(self, dsr_id: str) -> None: """ Delete all cache keys for this DSR and remove the index. - + Always uses SCAN to find all keys (both indexed and legacy) to ensure complete cleanup in mixed-key scenarios. """ # Use SCAN to find ALL keys (indexed + legacy) all_keys_via_scan = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) - + index_prefix = _dsr_index_prefix(dsr_id) - + # Delete all found keys in batch if all_keys_via_scan: self._redis.delete(*all_keys_via_scan) - + # Delete the index itself self._manager.delete_index(index_prefix) diff --git a/src/fides/common/cache/key_mapping.py b/src/fides/common/cache/key_mapping.py index 9944c4e3493..d312417afd7 100644 --- a/src/fides/common/cache/key_mapping.py +++ b/src/fides/common/cache/key_mapping.py @@ -40,7 +40,9 @@ def identity(dsr_id: str, attr: str) -> Tuple[str, str]: def custom_field(dsr_id: str, field_key: str) -> Tuple[str, str]: """New: dsr:{id}:custom_field:{key}. Legacy: id-{id}-custom-privacy-request-field-{key}.""" part = _part("custom_field", field_key) - return _new_key(dsr_id, part), f"id-{dsr_id}-custom-privacy-request-field-{field_key}" + return _new_key( + dsr_id, part + ), f"id-{dsr_id}-custom-privacy-request-field-{field_key}" @staticmethod def drp(dsr_id: str, attr: str) -> Tuple[str, str]: @@ -55,12 +57,12 @@ def encryption(dsr_id: str, attr: str) -> Tuple[str, str]: return _new_key(dsr_id, part), f"id-{dsr_id}-encryption-{attr}" @staticmethod - def masking_secret( - dsr_id: str, strategy: str, secret_type: str - ) -> Tuple[str, str]: + def masking_secret(dsr_id: str, strategy: str, secret_type: str) -> Tuple[str, str]: """New: dsr:{id}:masking_secret:{strategy}:{secret_type}. Legacy: id-{id}-masking-secret-{strategy}-{secret_type}.""" part = f"masking_secret:{strategy}:{secret_type}" - return _new_key(dsr_id, part), f"id-{dsr_id}-masking-secret-{strategy}-{secret_type}" + return _new_key( + dsr_id, part + ), f"id-{dsr_id}-masking-secret-{strategy}-{secret_type}" @staticmethod def async_execution(dsr_id: str) -> Tuple[str, str]: @@ -80,13 +82,17 @@ def retry_count(dsr_id: str) -> Tuple[str, str]: def webhook_manual_access(dsr_id: str, webhook_id: str) -> Tuple[str, str]: """New: dsr:{id}:webhook_manual_access:{webhook_id}. Legacy logical: WEBHOOK_MANUAL_ACCESS_INPUT__{id}__{webhook_id}.""" part = _part("webhook_manual_access", webhook_id) - return _new_key(dsr_id, part), f"WEBHOOK_MANUAL_ACCESS_INPUT__{dsr_id}__{webhook_id}" + return _new_key( + dsr_id, part + ), f"WEBHOOK_MANUAL_ACCESS_INPUT__{dsr_id}__{webhook_id}" @staticmethod def webhook_manual_erasure(dsr_id: str, webhook_id: str) -> Tuple[str, str]: """New: dsr:{id}:webhook_manual_erasure:{webhook_id}. Legacy logical: WEBHOOK_MANUAL_ERASURE_INPUT__{id}__{webhook_id}.""" part = _part("webhook_manual_erasure", webhook_id) - return _new_key(dsr_id, part), f"WEBHOOK_MANUAL_ERASURE_INPUT__{dsr_id}__{webhook_id}" + return _new_key( + dsr_id, part + ), f"WEBHOOK_MANUAL_ERASURE_INPUT__{dsr_id}__{webhook_id}" @staticmethod def data_use_map(dsr_id: str) -> Tuple[str, str]: @@ -100,7 +106,9 @@ def email_info( ) -> Tuple[str, str]: """New: dsr:{id}:email_info:{step}:{dataset}:{collection}. Legacy logical: EMAIL_INFORMATION__{id}__{step}__{dataset}__{collection}.""" part = f"email_info:{step}:{dataset}:{collection}" - return _new_key(dsr_id, part), f"EMAIL_INFORMATION__{dsr_id}__{step}__{dataset}__{collection}" + return _new_key( + dsr_id, part + ), f"EMAIL_INFORMATION__{dsr_id}__{step}__{dataset}__{collection}" @staticmethod def paused_location(dsr_id: str) -> Tuple[str, str]: diff --git a/tests/common/cache/conftest.py b/tests/common/cache/conftest.py index 180e8c56ca8..3c9a57a610d 100644 --- a/tests/common/cache/conftest.py +++ b/tests/common/cache/conftest.py @@ -3,9 +3,10 @@ real FastAPI app, DB, and Celery worker are not started when running only these tests. """ -import pytest from unittest.mock import MagicMock +import pytest + @pytest.fixture(scope="session") def test_client(): @@ -45,6 +46,7 @@ def app(): def config(): """Mock config so we don't pull in real config.""" from fides.config import get_config + config = get_config() config.test_mode = True yield config diff --git a/tests/common/cache/test_dsr_store.py b/tests/common/cache/test_dsr_store.py index 9bde6e5110d..86dabb3e1df 100644 --- a/tests/common/cache/test_dsr_store.py +++ b/tests/common/cache/test_dsr_store.py @@ -46,9 +46,7 @@ def keys(self, pattern: str) -> List[str]: """Glob-style: * matches any number of chars.""" return [k for k in self._data if fnmatch.fnmatch(k, pattern)] - def scan_iter( - self, match: str = "*", count: Optional[int] = None - ): + def scan_iter(self, match: str = "*", count: Optional[int] = None): """SCAN-compatible iterator; yields keys matching pattern (count ignored in-memory).""" return iter(self.keys(match)) @@ -199,7 +197,9 @@ def test_retry_count( # Legacy key migration in_memory_manager.redis.set("id-pr-2-privacy-request-retry-count", "1") assert dsr_store.get_retry_count("pr-2") == "1" - assert in_memory_manager.redis.get("id-pr-2-privacy-request-retry-count") is None + assert ( + in_memory_manager.redis.get("id-pr-2-privacy-request-retry-count") is None + ) def test_drp( self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager @@ -241,6 +241,4 @@ def test_masking_secret( assert ( dsr_store.get_masking_secret("pr-2", "hash", "pepper") == "legacy-masking" ) - assert ( - in_memory_manager.redis.get("id-pr-2-masking-secret-hash-pepper") is None - ) + assert in_memory_manager.redis.get("id-pr-2-masking-secret-hash-pepper") is None diff --git a/tests/common/cache/test_dsr_store_key_mapping.py b/tests/common/cache/test_dsr_store_key_mapping.py index 1c853a63a93..1ba518719dd 100644 --- a/tests/common/cache/test_dsr_store_key_mapping.py +++ b/tests/common/cache/test_dsr_store_key_mapping.py @@ -33,9 +33,7 @@ def test_encryption(self) -> None: assert legacy_key == "id-pr-abc-encryption-key" def test_masking_secret(self) -> None: - new_key, legacy_key = KeyMapper.masking_secret( - "pr-def", "hash", "salt" - ) + new_key, legacy_key = KeyMapper.masking_secret("pr-def", "hash", "salt") assert new_key == f"{DSR_KEY_PREFIX}pr-def:masking_secret:hash:salt" assert legacy_key == "id-pr-def-masking-secret-hash-salt" @@ -50,9 +48,7 @@ def test_retry_count(self) -> None: assert legacy_key == "id-pr-jkl-privacy-request-retry-count" def test_webhook_manual_access(self) -> None: - new_key, legacy_key = KeyMapper.webhook_manual_access( - "pr-mno", "webhook-uuid" - ) + new_key, legacy_key = KeyMapper.webhook_manual_access("pr-mno", "webhook-uuid") assert new_key == f"{DSR_KEY_PREFIX}pr-mno:webhook_manual_access:webhook-uuid" assert legacy_key == "WEBHOOK_MANUAL_ACCESS_INPUT__pr-mno__webhook-uuid" @@ -60,7 +56,9 @@ def test_webhook_manual_erasure(self) -> None: new_key, legacy_key = KeyMapper.webhook_manual_erasure( "pr-pqr", "webhook-uuid-2" ) - assert new_key == f"{DSR_KEY_PREFIX}pr-pqr:webhook_manual_erasure:webhook-uuid-2" + assert ( + new_key == f"{DSR_KEY_PREFIX}pr-pqr:webhook_manual_erasure:webhook-uuid-2" + ) assert legacy_key == "WEBHOOK_MANUAL_ERASURE_INPUT__pr-pqr__webhook-uuid-2" def test_data_use_map(self) -> None: @@ -72,8 +70,13 @@ def test_email_info(self) -> None: new_key, legacy_key = KeyMapper.email_info( "pr-vwx", "access", "postgres_example", "address" ) - assert new_key == f"{DSR_KEY_PREFIX}pr-vwx:email_info:access:postgres_example:address" - assert legacy_key == "EMAIL_INFORMATION__pr-vwx__access__postgres_example__address" + assert ( + new_key + == f"{DSR_KEY_PREFIX}pr-vwx:email_info:access:postgres_example:address" + ) + assert ( + legacy_key == "EMAIL_INFORMATION__pr-vwx__access__postgres_example__address" + ) def test_paused_location(self) -> None: new_key, legacy_key = KeyMapper.paused_location("pr-yz1") @@ -89,21 +92,30 @@ def test_access_request(self) -> None: new_key, legacy_key = KeyMapper.access_request( "pr-yz3", "access_request__postgres_example:address" ) - assert new_key == f"{DSR_KEY_PREFIX}pr-yz3:access_request:access_request__postgres_example:address" + assert ( + new_key + == f"{DSR_KEY_PREFIX}pr-yz3:access_request:access_request__postgres_example:address" + ) assert legacy_key == "pr-yz3__access_request__postgres_example:address" def test_erasure_request(self) -> None: new_key, legacy_key = KeyMapper.erasure_request( "pr-yz4", "postgres_example:address" ) - assert new_key == f"{DSR_KEY_PREFIX}pr-yz4:erasure_request:postgres_example:address" + assert ( + new_key + == f"{DSR_KEY_PREFIX}pr-yz4:erasure_request:postgres_example:address" + ) assert legacy_key == "pr-yz4__erasure_request__postgres_example:address" def test_placeholder_results(self) -> None: new_key, legacy_key = KeyMapper.placeholder_results( "pr-yz5", "postgres_example:customer" ) - assert new_key == f"{DSR_KEY_PREFIX}pr-yz5:placeholder_results:postgres_example:customer" + assert ( + new_key + == f"{DSR_KEY_PREFIX}pr-yz5:placeholder_results:postgres_example:customer" + ) assert legacy_key == "PLACEHOLDER_RESULTS__pr-yz5__postgres_example:customer" def test_index_prefix(self) -> None: diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py index b389d8ae051..c5f41e297bc 100644 --- a/tests/common/cache/test_dsr_store_migration.py +++ b/tests/common/cache/test_dsr_store_migration.py @@ -32,7 +32,9 @@ def set(self, key: str, value: RedisValue, ex: Optional[int] = None) -> bool: return True def delete(self, *keys: str) -> int: - deleted = sum(1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None)) + deleted = sum( + 1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None) + ) return deleted def keys(self, pattern: str) -> List[str]: @@ -95,15 +97,18 @@ def dsr_id(): class TestLegacyKeyMigration: """Test legacy key formats are readable and migrated correctly.""" - @pytest.mark.parametrize("field_type,getter,field_key,value", [ - ("identity", "get_identity", "email", "user@example.com"), - ("custom-privacy-request-field", "get_custom_field", "dept", "Engineering"), - ("encryption", "get_encryption", "key", "encryption-key-123"), - ("async-execution", "get_async_execution", "", "celery-task-123"), - ("privacy-request-retry-count", "get_retry_count", "", "3"), - ("drp", "get_drp", "email", "drp@example.com"), - ("masking-secret-hash", "get_masking_secret", "salt", "secret-123"), - ]) + @pytest.mark.parametrize( + "field_type,getter,field_key,value", + [ + ("identity", "get_identity", "email", "user@example.com"), + ("custom-privacy-request-field", "get_custom_field", "dept", "Engineering"), + ("encryption", "get_encryption", "key", "encryption-key-123"), + ("async-execution", "get_async_execution", "", "celery-task-123"), + ("privacy-request-retry-count", "get_retry_count", "", "3"), + ("drp", "get_drp", "email", "drp@example.com"), + ("masking-secret-hash", "get_masking_secret", "salt", "secret-123"), + ], + ) def test_legacy_keys_readable( self, mock_redis, dsr_store, dsr_id, field_type, getter, field_key, value ): @@ -129,19 +134,32 @@ def test_legacy_key_migrated_on_read(self, mock_redis, dsr_store, dsr_id): assert email == "migrate@test.com" # New key exists, legacy deleted, index updated - assert mock_redis.get(make_new_key(dsr_id, "identity:email")) == "migrate@test.com" + assert ( + mock_redis.get(make_new_key(dsr_id, "identity:email")) == "migrate@test.com" + ) assert mock_redis.get(make_legacy_key(dsr_id, "identity", "email")) is None - assert make_new_key(dsr_id, "identity:email") in mock_redis.smembers(f"__idx:dsr:{dsr_id}") + assert make_new_key(dsr_id, "identity:email") in mock_redis.smembers( + f"__idx:dsr:{dsr_id}" + ) def test_new_writes_create_indexed_keys_only(self, mock_redis, dsr_store, dsr_id): """New writes create new-format keys and index them; no legacy keys written.""" dsr_store.write_identity(dsr_id, "email", "new@example.com") dsr_store.write_custom_field(dsr_id, "department", "Sales") - assert mock_redis.get(make_new_key(dsr_id, "identity:email")) == "new@example.com" - assert mock_redis.get(make_new_key(dsr_id, "custom_field:department")) == "Sales" + assert ( + mock_redis.get(make_new_key(dsr_id, "identity:email")) == "new@example.com" + ) + assert ( + mock_redis.get(make_new_key(dsr_id, "custom_field:department")) == "Sales" + ) assert mock_redis.get(make_legacy_key(dsr_id, "identity", "email")) is None - assert mock_redis.get(make_legacy_key(dsr_id, "custom-privacy-request-field", "department")) is None + assert ( + mock_redis.get( + make_legacy_key(dsr_id, "custom-privacy-request-field", "department") + ) + is None + ) def test_clear_removes_mixed_keys(self, mock_redis, dsr_store, dsr_id): """clear() removes both legacy and new keys using SCAN.""" @@ -157,9 +175,13 @@ def test_clear_removes_mixed_keys(self, mock_redis, dsr_store, dsr_id): def test_index_backfill(self, mock_redis, dsr_id): """Legacy keys are backfilled into index when enabled.""" mock_redis.set(make_legacy_key(dsr_id, "identity", "email"), "test@example.com") - mock_redis.set(make_legacy_key(dsr_id, "identity", "phone_number"), "+1234567890") + mock_redis.set( + make_legacy_key(dsr_id, "identity", "phone_number"), "+1234567890" + ) - store = DSRCacheStore(RedisCacheManager(mock_redis), backfill_index_on_legacy_read=True) + store = DSRCacheStore( + RedisCacheManager(mock_redis), backfill_index_on_legacy_read=True + ) keys = store.get_all_keys(dsr_id) assert len(keys) == 2 @@ -206,4 +228,3 @@ def test_clear_isolation(self, mock_redis): assert mock_redis.get(make_new_key(dsr1, "identity:email")) is None assert mock_redis.get(make_new_key(dsr2, "identity:email")) == "dsr2@test.com" - From c0536fb3633a2164c635bbfecaed970d3863372d Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 08:57:25 -0800 Subject: [PATCH 27/50] Dedupe import and add a TODO about moving it --- src/fides/common/cache/dsr_store.py | 5 +---- src/fides/common/cache/key_mapping.py | 4 ++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index 1151b92b30e..e4a307bd88a 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -18,12 +18,9 @@ from redis import Redis -from fides.common.cache.key_mapping import KeyMapper +from fides.common.cache.key_mapping import KeyMapper, DSR_KEY_PREFIX from fides.common.cache.manager import RedisCacheManager, RedisValue -# Key format: dsr:{dsr_id}:{part} (re-export for callers; KeyMapper builds these) -DSR_KEY_PREFIX = "dsr:" - def _dsr_key(dsr_id: str, part: str) -> str: """Build the Redis key for a DSR cache part.""" diff --git a/src/fides/common/cache/key_mapping.py b/src/fides/common/cache/key_mapping.py index d312417afd7..3284f5b0e43 100644 --- a/src/fides/common/cache/key_mapping.py +++ b/src/fides/common/cache/key_mapping.py @@ -9,6 +9,10 @@ from typing import Tuple +# TODO: Move to dsr_store.py when deprecating +# Once we don't need to do migrations we can get rid of this file, +# the prefix should move to the dsr_store.py (not there to avoid circular +# dependencies since the store depends on this) DSR_KEY_PREFIX = "dsr:" From 864e94e5f254edcf6eea1138ab30230f2f11d2a8 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 10 Feb 2026 21:04:20 -0800 Subject: [PATCH 28/50] Part 2 of ENG-740 This uses the `clear` method on the DSR cache store, which underneath the hood uses `scan` (if needed, will try to use the set-based key index if that particular ID is being tracked). Adds some tests and removes the dependency on get_all_cache_keys_for_privacy_request --- .../models/privacy_request/privacy_request.py | 11 +- .../cache/test_dsr_store_clear_integration.py | 165 ++++++++++++++++++ 2 files changed, 171 insertions(+), 5 deletions(-) create mode 100644 tests/common/cache/test_dsr_store_clear_integration.py diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index dc0e9aae17f..022fb66aac8 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -106,7 +106,6 @@ from fides.api.tasks import celery_app from fides.api.util.cache import ( FidesopsRedis, - get_all_cache_keys_for_privacy_request, get_async_task_tracking_cache_key, get_cache, get_custom_privacy_request_field_cache_key, @@ -114,7 +113,11 @@ get_encryption_cache_key, get_identity_cache_key, ) + from fides.api.util.collection_util import Row +from fides.common.cache import get_dsr_cache_store +from fides.api.util.collection_util import Row, extract_key_for_address + from fides.api.util.constants import API_DATE_FORMAT from fides.api.util.custom_json_encoder import CustomJSONEncoder from fides.api.util.decrypted_identity_automaton import DecryptedIdentityAutomatonMixin @@ -470,10 +473,8 @@ def clear_cached_values(self) -> None: Clears all cached values associated with this privacy request from Redis. """ logger.info(f"Clearing cached values for privacy request {self.id}") - cache: FidesopsRedis = get_cache() - all_keys = get_all_cache_keys_for_privacy_request(privacy_request_id=self.id) - for key in all_keys: - cache.delete(key) + with get_dsr_cache_store() as store: + store.clear(self.id) def delete(self, db: Session) -> None: """ diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py new file mode 100644 index 00000000000..95fc5449bfe --- /dev/null +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -0,0 +1,165 @@ +""" +Tests for privacy_request.clear_cached_values() integration with DSRCacheStore. + +Verifies that clearing uses the store and handles both legacy and new cache keys. +""" + +import uuid +from unittest.mock import MagicMock, patch + +import pytest + +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager + + +class MockRedis: + """Minimal mock Redis for testing clear behavior.""" + + def __init__(self): + self._data = {} + self._sets = {} + + def set(self, key, value, ex=None): + self._data[key] = value + return True + + def get(self, key): + return self._data.get(key) + + def delete(self, *keys): + return sum(1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None)) + + def keys(self, pattern): + import fnmatch + return [k for k in self._data if fnmatch.fnmatch(k, pattern)] + + def scan_iter(self, match="*", count=None): + return iter(self.keys(match)) + + def sadd(self, key, *members): + s = self._sets.setdefault(key, set()) + before = len(s) + s.update(members) + return len(s) - before + + def srem(self, key, *members): + if key not in self._sets: + return 0 + before = len(self._sets[key]) + self._sets[key].difference_update(members) + return before - len(self._sets[key]) + + def smembers(self, key): + return self._sets.get(key, set()).copy() + + +@pytest.mark.unit +class TestPrivacyRequestClearCachedValues: + """Test clear_cached_values() with DSR store.""" + + def test_clear_removes_legacy_keys(self): + """clear_cached_values removes legacy cache keys.""" + mock_redis = MockRedis() + pr_id = f"test-pr-{uuid.uuid4()}" + + # Simulate legacy cached data + mock_redis.set(f"id-{pr_id}-identity-email", "test@example.com") + mock_redis.set(f"id-{pr_id}-identity-phone_number", "+1234567890") + mock_redis.set(f"id-{pr_id}-encryption-key", "encryption-key") + + # Mock privacy request + pr = MagicMock() + pr.id = pr_id + + # Patch get_cache in the api.util.cache module where get_dsr_cache_store calls it + with patch("fides.api.util.cache.get_cache", return_value=mock_redis): + # Import here to avoid app initialization + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + PrivacyRequest.clear_cached_values(pr) + + # Verify all keys deleted + assert len(mock_redis.keys(f"*{pr_id}*")) == 0 + + def test_clear_removes_new_keys(self): + """clear_cached_values removes new-format cache keys.""" + mock_redis = MockRedis() + pr_id = f"test-pr-{uuid.uuid4()}" + + # Simulate new cached data via store + manager = RedisCacheManager(mock_redis) + store = DSRCacheStore(manager) + store.write_identity(pr_id, "email", "test@example.com") + store.write_encryption(pr_id, "key", "encryption-key") + + pr = MagicMock() + pr.id = pr_id + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + PrivacyRequest.clear_cached_values(pr) + + assert len(mock_redis.keys(f"*{pr_id}*")) == 0 + + def test_clear_removes_mixed_keys(self): + """clear_cached_values removes both legacy and new keys.""" + mock_redis = MockRedis() + pr_id = f"test-pr-{uuid.uuid4()}" + + # Mixed: legacy identity, new encryption + mock_redis.set(f"id-{pr_id}-identity-email", "legacy@example.com") + mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-dept", "Engineering") + + manager = RedisCacheManager(mock_redis) + store = DSRCacheStore(manager) + store.write_encryption(pr_id, "key", "new-encryption-key") + store.write_async_execution(pr_id, "task-123") + + pr = MagicMock() + pr.id = pr_id + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + PrivacyRequest.clear_cached_values(pr) + + assert len(mock_redis.keys(f"*{pr_id}*")) == 0 + + def test_clear_removes_index(self): + """clear_cached_values removes the DSR index.""" + mock_redis = MockRedis() + pr_id = f"test-pr-{uuid.uuid4()}" + + manager = RedisCacheManager(mock_redis) + store = DSRCacheStore(manager) + store.write_identity(pr_id, "email", "test@example.com") + + # Verify index exists + assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) > 0 + + pr = MagicMock() + pr.id = pr_id + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis): + from fides.api.models.privacy_request.privacy_request import PrivacyRequest + + PrivacyRequest.clear_cached_values(pr) + + # Index should be deleted + assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) == 0 + + +def _run_standalone_tests(): + """Run tests standalone.""" + tests = TestPrivacyRequestClearCachedValues() + tests.test_clear_removes_legacy_keys() + tests.test_clear_removes_new_keys() + tests.test_clear_removes_mixed_keys() + tests.test_clear_removes_index() + print("All clear_cached_values integration tests passed.") + + +if __name__ == "__main__": + _run_standalone_tests() From f9450f5bcb80bc64936f2e8be08b58df0e5dbb17 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 10 Feb 2026 22:19:07 -0800 Subject: [PATCH 29/50] Cleanup tests --- .../cache/test_dsr_store_clear_integration.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py index 95fc5449bfe..5b9c4538b78 100644 --- a/tests/common/cache/test_dsr_store_clear_integration.py +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -150,16 +150,3 @@ def test_clear_removes_index(self): # Index should be deleted assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) == 0 - -def _run_standalone_tests(): - """Run tests standalone.""" - tests = TestPrivacyRequestClearCachedValues() - tests.test_clear_removes_legacy_keys() - tests.test_clear_removes_new_keys() - tests.test_clear_removes_mixed_keys() - tests.test_clear_removes_index() - print("All clear_cached_values integration tests passed.") - - -if __name__ == "__main__": - _run_standalone_tests() From 0fefc95ed839807f36cc339e9fb6bd919a35a5b4 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Mon, 23 Feb 2026 14:47:14 -0800 Subject: [PATCH 30/50] Formatting --- .../models/privacy_request/privacy_request.py | 5 +---- .../cache/test_dsr_store_clear_integration.py | 20 ++++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index 022fb66aac8..8e021bcf494 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -113,17 +113,14 @@ get_encryption_cache_key, get_identity_cache_key, ) - from fides.api.util.collection_util import Row -from fides.common.cache import get_dsr_cache_store -from fides.api.util.collection_util import Row, extract_key_for_address - from fides.api.util.constants import API_DATE_FORMAT from fides.api.util.custom_json_encoder import CustomJSONEncoder from fides.api.util.decrypted_identity_automaton import DecryptedIdentityAutomatonMixin from fides.api.util.identity_verification import IdentityVerificationMixin from fides.api.util.logger import Pii from fides.api.util.logger_context_utils import Contextualizable, LoggerContextKeys +from fides.common.cache import get_dsr_cache_store from fides.config import CONFIG from fides.service.attachment_service import AttachmentService diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py index 5b9c4538b78..3bbbf80e323 100644 --- a/tests/common/cache/test_dsr_store_clear_integration.py +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -28,10 +28,13 @@ def get(self, key): return self._data.get(key) def delete(self, *keys): - return sum(1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None)) + return sum( + 1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None) + ) def keys(self, pattern): import fnmatch + return [k for k in self._data if fnmatch.fnmatch(k, pattern)] def scan_iter(self, match="*", count=None): @@ -76,7 +79,7 @@ def test_clear_removes_legacy_keys(self): with patch("fides.api.util.cache.get_cache", return_value=mock_redis): # Import here to avoid app initialization from fides.api.models.privacy_request.privacy_request import PrivacyRequest - + PrivacyRequest.clear_cached_values(pr) # Verify all keys deleted @@ -86,7 +89,7 @@ def test_clear_removes_new_keys(self): """clear_cached_values removes new-format cache keys.""" mock_redis = MockRedis() pr_id = f"test-pr-{uuid.uuid4()}" - + # Simulate new cached data via store manager = RedisCacheManager(mock_redis) store = DSRCacheStore(manager) @@ -98,7 +101,7 @@ def test_clear_removes_new_keys(self): with patch("fides.api.util.cache.get_cache", return_value=mock_redis): from fides.api.models.privacy_request.privacy_request import PrivacyRequest - + PrivacyRequest.clear_cached_values(pr) assert len(mock_redis.keys(f"*{pr_id}*")) == 0 @@ -111,7 +114,7 @@ def test_clear_removes_mixed_keys(self): # Mixed: legacy identity, new encryption mock_redis.set(f"id-{pr_id}-identity-email", "legacy@example.com") mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-dept", "Engineering") - + manager = RedisCacheManager(mock_redis) store = DSRCacheStore(manager) store.write_encryption(pr_id, "key", "new-encryption-key") @@ -122,7 +125,7 @@ def test_clear_removes_mixed_keys(self): with patch("fides.api.util.cache.get_cache", return_value=mock_redis): from fides.api.models.privacy_request.privacy_request import PrivacyRequest - + PrivacyRequest.clear_cached_values(pr) assert len(mock_redis.keys(f"*{pr_id}*")) == 0 @@ -135,7 +138,7 @@ def test_clear_removes_index(self): manager = RedisCacheManager(mock_redis) store = DSRCacheStore(manager) store.write_identity(pr_id, "email", "test@example.com") - + # Verify index exists assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) > 0 @@ -144,9 +147,8 @@ def test_clear_removes_index(self): with patch("fides.api.util.cache.get_cache", return_value=mock_redis): from fides.api.models.privacy_request.privacy_request import PrivacyRequest - + PrivacyRequest.clear_cached_values(pr) # Index should be deleted assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) == 0 - From bada4fb162ddd150318f5dc58222bc95d9e3240c Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 14:50:55 -0800 Subject: [PATCH 31/50] Style fix --- src/fides/common/cache/dsr_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index e4a307bd88a..d38fc09e475 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -18,7 +18,7 @@ from redis import Redis -from fides.common.cache.key_mapping import KeyMapper, DSR_KEY_PREFIX +from fides.common.cache.key_mapping import DSR_KEY_PREFIX, KeyMapper from fides.common.cache.manager import RedisCacheManager, RedisValue From 94be49ba8d0c9a7ab7e1e94c05b09483e0852f76 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 15:06:41 -0800 Subject: [PATCH 32/50] Add missing commit from PR split --- .../models/privacy_request/privacy_request.py | 2 +- src/fides/api/util/cache.py | 14 ++ src/fides/common/cache/__init__.py | 7 - tests/common/cache/mock_redis.py | 129 ++++++++++++++++++ .../cache/test_dsr_store_clear_integration.py | 45 +----- .../common/cache/test_dsr_store_migration.py | 50 +------ tests/common/cache/test_manager.py | 120 +--------------- 7 files changed, 148 insertions(+), 219 deletions(-) create mode 100644 tests/common/cache/mock_redis.py diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index 8e021bcf494..4b78fd402fc 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -110,6 +110,7 @@ get_cache, get_custom_privacy_request_field_cache_key, get_drp_request_body_cache_key, + get_dsr_cache_store, get_encryption_cache_key, get_identity_cache_key, ) @@ -120,7 +121,6 @@ from fides.api.util.identity_verification import IdentityVerificationMixin from fides.api.util.logger import Pii from fides.api.util.logger_context_utils import Contextualizable, LoggerContextKeys -from fides.common.cache import get_dsr_cache_store from fides.config import CONFIG from fides.service.attachment_service import AttachmentService diff --git a/src/fides/api/util/cache.py b/src/fides/api/util/cache.py index 66d51322c9b..937ea097019 100644 --- a/src/fides/api/util/cache.py +++ b/src/fides/api/util/cache.py @@ -1,5 +1,6 @@ import json import os +from contextlib import contextmanager from typing import Any, Dict, List, Optional, Union, cast from urllib.parse import unquote_to_bytes @@ -28,6 +29,8 @@ celery_app, ) from fides.api.util.custom_json_encoder import CustomJSONEncoder, _custom_decoder +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager from fides.config import CONFIG # This constant represents every type a redis key may contain, and can be @@ -318,6 +321,17 @@ def get_cache() -> FidesopsRedis: return _connection +def get_redis_cache_manager() -> RedisCacheManager: + """Return a RedisCacheManager wrapping the default Redis connection.""" + return RedisCacheManager(get_cache()) + + +@contextmanager +def get_dsr_cache_store() -> Generator[DSRCacheStore, None, None]: + """Context manager yielding a DSRCacheStore for privacy request cache operations.""" + yield DSRCacheStore(get_redis_cache_manager()) + + def get_read_only_cache() -> FidesopsRedis: """ Return a singleton connection to the read-only Redis cache. diff --git a/src/fides/common/cache/__init__.py b/src/fides/common/cache/__init__.py index 6b24f4828f6..96d84d4041e 100644 --- a/src/fides/common/cache/__init__.py +++ b/src/fides/common/cache/__init__.py @@ -6,15 +6,8 @@ DSRCacheStore wraps that with DSR-specific key naming (dsr:{id}:{part}) and index-backed list/clear with lazy migration for legacy keys. -Usage: - with get_dsr_cache_store() as store: - store.write_identity(privacy_request_id, "email", "user@example.com") - store.clear(privacy_request_id) """ -from contextlib import contextmanager -from typing import Iterator - from fides.common.cache.dsr_store import ( DSR_KEY_PREFIX, DSRCacheStore, diff --git a/tests/common/cache/mock_redis.py b/tests/common/cache/mock_redis.py new file mode 100644 index 00000000000..ab49ea4f113 --- /dev/null +++ b/tests/common/cache/mock_redis.py @@ -0,0 +1,129 @@ +""" +Shared in-memory Redis mock for cache tests. + +Provides MockPipeline and MockRedis with pipeline(), ttl(), expire(), scan_iter(), +and the operations needed by RedisCacheManager and DSRCacheStore. +""" + +import fnmatch +from typing import Any, Iterator, List, Optional, Set, Union + + +class MockPipeline: + """In-memory pipeline that batches commands and executes atomically.""" + + def __init__(self, data: dict, sets: dict) -> None: + self._data = data + self._sets = sets + self._commands: list = [] + + def set(self, key: str, value: Any, ex: Optional[int] = None) -> "MockPipeline": + self._commands.append(("set", (key, value, ex))) + return self + + def sadd(self, key: str, member: str) -> "MockPipeline": + self._commands.append(("sadd", (key, member))) + return self + + def delete(self, *keys: str) -> "MockPipeline": + self._commands.append(("delete", keys)) + return self + + def srem(self, key: str, member: str) -> "MockPipeline": + self._commands.append(("srem", (key, member))) + return self + + def execute(self) -> list: + results = [] + for op, args in self._commands: + if op == "set": + key, value, _ = args + self._data[key] = value + results.append(True) + elif op == "sadd": + key, member = args + if key not in self._sets: + self._sets[key] = set() + self._sets[key].add(member) + results.append(1) + elif op == "delete": + for k in args: + self._data.pop(k, None) + self._sets.pop(k, None) + results.append(len(args)) + elif op == "srem": + key, member = args + if key in self._sets: + self._sets[key].discard(member) + if not self._sets[key]: + del self._sets[key] + results.append(1) + self._commands = [] + return results + + +class MockRedis: + """In-memory Redis mock for RedisCacheManager and DSRCacheStore tests.""" + + def __init__(self) -> None: + self._data: dict = {} + self._sets: dict = {} + self._ttl: dict = {} # key -> seconds until expiry (simplified; no decay) + + def get(self, key: str) -> Optional[Any]: + return self._data.get(key) + + def set(self, key: str, value: Any, ex: Optional[int] = None) -> bool: + self._data[key] = value + return True + + def delete(self, *keys: str) -> int: + count = 0 + for k in keys: + if k in self._data: + del self._data[k] + count += 1 + if k in self._sets: + del self._sets[k] + count += 1 + self._ttl.pop(k, None) + return count + + def sadd(self, key: str, member: str) -> int: + if key not in self._sets: + self._sets[key] = set() + self._sets[key].add(member) + return 1 + + def srem(self, key: str, member: str) -> int: + if key in self._sets: + self._sets[key].discard(member) + if not self._sets[key]: + del self._sets[key] + return 1 + return 0 + + def smembers(self, key: str) -> Set[Union[str, bytes]]: + return self._sets.get(key, set()).copy() + + def keys(self, pattern: str = "*") -> List[str]: + all_keys = set(self._data) | set(self._sets) + return [k for k in all_keys if fnmatch.fnmatch(k, pattern)] + + def scan_iter(self, match: str = "*", count: Optional[int] = None) -> Iterator[str]: + """Iterate over keys matching the pattern (used by DSRCacheStore.clear).""" + yield from self.keys(match) + + def ttl(self, key: str) -> int: + if key not in self._data and key not in self._sets: + return -2 + return self._ttl.get(key, -1) + + def expire(self, key: str, seconds: int) -> bool: + if key in self._data or key in self._sets: + self._ttl[key] = seconds + return True + return False + + def pipeline(self) -> MockPipeline: + return MockPipeline(self._data, self._sets) diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py index 3bbbf80e323..9f5c60ceefe 100644 --- a/tests/common/cache/test_dsr_store_clear_integration.py +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -11,50 +11,7 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager - - -class MockRedis: - """Minimal mock Redis for testing clear behavior.""" - - def __init__(self): - self._data = {} - self._sets = {} - - def set(self, key, value, ex=None): - self._data[key] = value - return True - - def get(self, key): - return self._data.get(key) - - def delete(self, *keys): - return sum( - 1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None) - ) - - def keys(self, pattern): - import fnmatch - - return [k for k in self._data if fnmatch.fnmatch(k, pattern)] - - def scan_iter(self, match="*", count=None): - return iter(self.keys(match)) - - def sadd(self, key, *members): - s = self._sets.setdefault(key, set()) - before = len(s) - s.update(members) - return len(s) - before - - def srem(self, key, *members): - if key not in self._sets: - return 0 - before = len(self._sets[key]) - self._sets[key].difference_update(members) - return before - len(self._sets[key]) - - def smembers(self, key): - return self._sets.get(key, set()).copy() +from tests.common.cache.mock_redis import MockRedis @pytest.mark.unit diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py index c5f41e297bc..88a0f4fd8e9 100644 --- a/tests/common/cache/test_dsr_store_migration.py +++ b/tests/common/cache/test_dsr_store_migration.py @@ -4,60 +4,14 @@ Verifies existing cached data (legacy format) is correctly read, migrated, and cleared. """ -import fnmatch import uuid -from typing import Dict, List, Optional, Set, Union +from typing import List import pytest from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager - -RedisValue = Union[bytes, float, int, str] - - -class MockRedis: - """Mock Redis with minimal interface for DSRCacheStore.""" - - def __init__(self) -> None: - self._data: Dict[str, RedisValue] = {} - self._sets: Dict[str, Set[Union[str, bytes]]] = {} - - def get(self, key: str) -> Optional[Union[str, bytes]]: - val = self._data.get(key) - return val if isinstance(val, (str, bytes)) else str(val) if val else None - - def set(self, key: str, value: RedisValue, ex: Optional[int] = None) -> bool: - self._data[key] = value - return True - - def delete(self, *keys: str) -> int: - deleted = sum( - 1 for k in keys if self._data.pop(k, None) or self._sets.pop(k, None) - ) - return deleted - - def keys(self, pattern: str) -> List[str]: - return [k for k in self._data if fnmatch.fnmatch(k, pattern)] - - def scan_iter(self, match: str = "*", count: Optional[int] = None): - return iter(self.keys(match)) - - def sadd(self, key: str, *members: Union[str, bytes]) -> int: - s = self._sets.setdefault(key, set()) - before = len(s) - s.update(members) - return len(s) - before - - def srem(self, key: str, *members: Union[str, bytes]) -> int: - if key not in self._sets: - return 0 - before = len(self._sets[key]) - self._sets[key].difference_update(members) - return before - len(self._sets[key]) - - def smembers(self, key: str) -> Set[Union[str, bytes]]: - return self._sets.get(key, set()).copy() +from tests.common.cache.mock_redis import MockRedis # Test data factories diff --git a/tests/common/cache/test_manager.py b/tests/common/cache/test_manager.py index f90db5e215a..bda07f0ace0 100644 --- a/tests/common/cache/test_manager.py +++ b/tests/common/cache/test_manager.py @@ -1,125 +1,7 @@ -import fnmatch - import pytest from fides.common.cache.manager import INDEX_TTL_EXTRA_SECONDS, RedisCacheManager - - -class MockPipeline: - """In-memory pipeline that batches commands and executes atomically.""" - - def __init__(self, data: dict, sets: dict) -> None: - self._data = data - self._sets = sets - self._commands: list = [] - - def set(self, key: str, value, ex=None) -> "MockPipeline": - self._commands.append(("set", (key, value, ex))) - return self - - def sadd(self, key: str, member: str) -> "MockPipeline": - self._commands.append(("sadd", (key, member))) - return self - - def delete(self, *keys: str) -> "MockPipeline": - self._commands.append(("delete", keys)) - return self - - def srem(self, key: str, member: str) -> "MockPipeline": - self._commands.append(("srem", (key, member))) - return self - - def execute(self) -> list: - results = [] - for op, args in self._commands: - if op == "set": - key, value, _ = args - self._data[key] = value - results.append(True) - elif op == "sadd": - key, member = args - if key not in self._sets: - self._sets[key] = set() - self._sets[key].add(member) - results.append(1) - elif op == "delete": - for k in args: - self._data.pop(k, None) - self._sets.pop(k, None) - results.append(len(args)) - elif op == "srem": - key, member = args - if key in self._sets: - self._sets[key].discard(member) - if not self._sets[key]: - del self._sets[key] - results.append(1) - self._commands = [] - return results - - -class MockRedis: - """In-memory Redis mock for RedisCacheManager tests.""" - - def __init__(self) -> None: - self._data: dict = {} - self._sets: dict = {} - self._ttl: dict = {} # key -> seconds until expiry (simplified; no decay) - - def get(self, key: str): - return self._data.get(key) - - def set(self, key: str, value, ex=None) -> bool: - self._data[key] = value - return True - - def delete(self, *keys: str) -> int: - count = 0 - for k in keys: - if k in self._data: - del self._data[k] - count += 1 - if k in self._sets: - del self._sets[k] - count += 1 - self._ttl.pop(k, None) - return count - - def sadd(self, key: str, member: str) -> int: - if key not in self._sets: - self._sets[key] = set() - self._sets[key].add(member) - return 1 - - def srem(self, key: str, member: str) -> int: - if key in self._sets: - self._sets[key].discard(member) - if not self._sets[key]: - del self._sets[key] - return 1 - return 0 - - def smembers(self, key: str) -> set: - return self._sets.get(key, set()).copy() - - def keys(self, pattern: str = "*") -> list: - all_keys = set(self._data) | set(self._sets) - return [k for k in all_keys if fnmatch.fnmatch(k, pattern)] - - def ttl(self, key: str) -> int: - if key not in self._data and key not in self._sets: - return -2 - return self._ttl.get(key, -1) - - def expire(self, key: str, seconds: int) -> bool: - if key in self._data or key in self._sets: - self._ttl[key] = seconds - return True - return False - - def pipeline(self) -> MockPipeline: - return MockPipeline(self._data, self._sets) - +from tests.common.cache.mock_redis import MockPipeline, MockRedis # --- Fixtures --- From c0b5aee6475c8391dd51af1beb10b7426a295f82 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Tue, 24 Feb 2026 17:30:42 -0800 Subject: [PATCH 33/50] Fix mypy --- src/fides/common/cache/__init__.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/fides/common/cache/__init__.py b/src/fides/common/cache/__init__.py index 96d84d4041e..66441b42367 100644 --- a/src/fides/common/cache/__init__.py +++ b/src/fides/common/cache/__init__.py @@ -8,10 +8,8 @@ """ -from fides.common.cache.dsr_store import ( - DSR_KEY_PREFIX, - DSRCacheStore, -) +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.key_mapping import DSR_KEY_PREFIX from fides.common.cache.manager import ( INDEX_KEY_PREFIX, RedisCacheManager, From db882eb1312fbb09a823d31fbc3d578834513b9f Mon Sep 17 00:00:00 2001 From: John Ewart Date: Thu, 19 Mar 2026 12:33:34 -0700 Subject: [PATCH 34/50] Fix merge error --- src/fides/common/cache/__init__.py | 2 -- src/fides/common/cache/dsr_store.py | 4 +++- tests/common/cache/test_dsr_store.py | 7 ------- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/fides/common/cache/__init__.py b/src/fides/common/cache/__init__.py index a54ed9428ef..4757b8e8f66 100644 --- a/src/fides/common/cache/__init__.py +++ b/src/fides/common/cache/__init__.py @@ -15,11 +15,9 @@ DSR_KEY_PREFIX, DSRCacheStore, ) - from fides.common.cache.manager import ( INDEX_KEY_PREFIX, RedisCacheManager, ) - __all__ = ["INDEX_KEY_PREFIX", "RedisCacheManager", "DSR_KEY_PREFIX", "DSRCacheStore"] diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index e4a307bd88a..921dcee4094 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -18,9 +18,11 @@ from redis import Redis -from fides.common.cache.key_mapping import KeyMapper, DSR_KEY_PREFIX +from fides.common.cache.key_mapping import DSR_KEY_PREFIX, KeyMapper from fides.common.cache.manager import RedisCacheManager, RedisValue +__all__ = ["DSR_KEY_PREFIX", "DSRCacheStore"] + def _dsr_key(dsr_id: str, part: str) -> str: """Build the Redis key for a DSR cache part.""" diff --git a/tests/common/cache/test_dsr_store.py b/tests/common/cache/test_dsr_store.py index 83483592623..0f302d93494 100644 --- a/tests/common/cache/test_dsr_store.py +++ b/tests/common/cache/test_dsr_store.py @@ -34,13 +34,6 @@ def set( self._data[key] = value return True - deleted_count = 0 - for key in keys: - if key in self._data: - del self._data[key] - deleted_count += 1 - return deleted_count - def keys(self, pattern: str) -> List[str]: """Glob-style: * matches any number of chars.""" return [k for k in self._data if fnmatch.fnmatch(k, pattern)] From 6965fb3d8147e4bc29b1e8f063bcf1a2bfac4303 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Thu, 19 Mar 2026 13:00:17 -0700 Subject: [PATCH 35/50] Fix another merge issue --- tests/common/cache/test_dsr_store.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/common/cache/test_dsr_store.py b/tests/common/cache/test_dsr_store.py index 0f302d93494..228560cb9d2 100644 --- a/tests/common/cache/test_dsr_store.py +++ b/tests/common/cache/test_dsr_store.py @@ -34,6 +34,15 @@ def set( self._data[key] = value return True + def delete(self, *keys: str) -> int: + """Remove keys; returns count removed (redis-py compatible).""" + removed = 0 + for key in keys: + if key in self._data: + del self._data[key] + removed += 1 + return removed + def keys(self, pattern: str) -> List[str]: """Glob-style: * matches any number of chars.""" return [k for k in self._data if fnmatch.fnmatch(k, pattern)] From 1a3fdf6a19db674d9bd47b1c98b1f43309549cd8 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Thu, 19 Mar 2026 13:47:53 -0700 Subject: [PATCH 36/50] Add missing mock pipeline --- .../common/cache/test_dsr_store_migration.py | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py index c5f41e297bc..1a8daf8ea72 100644 --- a/tests/common/cache/test_dsr_store_migration.py +++ b/tests/common/cache/test_dsr_store_migration.py @@ -6,7 +6,7 @@ import fnmatch import uuid -from typing import Dict, List, Optional, Set, Union +from typing import Any, Callable, Dict, List, Optional, Set, Union import pytest @@ -16,6 +16,47 @@ RedisValue = Union[bytes, float, int, str] +class MockPipeline: + """Minimal Redis pipeline: buffers ops and runs them on execute().""" + + def __init__(self, redis: "MockRedis") -> None: + self._redis = redis + self._ops: List[Callable[[], Any]] = [] + + def set( + self, key: str, value: RedisValue, ex: Optional[int] = None + ) -> "MockPipeline": + def op() -> bool: + return self._redis.set(key, value, ex=ex) + + self._ops.append(op) + return self + + def sadd(self, key: str, *members: Union[str, bytes]) -> "MockPipeline": + def op() -> int: + return self._redis.sadd(key, *members) + + self._ops.append(op) + return self + + def delete(self, *keys: str) -> "MockPipeline": + def op() -> int: + return self._redis.delete(*keys) + + self._ops.append(op) + return self + + def srem(self, key: str, *members: Union[str, bytes]) -> "MockPipeline": + def op() -> int: + return self._redis.srem(key, *members) + + self._ops.append(op) + return self + + def execute(self) -> List[Any]: + return [op() for op in self._ops] + + class MockRedis: """Mock Redis with minimal interface for DSRCacheStore.""" @@ -59,6 +100,9 @@ def srem(self, key: str, *members: Union[str, bytes]) -> int: def smembers(self, key: str) -> Set[Union[str, bytes]]: return self._sets.get(key, set()).copy() + def pipeline(self) -> MockPipeline: + return MockPipeline(self) + # Test data factories def make_dsr_id() -> str: From 368d217584f5203ee7c77ca119525fb2745a4144 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Thu, 19 Mar 2026 14:41:50 -0700 Subject: [PATCH 37/50] Minor fixes --- src/fides/api/models/privacy_request/privacy_request.py | 1 - src/fides/api/util/cache.py | 2 +- tests/ops/service/privacy_request/test_request_service.py | 7 +++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index d2829ec88d3..64b53d5f4b3 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -119,7 +119,6 @@ from fides.api.util.identity_verification import IdentityVerificationMixin from fides.api.util.logger import Pii from fides.api.util.logger_context_utils import Contextualizable, LoggerContextKeys -from fides.common.cache import get_dsr_cache_store from fides.config import CONFIG from fides.service.attachment_service import AttachmentService diff --git a/src/fides/api/util/cache.py b/src/fides/api/util/cache.py index 937ea097019..33e540deb38 100644 --- a/src/fides/api/util/cache.py +++ b/src/fides/api/util/cache.py @@ -1,7 +1,7 @@ import json import os from contextlib import contextmanager -from typing import Any, Dict, List, Optional, Union, cast +from typing import Any, Dict, Generator, List, Optional, Union, cast from urllib.parse import unquote_to_bytes from loguru import logger diff --git a/tests/ops/service/privacy_request/test_request_service.py b/tests/ops/service/privacy_request/test_request_service.py index 2cbe0e28b08..fd61fe8376b 100644 --- a/tests/ops/service/privacy_request/test_request_service.py +++ b/tests/ops/service/privacy_request/test_request_service.py @@ -341,10 +341,9 @@ def very_short_request_task_expiration(): @pytest.fixture(scope="function") def very_short_redis_cache_expiration(): - original_value: float = CONFIG.redis.default_ttl_seconds - CONFIG.redis.default_ttl_seconds = ( - 0.01 # Set redis cache to expire very quickly for testing purposes - ) + original_value: int = CONFIG.redis.default_ttl_seconds + # Redis SET ex= must be int or timedelta (not float). Use 1s; tests already sleep 1s. + CONFIG.redis.default_ttl_seconds = 1 yield CONFIG CONFIG.redis.default_ttl_seconds = original_value From 1dde1de18187480677283218699c0531c10f0eb9 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Thu, 19 Mar 2026 16:57:18 -0700 Subject: [PATCH 38/50] Fix some merge-introduced issues --- .../models/privacy_request/privacy_request.py | 11 ++-- src/fides/api/tasks/encryption_utils.py | 16 ++--- src/fides/common/cache/__init__.py | 1 - .../cache/test_dsr_store_clear_integration.py | 1 - ...est_dsr_store_custom_fields_integration.py | 60 ++++++++++++++----- .../cache/test_dsr_store_drp_integration.py | 5 +- .../test_dsr_store_identity_integration.py | 23 ++++--- .../common/cache/test_dsr_store_migration.py | 2 +- .../test_privacy_request_endpoints.py | 15 +---- .../privacy_request/test_privacy_request.py | 15 ++--- 10 files changed, 88 insertions(+), 61 deletions(-) diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index 64b53d5f4b3..0862ea1398d 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -110,7 +110,6 @@ get_cache, get_drp_request_body_cache_key, get_dsr_cache_store, - get_encryption_cache_key, ) from fides.api.util.collection_util import Row from fides.api.util.constants import API_DATE_FORMAT @@ -685,9 +684,13 @@ def verify_identity(self, db: Session, provided_code: str) -> "PrivacyRequest": def get_cached_encryption_key(self) -> Optional[str]: """Gets the cached encryption key for this privacy request.""" - cache: FidesopsRedis = get_cache() - encryption_key = cache.get(get_encryption_cache_key(self.id, "key")) - return encryption_key + with get_dsr_cache_store() as store: + raw = store.get_encryption(self.id, "key") + if raw is None: + return None + if isinstance(raw, bytes): + return raw.decode(CONFIG.security.encoding) + return str(raw) def get_cached_task_id(self) -> Optional[str]: """Gets the cached task ID for this privacy request.""" diff --git a/src/fides/api/tasks/encryption_utils.py b/src/fides/api/tasks/encryption_utils.py index 9b892436db2..e69edc55534 100644 --- a/src/fides/api/tasks/encryption_utils.py +++ b/src/fides/api/tasks/encryption_utils.py @@ -2,7 +2,7 @@ from typing import Optional, Union from fides.api.cryptography.cryptographic_util import bytes_to_b64_str -from fides.api.util.cache import get_cache, get_encryption_cache_key +from fides.api.util.cache import get_dsr_cache_store from fides.api.util.encryption.aes_gcm_encryption_scheme import ( encrypt_to_bytes_verify_secrets_length, ) @@ -19,15 +19,17 @@ def encrypt_access_request_results(data: Union[str, bytes], request_id: str) -> Returns: str: The encrypted data as a string """ - cache = get_cache() - encryption_cache_key = get_encryption_cache_key( - privacy_request_id=request_id, - encryption_attr="key", - ) if isinstance(data, bytes): data = data.decode(CONFIG.security.encoding) - encryption_key: Optional[str] = cache.get(encryption_cache_key) + with get_dsr_cache_store() as store: + raw = store.get_encryption(request_id, "key") + if raw is None: + return data + if isinstance(raw, bytes): + encryption_key = raw.decode(CONFIG.security.encoding) + else: + encryption_key = str(raw) if not encryption_key: return data diff --git a/src/fides/common/cache/__init__.py b/src/fides/common/cache/__init__.py index 007b5d11935..66441b42367 100644 --- a/src/fides/common/cache/__init__.py +++ b/src/fides/common/cache/__init__.py @@ -10,7 +10,6 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.key_mapping import DSR_KEY_PREFIX - from fides.common.cache.manager import ( INDEX_KEY_PREFIX, RedisCacheManager, diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py index cf4d32f91fe..9f5c60ceefe 100644 --- a/tests/common/cache/test_dsr_store_clear_integration.py +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -11,7 +11,6 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager - from tests.common.cache.mock_redis import MockRedis diff --git a/tests/common/cache/test_dsr_store_custom_fields_integration.py b/tests/common/cache/test_dsr_store_custom_fields_integration.py index 2506182ab2b..c009720769e 100644 --- a/tests/common/cache/test_dsr_store_custom_fields_integration.py +++ b/tests/common/cache/test_dsr_store_custom_fields_integration.py @@ -11,7 +11,6 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager - from tests.common.cache.mock_redis import MockRedis @@ -50,11 +49,18 @@ def test_cache_custom_fields_writes_all_fields(self, dsr_store, pr_id): dsr_store.cache_custom_fields(pr_id, custom_fields, expire_seconds=3600) # All keys written in new format - assert dsr_store._redis.get(f"dsr:{pr_id}:custom_field:department") == json.dumps("Engineering") - assert dsr_store._redis.get(f"dsr:{pr_id}:custom_field:employee_id") == json.dumps("E12345") - + assert dsr_store._redis.get( + f"dsr:{pr_id}:custom_field:department" + ) == json.dumps("Engineering") + assert dsr_store._redis.get( + f"dsr:{pr_id}:custom_field:employee_id" + ) == json.dumps("E12345") + # Legacy keys do NOT exist - assert dsr_store._redis.get(f"id-{pr_id}-custom-privacy-request-field-department") is None + assert ( + dsr_store._redis.get(f"id-{pr_id}-custom-privacy-request-field-department") + is None + ) def test_get_cached_custom_fields_reads_all_fields(self, dsr_store, pr_id): """get_cached_custom_fields reads all fields from new-format keys.""" @@ -69,11 +75,18 @@ def test_get_cached_custom_fields_reads_all_fields(self, dsr_store, pr_id): assert result["department"] == json.dumps("Engineering") assert result["employee_id"] == json.dumps("E12345") - def test_get_cached_custom_fields_migrates_legacy_keys(self, dsr_store, mock_redis, pr_id): + def test_get_cached_custom_fields_migrates_legacy_keys( + self, dsr_store, mock_redis, pr_id + ): """get_cached_custom_fields reads and migrates legacy keys on first access.""" # Write legacy format - mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-department", json.dumps("Engineering")) - mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-employee_id", json.dumps("E12345")) + mock_redis.set( + f"id-{pr_id}-custom-privacy-request-field-department", + json.dumps("Engineering"), + ) + mock_redis.set( + f"id-{pr_id}-custom-privacy-request-field-employee_id", json.dumps("E12345") + ) result = dsr_store.get_cached_custom_fields(pr_id) @@ -83,15 +96,23 @@ def test_get_cached_custom_fields_migrates_legacy_keys(self, dsr_store, mock_red # Legacy keys migrated to new format assert mock_redis.get(f"dsr:{pr_id}:custom_field:department") is not None - assert mock_redis.get(f"id-{pr_id}-custom-privacy-request-field-department") is None - - def test_has_cached_custom_fields_detects_both_formats(self, dsr_store, mock_redis, pr_id): + assert ( + mock_redis.get(f"id-{pr_id}-custom-privacy-request-field-department") + is None + ) + + def test_has_cached_custom_fields_detects_both_formats( + self, dsr_store, mock_redis, pr_id + ): """has_cached_custom_fields detects fields in both legacy and new formats.""" # Empty initially assert dsr_store.has_cached_custom_fields(pr_id) is False # Add legacy key - mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-department", json.dumps("Engineering")) + mock_redis.set( + f"id-{pr_id}-custom-privacy-request-field-department", + json.dumps("Engineering"), + ) assert dsr_store.has_cached_custom_fields(pr_id) is True # Clear and test new format @@ -105,10 +126,15 @@ class TestDSRCacheStoreEncryption: def test_write_encryption_writes_key(self, dsr_store, pr_id): """write_encryption writes encryption key to new-format key.""" - dsr_store.write_encryption(pr_id, "key", "test-encryption-key-12345", expire_seconds=3600) + dsr_store.write_encryption( + pr_id, "key", "test-encryption-key-12345", expire_seconds=3600 + ) + + assert ( + dsr_store._redis.get(f"dsr:{pr_id}:encryption:key") + == "test-encryption-key-12345" + ) - assert dsr_store._redis.get(f"dsr:{pr_id}:encryption:key") == "test-encryption-key-12345" - # Legacy key does NOT exist assert dsr_store._redis.get(f"id-{pr_id}-encryption-key") is None @@ -123,5 +149,7 @@ def test_get_encryption_migrates_legacy_key(self, dsr_store, mock_redis, pr_id): assert value == "test-encryption-key-12345" # Legacy key migrated - assert mock_redis.get(f"dsr:{pr_id}:encryption:key") == "test-encryption-key-12345" + assert ( + mock_redis.get(f"dsr:{pr_id}:encryption:key") == "test-encryption-key-12345" + ) assert mock_redis.get(f"id-{pr_id}-encryption-key") is None diff --git a/tests/common/cache/test_dsr_store_drp_integration.py b/tests/common/cache/test_dsr_store_drp_integration.py index 63af17a9884..df5b4c6477e 100644 --- a/tests/common/cache/test_dsr_store_drp_integration.py +++ b/tests/common/cache/test_dsr_store_drp_integration.py @@ -7,9 +7,10 @@ - Automatic migration on read """ -import pytest import uuid -from typing import Dict, Any +from typing import Any, Dict + +import pytest from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager diff --git a/tests/common/cache/test_dsr_store_identity_integration.py b/tests/common/cache/test_dsr_store_identity_integration.py index 19f7d2c5b84..8c10123be0d 100644 --- a/tests/common/cache/test_dsr_store_identity_integration.py +++ b/tests/common/cache/test_dsr_store_identity_integration.py @@ -11,7 +11,6 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager - from tests.common.cache.mock_redis import MockRedis @@ -59,13 +58,19 @@ def test_cache_identity_data_writes_all_attributes(self, dsr_store, pr_id): dsr_store.cache_identity_data(pr_id, identity_data, expire_seconds=3600) # All keys written in new format - assert dsr_store._redis.get(f"dsr:{pr_id}:identity:email") == json.dumps("user@example.com") - assert dsr_store._redis.get(f"dsr:{pr_id}:identity:phone_number") == json.dumps("+1234567890") - + assert dsr_store._redis.get(f"dsr:{pr_id}:identity:email") == json.dumps( + "user@example.com" + ) + assert dsr_store._redis.get(f"dsr:{pr_id}:identity:phone_number") == json.dumps( + "+1234567890" + ) + # Legacy keys do NOT exist assert dsr_store._redis.get(f"id-{pr_id}-identity-email") is None - def test_get_cached_identity_data_reads_all_attributes(self, dsr_store, pr_id, identity_data): + def test_get_cached_identity_data_reads_all_attributes( + self, dsr_store, pr_id, identity_data + ): """get_cached_identity_data reads all identity attributes from new-format keys.""" # Write via store encoded_data = {k: json.dumps(v) for k, v in identity_data.items()} @@ -76,7 +81,9 @@ def test_get_cached_identity_data_reads_all_attributes(self, dsr_store, pr_id, i assert result["email"] == json.dumps("user@example.com") assert result["phone_number"] == json.dumps("+1234567890") - def test_get_cached_identity_data_migrates_legacy_keys(self, dsr_store, mock_redis, pr_id, identity_data): + def test_get_cached_identity_data_migrates_legacy_keys( + self, dsr_store, mock_redis, pr_id, identity_data + ): """get_cached_identity_data reads and migrates legacy keys on first access.""" # Write legacy format with JSON encoding for key, value in identity_data.items(): @@ -92,7 +99,9 @@ def test_get_cached_identity_data_migrates_legacy_keys(self, dsr_store, mock_red assert mock_redis.get(f"dsr:{pr_id}:identity:email") is not None assert mock_redis.get(f"id-{pr_id}-identity-email") is None - def test_has_cached_identity_data_detects_both_formats(self, dsr_store, mock_redis, pr_id): + def test_has_cached_identity_data_detects_both_formats( + self, dsr_store, mock_redis, pr_id + ): """has_cached_identity_data detects identity data in both legacy and new formats.""" # Empty initially assert dsr_store.has_cached_identity_data(pr_id) is False diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py index 9e23dc28cc2..4e35de7f12d 100644 --- a/tests/common/cache/test_dsr_store_migration.py +++ b/tests/common/cache/test_dsr_store_migration.py @@ -11,7 +11,7 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import MockRedis, MockPipeline +from tests.common.cache.mock_redis import MockPipeline, MockRedis # Test data factories diff --git a/tests/ops/api/v1/endpoints/privacy_request/test_privacy_request_endpoints.py b/tests/ops/api/v1/endpoints/privacy_request/test_privacy_request_endpoints.py index db8381e1d5a..b56311719a6 100644 --- a/tests/ops/api/v1/endpoints/privacy_request/test_privacy_request_endpoints.py +++ b/tests/ops/api/v1/endpoints/privacy_request/test_privacy_request_endpoints.py @@ -67,7 +67,6 @@ LabeledIdentity, ) from fides.api.tasks import DSR_QUEUE_NAME, MESSAGING_QUEUE_NAME -from fides.api.util.cache import get_encryption_cache_key from fides.api.util.data_category import get_user_data_categories from fides.api.util.encryption.secrets_util import SecretsUtil from fides.api.util.fuzzy_search_utils import ( @@ -785,7 +784,6 @@ def test_create_privacy_request_caches_encryption_keys( db, api_client: TestClient, policy, - cache, ): identity = {"email": "test@example.com"} data = [ @@ -801,11 +799,7 @@ def test_create_privacy_request_caches_encryption_keys( response_data = resp.json()["succeeded"] assert len(response_data) == 1 pr = PrivacyRequest.get(db=db, object_id=response_data[0]["id"]) - encryption_key = get_encryption_cache_key( - privacy_request_id=pr.id, - encryption_attr="key", - ) - assert cache.get(encryption_key) == "test--encryption" + assert pr.get_cached_encryption_key() == "test--encryption" pr.delete(db=db) assert run_access_request_mock.called @@ -8032,7 +8026,6 @@ def test_create_privacy_request_caches_encryption_keys( generate_auth_header, api_client: TestClient, policy, - cache, ): identity = {"email": "test@example.com"} data = [ @@ -8049,11 +8042,7 @@ def test_create_privacy_request_caches_encryption_keys( response_data = resp.json()["succeeded"] assert len(response_data) == 1 pr = PrivacyRequest.get(db=db, object_id=response_data[0]["id"]) - encryption_key = get_encryption_cache_key( - privacy_request_id=pr.id, - encryption_attr="key", - ) - assert cache.get(encryption_key) == "test--encryption" + assert pr.get_cached_encryption_key() == "test--encryption" assert run_access_request_mock.called def test_create_privacy_request_no_identities( diff --git a/tests/ops/models/privacy_request/test_privacy_request.py b/tests/ops/models/privacy_request/test_privacy_request.py index 6891d6aee16..fcc62872e01 100644 --- a/tests/ops/models/privacy_request/test_privacy_request.py +++ b/tests/ops/models/privacy_request/test_privacy_request.py @@ -47,6 +47,7 @@ FidesopsRedis, get_cache, get_custom_privacy_request_field_cache_key, + get_dsr_cache_store, get_identity_cache_key, ) from fides.api.util.constants import API_DATE_FORMAT @@ -263,28 +264,23 @@ def test_delete_privacy_request_removes_cached_data( def test_cache_identity_fallback_to_db( db: Session, privacy_request_with_email_identity: PrivacyRequest, - cache: FidesopsRedis, loguru_caplog, ) -> None: identity = privacy_request_with_email_identity.get_persisted_identity() privacy_request_with_email_identity.cache_identity(identity) - key = get_identity_cache_key( - privacy_request_id=privacy_request_with_email_identity.id, - identity_attribute="email", - ) cached_identity_data = ( privacy_request_with_email_identity.get_cached_identity_data() ) assert cached_identity_data != {} - cache.delete(key) - assert cache.get(key) is None + with get_dsr_cache_store() as store: + store.delete(privacy_request_with_email_identity.id, "identity:email") assert ( privacy_request_with_email_identity.get_cached_identity_data() == cached_identity_data ) assert ( f"Cache miss for request {privacy_request_with_email_identity.id}, falling back to DB" - in loguru_caplog.messages[-1] + in loguru_caplog.text ) @@ -312,7 +308,7 @@ def test_cache_identity_fallback_to_db_no_persisted_identity( assert privacy_request.get_cached_identity_data() == {} assert ( f"Cache miss for request {privacy_request.id}, falling back to DB" - in loguru_caplog.messages[-1] + in loguru_caplog.text ) @@ -1332,6 +1328,7 @@ def test_old_cache_can_be_read(self, privacy_request): We need to make sure we can still read these old values using the new `get_cached_identity_data` function. """ + privacy_request.clear_cached_values() def cache_identity(identity: Identity, privacy_request_id: str) -> None: """Old function for caching identity""" From 2551f78fe5cb3e3e5e043968b63c4a2eca625866 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Thu, 19 Mar 2026 17:31:07 -0700 Subject: [PATCH 39/50] Fix some typing issues --- .../api/models/privacy_request/privacy_request.py | 2 +- src/fides/common/cache/manager.py | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index 0862ea1398d..034c0656caa 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -709,7 +709,7 @@ def cache_drp_request_body(self, drp_request_body: DrpPrivacyRequestCreate) -> N drp_request_body_dict: Dict[str, Any] = dict(drp_request_body) # Serialize complex objects to repr format for storage - serialized_body = {} + serialized_body: Dict[str, Any] = {} for key, value in drp_request_body_dict.items(): if value is not None: # Handle nested dict/objects diff --git a/src/fides/common/cache/manager.py b/src/fides/common/cache/manager.py index 8895c567c1a..8b560ea3486 100644 --- a/src/fides/common/cache/manager.py +++ b/src/fides/common/cache/manager.py @@ -6,9 +6,7 @@ __idx:{index_prefix}; members are the actual cache key names. """ -from typing import List, Optional, Union - -from redis import Redis +from typing import Any, List, Optional, Union # Redis key prefix for index sets. Index key = INDEX_KEY_PREFIX + index_prefix. INDEX_KEY_PREFIX = "__idx:" @@ -31,10 +29,10 @@ class RedisCacheManager: set/delete helpers). """ - def __init__(self, redis_client: Redis) -> None: + def __init__(self, redis_client: Any) -> None: """ Args: - redis_client: Any Redis client (e.g. FidesopsRedis from get_cache()). + redis_client: redis.Redis, RedisCluster, or FidesopsRedis (delegates to underlying client). """ self._redis = redis_client @@ -131,6 +129,6 @@ def delete_key_and_remove_from_index( pipe.execute() @property - def redis(self) -> Redis: + def redis(self) -> Any: """Access the underlying Redis client for operations not on the manager.""" return self._redis From 21b88cddd94b20c62e5926ae37ddfd4daef6b9b3 Mon Sep 17 00:00:00 2001 From: John Ewart Date: Thu, 19 Mar 2026 18:05:33 -0700 Subject: [PATCH 40/50] Update some tests, add some very specific additional tests to simulate deploying this in the middle of DSRs being processed --- .../api/models/privacy_request/consent.py | 3 + .../models/privacy_request/privacy_request.py | 1 - src/fides/common/cache/dsr_store.py | 18 +- tests/common/cache/mock_redis.py | 13 + ...test_dsr_store_production_compatibility.py | 362 ++++++++++++++++++ .../api/v1/endpoints/test_drp_endpoints.py | 92 ++--- .../privacy_request/test_privacy_request.py | 7 +- tests/ops/tasks/test_encryption_utils.py | 28 +- .../ops/test_helpers/cache_secrets_helper.py | 18 +- 9 files changed, 458 insertions(+), 84 deletions(-) create mode 100644 tests/common/cache/test_dsr_store_production_compatibility.py diff --git a/src/fides/api/models/privacy_request/consent.py b/src/fides/api/models/privacy_request/consent.py index 3bec2a35a28..1688b0bc0a2 100644 --- a/src/fides/api/models/privacy_request/consent.py +++ b/src/fides/api/models/privacy_request/consent.py @@ -96,6 +96,9 @@ class ConsentRequest(IdentityVerificationMixin, Base): privacy_request = relationship("PrivacyRequest") def get_cached_identity_data(self) -> Dict[str, Any]: + # TODO: Remove this method - it's dead code (never called). + # If identity data is needed, use privacy_request.get_cached_identity_data() + # when privacy_request_id exists, or get_persisted_identity() from provided_identity. """Retrieves any identity data pertaining to this request from the cache.""" cache: FidesopsRedis = get_cache() keys = cache.get_keys_by_prefix(f"id-{self.id}-identity-") diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index 034c0656caa..b6ff9b7ec2a 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -108,7 +108,6 @@ FidesopsRedis, get_async_task_tracking_cache_key, get_cache, - get_drp_request_body_cache_key, get_dsr_cache_store, ) from fides.api.util.collection_util import Row diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index bc042405fd6..ef321ff638b 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -251,6 +251,15 @@ def get_cached_identity_data(self, dsr_id: str) -> Dict[str, Any]: # Filter for identity keys (both new and legacy formats) identity_keys = [k for k in all_keys if ":identity:" in k or "-identity-" in k] + + # Also scan for legacy identity keys directly, since get_all_keys() may return early + # if an index exists (e.g., when other fields like encryption are cached) + legacy_identity_prefix = f"id-{dsr_id}-identity-" + legacy_keys = list(self._redis.scan_iter(match=f"{legacy_identity_prefix}*", count=500)) + # Add legacy keys that aren't already in identity_keys + for legacy_key in legacy_keys: + if legacy_key not in identity_keys: + identity_keys.append(legacy_key) for key in identity_keys: # Extract attribute name from key @@ -275,7 +284,14 @@ def has_cached_identity_data(self, dsr_id: str) -> bool: Returns True if any identity keys exist (legacy or new format). """ all_keys = self.get_all_keys(dsr_id) - return any(":identity:" in k or "-identity-" in k for k in all_keys) + has_identity = any(":identity:" in k or "-identity-" in k for k in all_keys) + if has_identity: + return True + # Also check for legacy identity keys directly, since get_all_keys() may return early + # if an index exists (e.g., when other fields like encryption are cached) + legacy_identity_prefix = f"id-{dsr_id}-identity-" + legacy_keys = list(self._redis.scan_iter(match=f"{legacy_identity_prefix}*", count=500)) + return len(legacy_keys) > 0 # --- Convenience: encryption --- diff --git a/tests/common/cache/mock_redis.py b/tests/common/cache/mock_redis.py index ab49ea4f113..27931af0774 100644 --- a/tests/common/cache/mock_redis.py +++ b/tests/common/cache/mock_redis.py @@ -125,5 +125,18 @@ def expire(self, key: str, seconds: int) -> bool: return True return False + + def ping(self) -> bool: + """Mock ping - always returns True.""" + return True + + def get_keys_by_prefix(self, prefix: str) -> List[str]: + """Get all keys matching the prefix.""" + return [k for k in self.keys() if k.startswith(prefix)] + + def set_with_autoexpire(self, key: str, value: Any, ex: Optional[int] = None) -> bool: + """Set a key with optional expiration.""" + return self.set(key, value, ex=ex) + def pipeline(self) -> MockPipeline: return MockPipeline(self._data, self._sets) diff --git a/tests/common/cache/test_dsr_store_production_compatibility.py b/tests/common/cache/test_dsr_store_production_compatibility.py new file mode 100644 index 00000000000..554002e12d9 --- /dev/null +++ b/tests/common/cache/test_dsr_store_production_compatibility.py @@ -0,0 +1,362 @@ +""" +Production compatibility tests for DSR cache migration. + +These tests simulate a production deployment scenario where: +1. Old code has cached DSR data using legacy key formats (id-{id}-*) +2. New code is deployed and must read/process those in-flight requests +3. New code continues to work correctly with legacy keys + +This validates that the migration won't break production requests that are +already in-flight when the new code is deployed. +""" + +import json +import uuid +from unittest.mock import MagicMock, patch + +import pytest + +# PrivacyRequest imported inside tests to ensure get_cache is patched +from fides.api.schemas.redis_cache import Identity +from fides.api.util.cache import ( + FidesopsRedis, + get_cache, + get_drp_request_body_cache_key, + get_encryption_cache_key, + get_identity_cache_key, +) +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager +from tests.common.cache.mock_redis import MockRedis + + +@pytest.fixture +def mock_redis(): + """Shared MockRedis instance.""" + return MockRedis() + + +@pytest.fixture +def pr_id(): + """Generate unique privacy request ID.""" + return f"pri_{uuid.uuid4()}" + + +@pytest.mark.unit +class TestProductionCompatibilityLegacyKeys: + """ + Test that new code can read and process privacy requests that were + cached by old code using legacy key formats. + """ + + def simulate_legacy_cache_write( + self, cache: FidesopsRedis, pr_id: str, identity: Identity, encryption_key: str + ) -> None: + """ + Simulate how old code would cache data - using direct cache.set_with_autoexpire + with legacy key formats. + """ + # Legacy identity caching + identity_dict = identity.labeled_dict() + for key, value in identity_dict.items(): + if value is not None: + if isinstance(value, dict): + # LabeledIdentity - encode as JSON + cache.set_with_autoexpire( + get_identity_cache_key(pr_id, key), json.dumps(value) + ) + else: + cache.set_with_autoexpire( + get_identity_cache_key(pr_id, key), value + ) + + # Legacy encryption key caching + cache.set_with_autoexpire( + get_encryption_cache_key(pr_id, "key"), encryption_key + ) + + def test_privacy_request_reads_legacy_identity_during_processing( + self, mock_redis, pr_id + ): + """ + Production scenario: Privacy request was created and cached by old code. + New code reads identity during request processing. + """ + # Simulate old code caching identity + identity = Identity(email="user@example.com", phone_number="+1234567890") + encryption_key = "test-encryption-key-12345" + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + cache = get_cache() + self.simulate_legacy_cache_write(cache, pr_id, identity, encryption_key) + + # Simulate PrivacyRequest instance (minimal mock) + pr = MagicMock() + pr.id = pr_id + + # Import PrivacyRequest inside patch context + from fides.api.models.privacy_request import PrivacyRequest + + # New code reads cached identity - must be within patch context + identity_data = PrivacyRequest.get_cached_identity_data(pr) + + # Should successfully read from legacy keys + assert identity_data["email"] == "user@example.com" + assert identity_data["phone_number"] == "+1234567890" + + # Legacy keys should be migrated to new format + from fides.api.util.cache import get_dsr_cache_store + + with get_dsr_cache_store() as store: + assert store.get_identity(pr_id, "email") == "user@example.com" + # Legacy key should be deleted after migration + assert mock_redis.get(get_identity_cache_key(pr_id, "email")) is None + + def test_privacy_request_reads_legacy_encryption_during_processing( + self, mock_redis, pr_id + ): + """ + Production scenario: Encryption key was cached by old code. + New code reads encryption key during request processing. + """ + encryption_key = "legacy-encryption-key-16b" # 16 bytes + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + cache = get_cache() + cache.set_with_autoexpire( + get_encryption_cache_key(pr_id, "key"), encryption_key + ) + + # Import PrivacyRequest inside patch context + from fides.api.models.privacy_request import PrivacyRequest + + # New code reads encryption key + pr = MagicMock() + pr.id = pr_id + cached_key = PrivacyRequest.get_cached_encryption_key(pr) + + assert cached_key == encryption_key + + # Legacy key should be migrated + from fides.api.util.cache import get_dsr_cache_store + + with get_dsr_cache_store() as store: + assert store.get_encryption(pr_id, "key") == encryption_key + assert ( + mock_redis.get(get_encryption_cache_key(pr_id, "key")) is None + ) + + def test_encryption_utils_reads_legacy_encryption_key( + self, mock_redis, pr_id + ): + """ + Production scenario: Encryption key cached by old code. + encrypt_access_request_results reads it during processing. + """ + encryption_key = "0123456789abcdef" # 16 bytes + test_data = "sensitive data to encrypt" + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + cache = get_cache() + cache.set_with_autoexpire( + get_encryption_cache_key(pr_id, "key"), encryption_key + ) + + # New code encrypts data using cached key + from fides.api.tasks.encryption_utils import encrypt_access_request_results + + encrypted = encrypt_access_request_results(test_data, pr_id) + + # Should successfully encrypt (result is base64 string) + assert isinstance(encrypted, str) + assert len(encrypted) > 0 + assert encrypted != test_data # Should be encrypted, not plaintext + + def test_mixed_legacy_and_new_keys_same_request(self, mock_redis, pr_id): + """ + Production scenario: Some fields cached by old code, some by new code + (e.g., request started before deployment, continued after). + """ + # Old code cached identity + identity = Identity(email="legacy@example.com") + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + cache = get_cache() + cache.set_with_autoexpire( + get_identity_cache_key(pr_id, "email"), identity.email + ) + + # New code caches encryption + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + from fides.api.util.cache import get_dsr_cache_store + + with get_dsr_cache_store() as store: + store.write_encryption(pr_id, "key", "new-encryption-key") + + # Both should be readable + pr = MagicMock() + pr.id = pr_id + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + from fides.api.models.privacy_request import PrivacyRequest + + # get_cached_identity_data should find and migrate the legacy key + identity_data = PrivacyRequest.get_cached_identity_data(pr) + # After migration, the data should be available in the returned dict + assert identity_data["email"] == "legacy@example.com" + + encryption_key = PrivacyRequest.get_cached_encryption_key(pr) + assert encryption_key == "new-encryption-key" + + # Verify migration happened - legacy key should be deleted + assert mock_redis.get(get_identity_cache_key(pr_id, "email")) is None + + def test_legacy_drp_request_body_readable(self, mock_redis, pr_id): + """ + Production scenario: DRP request body cached by old code. + New code reads it during processing. + """ + # Simulate old code caching DRP body + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + cache = get_cache() + cache.set_with_autoexpire( + get_drp_request_body_cache_key(pr_id, "meta"), + "DrpMeta(version='0.5')", + ) + cache.set_with_autoexpire( + get_drp_request_body_cache_key(pr_id, "regime"), "ccpa" + ) + + # New code reads DRP body + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + from fides.api.util.cache import get_dsr_cache_store + + with get_dsr_cache_store() as store: + meta = store.get_drp(pr_id, "meta") + regime = store.get_drp(pr_id, "regime") + + assert meta == "DrpMeta(version='0.5')" + assert regime == "ccpa" + + # Legacy keys should be migrated + assert ( + mock_redis.get(get_drp_request_body_cache_key(pr_id, "meta")) + is None + ) + + def test_legacy_custom_fields_readable(self, mock_redis, pr_id): + """ + Production scenario: Custom fields cached by old code. + New code reads them during processing. + """ + from fides.api.util.cache import ( + get_custom_privacy_request_field_cache_key, + ) + + # Simulate old code caching custom fields + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + cache = get_cache() + cache.set_with_autoexpire( + get_custom_privacy_request_field_cache_key(pr_id, "department"), + json.dumps("Engineering"), + ) + + # New code reads custom fields + pr = MagicMock() + pr.id = pr_id + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + from fides.api.models.privacy_request import PrivacyRequest + + custom_fields = PrivacyRequest.get_cached_custom_privacy_request_fields(pr) + + assert custom_fields["department"] == "Engineering" + + def test_concurrent_legacy_and_new_requests(self, mock_redis): + """ + Production scenario: Multiple requests in flight - some with legacy keys, + some with new keys. Verify isolation and correct reads. + """ + pr1_id = f"pri_{uuid.uuid4()}" + pr2_id = f"pri_{uuid.uuid4()}" + + # PR1: cached by old code (legacy keys) + identity1 = Identity(email="legacy1@example.com") + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + cache = get_cache() + cache.set_with_autoexpire( + get_identity_cache_key(pr1_id, "email"), identity1.email + ) + + # PR2: cached by new code (new keys) + identity2 = Identity(email="new2@example.com") + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + from fides.api.util.cache import get_dsr_cache_store + + with get_dsr_cache_store() as store: + store.cache_identity_data( + pr2_id, + {"email": identity2.email}, + expire_seconds=3600, + ) + + # Both should be readable correctly + pr1 = MagicMock() + pr1.id = pr1_id + pr2 = MagicMock() + pr2.id = pr2_id + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + from fides.api.models.privacy_request import PrivacyRequest + + data1 = PrivacyRequest.get_cached_identity_data(pr1) + data2 = PrivacyRequest.get_cached_identity_data(pr2) + + assert data1["email"] == "legacy1@example.com" + assert data2["email"] == "new2@example.com" + + def test_legacy_keys_migrated_on_first_read_not_on_write( + self, mock_redis, pr_id + ): + """ + Production scenario: Legacy keys exist. New code writes additional data. + Legacy keys should only migrate on read, not interfere with new writes. + """ + # Old code cached identity + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + cache = get_cache() + cache.set_with_autoexpire( + get_identity_cache_key(pr_id, "email"), "legacy@example.com" + ) + + # New code writes encryption (shouldn't trigger migration) + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + from fides.api.util.cache import get_dsr_cache_store + + with get_dsr_cache_store() as store: + store.write_encryption(pr_id, "key", "new-key") + + # Legacy identity key should still exist (not migrated yet) + assert mock_redis.get(get_identity_cache_key(pr_id, "email")) == "legacy@example.com" + + # Reading identity should trigger migration + pr = MagicMock() + pr.id = pr_id + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + from fides.api.models.privacy_request import PrivacyRequest + from fides.api.util.cache import get_dsr_cache_store + + # get_cached_identity_data calls get_identity which should trigger migration + PrivacyRequest.get_cached_identity_data(pr) + + # Verify migration happened by checking the store directly + with get_dsr_cache_store() as store: + # The new key should exist + assert store.get_identity(pr_id, "email") == "legacy@example.com" + + # Now legacy key should be migrated (deleted) + assert mock_redis.get(get_identity_cache_key(pr_id, "email")) is None + + with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with get_dsr_cache_store() as store: + assert store.get_identity(pr_id, "email") == "legacy@example.com" diff --git a/tests/ops/api/v1/endpoints/test_drp_endpoints.py b/tests/ops/api/v1/endpoints/test_drp_endpoints.py index 7c6a15f37e6..862f341bf65 100644 --- a/tests/ops/api/v1/endpoints/test_drp_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_drp_endpoints.py @@ -18,7 +18,7 @@ PrivacyRequestDRPStatus, PrivacyRequestStatus, ) -from fides.api.util.cache import cache_task_tracking_key, get_drp_request_body_cache_key +from fides.api.util.cache import cache_task_tracking_key, get_dsr_cache_store from fides.common.scope_registry import ( POLICY_READ, PRIVACY_REQUEST_READ, @@ -50,7 +50,6 @@ def test_create_drp_privacy_request( db, api_client: TestClient, policy_drp_action, - cache, ): TEST_EMAIL = "test@example.com" TEST_PHONE_NUMBER = "+12345678910" @@ -76,26 +75,15 @@ def test_create_drp_privacy_request( pr = PrivacyRequest.get(db=db, object_id=response_data["request_id"]) # test appropriate data is cached - meta_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="meta", - ) - assert cache.get(meta_key) == "DrpMeta(version='0.5')" - regime_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="regime", - ) - assert cache.get(regime_key) == "ccpa" - exercise_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="exercise", - ) - assert cache.get(exercise_key) == "['access']" - identity_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="identity", - ) - assert cache.get(identity_key) == encoded_identity + with get_dsr_cache_store() as store: + meta_value = store.get_drp(pr.id, "meta") + assert meta_value == "DrpMeta(version='0.5')" + regime_value = store.get_drp(pr.id, "regime") + assert regime_value == "ccpa" + exercise_value = store.get_drp(pr.id, "exercise") + assert exercise_value == "['access']" + identity_value = store.get_drp(pr.id, "identity") + assert identity_value == encoded_identity assert pr.get_cached_identity_data()["email"] == identity["email"] persisted_identity = pr.get_persisted_identity() @@ -115,7 +103,6 @@ def test_create_drp_privacy_request_unsupported_identity_props( db, api_client: TestClient, policy_drp_action, - cache, ): identity = {"email": "test@example.com", "address": "something"} encoded_identity: str = jwt.encode( @@ -136,26 +123,15 @@ def test_create_drp_privacy_request_unsupported_identity_props( pr = PrivacyRequest.get(db=db, object_id=response_data["request_id"]) # test appropriate data is cached - meta_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="meta", - ) - assert cache.get(meta_key) == "DrpMeta(version='0.5')" - regime_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="regime", - ) - assert cache.get(regime_key) == "ccpa" - exercise_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="exercise", - ) - assert cache.get(exercise_key) == "['access']" - identity_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="identity", - ) - assert cache.get(identity_key) == encoded_identity + with get_dsr_cache_store() as store: + meta_value = store.get_drp(pr.id, "meta") + assert meta_value == "DrpMeta(version='0.5')" + regime_value = store.get_drp(pr.id, "regime") + assert regime_value == "ccpa" + exercise_value = store.get_drp(pr.id, "exercise") + assert exercise_value == "['access']" + identity_value = store.get_drp(pr.id, "identity") + assert identity_value == encoded_identity assert pr.get_cached_identity_data()["email"] == identity["email"] assert "address" not in pr.get_cached_identity_data().keys() @@ -305,7 +281,6 @@ def test_create_drp_privacy_request_error_notification( url, db, api_client: TestClient, - cache, policy_drp_action, ): TEST_EMAIL = "test@example.com" @@ -357,26 +332,15 @@ def test_create_drp_privacy_request_error_notification( pr = PrivacyRequest.get(db=db, object_id=response_data["request_id"]) # test appropriate data is cached - meta_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="meta", - ) - assert cache.get(meta_key) == "DrpMeta(version='0.5')" - regime_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="regime", - ) - assert cache.get(regime_key) == "ccpa" - exercise_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="exercise", - ) - assert cache.get(exercise_key) == "['access']" - identity_key = get_drp_request_body_cache_key( - privacy_request_id=pr.id, - identity_attribute="identity", - ) - assert cache.get(identity_key) == encoded_identity + with get_dsr_cache_store() as store: + meta_value = store.get_drp(pr.id, "meta") + assert meta_value == "DrpMeta(version='0.5')" + regime_value = store.get_drp(pr.id, "regime") + assert regime_value == "ccpa" + exercise_value = store.get_drp(pr.id, "exercise") + assert exercise_value == "['access']" + identity_value = store.get_drp(pr.id, "identity") + assert identity_value == encoded_identity assert pr.get_cached_identity_data()["email"] == identity["email"] persisted_identity = pr.get_persisted_identity() diff --git a/tests/ops/models/privacy_request/test_privacy_request.py b/tests/ops/models/privacy_request/test_privacy_request.py index fcc62872e01..4958cb435bc 100644 --- a/tests/ops/models/privacy_request/test_privacy_request.py +++ b/tests/ops/models/privacy_request/test_privacy_request.py @@ -258,7 +258,7 @@ def test_delete_privacy_request_removes_cached_data( privacy_request.delete(db) from_db = PrivacyRequest.get(db=db, object_id=privacy_request.id) assert from_db is None - assert cache.get(key) is None + # privacy_request.delete() calls clear_cached_values(), so cache is already cleared def test_cache_identity_fallback_to_db( @@ -337,8 +337,9 @@ def test_custom_privacy_request_fields_fallback_to_db( privacy_request.get_cached_custom_privacy_request_fields() ) assert cached_custom_privacy_request_fields is not None - cache.delete(key) - assert cache.get(key) is None + # Delete using DSR store to clear the cached custom field + with get_dsr_cache_store() as store: + store.delete(privacy_request.id, f"custom_field:{custom_privacy_request_field.label}") assert ( privacy_request.get_cached_custom_privacy_request_fields() == cached_custom_privacy_request_fields diff --git a/tests/ops/tasks/test_encryption_utils.py b/tests/ops/tasks/test_encryption_utils.py index 71648068319..c79b91c607e 100644 --- a/tests/ops/tasks/test_encryption_utils.py +++ b/tests/ops/tasks/test_encryption_utils.py @@ -7,29 +7,29 @@ @pytest.fixture def mock_cache(): - with patch("fides.api.tasks.encryption_utils.get_cache") as mock_get_cache: - cache = MagicMock() - mock_get_cache.return_value = cache - yield cache + with patch("fides.api.tasks.encryption_utils.get_dsr_cache_store") as mock_get_store: + store = MagicMock() + mock_get_store.return_value.__enter__.return_value = store + yield store def test_encrypt_access_request_results_no_encryption_key(mock_cache): """Test that data is returned unencrypted when no encryption key is found in cache.""" - mock_cache.get.return_value = None + mock_cache.get_encryption.return_value = None test_data = "test_data" request_id = "test_request_id" result = encrypt_access_request_results(test_data, request_id) assert result == test_data - mock_cache.get.assert_called_once() + mock_cache.get_encryption.assert_called_once_with(request_id, "key") def test_encrypt_access_request_results_with_encryption_key(mock_cache): """Test that data is encrypted when encryption key is found in cache.""" # Use a 16-byte key (128 bits) for AES-GCM encryption_key = "0123456789abcdef" # 16 bytes - mock_cache.get.return_value = encryption_key + mock_cache.get_encryption.return_value = encryption_key test_data = "test_data" request_id = "test_request_id" @@ -38,13 +38,13 @@ def test_encrypt_access_request_results_with_encryption_key(mock_cache): # The result should be a base64 encoded string containing the nonce and encrypted data assert isinstance(result, str) assert len(result) > 0 - mock_cache.get.assert_called_once() + mock_cache.get_encryption.assert_called_once_with(request_id, "key") def test_encrypt_access_request_results_with_bytes_input(mock_cache): """Test that bytes input is properly handled and encrypted.""" encryption_key = "0123456789abcdef" # 16 bytes - mock_cache.get.return_value = encryption_key + mock_cache.get_encryption.return_value = encryption_key test_data = b"test_data" request_id = "test_request_id" @@ -52,13 +52,13 @@ def test_encrypt_access_request_results_with_bytes_input(mock_cache): assert isinstance(result, str) assert len(result) > 0 - mock_cache.get.assert_called_once() + mock_cache.get_encryption.assert_called_once_with(request_id, "key") def test_encrypt_access_request_results_empty_data(mock_cache): """Test that empty data is handled correctly.""" encryption_key = "0123456789abcdef" # 16 bytes - mock_cache.get.return_value = encryption_key + mock_cache.get_encryption.return_value = encryption_key test_data = "" request_id = "test_request_id" @@ -66,13 +66,13 @@ def test_encrypt_access_request_results_empty_data(mock_cache): assert isinstance(result, str) assert len(result) > 0 - mock_cache.get.assert_called_once() + mock_cache.get_encryption.assert_called_once_with(request_id, "key") def test_encrypt_access_request_results_special_characters(mock_cache): """Test that data with special characters is properly encrypted.""" encryption_key = "0123456789abcdef" # 16 bytes - mock_cache.get.return_value = encryption_key + mock_cache.get_encryption.return_value = encryption_key test_data = "test_data!@#$%^&*()_+" request_id = "test_request_id" @@ -80,4 +80,4 @@ def test_encrypt_access_request_results_special_characters(mock_cache): assert isinstance(result, str) assert len(result) > 0 - mock_cache.get.assert_called_once() + mock_cache.get_encryption.assert_called_once_with(request_id, "key") diff --git a/tests/ops/test_helpers/cache_secrets_helper.py b/tests/ops/test_helpers/cache_secrets_helper.py index c0694d98657..74260b46873 100644 --- a/tests/ops/test_helpers/cache_secrets_helper.py +++ b/tests/ops/test_helpers/cache_secrets_helper.py @@ -23,6 +23,22 @@ def clear_cache_identities(request_id: str) -> None: """Testing helper just removes some cached identities from the Privacy Request for testing. Some of our Privacy Request fixtures automatically cache identities - + this clears them using the DSR cache store. Handles both new and legacy key formats. """ + from fides.api.util.cache import get_cache, get_dsr_cache_store, get_identity_cache_key + cache: FidesopsRedis = get_cache() - cache.delete_keys_by_prefix(f"id-{request_id}-identity-") + + # First, try to get identity data which will migrate any legacy keys + with get_dsr_cache_store() as store: + identity_data = store.get_cached_identity_data(request_id) + # Delete all identity attributes found + for attr in identity_data.keys(): + store.delete(request_id, f"identity:{attr}") + + # Also scan for any remaining legacy identity keys and delete them + legacy_keys = cache.get_keys_by_prefix(f"id-{request_id}-identity-") + for legacy_key in legacy_keys: + # Extract attribute name and delete via store + attr = legacy_key.split("-")[-1] + store.delete(request_id, f"identity:{attr}") From 005017f0b5b0e29eb31020ef233807bbd9f69f2a Mon Sep 17 00:00:00 2001 From: John Ewart Date: Thu, 19 Mar 2026 18:15:21 -0700 Subject: [PATCH 41/50] Formatting fixes --- src/fides/common/cache/dsr_store.py | 10 +- tests/common/cache/mock_redis.py | 5 +- ...test_dsr_store_production_compatibility.py | 122 ++++++++++++------ .../privacy_request/test_privacy_request.py | 4 +- tests/ops/tasks/test_encryption_utils.py | 4 +- .../ops/test_helpers/cache_secrets_helper.py | 10 +- 6 files changed, 107 insertions(+), 48 deletions(-) diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index ef321ff638b..107c4338196 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -251,11 +251,13 @@ def get_cached_identity_data(self, dsr_id: str) -> Dict[str, Any]: # Filter for identity keys (both new and legacy formats) identity_keys = [k for k in all_keys if ":identity:" in k or "-identity-" in k] - + # Also scan for legacy identity keys directly, since get_all_keys() may return early # if an index exists (e.g., when other fields like encryption are cached) legacy_identity_prefix = f"id-{dsr_id}-identity-" - legacy_keys = list(self._redis.scan_iter(match=f"{legacy_identity_prefix}*", count=500)) + legacy_keys = list( + self._redis.scan_iter(match=f"{legacy_identity_prefix}*", count=500) + ) # Add legacy keys that aren't already in identity_keys for legacy_key in legacy_keys: if legacy_key not in identity_keys: @@ -290,7 +292,9 @@ def has_cached_identity_data(self, dsr_id: str) -> bool: # Also check for legacy identity keys directly, since get_all_keys() may return early # if an index exists (e.g., when other fields like encryption are cached) legacy_identity_prefix = f"id-{dsr_id}-identity-" - legacy_keys = list(self._redis.scan_iter(match=f"{legacy_identity_prefix}*", count=500)) + legacy_keys = list( + self._redis.scan_iter(match=f"{legacy_identity_prefix}*", count=500) + ) return len(legacy_keys) > 0 # --- Convenience: encryption --- diff --git a/tests/common/cache/mock_redis.py b/tests/common/cache/mock_redis.py index 27931af0774..480e1a9598e 100644 --- a/tests/common/cache/mock_redis.py +++ b/tests/common/cache/mock_redis.py @@ -125,7 +125,6 @@ def expire(self, key: str, seconds: int) -> bool: return True return False - def ping(self) -> bool: """Mock ping - always returns True.""" return True @@ -134,7 +133,9 @@ def get_keys_by_prefix(self, prefix: str) -> List[str]: """Get all keys matching the prefix.""" return [k for k in self.keys() if k.startswith(prefix)] - def set_with_autoexpire(self, key: str, value: Any, ex: Optional[int] = None) -> bool: + def set_with_autoexpire( + self, key: str, value: Any, ex: Optional[int] = None + ) -> bool: """Set a key with optional expiration.""" return self.set(key, value, ex=ex) diff --git a/tests/common/cache/test_dsr_store_production_compatibility.py b/tests/common/cache/test_dsr_store_production_compatibility.py index 554002e12d9..82b12fef3e8 100644 --- a/tests/common/cache/test_dsr_store_production_compatibility.py +++ b/tests/common/cache/test_dsr_store_production_compatibility.py @@ -66,9 +66,7 @@ def simulate_legacy_cache_write( get_identity_cache_key(pr_id, key), json.dumps(value) ) else: - cache.set_with_autoexpire( - get_identity_cache_key(pr_id, key), value - ) + cache.set_with_autoexpire(get_identity_cache_key(pr_id, key), value) # Legacy encryption key caching cache.set_with_autoexpire( @@ -86,7 +84,10 @@ def test_privacy_request_reads_legacy_identity_during_processing( identity = Identity(email="user@example.com", phone_number="+1234567890") encryption_key = "test-encryption-key-12345" - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): cache = get_cache() self.simulate_legacy_cache_write(cache, pr_id, identity, encryption_key) @@ -96,7 +97,7 @@ def test_privacy_request_reads_legacy_identity_during_processing( # Import PrivacyRequest inside patch context from fides.api.models.privacy_request import PrivacyRequest - + # New code reads cached identity - must be within patch context identity_data = PrivacyRequest.get_cached_identity_data(pr) @@ -121,7 +122,10 @@ def test_privacy_request_reads_legacy_encryption_during_processing( """ encryption_key = "legacy-encryption-key-16b" # 16 bytes - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): cache = get_cache() cache.set_with_autoexpire( get_encryption_cache_key(pr_id, "key"), encryption_key @@ -129,7 +133,7 @@ def test_privacy_request_reads_legacy_encryption_during_processing( # Import PrivacyRequest inside patch context from fides.api.models.privacy_request import PrivacyRequest - + # New code reads encryption key pr = MagicMock() pr.id = pr_id @@ -142,13 +146,9 @@ def test_privacy_request_reads_legacy_encryption_during_processing( with get_dsr_cache_store() as store: assert store.get_encryption(pr_id, "key") == encryption_key - assert ( - mock_redis.get(get_encryption_cache_key(pr_id, "key")) is None - ) + assert mock_redis.get(get_encryption_cache_key(pr_id, "key")) is None - def test_encryption_utils_reads_legacy_encryption_key( - self, mock_redis, pr_id - ): + def test_encryption_utils_reads_legacy_encryption_key(self, mock_redis, pr_id): """ Production scenario: Encryption key cached by old code. encrypt_access_request_results reads it during processing. @@ -156,7 +156,10 @@ def test_encryption_utils_reads_legacy_encryption_key( encryption_key = "0123456789abcdef" # 16 bytes test_data = "sensitive data to encrypt" - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): cache = get_cache() cache.set_with_autoexpire( get_encryption_cache_key(pr_id, "key"), encryption_key @@ -179,14 +182,20 @@ def test_mixed_legacy_and_new_keys_same_request(self, mock_redis, pr_id): """ # Old code cached identity identity = Identity(email="legacy@example.com") - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): cache = get_cache() cache.set_with_autoexpire( get_identity_cache_key(pr_id, "email"), identity.email ) # New code caches encryption - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): from fides.api.util.cache import get_dsr_cache_store with get_dsr_cache_store() as store: @@ -196,9 +205,12 @@ def test_mixed_legacy_and_new_keys_same_request(self, mock_redis, pr_id): pr = MagicMock() pr.id = pr_id - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): from fides.api.models.privacy_request import PrivacyRequest - + # get_cached_identity_data should find and migrate the legacy key identity_data = PrivacyRequest.get_cached_identity_data(pr) # After migration, the data should be available in the returned dict @@ -206,7 +218,7 @@ def test_mixed_legacy_and_new_keys_same_request(self, mock_redis, pr_id): encryption_key = PrivacyRequest.get_cached_encryption_key(pr) assert encryption_key == "new-encryption-key" - + # Verify migration happened - legacy key should be deleted assert mock_redis.get(get_identity_cache_key(pr_id, "email")) is None @@ -216,7 +228,10 @@ def test_legacy_drp_request_body_readable(self, mock_redis, pr_id): New code reads it during processing. """ # Simulate old code caching DRP body - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): cache = get_cache() cache.set_with_autoexpire( get_drp_request_body_cache_key(pr_id, "meta"), @@ -227,7 +242,10 @@ def test_legacy_drp_request_body_readable(self, mock_redis, pr_id): ) # New code reads DRP body - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): from fides.api.util.cache import get_dsr_cache_store with get_dsr_cache_store() as store: @@ -253,7 +271,10 @@ def test_legacy_custom_fields_readable(self, mock_redis, pr_id): ) # Simulate old code caching custom fields - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): cache = get_cache() cache.set_with_autoexpire( get_custom_privacy_request_field_cache_key(pr_id, "department"), @@ -264,9 +285,12 @@ def test_legacy_custom_fields_readable(self, mock_redis, pr_id): pr = MagicMock() pr.id = pr_id - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): from fides.api.models.privacy_request import PrivacyRequest - + custom_fields = PrivacyRequest.get_cached_custom_privacy_request_fields(pr) assert custom_fields["department"] == "Engineering" @@ -281,7 +305,10 @@ def test_concurrent_legacy_and_new_requests(self, mock_redis): # PR1: cached by old code (legacy keys) identity1 = Identity(email="legacy1@example.com") - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): cache = get_cache() cache.set_with_autoexpire( get_identity_cache_key(pr1_id, "email"), identity1.email @@ -289,7 +316,10 @@ def test_concurrent_legacy_and_new_requests(self, mock_redis): # PR2: cached by new code (new keys) identity2 = Identity(email="new2@example.com") - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): from fides.api.util.cache import get_dsr_cache_store with get_dsr_cache_store() as store: @@ -305,50 +335,63 @@ def test_concurrent_legacy_and_new_requests(self, mock_redis): pr2 = MagicMock() pr2.id = pr2_id - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): from fides.api.models.privacy_request import PrivacyRequest - + data1 = PrivacyRequest.get_cached_identity_data(pr1) data2 = PrivacyRequest.get_cached_identity_data(pr2) assert data1["email"] == "legacy1@example.com" assert data2["email"] == "new2@example.com" - def test_legacy_keys_migrated_on_first_read_not_on_write( - self, mock_redis, pr_id - ): + def test_legacy_keys_migrated_on_first_read_not_on_write(self, mock_redis, pr_id): """ Production scenario: Legacy keys exist. New code writes additional data. Legacy keys should only migrate on read, not interfere with new writes. """ # Old code cached identity - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): cache = get_cache() cache.set_with_autoexpire( get_identity_cache_key(pr_id, "email"), "legacy@example.com" ) # New code writes encryption (shouldn't trigger migration) - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): from fides.api.util.cache import get_dsr_cache_store with get_dsr_cache_store() as store: store.write_encryption(pr_id, "key", "new-key") # Legacy identity key should still exist (not migrated yet) - assert mock_redis.get(get_identity_cache_key(pr_id, "email")) == "legacy@example.com" + assert ( + mock_redis.get(get_identity_cache_key(pr_id, "email")) + == "legacy@example.com" + ) # Reading identity should trigger migration pr = MagicMock() pr.id = pr_id - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): from fides.api.models.privacy_request import PrivacyRequest from fides.api.util.cache import get_dsr_cache_store - + # get_cached_identity_data calls get_identity which should trigger migration PrivacyRequest.get_cached_identity_data(pr) - + # Verify migration happened by checking the store directly with get_dsr_cache_store() as store: # The new key should exist @@ -357,6 +400,9 @@ def test_legacy_keys_migrated_on_first_read_not_on_write( # Now legacy key should be migrated (deleted) assert mock_redis.get(get_identity_cache_key(pr_id, "email")) is None - with patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis): + with ( + patch("fides.api.util.cache.get_cache", return_value=mock_redis), + patch("fides.api.util.cache._connection", mock_redis), + ): with get_dsr_cache_store() as store: assert store.get_identity(pr_id, "email") == "legacy@example.com" diff --git a/tests/ops/models/privacy_request/test_privacy_request.py b/tests/ops/models/privacy_request/test_privacy_request.py index 4958cb435bc..d086dd89993 100644 --- a/tests/ops/models/privacy_request/test_privacy_request.py +++ b/tests/ops/models/privacy_request/test_privacy_request.py @@ -339,7 +339,9 @@ def test_custom_privacy_request_fields_fallback_to_db( assert cached_custom_privacy_request_fields is not None # Delete using DSR store to clear the cached custom field with get_dsr_cache_store() as store: - store.delete(privacy_request.id, f"custom_field:{custom_privacy_request_field.label}") + store.delete( + privacy_request.id, f"custom_field:{custom_privacy_request_field.label}" + ) assert ( privacy_request.get_cached_custom_privacy_request_fields() == cached_custom_privacy_request_fields diff --git a/tests/ops/tasks/test_encryption_utils.py b/tests/ops/tasks/test_encryption_utils.py index c79b91c607e..6d3d6c42870 100644 --- a/tests/ops/tasks/test_encryption_utils.py +++ b/tests/ops/tasks/test_encryption_utils.py @@ -7,7 +7,9 @@ @pytest.fixture def mock_cache(): - with patch("fides.api.tasks.encryption_utils.get_dsr_cache_store") as mock_get_store: + with patch( + "fides.api.tasks.encryption_utils.get_dsr_cache_store" + ) as mock_get_store: store = MagicMock() mock_get_store.return_value.__enter__.return_value = store yield store diff --git a/tests/ops/test_helpers/cache_secrets_helper.py b/tests/ops/test_helpers/cache_secrets_helper.py index 74260b46873..736e8982704 100644 --- a/tests/ops/test_helpers/cache_secrets_helper.py +++ b/tests/ops/test_helpers/cache_secrets_helper.py @@ -25,17 +25,21 @@ def clear_cache_identities(request_id: str) -> None: Some of our Privacy Request fixtures automatically cache identities - this clears them using the DSR cache store. Handles both new and legacy key formats. """ - from fides.api.util.cache import get_cache, get_dsr_cache_store, get_identity_cache_key + from fides.api.util.cache import ( + get_cache, + get_dsr_cache_store, + get_identity_cache_key, + ) cache: FidesopsRedis = get_cache() - + # First, try to get identity data which will migrate any legacy keys with get_dsr_cache_store() as store: identity_data = store.get_cached_identity_data(request_id) # Delete all identity attributes found for attr in identity_data.keys(): store.delete(request_id, f"identity:{attr}") - + # Also scan for any remaining legacy identity keys and delete them legacy_keys = cache.get_keys_by_prefix(f"id-{request_id}-identity-") for legacy_key in legacy_keys: From c56d7aa23dbbc0fb23c311327cc948f21172c73c Mon Sep 17 00:00:00 2001 From: John Ewart Date: Thu, 19 Mar 2026 19:04:27 -0700 Subject: [PATCH 42/50] Add changelog --- changelog/7708-adopt-structured-cache-for-dsrs.yaml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelog/7708-adopt-structured-cache-for-dsrs.yaml diff --git a/changelog/7708-adopt-structured-cache-for-dsrs.yaml b/changelog/7708-adopt-structured-cache-for-dsrs.yaml new file mode 100644 index 00000000000..b1dad88aa26 --- /dev/null +++ b/changelog/7708-adopt-structured-cache-for-dsrs.yaml @@ -0,0 +1,4 @@ +type: Changed +description: Migrated DSR workflows to use structured caching mechanism with secondary index, ensuring backward compatibility with legacy cache keys for in-flight requests during deployment. +pr: 7708 +labels: [] From bb75535e10855eec1b39ebabfb09e3534f35d74e Mon Sep 17 00:00:00 2001 From: Jade Wibbels Date: Fri, 20 Mar 2026 11:40:54 -0600 Subject: [PATCH 43/50] Fix 3 TLA+-confirmed concurrency bugs in DSRCacheStore, replace hand-written Redis mocks with autospec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug 1 (get_all_keys): Index no longer masks legacy keys. Uses a __migrated:{dsr_id} flag (24h TTL) to skip SCAN after confirming no legacy stragglers remain. Filters internal keys from SCAN results. Bug 2 (get_with_legacy): Re-checks new key after double-miss to handle the race where another reader migrates between the two GETs. Bug 3 (clear): Second SCAN pass catches keys written by concurrent migrations. Invalidates migration flag so future reads re-scan. Removes ad-hoc scan_iter workarounds from get_cached_identity_data and has_cached_identity_data that were papering over Bug 1. Replaces hand-written MockRedis/MockPipeline/InMemoryRedis with create_mock_redis() factory using create_autospec(redis.Redis) — method signatures are now validated against the real Redis client, preventing the missing-method class of bugs (exists/setex were missing before). Co-Authored-By: Claude Opus 4.6 --- src/fides/common/cache/dsr_store.py | 94 +++--- tests/common/cache/mock_redis.py | 288 ++++++++++-------- tests/common/cache/test_dsr_store.py | 165 ++-------- .../cache/test_dsr_store_clear_integration.py | 10 +- ...est_dsr_store_custom_fields_integration.py | 6 +- .../cache/test_dsr_store_drp_integration.py | 5 +- .../test_dsr_store_identity_integration.py | 6 +- .../common/cache/test_dsr_store_migration.py | 5 +- ...test_dsr_store_production_compatibility.py | 8 +- tests/common/cache/test_manager.py | 46 +-- 10 files changed, 290 insertions(+), 343 deletions(-) diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index 107c4338196..79500cd22e2 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -14,7 +14,7 @@ later if we want to avoid index consistency concerns. """ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, Optional, Union from redis import Redis @@ -91,12 +91,14 @@ def get_with_legacy( Get value for part; if missing, try legacy_key. If found in legacy only and migrate_legacy_on_read, copy to new key, delete legacy, add to index. """ - val = self._redis.get(_dsr_key(dsr_id, part)) + new_key = _dsr_key(dsr_id, part) + val = self._redis.get(new_key) if val is not None: return val val = self._redis.get(legacy_key) if val is None: - return None + # Re-check: another reader may have migrated between our two GETs + return self._redis.get(new_key) if self._migrate_on_read: self.set(dsr_id, part, val) self._redis.delete(legacy_key) @@ -252,17 +254,6 @@ def get_cached_identity_data(self, dsr_id: str) -> Dict[str, Any]: # Filter for identity keys (both new and legacy formats) identity_keys = [k for k in all_keys if ":identity:" in k or "-identity-" in k] - # Also scan for legacy identity keys directly, since get_all_keys() may return early - # if an index exists (e.g., when other fields like encryption are cached) - legacy_identity_prefix = f"id-{dsr_id}-identity-" - legacy_keys = list( - self._redis.scan_iter(match=f"{legacy_identity_prefix}*", count=500) - ) - # Add legacy keys that aren't already in identity_keys - for legacy_key in legacy_keys: - if legacy_key not in identity_keys: - identity_keys.append(legacy_key) - for key in identity_keys: # Extract attribute name from key # New format: dsr:{id}:identity:{attr} @@ -286,16 +277,7 @@ def has_cached_identity_data(self, dsr_id: str) -> bool: Returns True if any identity keys exist (legacy or new format). """ all_keys = self.get_all_keys(dsr_id) - has_identity = any(":identity:" in k or "-identity-" in k for k in all_keys) - if has_identity: - return True - # Also check for legacy identity keys directly, since get_all_keys() may return early - # if an index exists (e.g., when other fields like encryption are cached) - legacy_identity_prefix = f"id-{dsr_id}-identity-" - legacy_keys = list( - self._redis.scan_iter(match=f"{legacy_identity_prefix}*", count=500) - ) - return len(legacy_keys) > 0 + return any(":identity:" in k or "-identity-" in k for k in all_keys) # --- Convenience: encryption --- @@ -441,41 +423,67 @@ def get_retry_count(self, dsr_id: str) -> Optional[Union[str, bytes]]: part = "retry_count" return self.get_with_legacy(dsr_id, part, KeyMapper.retry_count(dsr_id)[1]) - # --- List / clear (unchanged) --- + # --- List / clear --- - def get_all_keys(self, dsr_id: str) -> List[str]: + def get_all_keys(self, dsr_id: str) -> list[str]: """ Return all cache keys for this DSR. - Uses the index first; if empty, falls back to SCAN for legacy keys - and optionally backfills the index. + + Uses the index first. If a migration flag confirms no legacy keys remain, + returns index contents directly. Otherwise, does a one-time SCAN to find + legacy stragglers, backfills them into the index, and sets the migration + flag so future calls skip the SCAN. """ index_prefix = _dsr_index_prefix(dsr_id) keys = self._manager.get_keys_by_index(index_prefix) - if keys: + + # If we've already confirmed no legacy keys remain, index is authoritative + migration_key = f"__migrated:{dsr_id}" + if keys and self._redis.exists(migration_key): return keys - legacy_keys = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) - if not legacy_keys: + + # SCAN for legacy keys (one-time per DSR until migration confirmed) + # Filter out internal keys (__migrated:, __idx:) that match the SCAN pattern + legacy_keys = [ + k + for k in self._redis.scan_iter(match=f"*{dsr_id}*", count=500) + if not k.startswith("__migrated:") and not k.startswith("__idx:") + ] + indexed = set(keys) + legacy_set = set(legacy_keys) + all_keys = list(indexed | legacy_set) if keys else legacy_keys + + if not all_keys: return [] + if self._backfill: for k in legacy_keys: - self._manager.add_key_to_index(index_prefix, k) - return list(legacy_keys) + if k not in indexed: + self._manager.add_key_to_index(index_prefix, k) + + # If index existed and no legacy keys found outside it, mark as migrated + if keys and not (legacy_set - indexed): + self._redis.setex(migration_key, 86400, "1") # 24h TTL + + return all_keys def clear(self, dsr_id: str) -> None: """ Delete all cache keys for this DSR and remove the index. Always uses SCAN to find all keys (both indexed and legacy) to ensure - complete cleanup in mixed-key scenarios. + complete cleanup in mixed-key scenarios. Does a second SCAN pass to + catch keys written by concurrent migrations between the first SCAN + and DELETE. """ - # Use SCAN to find ALL keys (indexed + legacy) - all_keys_via_scan = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) - + all_keys = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) index_prefix = _dsr_index_prefix(dsr_id) - - # Delete all found keys in batch - if all_keys_via_scan: - self._redis.delete(*all_keys_via_scan) - - # Delete the index itself + if all_keys: + self._redis.delete(*all_keys) self._manager.delete_index(index_prefix) + # Invalidate migration flag so future reads re-scan + self._redis.delete(f"__migrated:{dsr_id}") + # Second pass: catch keys written by concurrent migrations + stragglers = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) + if stragglers: + self._redis.delete(*stragglers) diff --git a/tests/common/cache/mock_redis.py b/tests/common/cache/mock_redis.py index 480e1a9598e..17771fb6c75 100644 --- a/tests/common/cache/mock_redis.py +++ b/tests/common/cache/mock_redis.py @@ -1,143 +1,189 @@ """ -Shared in-memory Redis mock for cache tests. +Autospec'd Redis mock with in-memory backing store for cache tests. -Provides MockPipeline and MockRedis with pipeline(), ttl(), expire(), scan_iter(), -and the operations needed by RedisCacheManager and DSRCacheStore. +Uses ``create_autospec(redis.Redis)`` so that: +- Method signatures are validated against the real Redis client +- Missing methods surface as clear errors, not silent misbehavior +- New Redis methods used in production code are auto-available """ import fnmatch -from typing import Any, Iterator, List, Optional, Set, Union - - -class MockPipeline: - """In-memory pipeline that batches commands and executes atomically.""" - - def __init__(self, data: dict, sets: dict) -> None: - self._data = data - self._sets = sets - self._commands: list = [] - - def set(self, key: str, value: Any, ex: Optional[int] = None) -> "MockPipeline": - self._commands.append(("set", (key, value, ex))) - return self - - def sadd(self, key: str, member: str) -> "MockPipeline": - self._commands.append(("sadd", (key, member))) - return self - - def delete(self, *keys: str) -> "MockPipeline": - self._commands.append(("delete", keys)) - return self - - def srem(self, key: str, member: str) -> "MockPipeline": - self._commands.append(("srem", (key, member))) - return self - - def execute(self) -> list: - results = [] - for op, args in self._commands: - if op == "set": - key, value, _ = args - self._data[key] = value - results.append(True) - elif op == "sadd": - key, member = args - if key not in self._sets: - self._sets[key] = set() - self._sets[key].add(member) - results.append(1) - elif op == "delete": - for k in args: - self._data.pop(k, None) - self._sets.pop(k, None) - results.append(len(args)) - elif op == "srem": - key, member = args - if key in self._sets: - self._sets[key].discard(member) - if not self._sets[key]: - del self._sets[key] - results.append(1) - self._commands = [] - return results - - -class MockRedis: - """In-memory Redis mock for RedisCacheManager and DSRCacheStore tests.""" - - def __init__(self) -> None: - self._data: dict = {} - self._sets: dict = {} - self._ttl: dict = {} # key -> seconds until expiry (simplified; no decay) - - def get(self, key: str) -> Optional[Any]: - return self._data.get(key) - - def set(self, key: str, value: Any, ex: Optional[int] = None) -> bool: - self._data[key] = value +from typing import Any +from unittest.mock import MagicMock, create_autospec + +import redis as redis_lib + +__all__ = ["create_mock_redis"] + + +def create_mock_redis() -> MagicMock: + """ + Create an autospec'd ``redis.Redis`` mock with in-memory state. + + The mock validates method signatures against real ``redis.Redis``, + while providing stateful in-memory behavior via side_effects. + + Internal state is accessible for test assertions:: + + mock._data -- dict of string keys to values + mock._sets -- dict of set keys to set[str] + mock._ttls -- dict of keys to TTL seconds + """ + mock = create_autospec(redis_lib.Redis, instance=True) + + _data: dict[str, Any] = {} + _sets: dict[str, set[str]] = {} + _ttls: dict[str, int] = {} + + # Expose state for test assertions + mock._data = _data + mock._sets = _sets + mock._ttls = _ttls + + # --- Core Redis methods --- + + def _get(name): + return _data.get(name) + + def _set(name, value, ex=None, **kwargs): + _data[name] = value + if ex is not None: + _ttls[name] = ex return True - def delete(self, *keys: str) -> int: + def _setex(name, time, value): + _data[name] = value + _ttls[name] = time + return True + + def _delete(*names): count = 0 - for k in keys: - if k in self._data: - del self._data[k] + for n in names: + if n in _data: + del _data[n] count += 1 - if k in self._sets: - del self._sets[k] + if n in _sets: + del _sets[n] count += 1 - self._ttl.pop(k, None) + _ttls.pop(n, None) return count - def sadd(self, key: str, member: str) -> int: - if key not in self._sets: - self._sets[key] = set() - self._sets[key].add(member) - return 1 - - def srem(self, key: str, member: str) -> int: - if key in self._sets: - self._sets[key].discard(member) - if not self._sets[key]: - del self._sets[key] - return 1 - return 0 - - def smembers(self, key: str) -> Set[Union[str, bytes]]: - return self._sets.get(key, set()).copy() - - def keys(self, pattern: str = "*") -> List[str]: - all_keys = set(self._data) | set(self._sets) + def _exists(*names): + return sum(1 for n in names if n in _data or n in _sets) + + def _sadd(name, *values): + _sets.setdefault(name, set()).update(values) + return len(values) + + def _srem(name, *values): + if name in _sets: + for v in values: + _sets[name].discard(v) + if not _sets[name]: + del _sets[name] + return len(values) + + def _smembers(name): + return _sets.get(name, set()).copy() + + def _keys(pattern="*"): + all_keys = set(_data) | set(_sets) return [k for k in all_keys if fnmatch.fnmatch(k, pattern)] - def scan_iter(self, match: str = "*", count: Optional[int] = None) -> Iterator[str]: - """Iterate over keys matching the pattern (used by DSRCacheStore.clear).""" - yield from self.keys(match) + def _scan_iter(match="*", count=None, **kwargs): + return iter(_keys(match)) - def ttl(self, key: str) -> int: - if key not in self._data and key not in self._sets: + def _ttl_fn(name): + if name not in _data and name not in _sets: return -2 - return self._ttl.get(key, -1) + return _ttls.get(name, -1) - def expire(self, key: str, seconds: int) -> bool: - if key in self._data or key in self._sets: - self._ttl[key] = seconds + def _expire(name, time): + if name in _data or name in _sets: + _ttls[name] = time return True return False - def ping(self) -> bool: - """Mock ping - always returns True.""" + def _ping(**kwargs): return True - def get_keys_by_prefix(self, prefix: str) -> List[str]: - """Get all keys matching the prefix.""" - return [k for k in self.keys() if k.startswith(prefix)] - - def set_with_autoexpire( - self, key: str, value: Any, ex: Optional[int] = None - ) -> bool: - """Set a key with optional expiration.""" - return self.set(key, value, ex=ex) - - def pipeline(self) -> MockPipeline: - return MockPipeline(self._data, self._sets) + mock.get.side_effect = _get + mock.set.side_effect = _set + mock.setex.side_effect = _setex + mock.delete.side_effect = _delete + mock.exists.side_effect = _exists + mock.sadd.side_effect = _sadd + mock.srem.side_effect = _srem + mock.smembers.side_effect = _smembers + mock.keys.side_effect = _keys + mock.scan_iter.side_effect = _scan_iter + mock.ttl.side_effect = _ttl_fn + mock.expire.side_effect = _expire + mock.ping.side_effect = _ping + + # --- Pipeline --- + + def _make_pipeline(**kwargs): + pipe = MagicMock() + commands: list = [] + + def pipe_set(name, value, ex=None, **kw): + commands.append(("set", name, value, ex)) + return pipe + + def pipe_sadd(name, *values): + commands.append(("sadd", name, values)) + return pipe + + def pipe_delete(*names): + commands.append(("delete", names)) + return pipe + + def pipe_srem(name, *values): + commands.append(("srem", name, values)) + return pipe + + def pipe_execute(**kw): + results = [] + for cmd in commands: + if cmd[0] == "set": + _data[cmd[1]] = cmd[2] + if cmd[3] is not None: + _ttls[cmd[1]] = cmd[3] + results.append(True) + elif cmd[0] == "sadd": + _sets.setdefault(cmd[1], set()).update(cmd[2]) + results.append(len(cmd[2])) + elif cmd[0] == "delete": + for k in cmd[1]: + _data.pop(k, None) + _sets.pop(k, None) + _ttls.pop(k, None) + results.append(len(cmd[1])) + elif cmd[0] == "srem": + for v in cmd[2]: + if cmd[1] in _sets: + _sets[cmd[1]].discard(v) + results.append(len(cmd[2])) + commands.clear() + return results + + pipe.set.side_effect = pipe_set + pipe.sadd.side_effect = pipe_sadd + pipe.delete.side_effect = pipe_delete + pipe.srem.side_effect = pipe_srem + pipe.execute.side_effect = pipe_execute + return pipe + + mock.pipeline.side_effect = _make_pipeline + + # --- FidesopsRedis-specific methods (used in production compatibility tests) --- + + mock.set_with_autoexpire = MagicMock( + side_effect=lambda key, value, ex=None: _set(key, value, ex=ex) + ) + mock.get_keys_by_prefix = MagicMock( + side_effect=lambda prefix: [k for k in _keys() if k.startswith(prefix)] + ) + + return mock diff --git a/tests/common/cache/test_dsr_store.py b/tests/common/cache/test_dsr_store.py index 228560cb9d2..e5a7afda5ba 100644 --- a/tests/common/cache/test_dsr_store.py +++ b/tests/common/cache/test_dsr_store.py @@ -1,113 +1,28 @@ """ -Tests for DSRCacheStore using an in-memory RedisCacheManager (dict + set). +Tests for DSRCacheStore using an autospec'd Redis mock. No real Redis required. """ -import fnmatch -from typing import Any, Dict, List, Optional, Set, Union - import pytest from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager +from tests.common.cache.mock_redis import create_mock_redis -RedisValue = Union[bytes, float, int, str] - - -class InMemoryRedis: - """Minimal Redis-like interface: get, set, delete, keys (glob pattern).""" - - def __init__(self) -> None: - self._data: Dict[str, RedisValue] = {} - - def get(self, key: str) -> Optional[Union[str, bytes]]: - val = self._data.get(key) - if val is None: - return None - return val if isinstance(val, (str, bytes)) else str(val) - - def set( - self, - key: str, - value: RedisValue, - ex: Optional[int] = None, - ) -> Optional[bool]: - self._data[key] = value - return True - - def delete(self, *keys: str) -> int: - """Remove keys; returns count removed (redis-py compatible).""" - removed = 0 - for key in keys: - if key in self._data: - del self._data[key] - removed += 1 - return removed - - def keys(self, pattern: str) -> List[str]: - """Glob-style: * matches any number of chars.""" - return [k for k in self._data if fnmatch.fnmatch(k, pattern)] - - def scan_iter(self, match: str = "*", count: Optional[int] = None): - """SCAN-compatible iterator; yields keys matching pattern (count ignored in-memory).""" - return iter(self.keys(match)) - - -class InMemoryRedisCacheManager: - """ - In-memory implementation of the RedisCacheManager interface: a dict for - key -> value and a dict of index_prefix -> set of keys for set_with_index. - """ - - def __init__(self) -> None: - self._redis = InMemoryRedis() - self._index: Dict[str, Set[str]] = {} - - def add_key_to_index(self, index_prefix: str, key: str) -> None: - self._index.setdefault(index_prefix, set()).add(key) - - def remove_key_from_index(self, index_prefix: str, key: str) -> None: - s = self._index.get(index_prefix) - if s is not None: - s.discard(key) - - def get_keys_by_index(self, index_prefix: str) -> List[str]: - return list(self._index.get(index_prefix, set())) - - def delete_index(self, index_prefix: str) -> None: - self._index.pop(index_prefix, None) - - def set_with_index( - self, - key: str, - value: RedisValue, - index_prefix: str, - expire_seconds: Optional[int] = None, - ) -> Optional[bool]: - result = self._redis.set(key, value, ex=expire_seconds) - self.add_key_to_index(index_prefix, key) - return result - - def delete_key_and_remove_from_index( - self, - key: str, - index_prefix: str, - ) -> None: - self._redis.delete(key) - self.remove_key_from_index(index_prefix, key) - @property - def redis(self) -> InMemoryRedis: - return self._redis +@pytest.fixture +def mock_redis(): + return create_mock_redis() @pytest.fixture -def in_memory_manager() -> InMemoryRedisCacheManager: - return InMemoryRedisCacheManager() +def manager(mock_redis) -> RedisCacheManager: + return RedisCacheManager(mock_redis) @pytest.fixture -def dsr_store(in_memory_manager: InMemoryRedisCacheManager) -> DSRCacheStore: - return DSRCacheStore(in_memory_manager) +def dsr_store(manager: RedisCacheManager) -> DSRCacheStore: + return DSRCacheStore(manager) @pytest.mark.unit @@ -122,16 +37,14 @@ def test_get_missing_returns_none(self, dsr_store: DSRCacheStore) -> None: assert dsr_store.get("pr-1", "identity:email") is None def test_set_with_index_registers_key_in_index( - self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + self, dsr_store: DSRCacheStore, mock_redis ) -> None: dsr_store.set("pr-1", "custom_field:foo", "bar") - keys = in_memory_manager.get_keys_by_index("dsr:pr-1") + keys = mock_redis.smembers("__idx:dsr:pr-1") assert "dsr:pr-1:custom_field:foo" in keys assert len(keys) == 1 - def test_get_all_keys_returns_indexed_keys( - self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager - ) -> None: + def test_get_all_keys_returns_indexed_keys(self, dsr_store: DSRCacheStore) -> None: dsr_store.write_custom_field("pr-1", "f1", "v1") dsr_store.write_identity("pr-1", "email", "e@x.com") keys = dsr_store.get_all_keys("pr-1") @@ -140,9 +53,7 @@ def test_get_all_keys_returns_indexed_keys( "dsr:pr-1:identity:email", } - def test_clear_removes_all_keys_and_index( - self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager - ) -> None: + def test_clear_removes_all_keys_and_index(self, dsr_store: DSRCacheStore) -> None: dsr_store.write_custom_field("pr-1", "f1", "v1") dsr_store.write_identity("pr-1", "email", "e@x.com") dsr_store.clear("pr-1") @@ -150,31 +61,29 @@ def test_clear_removes_all_keys_and_index( assert dsr_store.get("pr-1", "custom_field:f1") is None assert dsr_store.get("pr-1", "identity:email") is None - def test_delete_removes_key_and_index_entry( - self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager - ) -> None: + def test_delete_removes_key_and_index_entry(self, dsr_store: DSRCacheStore) -> None: dsr_store.set("pr-1", "identity:email", "e@x.com") dsr_store.delete("pr-1", "identity:email") assert dsr_store.get("pr-1", "identity:email") is None assert "dsr:pr-1:identity:email" not in dsr_store.get_all_keys("pr-1") def test_get_with_legacy_reads_new_key_first( - self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + self, dsr_store: DSRCacheStore ) -> None: dsr_store.write_identity("pr-1", "email", "new@example.com") # Legacy key not set; should still get from new key assert dsr_store.get_identity("pr-1", "email") == "new@example.com" def test_get_with_legacy_migrates_from_legacy_key( - self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager + self, dsr_store: DSRCacheStore, mock_redis ) -> None: # Simulate legacy data only (no new key) - in_memory_manager.redis.set("id-pr-1-identity-email", "legacy@example.com") + mock_redis.set("id-pr-1-identity-email", "legacy@example.com") result = dsr_store.get_identity("pr-1", "email") assert result == "legacy@example.com" # After migrate: new key should exist and legacy should be gone assert dsr_store.get("pr-1", "identity:email") == "legacy@example.com" - assert in_memory_manager.redis.get("id-pr-1-identity-email") is None + assert mock_redis.get("id-pr-1-identity-email") is None def test_write_custom_field_and_get_custom_field( self, dsr_store: DSRCacheStore @@ -186,9 +95,7 @@ def test_convenience_async_execution(self, dsr_store: DSRCacheStore) -> None: dsr_store.write_async_execution("pr-1", "celery-task-id-xyz") assert dsr_store.get_async_execution("pr-1") == "celery-task-id-xyz" - def test_retry_count( - self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager - ) -> None: + def test_retry_count(self, dsr_store: DSRCacheStore, mock_redis) -> None: """Mirrors cache.py get/increment/reset_privacy_request_retry_count.""" assert dsr_store.get_retry_count("pr-1") is None dsr_store.write_retry_count("pr-1", "3", expire_seconds=86400) @@ -196,39 +103,31 @@ def test_retry_count( dsr_store.delete("pr-1", "retry_count") assert dsr_store.get_retry_count("pr-1") is None # Legacy key migration - in_memory_manager.redis.set("id-pr-2-privacy-request-retry-count", "1") + mock_redis.set("id-pr-2-privacy-request-retry-count", "1") assert dsr_store.get_retry_count("pr-2") == "1" - assert ( - in_memory_manager.redis.get("id-pr-2-privacy-request-retry-count") is None - ) + assert mock_redis.get("id-pr-2-privacy-request-retry-count") is None - def test_drp( - self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager - ) -> None: + def test_drp(self, dsr_store: DSRCacheStore, mock_redis) -> None: """Mirrors privacy_request.py DRP body cache (get_drp_request_body_cache_key).""" dsr_store.write_drp("pr-1", "address", "encrypted-body", expire_seconds=300) assert dsr_store.get_drp("pr-1", "address") == "encrypted-body" assert dsr_store.get_drp("pr-1", "email") is None # Legacy key migration - in_memory_manager.redis.set("id-pr-2-drp-email", "legacy-drp") + mock_redis.set("id-pr-2-drp-email", "legacy-drp") assert dsr_store.get_drp("pr-2", "email") == "legacy-drp" - assert in_memory_manager.redis.get("id-pr-2-drp-email") is None + assert mock_redis.get("id-pr-2-drp-email") is None - def test_encryption( - self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager - ) -> None: + def test_encryption(self, dsr_store: DSRCacheStore, mock_redis) -> None: """Mirrors privacy_request.py / encryption_utils.py encryption key cache.""" dsr_store.write_encryption("pr-1", "key", "enc-key-123", expire_seconds=3600) assert dsr_store.get_encryption("pr-1", "key") == "enc-key-123" assert dsr_store.get_encryption("pr-1", "other") is None # Legacy key migration - in_memory_manager.redis.set("id-pr-2-encryption-key", "legacy-enc") + mock_redis.set("id-pr-2-encryption-key", "legacy-enc") assert dsr_store.get_encryption("pr-2", "key") == "legacy-enc" - assert in_memory_manager.redis.get("id-pr-2-encryption-key") is None + assert mock_redis.get("id-pr-2-encryption-key") is None - def test_masking_secret( - self, dsr_store: DSRCacheStore, in_memory_manager: InMemoryRedisCacheManager - ) -> None: + def test_masking_secret(self, dsr_store: DSRCacheStore, mock_redis) -> None: """Mirrors secrets_util.get_masking_secret cache read (and write path).""" dsr_store.write_masking_secret( "pr-1", "hash", "salt", "encoded-secret", expire_seconds=600 @@ -236,10 +135,8 @@ def test_masking_secret( assert dsr_store.get_masking_secret("pr-1", "hash", "salt") == "encoded-secret" assert dsr_store.get_masking_secret("pr-1", "hash", "other") is None # Legacy key migration - in_memory_manager.redis.set( - "id-pr-2-masking-secret-hash-pepper", "legacy-masking" - ) + mock_redis.set("id-pr-2-masking-secret-hash-pepper", "legacy-masking") assert ( dsr_store.get_masking_secret("pr-2", "hash", "pepper") == "legacy-masking" ) - assert in_memory_manager.redis.get("id-pr-2-masking-secret-hash-pepper") is None + assert mock_redis.get("id-pr-2-masking-secret-hash-pepper") is None diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py index 9f5c60ceefe..2508e828fb0 100644 --- a/tests/common/cache/test_dsr_store_clear_integration.py +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -11,7 +11,7 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import MockRedis +from tests.common.cache.mock_redis import create_mock_redis @pytest.mark.unit @@ -20,7 +20,7 @@ class TestPrivacyRequestClearCachedValues: def test_clear_removes_legacy_keys(self): """clear_cached_values removes legacy cache keys.""" - mock_redis = MockRedis() + mock_redis = create_mock_redis() pr_id = f"test-pr-{uuid.uuid4()}" # Simulate legacy cached data @@ -44,7 +44,7 @@ def test_clear_removes_legacy_keys(self): def test_clear_removes_new_keys(self): """clear_cached_values removes new-format cache keys.""" - mock_redis = MockRedis() + mock_redis = create_mock_redis() pr_id = f"test-pr-{uuid.uuid4()}" # Simulate new cached data via store @@ -65,7 +65,7 @@ def test_clear_removes_new_keys(self): def test_clear_removes_mixed_keys(self): """clear_cached_values removes both legacy and new keys.""" - mock_redis = MockRedis() + mock_redis = create_mock_redis() pr_id = f"test-pr-{uuid.uuid4()}" # Mixed: legacy identity, new encryption @@ -89,7 +89,7 @@ def test_clear_removes_mixed_keys(self): def test_clear_removes_index(self): """clear_cached_values removes the DSR index.""" - mock_redis = MockRedis() + mock_redis = create_mock_redis() pr_id = f"test-pr-{uuid.uuid4()}" manager = RedisCacheManager(mock_redis) diff --git a/tests/common/cache/test_dsr_store_custom_fields_integration.py b/tests/common/cache/test_dsr_store_custom_fields_integration.py index c009720769e..08352af9766 100644 --- a/tests/common/cache/test_dsr_store_custom_fields_integration.py +++ b/tests/common/cache/test_dsr_store_custom_fields_integration.py @@ -11,13 +11,13 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import MockRedis +from tests.common.cache.mock_redis import create_mock_redis @pytest.fixture def mock_redis(): - """Shared MockRedis instance.""" - return MockRedis() + """Shared mock Redis instance.""" + return create_mock_redis() @pytest.fixture diff --git a/tests/common/cache/test_dsr_store_drp_integration.py b/tests/common/cache/test_dsr_store_drp_integration.py index df5b4c6477e..e5fe96302d4 100644 --- a/tests/common/cache/test_dsr_store_drp_integration.py +++ b/tests/common/cache/test_dsr_store_drp_integration.py @@ -8,19 +8,18 @@ """ import uuid -from typing import Any, Dict import pytest from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import MockRedis +from tests.common.cache.mock_redis import create_mock_redis @pytest.fixture def mock_redis(): """In-memory Redis mock for isolated testing.""" - return MockRedis() + return create_mock_redis() @pytest.fixture diff --git a/tests/common/cache/test_dsr_store_identity_integration.py b/tests/common/cache/test_dsr_store_identity_integration.py index 8c10123be0d..9ffd9ef53fd 100644 --- a/tests/common/cache/test_dsr_store_identity_integration.py +++ b/tests/common/cache/test_dsr_store_identity_integration.py @@ -11,13 +11,13 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import MockRedis +from tests.common.cache.mock_redis import create_mock_redis @pytest.fixture def mock_redis(): - """Shared MockRedis instance.""" - return MockRedis() + """Shared mock Redis instance.""" + return create_mock_redis() @pytest.fixture diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py index 4e35de7f12d..a786360f1b4 100644 --- a/tests/common/cache/test_dsr_store_migration.py +++ b/tests/common/cache/test_dsr_store_migration.py @@ -5,13 +5,12 @@ """ import uuid -from typing import Any, Callable, Dict, List, Optional, Set, Union import pytest from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import MockPipeline, MockRedis +from tests.common.cache.mock_redis import create_mock_redis # Test data factories @@ -34,7 +33,7 @@ def make_new_key(dsr_id: str, part: str) -> str: @pytest.fixture def mock_redis(): - return MockRedis() + return create_mock_redis() @pytest.fixture diff --git a/tests/common/cache/test_dsr_store_production_compatibility.py b/tests/common/cache/test_dsr_store_production_compatibility.py index 82b12fef3e8..c5f155c69cb 100644 --- a/tests/common/cache/test_dsr_store_production_compatibility.py +++ b/tests/common/cache/test_dsr_store_production_compatibility.py @@ -25,15 +25,13 @@ get_encryption_cache_key, get_identity_cache_key, ) -from fides.common.cache.dsr_store import DSRCacheStore -from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import MockRedis +from tests.common.cache.mock_redis import create_mock_redis @pytest.fixture def mock_redis(): - """Shared MockRedis instance.""" - return MockRedis() + """Shared mock Redis instance.""" + return create_mock_redis() @pytest.fixture diff --git a/tests/common/cache/test_manager.py b/tests/common/cache/test_manager.py index bda07f0ace0..770625aba78 100644 --- a/tests/common/cache/test_manager.py +++ b/tests/common/cache/test_manager.py @@ -1,18 +1,18 @@ import pytest from fides.common.cache.manager import INDEX_TTL_EXTRA_SECONDS, RedisCacheManager -from tests.common.cache.mock_redis import MockPipeline, MockRedis +from tests.common.cache.mock_redis import create_mock_redis # --- Fixtures --- @pytest.fixture -def mock_redis() -> MockRedis: - return MockRedis() +def mock_redis(): + return create_mock_redis() @pytest.fixture -def manager(mock_redis: MockRedis) -> RedisCacheManager: +def manager(mock_redis) -> RedisCacheManager: return RedisCacheManager(mock_redis) @@ -24,7 +24,7 @@ class TestRedisCacheManagerPipeline: """Tests for RedisCacheManager pipeline-based index operations.""" def test_set_with_index_uses_pipeline_and_returns_set_result( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """set_with_index stores key, adds to index, and returns SET result.""" result = manager.set_with_index("k1", "v1", "idx1") @@ -34,7 +34,7 @@ def test_set_with_index_uses_pipeline_and_returns_set_result( assert "k1" in mock_redis.smembers("__idx:idx1") def test_set_with_index_with_expiry( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """set_with_index with expire_seconds stores value and adds to index.""" result = manager.set_with_index("k2", "v2", "idx2", expire_seconds=60) @@ -44,7 +44,7 @@ def test_set_with_index_with_expiry( assert "k2" in mock_redis.smembers("__idx:idx2") def test_delete_key_and_remove_from_index_atomic( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """delete_key_and_remove_from_index removes key and index entry atomically.""" manager.set_with_index("k3", "v3", "idx3") @@ -57,7 +57,7 @@ def test_delete_key_and_remove_from_index_atomic( assert "k3" not in mock_redis.smembers("__idx:idx3") def test_delete_keys_by_index_batches_deletes( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """delete_keys_by_index removes all indexed keys and the index in one pipeline.""" manager.set_with_index("k4a", "v4a", "idx4") @@ -72,7 +72,7 @@ def test_delete_keys_by_index_batches_deletes( assert mock_redis.smembers("__idx:idx4") == set() def test_delete_keys_by_index_empty_index( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """delete_keys_by_index on empty index deletes index set without error.""" manager.delete_keys_by_index("idx5") @@ -85,7 +85,7 @@ class TestRedisCacheManagerIndexOperations: """Tests for add_key_to_index, remove_key_from_index, get_keys_by_index, delete_index.""" def test_add_key_to_index_registers_key( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Ensure that add_key_to_index adds the key and creates the index set if it doesn't exist.""" manager.add_key_to_index("myidx", "cache_key_1") @@ -93,7 +93,7 @@ def test_add_key_to_index_registers_key( assert "cache_key_1" in mock_redis.smembers("__idx:myidx") def test_add_key_to_index_multiple_keys( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Ensure that add_key_to_index can add multiple keys to the same index.""" manager.add_key_to_index("idx6", "key_a") @@ -104,7 +104,7 @@ def test_add_key_to_index_multiple_keys( assert members == {"key_a", "key_b", "key_c"} def test_remove_key_from_index_idempotent( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Ensure that remove_key_from_index is idempotent and does not error when the specified key is not in the index.""" manager.set_with_index("key_a", "value_a", "idx6") @@ -125,7 +125,7 @@ def test_remove_key_from_index_idempotent( assert mock_redis.get("key_b") == "value_b" def test_remove_key_from_index_unregisters_key( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Ensure that remove_key_from_index removes a key from the index and does not remove other keys.""" manager.add_key_to_index("idx7", "keep") @@ -136,7 +136,7 @@ def test_remove_key_from_index_unregisters_key( assert mock_redis.smembers("__idx:idx7") == {"keep"} def test_remove_key_from_index_does_not_error_when_missing( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Ensure that remove_key_from_index does not error when the specified key is not in the index, and does not remove other keys.""" manager.add_key_to_index("idx8", "existing") @@ -146,7 +146,7 @@ def test_remove_key_from_index_does_not_error_when_missing( assert mock_redis.smembers("__idx:idx8") == {"existing"} def test_get_keys_by_index_returns_empty_for_missing_index( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Ensure that get_keys_by_index returns an empty list when the specified index does not exist.""" keys = manager.get_keys_by_index("never_used") @@ -154,7 +154,7 @@ def test_get_keys_by_index_returns_empty_for_missing_index( assert keys == [] def test_get_keys_by_index_returns_registered_keys( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Ensure get_keys_by_index returns all the keys in the index.""" manager.add_key_to_index("idx9", "k1") @@ -166,7 +166,7 @@ def test_get_keys_by_index_returns_registered_keys( assert len(keys) == 2 def test_delete_index_removes_index_set_only( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Ensure that delete_index removes the index set but NOT the data keys that are still in the cache.""" mock_redis.set("data_key_1", "value1") @@ -178,7 +178,7 @@ def test_delete_index_removes_index_set_only( assert mock_redis.get("data_key_1") == "value1" def test_delete_index_does_not_error_when_empty( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Ensure that delete_index does not error when the specified index does not exist.""" manager.delete_index("nonexistent_idx") @@ -189,7 +189,7 @@ class TestRedisCacheManagerIndexTTL: """Tests for optional index TTL (index_ttl_enabled).""" def test_index_ttl_disabled_by_default( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Without index_ttl_enabled, index has no TTL.""" manager.set_with_index("k", "v", "idx", expire_seconds=60) @@ -197,7 +197,7 @@ def test_index_ttl_disabled_by_default( assert mock_redis.ttl("__idx:idx") == -1 def test_index_ttl_applied_when_enabled( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """With index_ttl_enabled, index gets TTL matching key.""" manager.set_with_index( @@ -207,7 +207,7 @@ def test_index_ttl_applied_when_enabled( assert mock_redis.ttl("__idx:idx") == 120 + INDEX_TTL_EXTRA_SECONDS def test_index_ttl_extended_when_key_ttl_farther_out( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Index TTL is pushed out when adding key with longer TTL.""" manager.set_with_index( @@ -222,7 +222,7 @@ def test_index_ttl_extended_when_key_ttl_farther_out( assert mock_redis.ttl("__idx:idx") == 300 + INDEX_TTL_EXTRA_SECONDS def test_index_ttl_not_shortened_when_key_ttl_shorter( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """Index TTL is NOT shortened when adding key with shorter TTL.""" manager.set_with_index( @@ -237,7 +237,7 @@ def test_index_ttl_not_shortened_when_key_ttl_shorter( assert mock_redis.ttl("__idx:idx") == 300 + INDEX_TTL_EXTRA_SECONDS def test_index_ttl_ignored_when_no_expire_seconds( - self, manager: RedisCacheManager, mock_redis: MockRedis + self, manager: RedisCacheManager, mock_redis ) -> None: """index_ttl_enabled has no effect when expire_seconds is not set.""" manager.set_with_index("k", "v", "idx", index_ttl_enabled=True) From d3445a02ebde7ddeae1a38e7498f9cc29c5e172f Mon Sep 17 00:00:00 2001 From: Jade Wibbels Date: Fri, 20 Mar 2026 15:13:03 -0600 Subject: [PATCH 44/50] Review cleanup: deduplicate DSRCacheStore, remove context manager ceremony, improve tests - Extract _get_cached_by_type/_has_cached_by_type helpers in DSRCacheStore to eliminate triplicated scan-filter-parse pattern across custom fields, identity, and DRP methods - Replace no-op context manager get_dsr_cache_store() with plain function, removing unnecessary indentation from 11 production and 14 test callsites - Extract shared test fixtures (mock_redis, manager, dsr_store, pr_id, dsr_id) to tests/common/cache/conftest.py, removing duplication across 6 test files - Remove dead code in clear_cache_identities test helper (legacy scan after migration already deleted all legacy keys) - Replace 8 individual legacy compatibility tests with 2 comprehensive lifecycle tests that simulate full in-flight DSR processing across a deployment boundary - Move all local imports to module top level Co-Authored-By: Claude Opus 4.6 (1M context) --- .../models/privacy_request/privacy_request.py | 160 +++---- src/fides/api/tasks/encryption_utils.py | 4 +- src/fides/api/util/cache.py | 10 +- src/fides/common/cache/dsr_store.py | 122 ++--- tests/common/cache/conftest.py | 37 ++ tests/common/cache/test_dsr_store.py | 17 - .../cache/test_dsr_store_clear_integration.py | 9 +- ...est_dsr_store_custom_fields_integration.py | 24 - .../cache/test_dsr_store_drp_integration.py | 27 -- .../test_dsr_store_identity_integration.py | 23 - .../common/cache/test_dsr_store_migration.py | 16 - ...test_dsr_store_production_compatibility.py | 446 +++++------------- tests/common/cache/test_manager.py | 14 - .../api/v1/endpoints/test_drp_endpoints.py | 54 +-- .../privacy_request/test_privacy_request.py | 12 +- tests/ops/tasks/test_encryption_utils.py | 2 +- .../ops/test_helpers/cache_secrets_helper.py | 39 +- 17 files changed, 347 insertions(+), 669 deletions(-) diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index b6ff9b7ec2a..f668fd69a9b 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -466,8 +466,8 @@ def clear_cached_values(self) -> None: Clears all cached values associated with this privacy request from Redis. """ logger.info(f"Clearing cached values for privacy request {self.id}") - with get_dsr_cache_store() as store: - store.clear(self.id) + store = get_dsr_cache_store() + store.clear(self.id) def delete(self, db: Session) -> None: """ @@ -504,18 +504,18 @@ def cache_identity( identity_dict: Dict[str, Any] = identity.labeled_dict() - with get_dsr_cache_store() as store: - # Encode values for Redis storage - encoded_dict = { - key: FidesopsRedis.encode_obj(value) - for key, value in identity_dict.items() - if value is not None - } - store.cache_identity_data( - self.id, - encoded_dict, - expire_seconds=CONFIG.redis.default_ttl_seconds, - ) + store = get_dsr_cache_store() + # Encode values for Redis storage + encoded_dict = { + key: FidesopsRedis.encode_obj(value) + for key, value in identity_dict.items() + if value is not None + } + store.cache_identity_data( + self.id, + encoded_dict, + expire_seconds=CONFIG.redis.default_ttl_seconds, + ) def cache_custom_privacy_request_fields( self, @@ -531,18 +531,18 @@ def cache_custom_privacy_request_fields( return if CONFIG.execution.allow_custom_privacy_request_fields_in_request_execution: - with get_dsr_cache_store() as store: - # Encode values for Redis storage - encoded_fields = { - key: json.dumps(item.value, cls=CustomJSONEncoder) - for key, item in custom_privacy_request_fields.items() - if item is not None - } - store.cache_custom_fields( - self.id, - encoded_fields, - expire_seconds=CONFIG.redis.default_ttl_seconds, - ) + store = get_dsr_cache_store() + # Encode values for Redis storage + encoded_fields = { + key: json.dumps(item.value, cls=CustomJSONEncoder) + for key, item in custom_privacy_request_fields.items() + if item is not None + } + store.cache_custom_fields( + self.id, + encoded_fields, + expire_seconds=CONFIG.redis.default_ttl_seconds, + ) else: logger.info( "Custom fields from privacy request {}, but config setting 'CONFIG.execution.allow_custom_privacy_request_fields_in_request_execution' is set to false and prevents their usage.", @@ -683,8 +683,8 @@ def verify_identity(self, db: Session, provided_code: str) -> "PrivacyRequest": def get_cached_encryption_key(self) -> Optional[str]: """Gets the cached encryption key for this privacy request.""" - with get_dsr_cache_store() as store: - raw = store.get_encryption(self.id, "key") + store = get_dsr_cache_store() + raw = store.get_encryption(self.id, "key") if raw is None: return None if isinstance(raw, bytes): @@ -717,25 +717,25 @@ def cache_drp_request_body(self, drp_request_body: DrpPrivacyRequestCreate) -> N else: serialized_body[key] = value - with get_dsr_cache_store() as store: - store.cache_drp_request_body( - self.id, - serialized_body, - expire_seconds=CONFIG.redis.default_ttl_seconds, - ) + store = get_dsr_cache_store() + store.cache_drp_request_body( + self.id, + serialized_body, + expire_seconds=CONFIG.redis.default_ttl_seconds, + ) def cache_encryption(self, encryption_key: Optional[str] = None) -> None: """Sets the encryption key in the Fides app cache if provided""" if not encryption_key: return - with get_dsr_cache_store() as store: - store.write_encryption( - self.id, - "key", - encryption_key, - expire_seconds=CONFIG.redis.default_ttl_seconds, - ) + store = get_dsr_cache_store() + store.write_encryption( + self.id, + "key", + encryption_key, + expire_seconds=CONFIG.redis.default_ttl_seconds, + ) def persist_masking_secrets( self, masking_secrets: List[MaskingSecretCache] @@ -758,57 +758,57 @@ def persist_masking_secrets( def verify_cache_for_identity_data(self) -> bool: """Verifies if the identity data is cached for this request""" - with get_dsr_cache_store() as store: - return store.has_cached_identity_data(self.id) + store = get_dsr_cache_store() + return store.has_cached_identity_data(self.id) def get_cached_identity_data(self) -> Dict[str, Any]: """Retrieves any identity data pertaining to this request from the cache""" - with get_dsr_cache_store() as store: + store = get_dsr_cache_store() + result = store.get_cached_identity_data(self.id) + + if not result: + logger.debug(f"Cache miss for request {self.id}, falling back to DB") + identity = self.get_persisted_identity() + self.cache_identity(identity) result = store.get_cached_identity_data(self.id) - if not result: - logger.debug(f"Cache miss for request {self.id}, falling back to DB") - identity = self.get_persisted_identity() - self.cache_identity(identity) - result = store.get_cached_identity_data(self.id) - - # Parse JSON values for backward compatibility - parsed_result: Dict[str, Any] = {} - for key, value in result.items(): - try: - # try parsing the value as JSON - parsed_result[key] = json.loads(value) - except json.JSONDecodeError: - # if parsing as JSON fails, assume it's a string. - # this is purely for backward compatibility: to ensure - # that identity data stored pre-2.34.0 in the "old" format - # can still be correctly retrieved from the cache. - parsed_result[key] = value + # Parse JSON values for backward compatibility + parsed_result: Dict[str, Any] = {} + for key, value in result.items(): + try: + # try parsing the value as JSON + parsed_result[key] = json.loads(value) + except json.JSONDecodeError: + # if parsing as JSON fails, assume it's a string. + # this is purely for backward compatibility: to ensure + # that identity data stored pre-2.34.0 in the "old" format + # can still be correctly retrieved from the cache. + parsed_result[key] = value return parsed_result def get_cached_custom_privacy_request_fields(self) -> Dict[str, Any]: """Retrieves any custom fields pertaining to this request from the cache""" - with get_dsr_cache_store() as store: - result = store.get_cached_custom_fields(self.id) + store = get_dsr_cache_store() + result = store.get_cached_custom_fields(self.id) - if not result: - logger.debug(f"Cache miss for request {self.id}, falling back to DB") - custom_privacy_request_fields = ( - self.get_persisted_custom_privacy_request_fields() - ) - self.cache_custom_privacy_request_fields( - { - key: CustomPrivacyRequestFieldSchema(**value) - for key, value in custom_privacy_request_fields.items() - } - ) - result = store.get_cached_custom_fields(self.id) + if not result: + logger.debug(f"Cache miss for request {self.id}, falling back to DB") + custom_privacy_request_fields = ( + self.get_persisted_custom_privacy_request_fields() + ) + self.cache_custom_privacy_request_fields( + { + key: CustomPrivacyRequestFieldSchema(**value) + for key, value in custom_privacy_request_fields.items() + } + ) + result = store.get_cached_custom_fields(self.id) - # Parse JSON values - parsed_result: Dict[str, Any] = {} - for key, value in result.items(): - parsed_result[key] = json.loads(value) + # Parse JSON values + parsed_result: Dict[str, Any] = {} + for key, value in result.items(): + parsed_result[key] = json.loads(value) return parsed_result diff --git a/src/fides/api/tasks/encryption_utils.py b/src/fides/api/tasks/encryption_utils.py index e69edc55534..08e456b9157 100644 --- a/src/fides/api/tasks/encryption_utils.py +++ b/src/fides/api/tasks/encryption_utils.py @@ -22,8 +22,8 @@ def encrypt_access_request_results(data: Union[str, bytes], request_id: str) -> if isinstance(data, bytes): data = data.decode(CONFIG.security.encoding) - with get_dsr_cache_store() as store: - raw = store.get_encryption(request_id, "key") + store = get_dsr_cache_store() + raw = store.get_encryption(request_id, "key") if raw is None: return data if isinstance(raw, bytes): diff --git a/src/fides/api/util/cache.py b/src/fides/api/util/cache.py index 33e540deb38..3cddc3d51d7 100644 --- a/src/fides/api/util/cache.py +++ b/src/fides/api/util/cache.py @@ -1,7 +1,6 @@ import json import os -from contextlib import contextmanager -from typing import Any, Dict, Generator, List, Optional, Union, cast +from typing import Any, Dict, List, Optional, Union, cast from urllib.parse import unquote_to_bytes from loguru import logger @@ -326,10 +325,9 @@ def get_redis_cache_manager() -> RedisCacheManager: return RedisCacheManager(get_cache()) -@contextmanager -def get_dsr_cache_store() -> Generator[DSRCacheStore, None, None]: - """Context manager yielding a DSRCacheStore for privacy request cache operations.""" - yield DSRCacheStore(get_redis_cache_manager()) +def get_dsr_cache_store() -> DSRCacheStore: + """Return a DSRCacheStore for privacy request cache operations.""" + return DSRCacheStore(get_redis_cache_manager()) def get_read_only_cache() -> FidesopsRedis: diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index 79500cd22e2..3132986813f 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -14,7 +14,7 @@ later if we want to avoid index consistency concerns. """ -from typing import Any, Dict, Optional, Union +from typing import Any, Callable, Dict, Optional, Union from redis import Redis @@ -128,6 +128,37 @@ def delete(self, dsr_id: str, part: str) -> None: key = _dsr_key(dsr_id, part) self._manager.delete_key_and_remove_from_index(key, _dsr_index_prefix(dsr_id)) + # --- Shared get/has helpers --- + + def _get_cached_by_type( + self, + dsr_id: str, + new_infix: str, + legacy_infix: str, + getter: Callable[[str, str], Optional[Union[str, bytes]]], + ) -> Dict[str, Any]: + """Shared implementation for get_cached_custom_fields/identity_data/drp_request_body.""" + result: Dict[str, Any] = {} + for key in self.get_all_keys(dsr_id): + if new_infix in key: + field = key.split(":")[-1] + elif legacy_infix in key: + field = key.split("-")[-1] + else: + continue + value = getter(dsr_id, field) + if value: + result[field] = value + return result + + def _has_cached_by_type( + self, dsr_id: str, new_infix: str, legacy_infix: str + ) -> bool: + """Shared implementation for has_cached_* methods.""" + return any( + new_infix in k or legacy_infix in k for k in self.get_all_keys(dsr_id) + ) + # --- Convenience: custom privacy request fields --- def write_custom_field( @@ -171,31 +202,12 @@ def get_cached_custom_fields(self, dsr_id: str) -> Dict[str, Any]: Returns dict with custom field values. Automatically migrates legacy keys on read. Returns empty dict if no custom fields cached. """ - result: Dict[str, Any] = {} - all_keys = self.get_all_keys(dsr_id) - - # Filter for custom field keys (both new and legacy formats) - # New: dsr:{id}:custom_field:{key} - # Legacy: id-{id}-custom-privacy-request-field-{key} - custom_keys = [ - k - for k in all_keys - if ":custom_field:" in k or "-custom-privacy-request-field-" in k - ] - - for key in custom_keys: - # Extract field name from key - if ":custom_field:" in key: - field_key = key.split(":")[-1] - else: - # Legacy format - field_key = key.split("-")[-1] - - value = self.get_custom_field(dsr_id, field_key) - if value: - result[field_key] = value - - return result + return self._get_cached_by_type( + dsr_id, + ":custom_field:", + "-custom-privacy-request-field-", + self.get_custom_field, + ) def has_cached_custom_fields(self, dsr_id: str) -> bool: """ @@ -203,10 +215,8 @@ def has_cached_custom_fields(self, dsr_id: str) -> bool: Returns True if any custom field keys exist (legacy or new format). """ - all_keys = self.get_all_keys(dsr_id) - return any( - ":custom_field:" in k or "-custom-privacy-request-field-" in k - for k in all_keys + return self._has_cached_by_type( + dsr_id, ":custom_field:", "-custom-privacy-request-field-" ) # --- Convenience: identity --- @@ -248,27 +258,9 @@ def get_cached_identity_data(self, dsr_id: str) -> Dict[str, Any]: Returns dict with identity attributes. Automatically migrates legacy keys on read. Returns empty dict if no identity data cached. """ - result: Dict[str, Any] = {} - all_keys = self.get_all_keys(dsr_id) - - # Filter for identity keys (both new and legacy formats) - identity_keys = [k for k in all_keys if ":identity:" in k or "-identity-" in k] - - for key in identity_keys: - # Extract attribute name from key - # New format: dsr:{id}:identity:{attr} - # Legacy format: id-{id}-identity-{attr} - if ":identity:" in key: - attr = key.split(":")[-1] - else: - # Legacy format - attr = key.split("-")[-1] - - value = self.get_identity(dsr_id, attr) - if value: - result[attr] = value - - return result + return self._get_cached_by_type( + dsr_id, ":identity:", "-identity-", self.get_identity + ) def has_cached_identity_data(self, dsr_id: str) -> bool: """ @@ -276,8 +268,7 @@ def has_cached_identity_data(self, dsr_id: str) -> bool: Returns True if any identity keys exist (legacy or new format). """ - all_keys = self.get_all_keys(dsr_id) - return any(":identity:" in k or "-identity-" in k for k in all_keys) + return self._has_cached_by_type(dsr_id, ":identity:", "-identity-") # --- Convenience: encryption --- @@ -333,35 +324,14 @@ def get_cached_drp_request_body(self, dsr_id: str) -> Dict[str, Any]: Returns dict with DRP fields. Automatically migrates legacy keys on read. Returns empty dict if no DRP data cached. """ - result: Dict[str, Any] = {} - all_keys = self.get_all_keys(dsr_id) - - # Filter for DRP keys (both new and legacy formats) - drp_keys = [k for k in all_keys if ":drp:" in k or "-drp-" in k] - - for key in drp_keys: - # Extract field name from key - # New format: dsr:{id}:drp:{field} - # Legacy format: id-{id}-drp-{field} - if ":drp:" in key: - field = key.split(":")[-1] - else: - # Legacy format - field = key.split("-")[-1] - - value = self.get_drp(dsr_id, field) - if value: - result[field] = value - - return result + return self._get_cached_by_type(dsr_id, ":drp:", "-drp-", self.get_drp) def has_cached_drp_request_body(self, dsr_id: str) -> bool: """ Check if any DRP request body data is cached for this DSR. Checks both new and legacy key formats. """ - all_keys = self.get_all_keys(dsr_id) - return any(":drp:" in k or "-drp-" in k for k in all_keys) + return self._has_cached_by_type(dsr_id, ":drp:", "-drp-") # --- Convenience: masking secret --- diff --git a/tests/common/cache/conftest.py b/tests/common/cache/conftest.py index 3c9a57a610d..5c08fc2828f 100644 --- a/tests/common/cache/conftest.py +++ b/tests/common/cache/conftest.py @@ -3,10 +3,47 @@ real FastAPI app, DB, and Celery worker are not started when running only these tests. """ +import uuid from unittest.mock import MagicMock import pytest +from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager +from tests.common.cache.mock_redis import create_mock_redis + +# --- Shared cache test fixtures --- + + +@pytest.fixture +def mock_redis(): + """In-memory autospec'd Redis mock.""" + return create_mock_redis() + + +@pytest.fixture +def manager(mock_redis) -> RedisCacheManager: + """RedisCacheManager backed by mock Redis.""" + return RedisCacheManager(mock_redis) + + +@pytest.fixture +def dsr_store(manager: RedisCacheManager) -> DSRCacheStore: + """DSRCacheStore backed by mock Redis.""" + return DSRCacheStore(manager) + + +@pytest.fixture +def pr_id(): + """Generate unique privacy request ID.""" + return f"test-pr-{uuid.uuid4()}" + + +@pytest.fixture +def dsr_id(): + """Alias for pr_id used by migration tests.""" + return f"test-pr-{uuid.uuid4()}" + @pytest.fixture(scope="session") def test_client(): diff --git a/tests/common/cache/test_dsr_store.py b/tests/common/cache/test_dsr_store.py index e5a7afda5ba..4a4a33660de 100644 --- a/tests/common/cache/test_dsr_store.py +++ b/tests/common/cache/test_dsr_store.py @@ -6,23 +6,6 @@ import pytest from fides.common.cache.dsr_store import DSRCacheStore -from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import create_mock_redis - - -@pytest.fixture -def mock_redis(): - return create_mock_redis() - - -@pytest.fixture -def manager(mock_redis) -> RedisCacheManager: - return RedisCacheManager(mock_redis) - - -@pytest.fixture -def dsr_store(manager: RedisCacheManager) -> DSRCacheStore: - return DSRCacheStore(manager) @pytest.mark.unit diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py index 2508e828fb0..971fb6cbafa 100644 --- a/tests/common/cache/test_dsr_store_clear_integration.py +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -9,6 +9,7 @@ import pytest +from fides.api.models.privacy_request.privacy_request import PrivacyRequest from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager from tests.common.cache.mock_redis import create_mock_redis @@ -35,8 +36,6 @@ def test_clear_removes_legacy_keys(self): # Patch get_cache in the api.util.cache module where get_dsr_cache_store calls it with patch("fides.api.util.cache.get_cache", return_value=mock_redis): # Import here to avoid app initialization - from fides.api.models.privacy_request.privacy_request import PrivacyRequest - PrivacyRequest.clear_cached_values(pr) # Verify all keys deleted @@ -57,8 +56,6 @@ def test_clear_removes_new_keys(self): pr.id = pr_id with patch("fides.api.util.cache.get_cache", return_value=mock_redis): - from fides.api.models.privacy_request.privacy_request import PrivacyRequest - PrivacyRequest.clear_cached_values(pr) assert len(mock_redis.keys(f"*{pr_id}*")) == 0 @@ -81,8 +78,6 @@ def test_clear_removes_mixed_keys(self): pr.id = pr_id with patch("fides.api.util.cache.get_cache", return_value=mock_redis): - from fides.api.models.privacy_request.privacy_request import PrivacyRequest - PrivacyRequest.clear_cached_values(pr) assert len(mock_redis.keys(f"*{pr_id}*")) == 0 @@ -103,8 +98,6 @@ def test_clear_removes_index(self): pr.id = pr_id with patch("fides.api.util.cache.get_cache", return_value=mock_redis): - from fides.api.models.privacy_request.privacy_request import PrivacyRequest - PrivacyRequest.clear_cached_values(pr) # Index should be deleted diff --git a/tests/common/cache/test_dsr_store_custom_fields_integration.py b/tests/common/cache/test_dsr_store_custom_fields_integration.py index 08352af9766..83f0aff65b1 100644 --- a/tests/common/cache/test_dsr_store_custom_fields_integration.py +++ b/tests/common/cache/test_dsr_store_custom_fields_integration.py @@ -5,33 +5,9 @@ """ import json -import uuid import pytest -from fides.common.cache.dsr_store import DSRCacheStore -from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import create_mock_redis - - -@pytest.fixture -def mock_redis(): - """Shared mock Redis instance.""" - return create_mock_redis() - - -@pytest.fixture -def dsr_store(mock_redis): - """DSRCacheStore backed by MockRedis.""" - return DSRCacheStore(RedisCacheManager(mock_redis)) - - -@pytest.fixture -def pr_id(): - """Generate unique privacy request ID.""" - return f"test-pr-{uuid.uuid4()}" - - # Mark all tests as unit tests pytestmark = pytest.mark.unit diff --git a/tests/common/cache/test_dsr_store_drp_integration.py b/tests/common/cache/test_dsr_store_drp_integration.py index e5fe96302d4..6a16d3fc7cd 100644 --- a/tests/common/cache/test_dsr_store_drp_integration.py +++ b/tests/common/cache/test_dsr_store_drp_integration.py @@ -7,33 +7,6 @@ - Automatic migration on read """ -import uuid - -import pytest - -from fides.common.cache.dsr_store import DSRCacheStore -from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import create_mock_redis - - -@pytest.fixture -def mock_redis(): - """In-memory Redis mock for isolated testing.""" - return create_mock_redis() - - -@pytest.fixture -def dsr_store(mock_redis): - """DSRCacheStore instance with mock Redis backend.""" - manager = RedisCacheManager(mock_redis) - return DSRCacheStore(manager) - - -@pytest.fixture -def pr_id(): - """Generate unique privacy request ID for each test.""" - return f"test-pr-{uuid.uuid4()}" - class TestDSRCacheStoreDRP: """Test DSRCacheStore DRP request body methods.""" diff --git a/tests/common/cache/test_dsr_store_identity_integration.py b/tests/common/cache/test_dsr_store_identity_integration.py index 9ffd9ef53fd..a82ac465da5 100644 --- a/tests/common/cache/test_dsr_store_identity_integration.py +++ b/tests/common/cache/test_dsr_store_identity_integration.py @@ -5,32 +5,9 @@ """ import json -import uuid import pytest -from fides.common.cache.dsr_store import DSRCacheStore -from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import create_mock_redis - - -@pytest.fixture -def mock_redis(): - """Shared mock Redis instance.""" - return create_mock_redis() - - -@pytest.fixture -def dsr_store(mock_redis): - """DSRCacheStore backed by MockRedis.""" - return DSRCacheStore(RedisCacheManager(mock_redis)) - - -@pytest.fixture -def pr_id(): - """Generate unique privacy request ID.""" - return f"test-pr-{uuid.uuid4()}" - @pytest.fixture def identity_data(): diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py index a786360f1b4..3b2ff127518 100644 --- a/tests/common/cache/test_dsr_store_migration.py +++ b/tests/common/cache/test_dsr_store_migration.py @@ -10,7 +10,6 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager -from tests.common.cache.mock_redis import create_mock_redis # Test data factories @@ -31,21 +30,6 @@ def make_new_key(dsr_id: str, part: str) -> str: return f"dsr:{dsr_id}:{part}" -@pytest.fixture -def mock_redis(): - return create_mock_redis() - - -@pytest.fixture -def dsr_store(mock_redis): - return DSRCacheStore(RedisCacheManager(mock_redis)) - - -@pytest.fixture -def dsr_id(): - return make_dsr_id() - - @pytest.mark.unit class TestLegacyKeyMigration: """Test legacy key formats are readable and migrated correctly.""" diff --git a/tests/common/cache/test_dsr_store_production_compatibility.py b/tests/common/cache/test_dsr_store_production_compatibility.py index c5f155c69cb..2aa7d0e1cde 100644 --- a/tests/common/cache/test_dsr_store_production_compatibility.py +++ b/tests/common/cache/test_dsr_store_production_compatibility.py @@ -16,391 +16,201 @@ import pytest -# PrivacyRequest imported inside tests to ensure get_cache is patched -from fides.api.schemas.redis_cache import Identity +from fides.api.models.privacy_request import PrivacyRequest +from fides.api.tasks.encryption_utils import encrypt_access_request_results from fides.api.util.cache import ( - FidesopsRedis, get_cache, + get_custom_privacy_request_field_cache_key, get_drp_request_body_cache_key, + get_dsr_cache_store, get_encryption_cache_key, get_identity_cache_key, ) -from tests.common.cache.mock_redis import create_mock_redis - - -@pytest.fixture -def mock_redis(): - """Shared mock Redis instance.""" - return create_mock_redis() - - -@pytest.fixture -def pr_id(): - """Generate unique privacy request ID.""" - return f"pri_{uuid.uuid4()}" @pytest.mark.unit -class TestProductionCompatibilityLegacyKeys: +class TestInFlightDSRLifecycle: """ - Test that new code can read and process privacy requests that were - cached by old code using legacy key formats. + Simulate a full in-flight DSR that was cached by old code, then processed + and cleared by new code after a deployment. This is the "Steps to Confirm" + scenario from the PR: volume of in-flight DSR processing, then upgrading + in the middle of it. """ - def simulate_legacy_cache_write( - self, cache: FidesopsRedis, pr_id: str, identity: Identity, encryption_key: str - ) -> None: - """ - Simulate how old code would cache data - using direct cache.set_with_autoexpire - with legacy key formats. + def test_full_lifecycle_legacy_request_processed_by_new_code(self, mock_redis): """ - # Legacy identity caching - identity_dict = identity.labeled_dict() - for key, value in identity_dict.items(): - if value is not None: - if isinstance(value, dict): - # LabeledIdentity - encode as JSON - cache.set_with_autoexpire( - get_identity_cache_key(pr_id, key), json.dumps(value) - ) - else: - cache.set_with_autoexpire(get_identity_cache_key(pr_id, key), value) - - # Legacy encryption key caching - cache.set_with_autoexpire( - get_encryption_cache_key(pr_id, "key"), encryption_key - ) - - def test_privacy_request_reads_legacy_identity_during_processing( - self, mock_redis, pr_id - ): + End-to-end: old code caches a complete DSR (identity, encryption, + custom fields, DRP body). New code reads everything, processes the + request, and clears the cache. """ - Production scenario: Privacy request was created and cached by old code. - New code reads identity during request processing. - """ - # Simulate old code caching identity - identity = Identity(email="user@example.com", phone_number="+1234567890") - encryption_key = "test-encryption-key-12345" + pr_id = f"pri_{uuid.uuid4()}" + # --- Phase 1: "Old code" caches a full DSR using legacy key format --- with ( patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis), ): cache = get_cache() - self.simulate_legacy_cache_write(cache, pr_id, identity, encryption_key) - - # Simulate PrivacyRequest instance (minimal mock) - pr = MagicMock() - pr.id = pr_id - - # Import PrivacyRequest inside patch context - from fides.api.models.privacy_request import PrivacyRequest - - # New code reads cached identity - must be within patch context - identity_data = PrivacyRequest.get_cached_identity_data(pr) - - # Should successfully read from legacy keys - assert identity_data["email"] == "user@example.com" - assert identity_data["phone_number"] == "+1234567890" - - # Legacy keys should be migrated to new format - from fides.api.util.cache import get_dsr_cache_store - - with get_dsr_cache_store() as store: - assert store.get_identity(pr_id, "email") == "user@example.com" - # Legacy key should be deleted after migration - assert mock_redis.get(get_identity_cache_key(pr_id, "email")) is None - - def test_privacy_request_reads_legacy_encryption_during_processing( - self, mock_redis, pr_id - ): - """ - Production scenario: Encryption key was cached by old code. - New code reads encryption key during request processing. - """ - encryption_key = "legacy-encryption-key-16b" # 16 bytes - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - cache = get_cache() + # Identity cache.set_with_autoexpire( - get_encryption_cache_key(pr_id, "key"), encryption_key + get_identity_cache_key(pr_id, "email"), json.dumps("user@example.com") ) - - # Import PrivacyRequest inside patch context - from fides.api.models.privacy_request import PrivacyRequest - - # New code reads encryption key - pr = MagicMock() - pr.id = pr_id - cached_key = PrivacyRequest.get_cached_encryption_key(pr) - - assert cached_key == encryption_key - - # Legacy key should be migrated - from fides.api.util.cache import get_dsr_cache_store - - with get_dsr_cache_store() as store: - assert store.get_encryption(pr_id, "key") == encryption_key - assert mock_redis.get(get_encryption_cache_key(pr_id, "key")) is None - - def test_encryption_utils_reads_legacy_encryption_key(self, mock_redis, pr_id): - """ - Production scenario: Encryption key cached by old code. - encrypt_access_request_results reads it during processing. - """ - encryption_key = "0123456789abcdef" # 16 bytes - test_data = "sensitive data to encrypt" - - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - cache = get_cache() cache.set_with_autoexpire( - get_encryption_cache_key(pr_id, "key"), encryption_key + get_identity_cache_key(pr_id, "phone_number"), + json.dumps("+1234567890"), ) - # New code encrypts data using cached key - from fides.api.tasks.encryption_utils import encrypt_access_request_results - - encrypted = encrypt_access_request_results(test_data, pr_id) - - # Should successfully encrypt (result is base64 string) - assert isinstance(encrypted, str) - assert len(encrypted) > 0 - assert encrypted != test_data # Should be encrypted, not plaintext - - def test_mixed_legacy_and_new_keys_same_request(self, mock_redis, pr_id): - """ - Production scenario: Some fields cached by old code, some by new code - (e.g., request started before deployment, continued after). - """ - # Old code cached identity - identity = Identity(email="legacy@example.com") - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - cache = get_cache() + # Encryption key cache.set_with_autoexpire( - get_identity_cache_key(pr_id, "email"), identity.email + get_encryption_cache_key(pr_id, "key"), "0123456789abcdef" ) - # New code caches encryption - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - from fides.api.util.cache import get_dsr_cache_store - - with get_dsr_cache_store() as store: - store.write_encryption(pr_id, "key", "new-encryption-key") - - # Both should be readable - pr = MagicMock() - pr.id = pr_id - - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - from fides.api.models.privacy_request import PrivacyRequest - - # get_cached_identity_data should find and migrate the legacy key - identity_data = PrivacyRequest.get_cached_identity_data(pr) - # After migration, the data should be available in the returned dict - assert identity_data["email"] == "legacy@example.com" - - encryption_key = PrivacyRequest.get_cached_encryption_key(pr) - assert encryption_key == "new-encryption-key" - - # Verify migration happened - legacy key should be deleted - assert mock_redis.get(get_identity_cache_key(pr_id, "email")) is None + # Custom fields + cache.set_with_autoexpire( + get_custom_privacy_request_field_cache_key(pr_id, "department"), + json.dumps("Engineering"), + ) + cache.set_with_autoexpire( + get_custom_privacy_request_field_cache_key(pr_id, "tenant_id"), + json.dumps("tenant-42"), + ) - def test_legacy_drp_request_body_readable(self, mock_redis, pr_id): - """ - Production scenario: DRP request body cached by old code. - New code reads it during processing. - """ - # Simulate old code caching DRP body - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - cache = get_cache() + # DRP body cache.set_with_autoexpire( - get_drp_request_body_cache_key(pr_id, "meta"), - "DrpMeta(version='0.5')", + get_drp_request_body_cache_key(pr_id, "meta"), "DrpMeta(version='0.5')" ) cache.set_with_autoexpire( get_drp_request_body_cache_key(pr_id, "regime"), "ccpa" ) - # New code reads DRP body + # Verify legacy keys exist before "deployment" + legacy_keys = [k for k in mock_redis.keys("*") if pr_id in k] + assert len(legacy_keys) == 7 # 2 identity + 1 encryption + 2 custom + 2 DRP + + # --- Phase 2: "New code deployed" — read everything via PrivacyRequest --- with ( patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis), ): - from fides.api.util.cache import get_dsr_cache_store - - with get_dsr_cache_store() as store: - meta = store.get_drp(pr_id, "meta") - regime = store.get_drp(pr_id, "regime") - - assert meta == "DrpMeta(version='0.5')" - assert regime == "ccpa" - - # Legacy keys should be migrated - assert ( - mock_redis.get(get_drp_request_body_cache_key(pr_id, "meta")) - is None - ) + pr = MagicMock() + pr.id = pr_id - def test_legacy_custom_fields_readable(self, mock_redis, pr_id): - """ - Production scenario: Custom fields cached by old code. - New code reads them during processing. - """ - from fides.api.util.cache import ( - get_custom_privacy_request_field_cache_key, - ) + # Read identity (triggers migration) + identity_data = PrivacyRequest.get_cached_identity_data(pr) + assert identity_data["email"] == "user@example.com" + assert identity_data["phone_number"] == "+1234567890" - # Simulate old code caching custom fields - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - cache = get_cache() - cache.set_with_autoexpire( - get_custom_privacy_request_field_cache_key(pr_id, "department"), - json.dumps("Engineering"), + # Read encryption key (triggers migration) + encryption_key = PrivacyRequest.get_cached_encryption_key(pr) + assert encryption_key == "0123456789abcdef" + + # Encrypt data using the cached key + encrypted = encrypt_access_request_results("sensitive data", pr_id) + assert encrypted != "sensitive data" # Actually encrypted + + # Read custom fields (triggers migration) + store = get_dsr_cache_store() + custom_fields = store.get_cached_custom_fields(pr_id) + assert custom_fields["department"] == json.dumps("Engineering") + assert custom_fields["tenant_id"] == json.dumps("tenant-42") + + # Read DRP body (triggers migration) + drp_body = store.get_cached_drp_request_body(pr_id) + assert drp_body["meta"] == "DrpMeta(version='0.5')" + assert drp_body["regime"] == "ccpa" + + # --- Phase 3: Verify migration happened --- + # All legacy keys should be gone + remaining_legacy = [ + k for k in mock_redis.keys("*") if k.startswith(f"id-{pr_id}") + ] + assert remaining_legacy == [], ( + f"Legacy keys not migrated: {remaining_legacy}" ) - # New code reads custom fields - pr = MagicMock() - pr.id = pr_id + # New-format keys should exist + assert store.get_identity(pr_id, "email") == json.dumps("user@example.com") + assert store.get_encryption(pr_id, "key") == "0123456789abcdef" - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - from fides.api.models.privacy_request import PrivacyRequest - - custom_fields = PrivacyRequest.get_cached_custom_privacy_request_fields(pr) + # --- Phase 4: "Request complete" — clear the cache --- + store.clear(pr_id) - assert custom_fields["department"] == "Engineering" + # Everything gone + all_keys = [k for k in mock_redis.keys("*") if pr_id in k] + assert all_keys == [], f"Keys remain after clear: {all_keys}" - def test_concurrent_legacy_and_new_requests(self, mock_redis): + def test_multiple_in_flight_requests_mixed_formats(self, mock_redis): """ - Production scenario: Multiple requests in flight - some with legacy keys, - some with new keys. Verify isolation and correct reads. + Simulate 3 concurrent requests: one fully legacy, one fully new, + one partially migrated. All should be independently readable and + clearable after "deployment". """ - pr1_id = f"pri_{uuid.uuid4()}" - pr2_id = f"pri_{uuid.uuid4()}" + legacy_id = f"pri_{uuid.uuid4()}" + new_id = f"pri_{uuid.uuid4()}" + mixed_id = f"pri_{uuid.uuid4()}" - # PR1: cached by old code (legacy keys) - identity1 = Identity(email="legacy1@example.com") with ( patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis), ): cache = get_cache() + + # Request 1: fully legacy cache.set_with_autoexpire( - get_identity_cache_key(pr1_id, "email"), identity1.email + get_identity_cache_key(legacy_id, "email"), + json.dumps("legacy@example.com"), ) - - # PR2: cached by new code (new keys) - identity2 = Identity(email="new2@example.com") - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - from fides.api.util.cache import get_dsr_cache_store - - with get_dsr_cache_store() as store: - store.cache_identity_data( - pr2_id, - {"email": identity2.email}, - expire_seconds=3600, - ) - - # Both should be readable correctly - pr1 = MagicMock() - pr1.id = pr1_id - pr2 = MagicMock() - pr2.id = pr2_id - - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - from fides.api.models.privacy_request import PrivacyRequest - - data1 = PrivacyRequest.get_cached_identity_data(pr1) - data2 = PrivacyRequest.get_cached_identity_data(pr2) - - assert data1["email"] == "legacy1@example.com" - assert data2["email"] == "new2@example.com" - - def test_legacy_keys_migrated_on_first_read_not_on_write(self, mock_redis, pr_id): - """ - Production scenario: Legacy keys exist. New code writes additional data. - Legacy keys should only migrate on read, not interfere with new writes. - """ - # Old code cached identity - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - cache = get_cache() cache.set_with_autoexpire( - get_identity_cache_key(pr_id, "email"), "legacy@example.com" + get_encryption_cache_key(legacy_id, "key"), "legacy-key-1234567" ) - # New code writes encryption (shouldn't trigger migration) - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - from fides.api.util.cache import get_dsr_cache_store - - with get_dsr_cache_store() as store: - store.write_encryption(pr_id, "key", "new-key") - - # Legacy identity key should still exist (not migrated yet) - assert ( - mock_redis.get(get_identity_cache_key(pr_id, "email")) - == "legacy@example.com" - ) + # Request 2: fully new format + store = get_dsr_cache_store() + store.write_identity(new_id, "email", json.dumps("new@example.com")) + store.write_encryption(new_id, "key", "new-key-123456789") - # Reading identity should trigger migration - pr = MagicMock() - pr.id = pr_id + # Request 3: mixed (legacy identity, new encryption) + cache.set_with_autoexpire( + get_identity_cache_key(mixed_id, "email"), + json.dumps("mixed@example.com"), + ) + store.write_encryption(mixed_id, "key", "mixed-key-12345678") + # --- "New code deployed" — read all three --- with ( patch("fides.api.util.cache.get_cache", return_value=mock_redis), patch("fides.api.util.cache._connection", mock_redis), ): - from fides.api.models.privacy_request import PrivacyRequest - from fides.api.util.cache import get_dsr_cache_store + for pr_id, expected_email, expected_key in [ + (legacy_id, "legacy@example.com", "legacy-key-1234567"), + (new_id, "new@example.com", "new-key-123456789"), + (mixed_id, "mixed@example.com", "mixed-key-12345678"), + ]: + pr = MagicMock() + pr.id = pr_id + identity = PrivacyRequest.get_cached_identity_data(pr) + assert identity["email"] == expected_email, f"Failed for {pr_id}" + enc_key = PrivacyRequest.get_cached_encryption_key(pr) + assert enc_key == expected_key, f"Failed for {pr_id}" - # get_cached_identity_data calls get_identity which should trigger migration - PrivacyRequest.get_cached_identity_data(pr) + # Clear one, others unaffected + store = get_dsr_cache_store() + store.clear(legacy_id) - # Verify migration happened by checking the store directly - with get_dsr_cache_store() as store: - # The new key should exist - assert store.get_identity(pr_id, "email") == "legacy@example.com" + pr_new = MagicMock() + pr_new.id = new_id + assert ( + PrivacyRequest.get_cached_identity_data(pr_new)["email"] + == "new@example.com" + ) - # Now legacy key should be migrated (deleted) - assert mock_redis.get(get_identity_cache_key(pr_id, "email")) is None + pr_mixed = MagicMock() + pr_mixed.id = mixed_id + assert ( + PrivacyRequest.get_cached_identity_data(pr_mixed)["email"] + == "mixed@example.com" + ) - with ( - patch("fides.api.util.cache.get_cache", return_value=mock_redis), - patch("fides.api.util.cache._connection", mock_redis), - ): - with get_dsr_cache_store() as store: - assert store.get_identity(pr_id, "email") == "legacy@example.com" + # Legacy request fully cleared + assert store.get_all_keys(legacy_id) == [] diff --git a/tests/common/cache/test_manager.py b/tests/common/cache/test_manager.py index 770625aba78..fee4e95da10 100644 --- a/tests/common/cache/test_manager.py +++ b/tests/common/cache/test_manager.py @@ -1,20 +1,6 @@ import pytest from fides.common.cache.manager import INDEX_TTL_EXTRA_SECONDS, RedisCacheManager -from tests.common.cache.mock_redis import create_mock_redis - -# --- Fixtures --- - - -@pytest.fixture -def mock_redis(): - return create_mock_redis() - - -@pytest.fixture -def manager(mock_redis) -> RedisCacheManager: - return RedisCacheManager(mock_redis) - # --- Tests --- diff --git a/tests/ops/api/v1/endpoints/test_drp_endpoints.py b/tests/ops/api/v1/endpoints/test_drp_endpoints.py index 862f341bf65..c89280fd575 100644 --- a/tests/ops/api/v1/endpoints/test_drp_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_drp_endpoints.py @@ -75,15 +75,15 @@ def test_create_drp_privacy_request( pr = PrivacyRequest.get(db=db, object_id=response_data["request_id"]) # test appropriate data is cached - with get_dsr_cache_store() as store: - meta_value = store.get_drp(pr.id, "meta") - assert meta_value == "DrpMeta(version='0.5')" - regime_value = store.get_drp(pr.id, "regime") - assert regime_value == "ccpa" - exercise_value = store.get_drp(pr.id, "exercise") - assert exercise_value == "['access']" - identity_value = store.get_drp(pr.id, "identity") - assert identity_value == encoded_identity + store = get_dsr_cache_store() + meta_value = store.get_drp(pr.id, "meta") + assert meta_value == "DrpMeta(version='0.5')" + regime_value = store.get_drp(pr.id, "regime") + assert regime_value == "ccpa" + exercise_value = store.get_drp(pr.id, "exercise") + assert exercise_value == "['access']" + identity_value = store.get_drp(pr.id, "identity") + assert identity_value == encoded_identity assert pr.get_cached_identity_data()["email"] == identity["email"] persisted_identity = pr.get_persisted_identity() @@ -123,15 +123,15 @@ def test_create_drp_privacy_request_unsupported_identity_props( pr = PrivacyRequest.get(db=db, object_id=response_data["request_id"]) # test appropriate data is cached - with get_dsr_cache_store() as store: - meta_value = store.get_drp(pr.id, "meta") - assert meta_value == "DrpMeta(version='0.5')" - regime_value = store.get_drp(pr.id, "regime") - assert regime_value == "ccpa" - exercise_value = store.get_drp(pr.id, "exercise") - assert exercise_value == "['access']" - identity_value = store.get_drp(pr.id, "identity") - assert identity_value == encoded_identity + store = get_dsr_cache_store() + meta_value = store.get_drp(pr.id, "meta") + assert meta_value == "DrpMeta(version='0.5')" + regime_value = store.get_drp(pr.id, "regime") + assert regime_value == "ccpa" + exercise_value = store.get_drp(pr.id, "exercise") + assert exercise_value == "['access']" + identity_value = store.get_drp(pr.id, "identity") + assert identity_value == encoded_identity assert pr.get_cached_identity_data()["email"] == identity["email"] assert "address" not in pr.get_cached_identity_data().keys() @@ -332,15 +332,15 @@ def test_create_drp_privacy_request_error_notification( pr = PrivacyRequest.get(db=db, object_id=response_data["request_id"]) # test appropriate data is cached - with get_dsr_cache_store() as store: - meta_value = store.get_drp(pr.id, "meta") - assert meta_value == "DrpMeta(version='0.5')" - regime_value = store.get_drp(pr.id, "regime") - assert regime_value == "ccpa" - exercise_value = store.get_drp(pr.id, "exercise") - assert exercise_value == "['access']" - identity_value = store.get_drp(pr.id, "identity") - assert identity_value == encoded_identity + store = get_dsr_cache_store() + meta_value = store.get_drp(pr.id, "meta") + assert meta_value == "DrpMeta(version='0.5')" + regime_value = store.get_drp(pr.id, "regime") + assert regime_value == "ccpa" + exercise_value = store.get_drp(pr.id, "exercise") + assert exercise_value == "['access']" + identity_value = store.get_drp(pr.id, "identity") + assert identity_value == encoded_identity assert pr.get_cached_identity_data()["email"] == identity["email"] persisted_identity = pr.get_persisted_identity() diff --git a/tests/ops/models/privacy_request/test_privacy_request.py b/tests/ops/models/privacy_request/test_privacy_request.py index d086dd89993..3c902b251b9 100644 --- a/tests/ops/models/privacy_request/test_privacy_request.py +++ b/tests/ops/models/privacy_request/test_privacy_request.py @@ -272,8 +272,8 @@ def test_cache_identity_fallback_to_db( privacy_request_with_email_identity.get_cached_identity_data() ) assert cached_identity_data != {} - with get_dsr_cache_store() as store: - store.delete(privacy_request_with_email_identity.id, "identity:email") + store = get_dsr_cache_store() + store.delete(privacy_request_with_email_identity.id, "identity:email") assert ( privacy_request_with_email_identity.get_cached_identity_data() == cached_identity_data @@ -338,10 +338,10 @@ def test_custom_privacy_request_fields_fallback_to_db( ) assert cached_custom_privacy_request_fields is not None # Delete using DSR store to clear the cached custom field - with get_dsr_cache_store() as store: - store.delete( - privacy_request.id, f"custom_field:{custom_privacy_request_field.label}" - ) + store = get_dsr_cache_store() + store.delete( + privacy_request.id, f"custom_field:{custom_privacy_request_field.label}" + ) assert ( privacy_request.get_cached_custom_privacy_request_fields() == cached_custom_privacy_request_fields diff --git a/tests/ops/tasks/test_encryption_utils.py b/tests/ops/tasks/test_encryption_utils.py index 6d3d6c42870..27d3d9c8636 100644 --- a/tests/ops/tasks/test_encryption_utils.py +++ b/tests/ops/tasks/test_encryption_utils.py @@ -11,7 +11,7 @@ def mock_cache(): "fides.api.tasks.encryption_utils.get_dsr_cache_store" ) as mock_get_store: store = MagicMock() - mock_get_store.return_value.__enter__.return_value = store + mock_get_store.return_value = store yield store diff --git a/tests/ops/test_helpers/cache_secrets_helper.py b/tests/ops/test_helpers/cache_secrets_helper.py index 736e8982704..9bec8da4d8a 100644 --- a/tests/ops/test_helpers/cache_secrets_helper.py +++ b/tests/ops/test_helpers/cache_secrets_helper.py @@ -1,5 +1,10 @@ from fides.api.schemas.masking.masking_secrets import MaskingSecretCache -from fides.api.util.cache import FidesopsRedis, get_cache, get_masking_secret_cache_key +from fides.api.util.cache import ( + FidesopsRedis, + get_cache, + get_dsr_cache_store, + get_masking_secret_cache_key, +) def cache_secret(masking_secret_cache: MaskingSecretCache, request_id: str) -> None: @@ -20,29 +25,15 @@ def clear_cache_secrets(request_id: str) -> None: def clear_cache_identities(request_id: str) -> None: - """Testing helper just removes some cached identities from the Privacy Request for testing. + """Testing helper that removes cached identities from the Privacy Request. Some of our Privacy Request fixtures automatically cache identities - - this clears them using the DSR cache store. Handles both new and legacy key formats. + this clears them using the DSR cache store. The get_cached_identity_data + call migrates any legacy keys before deletion. """ - from fides.api.util.cache import ( - get_cache, - get_dsr_cache_store, - get_identity_cache_key, - ) - - cache: FidesopsRedis = get_cache() - - # First, try to get identity data which will migrate any legacy keys - with get_dsr_cache_store() as store: - identity_data = store.get_cached_identity_data(request_id) - # Delete all identity attributes found - for attr in identity_data.keys(): - store.delete(request_id, f"identity:{attr}") - - # Also scan for any remaining legacy identity keys and delete them - legacy_keys = cache.get_keys_by_prefix(f"id-{request_id}-identity-") - for legacy_key in legacy_keys: - # Extract attribute name and delete via store - attr = legacy_key.split("-")[-1] - store.delete(request_id, f"identity:{attr}") + store = get_dsr_cache_store() + # get_cached_identity_data triggers migration (legacy → new), so all + # identity keys will be in new format after this call. + identity_data = store.get_cached_identity_data(request_id) + for attr in identity_data: + store.delete(request_id, f"identity:{attr}") From 9cb04d5baecca041dcb037e1315ae1e6e803ca32 Mon Sep 17 00:00:00 2001 From: Jade Wibbels Date: Mon, 23 Mar 2026 13:13:23 -0600 Subject: [PATCH 45/50] Address review feedback: fix hyphenated field extraction, remove dead code, add test marker - Fix _get_cached_by_type legacy field extraction to split on infix instead of "-", preserving hyphens in field names (pre-existing bug, caught by Greptile) - Remove dead ConsentRequest.get_cached_identity_data (zero callers in fides/fidesplus) - Add missing pytest.mark.unit to DRP integration tests - Add clarifying comment on falsy-value guard in _get_cached_by_type Co-Authored-By: Claude Opus 4.6 (1M context) --- src/fides/api/models/privacy_request/consent.py | 10 ---------- src/fides/common/cache/dsr_store.py | 4 ++-- tests/common/cache/test_dsr_store_drp_integration.py | 5 +++++ 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/fides/api/models/privacy_request/consent.py b/src/fides/api/models/privacy_request/consent.py index 1688b0bc0a2..95c3db7becd 100644 --- a/src/fides/api/models/privacy_request/consent.py +++ b/src/fides/api/models/privacy_request/consent.py @@ -20,7 +20,6 @@ CustomPrivacyRequestField as CustomPrivacyRequestFieldSchema, ) from fides.api.schemas.redis_cache import IdentityBase -from fides.api.util.cache import FidesopsRedis, get_cache from fides.api.util.identity_verification import IdentityVerificationMixin from fides.config import CONFIG @@ -95,15 +94,6 @@ class ConsentRequest(IdentityVerificationMixin, Base): privacy_request_id = Column(String, ForeignKey("privacyrequest.id"), nullable=True) privacy_request = relationship("PrivacyRequest") - def get_cached_identity_data(self) -> Dict[str, Any]: - # TODO: Remove this method - it's dead code (never called). - # If identity data is needed, use privacy_request.get_cached_identity_data() - # when privacy_request_id exists, or get_persisted_identity() from provided_identity. - """Retrieves any identity data pertaining to this request from the cache.""" - cache: FidesopsRedis = get_cache() - keys = cache.get_keys_by_prefix(f"id-{self.id}-identity-") - return {key.split("-")[-1]: cache.get(key) for key in keys} - def verify_identity( self, db: Session, diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index 3132986813f..b164da5c65a 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -143,11 +143,11 @@ def _get_cached_by_type( if new_infix in key: field = key.split(":")[-1] elif legacy_infix in key: - field = key.split("-")[-1] + field = key.split(legacy_infix, 1)[-1] else: continue value = getter(dsr_id, field) - if value: + if value: # Intentionally drops empty/falsy — matches legacy behavior result[field] = value return result diff --git a/tests/common/cache/test_dsr_store_drp_integration.py b/tests/common/cache/test_dsr_store_drp_integration.py index 6a16d3fc7cd..cc202bd325f 100644 --- a/tests/common/cache/test_dsr_store_drp_integration.py +++ b/tests/common/cache/test_dsr_store_drp_integration.py @@ -7,6 +7,11 @@ - Automatic migration on read """ +import pytest + +# Mark all tests as unit tests +pytestmark = pytest.mark.unit + class TestDSRCacheStoreDRP: """Test DSRCacheStore DRP request body methods.""" From 385290e97702c85310b4fb4b6e4362c8b6a6cf75 Mon Sep 17 00:00:00 2001 From: Jade Wibbels Date: Mon, 23 Mar 2026 17:36:25 -0600 Subject: [PATCH 46/50] Migrate async_execution cache path to DSRCacheStore cache_task_tracking_key() and all get_cached_task_id() callers now route through DSRCacheStore, which writes new-format keys (dsr:{id}:async_execution) and reads from both new and legacy formats via get_with_legacy(). This was the last DSR cache path still using the old id-{id}-async-execution key format directly. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../api/models/privacy_request/privacy_request.py | 7 ++++--- .../api/models/privacy_request/request_task.py | 10 +++++----- .../api/service/privacy_request/request_service.py | 8 +++++--- src/fides/api/util/cache.py | 14 ++++++++++---- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index f668fd69a9b..678bb9943e0 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -106,7 +106,6 @@ from fides.api.tasks import celery_app from fides.api.util.cache import ( FidesopsRedis, - get_async_task_tracking_cache_key, get_cache, get_dsr_cache_store, ) @@ -693,8 +692,10 @@ def get_cached_encryption_key(self) -> Optional[str]: def get_cached_task_id(self) -> Optional[str]: """Gets the cached task ID for this privacy request.""" - cache: FidesopsRedis = get_cache() - task_id = cache.get(get_async_task_tracking_cache_key(self.id)) + store = get_dsr_cache_store() + task_id = store.get_async_execution(self.id) + if isinstance(task_id, bytes): + return task_id.decode(CONFIG.security.encoding) return task_id def get_async_execution_task(self) -> Optional[AsyncResult]: diff --git a/src/fides/api/models/privacy_request/request_task.py b/src/fides/api/models/privacy_request/request_task.py index 4cb4b95bbaa..913b5e737f9 100644 --- a/src/fides/api/models/privacy_request/request_task.py +++ b/src/fides/api/models/privacy_request/request_task.py @@ -28,10 +28,8 @@ from fides.api.schemas.base_class import FidesSchema from fides.api.schemas.policy import ActionType from fides.api.util.cache import ( - FidesopsRedis, celery_tasks_in_flight, - get_async_task_tracking_cache_key, - get_cache, + get_dsr_cache_store, ) from fides.api.util.collection_util import Row from fides.config import CONFIG @@ -247,8 +245,10 @@ def allowed_action_types(cls) -> List[str]: def get_cached_task_id(self) -> Optional[str]: """Gets the cached celery task ID for this request task.""" - cache: FidesopsRedis = get_cache() - task_id = cache.get(get_async_task_tracking_cache_key(self.id)) + store = get_dsr_cache_store() + task_id = store.get_async_execution(self.id) + if isinstance(task_id, bytes): + return task_id.decode("utf-8") return task_id def cleanup_external_storage(self) -> None: diff --git a/src/fides/api/service/privacy_request/request_service.py b/src/fides/api/service/privacy_request/request_service.py index 6ef4aa3d6ab..d3806467002 100644 --- a/src/fides/api/service/privacy_request/request_service.py +++ b/src/fides/api/service/privacy_request/request_service.py @@ -28,8 +28,8 @@ from fides.api.util.cache import ( FidesopsRedis, celery_tasks_in_flight, - get_async_task_tracking_cache_key, get_cache, + get_dsr_cache_store, get_privacy_request_retry_count, increment_privacy_request_retry_count, reset_privacy_request_retry_count, @@ -331,9 +331,11 @@ def get_cached_task_id(entity_id: str) -> Optional[str]: Raises Exception if cache operations fail, allowing callers to handle cache failures appropriately. """ - cache: FidesopsRedis = get_cache() try: - task_id = cache.get(get_async_task_tracking_cache_key(entity_id)) + store = get_dsr_cache_store() + task_id = store.get_async_execution(entity_id) + if isinstance(task_id, bytes): + return task_id.decode("utf-8") return task_id except Exception as exc: logger.error(f"Failed to get cached task ID for entity {entity_id}: {exc}") diff --git a/src/fides/api/util/cache.py b/src/fides/api/util/cache.py index 3cddc3d51d7..85677dcd304 100644 --- a/src/fides/api/util/cache.py +++ b/src/fides/api/util/cache.py @@ -418,6 +418,12 @@ def get_all_cache_keys_for_privacy_request(privacy_request_id: str) -> List[Any] def get_async_task_tracking_cache_key(privacy_request_id: str) -> str: + """Return the *legacy* Redis key for async-execution tracking. + + Prefer ``get_dsr_cache_store().get_async_execution()`` for reads and + ``cache_task_tracking_key()`` for writes — both route through the + DSRCacheStore which handles legacy fallback automatically. + """ return f"id-{privacy_request_id}-async-execution" @@ -434,12 +440,12 @@ def cache_task_tracking_key(request_id: str, celery_task_id: str) -> None: :return: None """ - cache: FidesopsRedis = get_cache() - try: - cache.set_with_autoexpire( - get_async_task_tracking_cache_key(request_id), + store = get_dsr_cache_store() + store.write_async_execution( + request_id, celery_task_id, + expire_seconds=CONFIG.redis.default_ttl_seconds, ) except DataError: logger.debug( From 7ca932b5368d6118c4d30ecba418da78b6ad7984 Mon Sep 17 00:00:00 2001 From: Jade Wibbels Date: Mon, 23 Mar 2026 17:50:03 -0600 Subject: [PATCH 47/50] Address PR review feedback from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename legacy_keys/legacy_set → scanned_keys/scanned_set in get_all_keys (variable included both new and legacy keys, name was misleading) - Add JSONDecodeError fallback in get_cached_custom_privacy_request_fields for consistency with get_cached_identity_data - Replace mock_redis._data.clear() with dsr_store.clear() in two tests to properly reset both data and index - Bump very_short_redis_cache_expiration TTL from 1s to 2s to reduce CI flakiness Co-Authored-By: Claude Opus 4.6 (1M context) --- .../api/models/privacy_request/privacy_request.py | 5 ++++- src/fides/common/cache/dsr_store.py | 14 +++++++------- .../test_dsr_store_custom_fields_integration.py | 2 +- .../cache/test_dsr_store_identity_integration.py | 2 +- .../privacy_request/test_request_service.py | 4 ++-- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index 678bb9943e0..1b89690bd2f 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -809,7 +809,10 @@ def get_cached_custom_privacy_request_fields(self) -> Dict[str, Any]: # Parse JSON values parsed_result: Dict[str, Any] = {} for key, value in result.items(): - parsed_result[key] = json.loads(value) + try: + parsed_result[key] = json.loads(value) + except json.JSONDecodeError: + parsed_result[key] = value return parsed_result diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index b164da5c65a..16a918e5ab4 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -412,27 +412,27 @@ def get_all_keys(self, dsr_id: str) -> list[str]: if keys and self._redis.exists(migration_key): return keys - # SCAN for legacy keys (one-time per DSR until migration confirmed) + # SCAN for all keys (one-time per DSR until migration confirmed) # Filter out internal keys (__migrated:, __idx:) that match the SCAN pattern - legacy_keys = [ + scanned_keys = [ k for k in self._redis.scan_iter(match=f"*{dsr_id}*", count=500) if not k.startswith("__migrated:") and not k.startswith("__idx:") ] indexed = set(keys) - legacy_set = set(legacy_keys) - all_keys = list(indexed | legacy_set) if keys else legacy_keys + scanned_set = set(scanned_keys) + all_keys = list(indexed | scanned_set) if keys else scanned_keys if not all_keys: return [] if self._backfill: - for k in legacy_keys: + for k in scanned_keys: if k not in indexed: self._manager.add_key_to_index(index_prefix, k) - # If index existed and no legacy keys found outside it, mark as migrated - if keys and not (legacy_set - indexed): + # If index existed and no scanned keys found outside it, mark as migrated + if keys and not (scanned_set - indexed): self._redis.setex(migration_key, 86400, "1") # 24h TTL return all_keys diff --git a/tests/common/cache/test_dsr_store_custom_fields_integration.py b/tests/common/cache/test_dsr_store_custom_fields_integration.py index 83f0aff65b1..7c791b1424f 100644 --- a/tests/common/cache/test_dsr_store_custom_fields_integration.py +++ b/tests/common/cache/test_dsr_store_custom_fields_integration.py @@ -92,7 +92,7 @@ def test_has_cached_custom_fields_detects_both_formats( assert dsr_store.has_cached_custom_fields(pr_id) is True # Clear and test new format - mock_redis._data.clear() + dsr_store.clear(pr_id) dsr_store.write_custom_field(pr_id, "department", json.dumps("Engineering")) assert dsr_store.has_cached_custom_fields(pr_id) is True diff --git a/tests/common/cache/test_dsr_store_identity_integration.py b/tests/common/cache/test_dsr_store_identity_integration.py index a82ac465da5..b6de30dd2b6 100644 --- a/tests/common/cache/test_dsr_store_identity_integration.py +++ b/tests/common/cache/test_dsr_store_identity_integration.py @@ -88,6 +88,6 @@ def test_has_cached_identity_data_detects_both_formats( assert dsr_store.has_cached_identity_data(pr_id) is True # Clear and test new format - mock_redis._data.clear() + dsr_store.clear(pr_id) dsr_store.write_identity(pr_id, "email", json.dumps("test@example.com")) assert dsr_store.has_cached_identity_data(pr_id) is True diff --git a/tests/ops/service/privacy_request/test_request_service.py b/tests/ops/service/privacy_request/test_request_service.py index fd61fe8376b..d5c28e61616 100644 --- a/tests/ops/service/privacy_request/test_request_service.py +++ b/tests/ops/service/privacy_request/test_request_service.py @@ -342,8 +342,8 @@ def very_short_request_task_expiration(): @pytest.fixture(scope="function") def very_short_redis_cache_expiration(): original_value: int = CONFIG.redis.default_ttl_seconds - # Redis SET ex= must be int or timedelta (not float). Use 1s; tests already sleep 1s. - CONFIG.redis.default_ttl_seconds = 1 + # Redis SET ex= must be int or timedelta (not float). Use 2s to avoid flakiness on slow CI. + CONFIG.redis.default_ttl_seconds = 2 yield CONFIG CONFIG.redis.default_ttl_seconds = original_value From 1d2b4501f57b5e7f1299cfc2464c405cce26d32a Mon Sep 17 00:00:00 2001 From: Jade Wibbels Date: Mon, 23 Mar 2026 18:02:28 -0600 Subject: [PATCH 48/50] Fix test failures from async_execution migration - test_cache_tracking_key_has_ttl: check new-format key (dsr:{id}:async_execution) with fallback to legacy key - test_customer_data_removed: bump sleep from 1s to 3s to exceed 2s TTL fixture - test_get_cached_task_id_cache_exception: mock get_dsr_cache_store instead of get_cache since get_cached_task_id now uses the store Co-Authored-By: Claude Opus 4.6 (1M context) --- .../privacy_request/test_request_service.py | 20 ++++++++++--------- tests/ops/util/test_cache.py | 7 ++++++- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/ops/service/privacy_request/test_request_service.py b/tests/ops/service/privacy_request/test_request_service.py index d5c28e61616..08ecebb6657 100644 --- a/tests/ops/service/privacy_request/test_request_service.py +++ b/tests/ops/service/privacy_request/test_request_service.py @@ -354,7 +354,7 @@ class TestRemoveSavedCustomerData: ) def test_no_request_tasks(self, db, privacy_request): assert not privacy_request.request_tasks.count() - time.sleep(1) + time.sleep(3) # Mainly asserting this runs without error remove_saved_dsr_data.delay().get() @@ -380,7 +380,7 @@ def test_privacy_request_incomplete(self, db, privacy_request): privacy_request.save(db) assert privacy_request.request_tasks.count() - time.sleep(1) + time.sleep(3) remove_saved_dsr_data.delay().get() @@ -408,7 +408,7 @@ def test_customer_data_removed_from_old_request_tasks_and_privacy_requests( privacy_request.save(db) assert privacy_request.request_tasks.count() - time.sleep(1) + time.sleep(3) remove_saved_dsr_data.delay().get() @@ -589,16 +589,18 @@ def test_get_cached_task_id_none_when_not_cached(self, privacy_request): result = get_cached_task_id(privacy_request.id) assert result is None - @mock.patch("fides.api.service.privacy_request.request_service.get_cache") + @mock.patch("fides.api.service.privacy_request.request_service.get_dsr_cache_store") @mock.patch("fides.api.service.privacy_request.request_service.logger") def test_get_cached_task_id_cache_exception( - self, mock_logger, mock_get_cache, privacy_request + self, mock_logger, mock_get_store, privacy_request ): """Test that function logs error and re-raises exceptions from cache operations.""" - # Mock cache to raise exception - mock_cache = mock.Mock() - mock_cache.get.side_effect = Exception("Redis connection failed") - mock_get_cache.return_value = mock_cache + # Mock store to raise exception on get_async_execution + mock_store = mock.Mock() + mock_store.get_async_execution.side_effect = Exception( + "Redis connection failed" + ) + mock_get_store.return_value = mock_store # Function should log error and re-raise exception with pytest.raises(Exception, match="Redis connection failed"): diff --git a/tests/ops/util/test_cache.py b/tests/ops/util/test_cache.py index 7914f9e90ce..b379646a055 100644 --- a/tests/ops/util/test_cache.py +++ b/tests/ops/util/test_cache.py @@ -209,7 +209,12 @@ def test_cache_tracking_key_has_ttl(self, privacy_request): cache_task_tracking_key(privacy_request.id, "test_1234") raw_cache = get_cache() - ttl = raw_cache.ttl(get_async_task_tracking_cache_key(privacy_request.id)) + # Check new-format key; fall back to legacy key for backward compat + new_key = f"dsr:{privacy_request.id}:async_execution" + legacy_key = get_async_task_tracking_cache_key(privacy_request.id) + ttl = raw_cache.ttl(new_key) + if ttl == -2: + ttl = raw_cache.ttl(legacy_key) assert ttl > 0 def test_cache_tracking_key_request_task(self, request_task): From bf4836314270536cdc084d9c27991140fe09f82c Mon Sep 17 00:00:00 2001 From: Jade Wibbels Date: Tue, 24 Mar 2026 16:38:29 -0600 Subject: [PATCH 49/50] Address PR review feedback: DSRCacheStore API improvements - Scope DSRCacheStore to a single DSR: dsr_id is now a constructor arg, removing it from all method signatures (per @erosselli) - Make expire_seconds required on set()/write() and all convenience write methods to prevent accidental no-TTL writes (per @erosselli) - Fix migrated key TTL: get_with_legacy now reads the legacy key's remaining TTL and propagates it to the new key, with a configurable default_ttl_seconds fallback (fixes critical from code review) - Unify encoding: request_task.py and request_service.py now use CONFIG.security.encoding instead of hardcoded "utf-8" - Update all callers and tests for the new API Co-Authored-By: Claude Opus 4.6 (1M context) --- .../models/privacy_request/privacy_request.py | 40 ++-- .../models/privacy_request/request_task.py | 6 +- .../privacy_request/request_service.py | 6 +- src/fides/api/tasks/encryption_utils.py | 4 +- src/fides/api/util/cache.py | 15 +- src/fides/common/cache/dsr_store.py | 177 ++++++++---------- tests/common/cache/conftest.py | 4 +- tests/common/cache/test_dsr_store.py | 113 +++++------ .../cache/test_dsr_store_clear_integration.py | 18 +- ...est_dsr_store_custom_fields_integration.py | 65 ++++--- .../cache/test_dsr_store_drp_integration.py | 73 +++++--- .../test_dsr_store_identity_integration.py | 42 +++-- .../common/cache/test_dsr_store_migration.py | 75 +++++--- ...test_dsr_store_production_compatibility.py | 29 +-- .../api/v1/endpoints/test_drp_endpoints.py | 30 +-- .../privacy_request/test_privacy_request.py | 10 +- .../ops/test_helpers/cache_secrets_helper.py | 6 +- 17 files changed, 375 insertions(+), 338 deletions(-) diff --git a/src/fides/api/models/privacy_request/privacy_request.py b/src/fides/api/models/privacy_request/privacy_request.py index 1b89690bd2f..013f187e19f 100644 --- a/src/fides/api/models/privacy_request/privacy_request.py +++ b/src/fides/api/models/privacy_request/privacy_request.py @@ -465,8 +465,8 @@ def clear_cached_values(self) -> None: Clears all cached values associated with this privacy request from Redis. """ logger.info(f"Clearing cached values for privacy request {self.id}") - store = get_dsr_cache_store() - store.clear(self.id) + store = get_dsr_cache_store(self.id) + store.clear() def delete(self, db: Session) -> None: """ @@ -503,7 +503,7 @@ def cache_identity( identity_dict: Dict[str, Any] = identity.labeled_dict() - store = get_dsr_cache_store() + store = get_dsr_cache_store(self.id) # Encode values for Redis storage encoded_dict = { key: FidesopsRedis.encode_obj(value) @@ -511,7 +511,6 @@ def cache_identity( if value is not None } store.cache_identity_data( - self.id, encoded_dict, expire_seconds=CONFIG.redis.default_ttl_seconds, ) @@ -530,7 +529,7 @@ def cache_custom_privacy_request_fields( return if CONFIG.execution.allow_custom_privacy_request_fields_in_request_execution: - store = get_dsr_cache_store() + store = get_dsr_cache_store(self.id) # Encode values for Redis storage encoded_fields = { key: json.dumps(item.value, cls=CustomJSONEncoder) @@ -538,7 +537,6 @@ def cache_custom_privacy_request_fields( if item is not None } store.cache_custom_fields( - self.id, encoded_fields, expire_seconds=CONFIG.redis.default_ttl_seconds, ) @@ -682,8 +680,8 @@ def verify_identity(self, db: Session, provided_code: str) -> "PrivacyRequest": def get_cached_encryption_key(self) -> Optional[str]: """Gets the cached encryption key for this privacy request.""" - store = get_dsr_cache_store() - raw = store.get_encryption(self.id, "key") + store = get_dsr_cache_store(self.id) + raw = store.get_encryption("key") if raw is None: return None if isinstance(raw, bytes): @@ -692,8 +690,8 @@ def get_cached_encryption_key(self) -> Optional[str]: def get_cached_task_id(self) -> Optional[str]: """Gets the cached task ID for this privacy request.""" - store = get_dsr_cache_store() - task_id = store.get_async_execution(self.id) + store = get_dsr_cache_store(self.id) + task_id = store.get_async_execution() if isinstance(task_id, bytes): return task_id.decode(CONFIG.security.encoding) return task_id @@ -718,9 +716,8 @@ def cache_drp_request_body(self, drp_request_body: DrpPrivacyRequestCreate) -> N else: serialized_body[key] = value - store = get_dsr_cache_store() + store = get_dsr_cache_store(self.id) store.cache_drp_request_body( - self.id, serialized_body, expire_seconds=CONFIG.redis.default_ttl_seconds, ) @@ -730,9 +727,8 @@ def cache_encryption(self, encryption_key: Optional[str] = None) -> None: if not encryption_key: return - store = get_dsr_cache_store() + store = get_dsr_cache_store(self.id) store.write_encryption( - self.id, "key", encryption_key, expire_seconds=CONFIG.redis.default_ttl_seconds, @@ -759,19 +755,19 @@ def persist_masking_secrets( def verify_cache_for_identity_data(self) -> bool: """Verifies if the identity data is cached for this request""" - store = get_dsr_cache_store() - return store.has_cached_identity_data(self.id) + store = get_dsr_cache_store(self.id) + return store.has_cached_identity_data() def get_cached_identity_data(self) -> Dict[str, Any]: """Retrieves any identity data pertaining to this request from the cache""" - store = get_dsr_cache_store() - result = store.get_cached_identity_data(self.id) + store = get_dsr_cache_store(self.id) + result = store.get_cached_identity_data() if not result: logger.debug(f"Cache miss for request {self.id}, falling back to DB") identity = self.get_persisted_identity() self.cache_identity(identity) - result = store.get_cached_identity_data(self.id) + result = store.get_cached_identity_data() # Parse JSON values for backward compatibility parsed_result: Dict[str, Any] = {} @@ -790,8 +786,8 @@ def get_cached_identity_data(self) -> Dict[str, Any]: def get_cached_custom_privacy_request_fields(self) -> Dict[str, Any]: """Retrieves any custom fields pertaining to this request from the cache""" - store = get_dsr_cache_store() - result = store.get_cached_custom_fields(self.id) + store = get_dsr_cache_store(self.id) + result = store.get_cached_custom_fields() if not result: logger.debug(f"Cache miss for request {self.id}, falling back to DB") @@ -804,7 +800,7 @@ def get_cached_custom_privacy_request_fields(self) -> Dict[str, Any]: for key, value in custom_privacy_request_fields.items() } ) - result = store.get_cached_custom_fields(self.id) + result = store.get_cached_custom_fields() # Parse JSON values parsed_result: Dict[str, Any] = {} diff --git a/src/fides/api/models/privacy_request/request_task.py b/src/fides/api/models/privacy_request/request_task.py index 913b5e737f9..10131993022 100644 --- a/src/fides/api/models/privacy_request/request_task.py +++ b/src/fides/api/models/privacy_request/request_task.py @@ -245,10 +245,10 @@ def allowed_action_types(cls) -> List[str]: def get_cached_task_id(self) -> Optional[str]: """Gets the cached celery task ID for this request task.""" - store = get_dsr_cache_store() - task_id = store.get_async_execution(self.id) + store = get_dsr_cache_store(self.id) + task_id = store.get_async_execution() if isinstance(task_id, bytes): - return task_id.decode("utf-8") + return task_id.decode(CONFIG.security.encoding) return task_id def cleanup_external_storage(self) -> None: diff --git a/src/fides/api/service/privacy_request/request_service.py b/src/fides/api/service/privacy_request/request_service.py index d3806467002..d2671d285b9 100644 --- a/src/fides/api/service/privacy_request/request_service.py +++ b/src/fides/api/service/privacy_request/request_service.py @@ -332,10 +332,10 @@ def get_cached_task_id(entity_id: str) -> Optional[str]: Raises Exception if cache operations fail, allowing callers to handle cache failures appropriately. """ try: - store = get_dsr_cache_store() - task_id = store.get_async_execution(entity_id) + store = get_dsr_cache_store(entity_id) + task_id = store.get_async_execution() if isinstance(task_id, bytes): - return task_id.decode("utf-8") + return task_id.decode(CONFIG.security.encoding) return task_id except Exception as exc: logger.error(f"Failed to get cached task ID for entity {entity_id}: {exc}") diff --git a/src/fides/api/tasks/encryption_utils.py b/src/fides/api/tasks/encryption_utils.py index 08e456b9157..060a055f875 100644 --- a/src/fides/api/tasks/encryption_utils.py +++ b/src/fides/api/tasks/encryption_utils.py @@ -22,8 +22,8 @@ def encrypt_access_request_results(data: Union[str, bytes], request_id: str) -> if isinstance(data, bytes): data = data.decode(CONFIG.security.encoding) - store = get_dsr_cache_store() - raw = store.get_encryption(request_id, "key") + store = get_dsr_cache_store(request_id) + raw = store.get_encryption("key") if raw is None: return data if isinstance(raw, bytes): diff --git a/src/fides/api/util/cache.py b/src/fides/api/util/cache.py index 85677dcd304..067ca796f0c 100644 --- a/src/fides/api/util/cache.py +++ b/src/fides/api/util/cache.py @@ -325,9 +325,13 @@ def get_redis_cache_manager() -> RedisCacheManager: return RedisCacheManager(get_cache()) -def get_dsr_cache_store() -> DSRCacheStore: - """Return a DSRCacheStore for privacy request cache operations.""" - return DSRCacheStore(get_redis_cache_manager()) +def get_dsr_cache_store(dsr_id: str) -> DSRCacheStore: + """Return a DSRCacheStore scoped to a single privacy request.""" + return DSRCacheStore( + dsr_id, + get_redis_cache_manager(), + default_ttl_seconds=CONFIG.redis.default_ttl_seconds, + ) def get_read_only_cache() -> FidesopsRedis: @@ -420,7 +424,7 @@ def get_all_cache_keys_for_privacy_request(privacy_request_id: str) -> List[Any] def get_async_task_tracking_cache_key(privacy_request_id: str) -> str: """Return the *legacy* Redis key for async-execution tracking. - Prefer ``get_dsr_cache_store().get_async_execution()`` for reads and + Prefer ``get_dsr_cache_store(dsr_id).get_async_execution()`` for reads and ``cache_task_tracking_key()`` for writes — both route through the DSRCacheStore which handles legacy fallback automatically. """ @@ -441,9 +445,8 @@ def cache_task_tracking_key(request_id: str, celery_task_id: str) -> None: """ try: - store = get_dsr_cache_store() + store = get_dsr_cache_store(request_id) store.write_async_execution( - request_id, celery_task_id, expire_seconds=CONFIG.redis.default_ttl_seconds, ) diff --git a/src/fides/common/cache/dsr_store.py b/src/fides/common/cache/dsr_store.py index 16a918e5ab4..35257d0207d 100644 --- a/src/fides/common/cache/dsr_store.py +++ b/src/fides/common/cache/dsr_store.py @@ -46,32 +46,38 @@ class DSRCacheStore: def __init__( self, + dsr_id: str, cache_manager: RedisCacheManager, *, + default_ttl_seconds: int = 3600, backfill_index_on_legacy_read: bool = True, migrate_legacy_on_read: bool = True, ) -> None: """ Args: + dsr_id: The privacy request ID this store is scoped to. cache_manager: RedisCacheManager (e.g. from get_redis_cache_manager()). + default_ttl_seconds: Fallback TTL for migrated keys when the legacy + key has no expiration. Default 3600s (1 hour). backfill_index_on_legacy_read: When listing keys and we fall back to KEYS for legacy keys, add those keys to the index. Default True. migrate_legacy_on_read: When a get finds value in legacy key only, write to new key, delete legacy key, add new key to index. Default True. """ + self._dsr_id = dsr_id self._manager = cache_manager self._redis: Redis = cache_manager.redis + self._default_ttl = default_ttl_seconds self._backfill = backfill_index_on_legacy_read self._migrate_on_read = migrate_legacy_on_read def write( self, - dsr_id: str, field_type: str, field_key: str, value: RedisValue, - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> Optional[bool]: """ Low-level write: set dsr:{dsr_id}:{field_type}:{field_key} and add to index. @@ -79,19 +85,19 @@ def write( stays in one place. """ part = f"{field_type}:{field_key}" if field_key else field_type - return self.set(dsr_id, part, value, expire_seconds) + return self.set(part, value, expire_seconds) def get_with_legacy( self, - dsr_id: str, part: str, legacy_key: str, ) -> Optional[Union[str, bytes]]: """ Get value for part; if missing, try legacy_key. If found in legacy only and migrate_legacy_on_read, copy to new key, delete legacy, add to index. + Propagates the legacy key's remaining TTL to the new key. """ - new_key = _dsr_key(dsr_id, part) + new_key = _dsr_key(self._dsr_id, part) val = self._redis.get(new_key) if val is not None: return val @@ -100,91 +106,85 @@ def get_with_legacy( # Re-check: another reader may have migrated between our two GETs return self._redis.get(new_key) if self._migrate_on_read: - self.set(dsr_id, part, val) + ttl = self._redis.ttl(legacy_key) + expire = ttl if ttl > 0 else self._default_ttl + self.set(part, val, expire) self._redis.delete(legacy_key) return val - def get(self, dsr_id: str, part: str) -> Optional[Union[str, bytes]]: + def get(self, part: str) -> Optional[Union[str, bytes]]: """Get a value for the given DSR and part. Returns None if missing.""" - return self._redis.get(_dsr_key(dsr_id, part)) + return self._redis.get(_dsr_key(self._dsr_id, part)) def set( self, - dsr_id: str, part: str, value: RedisValue, - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> Optional[bool]: """ Set a value for the given DSR and part. Registers the key in the DSR index. """ - key = _dsr_key(dsr_id, part) + key = _dsr_key(self._dsr_id, part) return self._manager.set_with_index( - key, value, _dsr_index_prefix(dsr_id), expire_seconds + key, value, _dsr_index_prefix(self._dsr_id), expire_seconds ) - def delete(self, dsr_id: str, part: str) -> None: + def delete(self, part: str) -> None: """Delete a single part and remove it from the DSR index.""" - key = _dsr_key(dsr_id, part) - self._manager.delete_key_and_remove_from_index(key, _dsr_index_prefix(dsr_id)) + key = _dsr_key(self._dsr_id, part) + self._manager.delete_key_and_remove_from_index( + key, _dsr_index_prefix(self._dsr_id) + ) # --- Shared get/has helpers --- def _get_cached_by_type( self, - dsr_id: str, new_infix: str, legacy_infix: str, - getter: Callable[[str, str], Optional[Union[str, bytes]]], + getter: Callable[[str], Optional[Union[str, bytes]]], ) -> Dict[str, Any]: """Shared implementation for get_cached_custom_fields/identity_data/drp_request_body.""" result: Dict[str, Any] = {} - for key in self.get_all_keys(dsr_id): + for key in self.get_all_keys(): if new_infix in key: field = key.split(":")[-1] elif legacy_infix in key: field = key.split(legacy_infix, 1)[-1] else: continue - value = getter(dsr_id, field) + value = getter(field) if value: # Intentionally drops empty/falsy — matches legacy behavior result[field] = value return result - def _has_cached_by_type( - self, dsr_id: str, new_infix: str, legacy_infix: str - ) -> bool: + def _has_cached_by_type(self, new_infix: str, legacy_infix: str) -> bool: """Shared implementation for has_cached_* methods.""" - return any( - new_infix in k or legacy_infix in k for k in self.get_all_keys(dsr_id) - ) + return any(new_infix in k or legacy_infix in k for k in self.get_all_keys()) # --- Convenience: custom privacy request fields --- def write_custom_field( self, - dsr_id: str, field_key: str, value: RedisValue, - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> Optional[bool]: """Write a custom privacy request field. New key: dsr:{id}:custom_field:{field_key}.""" - return self.write(dsr_id, "custom_field", field_key, value, expire_seconds) + return self.write("custom_field", field_key, value, expire_seconds) - def get_custom_field( - self, dsr_id: str, field_key: str - ) -> Optional[Union[str, bytes]]: + def get_custom_field(self, field_key: str) -> Optional[Union[str, bytes]]: """Get custom field; reads from legacy id-{id}-custom-privacy-request-field-{key} if needed.""" part = f"custom_field:{field_key}" return self.get_with_legacy( - dsr_id, part, KeyMapper.custom_field(dsr_id, field_key)[1] + part, KeyMapper.custom_field(self._dsr_id, field_key)[1] ) def cache_custom_fields( self, - dsr_id: str, custom_fields: Dict[str, Any], - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> None: """ Cache all custom privacy request fields for a DSR. @@ -193,9 +193,9 @@ def cache_custom_fields( """ for key, value in custom_fields.items(): if value is not None: - self.write_custom_field(dsr_id, key, value, expire_seconds) + self.write_custom_field(key, value, expire_seconds) - def get_cached_custom_fields(self, dsr_id: str) -> Dict[str, Any]: + def get_cached_custom_fields(self) -> Dict[str, Any]: """ Retrieve all cached custom fields for a DSR. @@ -203,44 +203,41 @@ def get_cached_custom_fields(self, dsr_id: str) -> Dict[str, Any]: Returns empty dict if no custom fields cached. """ return self._get_cached_by_type( - dsr_id, ":custom_field:", "-custom-privacy-request-field-", self.get_custom_field, ) - def has_cached_custom_fields(self, dsr_id: str) -> bool: + def has_cached_custom_fields(self) -> bool: """ Check if any custom fields are cached for this DSR. Returns True if any custom field keys exist (legacy or new format). """ return self._has_cached_by_type( - dsr_id, ":custom_field:", "-custom-privacy-request-field-" + ":custom_field:", "-custom-privacy-request-field-" ) # --- Convenience: identity --- def write_identity( self, - dsr_id: str, attr: str, value: RedisValue, - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> Optional[bool]: """Write an identity attribute. New key: dsr:{id}:identity:{attr}.""" - return self.write(dsr_id, "identity", attr, value, expire_seconds) + return self.write("identity", attr, value, expire_seconds) - def get_identity(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: + def get_identity(self, attr: str) -> Optional[Union[str, bytes]]: """Get identity attribute; reads from legacy id-{id}-identity-{attr} if needed.""" part = f"identity:{attr}" - return self.get_with_legacy(dsr_id, part, KeyMapper.identity(dsr_id, attr)[1]) + return self.get_with_legacy(part, KeyMapper.identity(self._dsr_id, attr)[1]) def cache_identity_data( self, - dsr_id: str, identity_dict: Dict[str, Any], - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> None: """ Cache all identity attributes for a DSR. @@ -249,66 +246,61 @@ def cache_identity_data( """ for key, value in identity_dict.items(): if value is not None: - self.write_identity(dsr_id, key, value, expire_seconds) + self.write_identity(key, value, expire_seconds) - def get_cached_identity_data(self, dsr_id: str) -> Dict[str, Any]: + def get_cached_identity_data(self) -> Dict[str, Any]: """ Retrieve all cached identity data for a DSR. Returns dict with identity attributes. Automatically migrates legacy keys on read. Returns empty dict if no identity data cached. """ - return self._get_cached_by_type( - dsr_id, ":identity:", "-identity-", self.get_identity - ) + return self._get_cached_by_type(":identity:", "-identity-", self.get_identity) - def has_cached_identity_data(self, dsr_id: str) -> bool: + def has_cached_identity_data(self) -> bool: """ Check if any identity data is cached for this DSR. Returns True if any identity keys exist (legacy or new format). """ - return self._has_cached_by_type(dsr_id, ":identity:", "-identity-") + return self._has_cached_by_type(":identity:", "-identity-") # --- Convenience: encryption --- def write_encryption( self, - dsr_id: str, attr: str, value: RedisValue, - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> Optional[bool]: """Write an encryption attribute. New key: dsr:{id}:encryption:{attr}.""" - return self.write(dsr_id, "encryption", attr, value, expire_seconds) + return self.write("encryption", attr, value, expire_seconds) - def get_encryption(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: + def get_encryption(self, attr: str) -> Optional[Union[str, bytes]]: """Get encryption attribute; reads from legacy id-{id}-encryption-{attr} if needed.""" part = f"encryption:{attr}" - return self.get_with_legacy(dsr_id, part, KeyMapper.encryption(dsr_id, attr)[1]) + return self.get_with_legacy(part, KeyMapper.encryption(self._dsr_id, attr)[1]) # --- Convenience: DRP request body --- def write_drp( self, - dsr_id: str, attr: str, value: RedisValue, - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> Optional[bool]: """Write DRP request body attribute. New key: dsr:{id}:drp:{attr}.""" - return self.write(dsr_id, "drp", attr, value, expire_seconds) + return self.write("drp", attr, value, expire_seconds) - def get_drp(self, dsr_id: str, attr: str) -> Optional[Union[str, bytes]]: + def get_drp(self, attr: str) -> Optional[Union[str, bytes]]: """Get DRP attribute; reads from legacy id-{id}-drp-{attr} if needed.""" part = f"drp:{attr}" - return self.get_with_legacy(dsr_id, part, KeyMapper.drp(dsr_id, attr)[1]) + return self.get_with_legacy(part, KeyMapper.drp(self._dsr_id, attr)[1]) def cache_drp_request_body( self, - dsr_id: str, drp_body: Dict[str, Any], - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> None: """ Cache all DRP request body fields for a DSR. @@ -316,86 +308,81 @@ def cache_drp_request_body( """ for key, value in drp_body.items(): if value is not None: - self.write_drp(dsr_id, key, value, expire_seconds) + self.write_drp(key, value, expire_seconds) - def get_cached_drp_request_body(self, dsr_id: str) -> Dict[str, Any]: + def get_cached_drp_request_body(self) -> Dict[str, Any]: """ Retrieve all cached DRP request body data for a DSR. Returns dict with DRP fields. Automatically migrates legacy keys on read. Returns empty dict if no DRP data cached. """ - return self._get_cached_by_type(dsr_id, ":drp:", "-drp-", self.get_drp) + return self._get_cached_by_type(":drp:", "-drp-", self.get_drp) - def has_cached_drp_request_body(self, dsr_id: str) -> bool: + def has_cached_drp_request_body(self) -> bool: """ Check if any DRP request body data is cached for this DSR. Checks both new and legacy key formats. """ - return self._has_cached_by_type(dsr_id, ":drp:", "-drp-") + return self._has_cached_by_type(":drp:", "-drp-") # --- Convenience: masking secret --- def write_masking_secret( self, - dsr_id: str, strategy: str, secret_type: str, value: RedisValue, - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> Optional[bool]: """Write masking secret. New key: dsr:{id}:masking_secret:{strategy}:{secret_type}.""" part = f"masking_secret:{strategy}:{secret_type}" - return self.set(dsr_id, part, value, expire_seconds) + return self.set(part, value, expire_seconds) def get_masking_secret( self, - dsr_id: str, strategy: str, secret_type: str, ) -> Optional[Union[str, bytes]]: """Get masking secret; reads from legacy id-{id}-masking-secret-{strategy}-{type} if needed.""" part = f"masking_secret:{strategy}:{secret_type}" return self.get_with_legacy( - dsr_id, part, - KeyMapper.masking_secret(dsr_id, strategy, secret_type)[1], + KeyMapper.masking_secret(self._dsr_id, strategy, secret_type)[1], ) # --- Convenience: async execution (single value per DSR) --- def write_async_execution( self, - dsr_id: str, value: RedisValue, - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> Optional[bool]: """Write async task id. New key: dsr:{id}:async_execution.""" - return self.write(dsr_id, "async_execution", "", value, expire_seconds) + return self.write("async_execution", "", value, expire_seconds) - def get_async_execution(self, dsr_id: str) -> Optional[Union[str, bytes]]: + def get_async_execution(self) -> Optional[Union[str, bytes]]: """Get async task id; reads from legacy id-{id}-async-execution if needed.""" part = "async_execution" - return self.get_with_legacy(dsr_id, part, KeyMapper.async_execution(dsr_id)[1]) + return self.get_with_legacy(part, KeyMapper.async_execution(self._dsr_id)[1]) # --- Convenience: retry count --- def write_retry_count( self, - dsr_id: str, value: RedisValue, - expire_seconds: Optional[int] = None, + expire_seconds: int, ) -> Optional[bool]: """Write privacy request retry count. New key: dsr:{id}:retry_count.""" - return self.write(dsr_id, "retry_count", "", value, expire_seconds) + return self.write("retry_count", "", value, expire_seconds) - def get_retry_count(self, dsr_id: str) -> Optional[Union[str, bytes]]: + def get_retry_count(self) -> Optional[Union[str, bytes]]: """Get retry count; reads from legacy id-{id}-privacy-request-retry-count if needed.""" part = "retry_count" - return self.get_with_legacy(dsr_id, part, KeyMapper.retry_count(dsr_id)[1]) + return self.get_with_legacy(part, KeyMapper.retry_count(self._dsr_id)[1]) # --- List / clear --- - def get_all_keys(self, dsr_id: str) -> list[str]: + def get_all_keys(self) -> list[str]: """ Return all cache keys for this DSR. @@ -404,11 +391,11 @@ def get_all_keys(self, dsr_id: str) -> list[str]: legacy stragglers, backfills them into the index, and sets the migration flag so future calls skip the SCAN. """ - index_prefix = _dsr_index_prefix(dsr_id) + index_prefix = _dsr_index_prefix(self._dsr_id) keys = self._manager.get_keys_by_index(index_prefix) # If we've already confirmed no legacy keys remain, index is authoritative - migration_key = f"__migrated:{dsr_id}" + migration_key = f"__migrated:{self._dsr_id}" if keys and self._redis.exists(migration_key): return keys @@ -416,7 +403,7 @@ def get_all_keys(self, dsr_id: str) -> list[str]: # Filter out internal keys (__migrated:, __idx:) that match the SCAN pattern scanned_keys = [ k - for k in self._redis.scan_iter(match=f"*{dsr_id}*", count=500) + for k in self._redis.scan_iter(match=f"*{self._dsr_id}*", count=500) if not k.startswith("__migrated:") and not k.startswith("__idx:") ] indexed = set(keys) @@ -437,7 +424,7 @@ def get_all_keys(self, dsr_id: str) -> list[str]: return all_keys - def clear(self, dsr_id: str) -> None: + def clear(self) -> None: """ Delete all cache keys for this DSR and remove the index. @@ -446,14 +433,14 @@ def clear(self, dsr_id: str) -> None: catch keys written by concurrent migrations between the first SCAN and DELETE. """ - all_keys = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) - index_prefix = _dsr_index_prefix(dsr_id) + all_keys = list(self._redis.scan_iter(match=f"*{self._dsr_id}*", count=500)) + index_prefix = _dsr_index_prefix(self._dsr_id) if all_keys: self._redis.delete(*all_keys) self._manager.delete_index(index_prefix) # Invalidate migration flag so future reads re-scan - self._redis.delete(f"__migrated:{dsr_id}") + self._redis.delete(f"__migrated:{self._dsr_id}") # Second pass: catch keys written by concurrent migrations - stragglers = list(self._redis.scan_iter(match=f"*{dsr_id}*", count=500)) + stragglers = list(self._redis.scan_iter(match=f"*{self._dsr_id}*", count=500)) if stragglers: self._redis.delete(*stragglers) diff --git a/tests/common/cache/conftest.py b/tests/common/cache/conftest.py index 5c08fc2828f..1ca2f11792e 100644 --- a/tests/common/cache/conftest.py +++ b/tests/common/cache/conftest.py @@ -29,8 +29,8 @@ def manager(mock_redis) -> RedisCacheManager: @pytest.fixture def dsr_store(manager: RedisCacheManager) -> DSRCacheStore: - """DSRCacheStore backed by mock Redis.""" - return DSRCacheStore(manager) + """DSRCacheStore backed by mock Redis, scoped to default 'pr-1' ID.""" + return DSRCacheStore("pr-1", manager) @pytest.fixture diff --git a/tests/common/cache/test_dsr_store.py b/tests/common/cache/test_dsr_store.py index 4a4a33660de..aa9890ad022 100644 --- a/tests/common/cache/test_dsr_store.py +++ b/tests/common/cache/test_dsr_store.py @@ -6,6 +6,9 @@ import pytest from fides.common.cache.dsr_store import DSRCacheStore +from fides.common.cache.manager import RedisCacheManager + +_TTL = 3600 # Test TTL @pytest.mark.unit @@ -13,113 +16,117 @@ class TestDSRCacheStoreWithInMemoryManager: """DSRCacheStore behavior with an in-memory RedisCacheManager.""" def test_set_and_get(self, dsr_store: DSRCacheStore) -> None: - dsr_store.set("pr-1", "identity:email", "user@example.com") - assert dsr_store.get("pr-1", "identity:email") == "user@example.com" + dsr_store.set("identity:email", "user@example.com", _TTL) + assert dsr_store.get("identity:email") == "user@example.com" def test_get_missing_returns_none(self, dsr_store: DSRCacheStore) -> None: - assert dsr_store.get("pr-1", "identity:email") is None + assert dsr_store.get("identity:email") is None def test_set_with_index_registers_key_in_index( self, dsr_store: DSRCacheStore, mock_redis ) -> None: - dsr_store.set("pr-1", "custom_field:foo", "bar") + dsr_store.set("custom_field:foo", "bar", _TTL) keys = mock_redis.smembers("__idx:dsr:pr-1") assert "dsr:pr-1:custom_field:foo" in keys assert len(keys) == 1 def test_get_all_keys_returns_indexed_keys(self, dsr_store: DSRCacheStore) -> None: - dsr_store.write_custom_field("pr-1", "f1", "v1") - dsr_store.write_identity("pr-1", "email", "e@x.com") - keys = dsr_store.get_all_keys("pr-1") + dsr_store.write_custom_field("f1", "v1", _TTL) + dsr_store.write_identity("email", "e@x.com", _TTL) + keys = dsr_store.get_all_keys() assert set(keys) == { "dsr:pr-1:custom_field:f1", "dsr:pr-1:identity:email", } def test_clear_removes_all_keys_and_index(self, dsr_store: DSRCacheStore) -> None: - dsr_store.write_custom_field("pr-1", "f1", "v1") - dsr_store.write_identity("pr-1", "email", "e@x.com") - dsr_store.clear("pr-1") - assert dsr_store.get_all_keys("pr-1") == [] - assert dsr_store.get("pr-1", "custom_field:f1") is None - assert dsr_store.get("pr-1", "identity:email") is None + dsr_store.write_custom_field("f1", "v1", _TTL) + dsr_store.write_identity("email", "e@x.com", _TTL) + dsr_store.clear() + assert dsr_store.get_all_keys() == [] + assert dsr_store.get("custom_field:f1") is None + assert dsr_store.get("identity:email") is None def test_delete_removes_key_and_index_entry(self, dsr_store: DSRCacheStore) -> None: - dsr_store.set("pr-1", "identity:email", "e@x.com") - dsr_store.delete("pr-1", "identity:email") - assert dsr_store.get("pr-1", "identity:email") is None - assert "dsr:pr-1:identity:email" not in dsr_store.get_all_keys("pr-1") + dsr_store.set("identity:email", "e@x.com", _TTL) + dsr_store.delete("identity:email") + assert dsr_store.get("identity:email") is None + assert "dsr:pr-1:identity:email" not in dsr_store.get_all_keys() def test_get_with_legacy_reads_new_key_first( self, dsr_store: DSRCacheStore ) -> None: - dsr_store.write_identity("pr-1", "email", "new@example.com") + dsr_store.write_identity("email", "new@example.com", _TTL) # Legacy key not set; should still get from new key - assert dsr_store.get_identity("pr-1", "email") == "new@example.com" + assert dsr_store.get_identity("email") == "new@example.com" def test_get_with_legacy_migrates_from_legacy_key( self, dsr_store: DSRCacheStore, mock_redis ) -> None: # Simulate legacy data only (no new key) mock_redis.set("id-pr-1-identity-email", "legacy@example.com") - result = dsr_store.get_identity("pr-1", "email") + result = dsr_store.get_identity("email") assert result == "legacy@example.com" # After migrate: new key should exist and legacy should be gone - assert dsr_store.get("pr-1", "identity:email") == "legacy@example.com" + assert dsr_store.get("identity:email") == "legacy@example.com" assert mock_redis.get("id-pr-1-identity-email") is None def test_write_custom_field_and_get_custom_field( self, dsr_store: DSRCacheStore ) -> None: - dsr_store.write_custom_field("pr-1", "my_field", "my_value") - assert dsr_store.get_custom_field("pr-1", "my_field") == "my_value" + dsr_store.write_custom_field("my_field", "my_value", _TTL) + assert dsr_store.get_custom_field("my_field") == "my_value" def test_convenience_async_execution(self, dsr_store: DSRCacheStore) -> None: - dsr_store.write_async_execution("pr-1", "celery-task-id-xyz") - assert dsr_store.get_async_execution("pr-1") == "celery-task-id-xyz" + dsr_store.write_async_execution("celery-task-id-xyz", _TTL) + assert dsr_store.get_async_execution() == "celery-task-id-xyz" - def test_retry_count(self, dsr_store: DSRCacheStore, mock_redis) -> None: + def test_retry_count(self, dsr_store: DSRCacheStore, mock_redis, manager) -> None: """Mirrors cache.py get/increment/reset_privacy_request_retry_count.""" - assert dsr_store.get_retry_count("pr-1") is None - dsr_store.write_retry_count("pr-1", "3", expire_seconds=86400) - assert dsr_store.get_retry_count("pr-1") == "3" - dsr_store.delete("pr-1", "retry_count") - assert dsr_store.get_retry_count("pr-1") is None - # Legacy key migration + assert dsr_store.get_retry_count() is None + dsr_store.write_retry_count("3", expire_seconds=86400) + assert dsr_store.get_retry_count() == "3" + dsr_store.delete("retry_count") + assert dsr_store.get_retry_count() is None + # Legacy key migration (different DSR) mock_redis.set("id-pr-2-privacy-request-retry-count", "1") - assert dsr_store.get_retry_count("pr-2") == "1" + store2 = DSRCacheStore("pr-2", manager) + assert store2.get_retry_count() == "1" assert mock_redis.get("id-pr-2-privacy-request-retry-count") is None - def test_drp(self, dsr_store: DSRCacheStore, mock_redis) -> None: + def test_drp(self, dsr_store: DSRCacheStore, mock_redis, manager) -> None: """Mirrors privacy_request.py DRP body cache (get_drp_request_body_cache_key).""" - dsr_store.write_drp("pr-1", "address", "encrypted-body", expire_seconds=300) - assert dsr_store.get_drp("pr-1", "address") == "encrypted-body" - assert dsr_store.get_drp("pr-1", "email") is None - # Legacy key migration + dsr_store.write_drp("address", "encrypted-body", expire_seconds=300) + assert dsr_store.get_drp("address") == "encrypted-body" + assert dsr_store.get_drp("email") is None + # Legacy key migration (different DSR) mock_redis.set("id-pr-2-drp-email", "legacy-drp") - assert dsr_store.get_drp("pr-2", "email") == "legacy-drp" + store2 = DSRCacheStore("pr-2", manager) + assert store2.get_drp("email") == "legacy-drp" assert mock_redis.get("id-pr-2-drp-email") is None - def test_encryption(self, dsr_store: DSRCacheStore, mock_redis) -> None: + def test_encryption(self, dsr_store: DSRCacheStore, mock_redis, manager) -> None: """Mirrors privacy_request.py / encryption_utils.py encryption key cache.""" - dsr_store.write_encryption("pr-1", "key", "enc-key-123", expire_seconds=3600) - assert dsr_store.get_encryption("pr-1", "key") == "enc-key-123" - assert dsr_store.get_encryption("pr-1", "other") is None - # Legacy key migration + dsr_store.write_encryption("key", "enc-key-123", expire_seconds=3600) + assert dsr_store.get_encryption("key") == "enc-key-123" + assert dsr_store.get_encryption("other") is None + # Legacy key migration (different DSR) mock_redis.set("id-pr-2-encryption-key", "legacy-enc") - assert dsr_store.get_encryption("pr-2", "key") == "legacy-enc" + store2 = DSRCacheStore("pr-2", manager) + assert store2.get_encryption("key") == "legacy-enc" assert mock_redis.get("id-pr-2-encryption-key") is None - def test_masking_secret(self, dsr_store: DSRCacheStore, mock_redis) -> None: + def test_masking_secret( + self, dsr_store: DSRCacheStore, mock_redis, manager + ) -> None: """Mirrors secrets_util.get_masking_secret cache read (and write path).""" dsr_store.write_masking_secret( - "pr-1", "hash", "salt", "encoded-secret", expire_seconds=600 + "hash", "salt", "encoded-secret", expire_seconds=600 ) - assert dsr_store.get_masking_secret("pr-1", "hash", "salt") == "encoded-secret" - assert dsr_store.get_masking_secret("pr-1", "hash", "other") is None - # Legacy key migration + assert dsr_store.get_masking_secret("hash", "salt") == "encoded-secret" + assert dsr_store.get_masking_secret("hash", "other") is None + # Legacy key migration (different DSR) mock_redis.set("id-pr-2-masking-secret-hash-pepper", "legacy-masking") - assert ( - dsr_store.get_masking_secret("pr-2", "hash", "pepper") == "legacy-masking" - ) + store2 = DSRCacheStore("pr-2", manager) + assert store2.get_masking_secret("hash", "pepper") == "legacy-masking" assert mock_redis.get("id-pr-2-masking-secret-hash-pepper") is None diff --git a/tests/common/cache/test_dsr_store_clear_integration.py b/tests/common/cache/test_dsr_store_clear_integration.py index 971fb6cbafa..090eaeb9b20 100644 --- a/tests/common/cache/test_dsr_store_clear_integration.py +++ b/tests/common/cache/test_dsr_store_clear_integration.py @@ -14,6 +14,8 @@ from fides.common.cache.manager import RedisCacheManager from tests.common.cache.mock_redis import create_mock_redis +_TTL = 3600 # Test TTL + @pytest.mark.unit class TestPrivacyRequestClearCachedValues: @@ -48,9 +50,9 @@ def test_clear_removes_new_keys(self): # Simulate new cached data via store manager = RedisCacheManager(mock_redis) - store = DSRCacheStore(manager) - store.write_identity(pr_id, "email", "test@example.com") - store.write_encryption(pr_id, "key", "encryption-key") + store = DSRCacheStore(pr_id, manager) + store.write_identity("email", "test@example.com", _TTL) + store.write_encryption("key", "encryption-key", _TTL) pr = MagicMock() pr.id = pr_id @@ -70,9 +72,9 @@ def test_clear_removes_mixed_keys(self): mock_redis.set(f"id-{pr_id}-custom-privacy-request-field-dept", "Engineering") manager = RedisCacheManager(mock_redis) - store = DSRCacheStore(manager) - store.write_encryption(pr_id, "key", "new-encryption-key") - store.write_async_execution(pr_id, "task-123") + store = DSRCacheStore(pr_id, manager) + store.write_encryption("key", "new-encryption-key", _TTL) + store.write_async_execution("task-123", _TTL) pr = MagicMock() pr.id = pr_id @@ -88,8 +90,8 @@ def test_clear_removes_index(self): pr_id = f"test-pr-{uuid.uuid4()}" manager = RedisCacheManager(mock_redis) - store = DSRCacheStore(manager) - store.write_identity(pr_id, "email", "test@example.com") + store = DSRCacheStore(pr_id, manager) + store.write_identity("email", "test@example.com", _TTL) # Verify index exists assert len(mock_redis.smembers(f"__idx:dsr:{pr_id}")) > 0 diff --git a/tests/common/cache/test_dsr_store_custom_fields_integration.py b/tests/common/cache/test_dsr_store_custom_fields_integration.py index 7c791b1424f..cbcf0cef06d 100644 --- a/tests/common/cache/test_dsr_store_custom_fields_integration.py +++ b/tests/common/cache/test_dsr_store_custom_fields_integration.py @@ -8,53 +8,60 @@ import pytest +from fides.common.cache.dsr_store import DSRCacheStore + # Mark all tests as unit tests pytestmark = pytest.mark.unit +_TTL = 3600 # Test TTL + class TestDSRCacheStoreCustomFields: """Test custom fields cache operations in DSRCacheStore.""" - def test_cache_custom_fields_writes_all_fields(self, dsr_store, pr_id): + def test_cache_custom_fields_writes_all_fields(self, manager, mock_redis, pr_id): """cache_custom_fields writes all fields to new-format keys.""" + store = DSRCacheStore(pr_id, manager) custom_fields = { "department": json.dumps("Engineering"), "employee_id": json.dumps("E12345"), } - dsr_store.cache_custom_fields(pr_id, custom_fields, expire_seconds=3600) + store.cache_custom_fields(custom_fields, expire_seconds=3600) # All keys written in new format - assert dsr_store._redis.get( - f"dsr:{pr_id}:custom_field:department" - ) == json.dumps("Engineering") - assert dsr_store._redis.get( - f"dsr:{pr_id}:custom_field:employee_id" - ) == json.dumps("E12345") + assert mock_redis.get(f"dsr:{pr_id}:custom_field:department") == json.dumps( + "Engineering" + ) + assert mock_redis.get(f"dsr:{pr_id}:custom_field:employee_id") == json.dumps( + "E12345" + ) # Legacy keys do NOT exist assert ( - dsr_store._redis.get(f"id-{pr_id}-custom-privacy-request-field-department") + mock_redis.get(f"id-{pr_id}-custom-privacy-request-field-department") is None ) - def test_get_cached_custom_fields_reads_all_fields(self, dsr_store, pr_id): + def test_get_cached_custom_fields_reads_all_fields(self, manager, pr_id): """get_cached_custom_fields reads all fields from new-format keys.""" + store = DSRCacheStore(pr_id, manager) custom_fields = { "department": json.dumps("Engineering"), "employee_id": json.dumps("E12345"), } - dsr_store.cache_custom_fields(pr_id, custom_fields) + store.cache_custom_fields(custom_fields, _TTL) - result = dsr_store.get_cached_custom_fields(pr_id) + result = store.get_cached_custom_fields() assert result["department"] == json.dumps("Engineering") assert result["employee_id"] == json.dumps("E12345") def test_get_cached_custom_fields_migrates_legacy_keys( - self, dsr_store, mock_redis, pr_id + self, manager, mock_redis, pr_id ): """get_cached_custom_fields reads and migrates legacy keys on first access.""" + store = DSRCacheStore(pr_id, manager) # Write legacy format mock_redis.set( f"id-{pr_id}-custom-privacy-request-field-department", @@ -64,7 +71,7 @@ def test_get_cached_custom_fields_migrates_legacy_keys( f"id-{pr_id}-custom-privacy-request-field-employee_id", json.dumps("E12345") ) - result = dsr_store.get_cached_custom_fields(pr_id) + result = store.get_cached_custom_fields() # Values are returned correctly assert result["department"] == json.dumps("Engineering") @@ -78,49 +85,49 @@ def test_get_cached_custom_fields_migrates_legacy_keys( ) def test_has_cached_custom_fields_detects_both_formats( - self, dsr_store, mock_redis, pr_id + self, manager, mock_redis, pr_id ): """has_cached_custom_fields detects fields in both legacy and new formats.""" + store = DSRCacheStore(pr_id, manager) # Empty initially - assert dsr_store.has_cached_custom_fields(pr_id) is False + assert store.has_cached_custom_fields() is False # Add legacy key mock_redis.set( f"id-{pr_id}-custom-privacy-request-field-department", json.dumps("Engineering"), ) - assert dsr_store.has_cached_custom_fields(pr_id) is True + assert store.has_cached_custom_fields() is True # Clear and test new format - dsr_store.clear(pr_id) - dsr_store.write_custom_field(pr_id, "department", json.dumps("Engineering")) - assert dsr_store.has_cached_custom_fields(pr_id) is True + store.clear() + store.write_custom_field("department", json.dumps("Engineering", _TTL)) + assert store.has_cached_custom_fields() is True class TestDSRCacheStoreEncryption: """Test encryption key cache operations in DSRCacheStore.""" - def test_write_encryption_writes_key(self, dsr_store, pr_id): + def test_write_encryption_writes_key(self, manager, mock_redis, pr_id): """write_encryption writes encryption key to new-format key.""" - dsr_store.write_encryption( - pr_id, "key", "test-encryption-key-12345", expire_seconds=3600 - ) + store = DSRCacheStore(pr_id, manager) + store.write_encryption("key", "test-encryption-key-12345", expire_seconds=3600) assert ( - dsr_store._redis.get(f"dsr:{pr_id}:encryption:key") - == "test-encryption-key-12345" + mock_redis.get(f"dsr:{pr_id}:encryption:key") == "test-encryption-key-12345" ) # Legacy key does NOT exist - assert dsr_store._redis.get(f"id-{pr_id}-encryption-key") is None + assert mock_redis.get(f"id-{pr_id}-encryption-key") is None - def test_get_encryption_migrates_legacy_key(self, dsr_store, mock_redis, pr_id): + def test_get_encryption_migrates_legacy_key(self, manager, mock_redis, pr_id): """get_encryption reads and migrates legacy encryption keys.""" + store = DSRCacheStore(pr_id, manager) # Write legacy format mock_redis.set(f"id-{pr_id}-encryption-key", "test-encryption-key-12345") # Read via store - value = dsr_store.get_encryption(pr_id, "key") + value = store.get_encryption("key") assert value == "test-encryption-key-12345" diff --git a/tests/common/cache/test_dsr_store_drp_integration.py b/tests/common/cache/test_dsr_store_drp_integration.py index cc202bd325f..0544bdb5411 100644 --- a/tests/common/cache/test_dsr_store_drp_integration.py +++ b/tests/common/cache/test_dsr_store_drp_integration.py @@ -9,15 +9,20 @@ import pytest +from fides.common.cache.dsr_store import DSRCacheStore + # Mark all tests as unit tests pytestmark = pytest.mark.unit +_TTL = 3600 # Test TTL + class TestDSRCacheStoreDRP: """Test DSRCacheStore DRP request body methods.""" - def test_cache_drp_request_body_writes_all_fields(self, dsr_store, pr_id): + def test_cache_drp_request_body_writes_all_fields(self, manager, pr_id): """cache_drp_request_body writes all fields to new-format keys.""" + store = DSRCacheStore(pr_id, manager) drp_body = { "meta": "metadata_value", "regime": "gdpr", @@ -25,39 +30,41 @@ def test_cache_drp_request_body_writes_all_fields(self, dsr_store, pr_id): "identity": '{"email": "user@example.com"}', } - dsr_store.cache_drp_request_body(pr_id, drp_body, expire_seconds=3600) + store.cache_drp_request_body(drp_body, expire_seconds=3600) # Verify all fields written to new format - assert dsr_store.get_drp(pr_id, "meta") == "metadata_value" - assert dsr_store.get_drp(pr_id, "regime") == "gdpr" - assert dsr_store.get_drp(pr_id, "exercise") == "access" - assert dsr_store.get_drp(pr_id, "identity") == '{"email": "user@example.com"}' + assert store.get_drp("meta") == "metadata_value" + assert store.get_drp("regime") == "gdpr" + assert store.get_drp("exercise") == "access" + assert store.get_drp("identity") == '{"email": "user@example.com"}' - def test_cache_drp_request_body_skips_none_values(self, dsr_store, pr_id): + def test_cache_drp_request_body_skips_none_values(self, manager, pr_id): """cache_drp_request_body skips None values.""" + store = DSRCacheStore(pr_id, manager) drp_body = { "meta": "metadata_value", "regime": None, "exercise": "access", } - dsr_store.cache_drp_request_body(pr_id, drp_body) + store.cache_drp_request_body(drp_body, _TTL) # Only non-None fields should be written - assert dsr_store.get_drp(pr_id, "meta") == "metadata_value" - assert dsr_store.get_drp(pr_id, "regime") is None - assert dsr_store.get_drp(pr_id, "exercise") == "access" + assert store.get_drp("meta") == "metadata_value" + assert store.get_drp("regime") is None + assert store.get_drp("exercise") == "access" - def test_get_cached_drp_request_body_reads_all_fields(self, dsr_store, pr_id): + def test_get_cached_drp_request_body_reads_all_fields(self, manager, pr_id): """get_cached_drp_request_body reads all fields from new-format keys.""" + store = DSRCacheStore(pr_id, manager) drp_body = { "meta": "metadata_value", "regime": "gdpr", "exercise": "access", } - dsr_store.cache_drp_request_body(pr_id, drp_body) + store.cache_drp_request_body(drp_body, _TTL) - result = dsr_store.get_cached_drp_request_body(pr_id) + result = store.get_cached_drp_request_body() assert result == { "meta": "metadata_value", @@ -66,14 +73,15 @@ def test_get_cached_drp_request_body_reads_all_fields(self, dsr_store, pr_id): } def test_get_cached_drp_request_body_migrates_legacy_keys( - self, dsr_store, mock_redis, pr_id + self, manager, mock_redis, pr_id ): """get_cached_drp_request_body reads and migrates legacy keys on first access.""" + store = DSRCacheStore(pr_id, manager) # Write legacy format directly mock_redis.set(f"id-{pr_id}-drp-meta", "legacy_metadata") mock_redis.set(f"id-{pr_id}-drp-regime", "ccpa") - result = dsr_store.get_cached_drp_request_body(pr_id) + result = store.get_cached_drp_request_body() assert result == { "meta": "legacy_metadata", @@ -87,51 +95,54 @@ def test_get_cached_drp_request_body_migrates_legacy_keys( assert mock_redis.get(f"id-{pr_id}-drp-regime") is None def test_has_cached_drp_request_body_detects_both_formats( - self, dsr_store, mock_redis, pr_id + self, manager, mock_redis, pr_id ): """has_cached_drp_request_body detects DRP data in both legacy and new formats.""" + store = DSRCacheStore(pr_id, manager) # Empty initially - assert dsr_store.has_cached_drp_request_body(pr_id) is False + assert store.has_cached_drp_request_body() is False # Write new format - dsr_store.write_drp(pr_id, "meta", "metadata") - assert dsr_store.has_cached_drp_request_body(pr_id) is True + store.write_drp("meta", "metadata", _TTL) + assert store.has_cached_drp_request_body() is True # Clear and test legacy format - dsr_store.clear(pr_id) - assert dsr_store.has_cached_drp_request_body(pr_id) is False + store.clear() + assert store.has_cached_drp_request_body() is False mock_redis.set(f"id-{pr_id}-drp-regime", "gdpr") - assert dsr_store.has_cached_drp_request_body(pr_id) is True + assert store.has_cached_drp_request_body() is True def test_get_cached_drp_request_body_returns_empty_dict_when_no_data( - self, dsr_store, pr_id + self, manager, pr_id ): """get_cached_drp_request_body returns empty dict when no DRP data cached.""" - result = dsr_store.get_cached_drp_request_body(pr_id) + store = DSRCacheStore(pr_id, manager) + result = store.get_cached_drp_request_body() assert result == {} - def test_drp_migration_then_new_writes(self, dsr_store, mock_redis, pr_id): + def test_drp_migration_then_new_writes(self, manager, mock_redis, pr_id): """After migrating legacy keys, new writes use indexed format.""" + store = DSRCacheStore(pr_id, manager) # Start with legacy keys mock_redis.set(f"id-{pr_id}-drp-meta", "legacy_metadata") # Read triggers migration - result1 = dsr_store.get_cached_drp_request_body(pr_id) + result1 = store.get_cached_drp_request_body() assert result1["meta"] == "legacy_metadata" # Now write new fields - should use indexed format - dsr_store.write_drp(pr_id, "regime", "gdpr") - dsr_store.write_drp(pr_id, "exercise", "access") + store.write_drp("regime", "gdpr", _TTL) + store.write_drp("exercise", "access", _TTL) # Read all - should get both migrated and new - result2 = dsr_store.get_cached_drp_request_body(pr_id) + result2 = store.get_cached_drp_request_body() assert result2["meta"] == "legacy_metadata" assert result2["regime"] == "gdpr" assert result2["exercise"] == "access" # Verify all keys are now indexed - all_keys = dsr_store.get_all_keys(pr_id) + all_keys = store.get_all_keys() assert f"dsr:{pr_id}:drp:meta" in all_keys assert f"dsr:{pr_id}:drp:regime" in all_keys assert f"dsr:{pr_id}:drp:exercise" in all_keys diff --git a/tests/common/cache/test_dsr_store_identity_integration.py b/tests/common/cache/test_dsr_store_identity_integration.py index b6de30dd2b6..a95943594ea 100644 --- a/tests/common/cache/test_dsr_store_identity_integration.py +++ b/tests/common/cache/test_dsr_store_identity_integration.py @@ -8,6 +8,8 @@ import pytest +from fides.common.cache.dsr_store import DSRCacheStore + @pytest.fixture def identity_data(): @@ -21,52 +23,59 @@ def identity_data(): # Mark all tests as unit tests pytestmark = pytest.mark.unit +_TTL = 3600 # Test TTL + class TestDSRCacheStoreIdentity: """Test identity cache operations in DSRCacheStore.""" - def test_cache_identity_data_writes_all_attributes(self, dsr_store, pr_id): + def test_cache_identity_data_writes_all_attributes( + self, manager, mock_redis, pr_id + ): """cache_identity_data writes all identity attributes to new-format keys.""" + store = DSRCacheStore(pr_id, manager) identity_data = { "email": json.dumps("user@example.com"), "phone_number": json.dumps("+1234567890"), } - dsr_store.cache_identity_data(pr_id, identity_data, expire_seconds=3600) + store.cache_identity_data(identity_data, expire_seconds=3600) # All keys written in new format - assert dsr_store._redis.get(f"dsr:{pr_id}:identity:email") == json.dumps( + assert mock_redis.get(f"dsr:{pr_id}:identity:email") == json.dumps( "user@example.com" ) - assert dsr_store._redis.get(f"dsr:{pr_id}:identity:phone_number") == json.dumps( + assert mock_redis.get(f"dsr:{pr_id}:identity:phone_number") == json.dumps( "+1234567890" ) # Legacy keys do NOT exist - assert dsr_store._redis.get(f"id-{pr_id}-identity-email") is None + assert mock_redis.get(f"id-{pr_id}-identity-email") is None def test_get_cached_identity_data_reads_all_attributes( - self, dsr_store, pr_id, identity_data + self, manager, pr_id, identity_data ): """get_cached_identity_data reads all identity attributes from new-format keys.""" + store = DSRCacheStore(pr_id, manager) # Write via store encoded_data = {k: json.dumps(v) for k, v in identity_data.items()} - dsr_store.cache_identity_data(pr_id, encoded_data) + store.cache_identity_data(encoded_data, _TTL) - result = dsr_store.get_cached_identity_data(pr_id) + result = store.get_cached_identity_data() assert result["email"] == json.dumps("user@example.com") assert result["phone_number"] == json.dumps("+1234567890") def test_get_cached_identity_data_migrates_legacy_keys( - self, dsr_store, mock_redis, pr_id, identity_data + self, manager, mock_redis, pr_id, identity_data ): """get_cached_identity_data reads and migrates legacy keys on first access.""" + store = DSRCacheStore(pr_id, manager) # Write legacy format with JSON encoding for key, value in identity_data.items(): mock_redis.set(f"id-{pr_id}-identity-{key}", json.dumps(value)) - result = dsr_store.get_cached_identity_data(pr_id) + result = store.get_cached_identity_data() # Values are returned correctly assert result["email"] == json.dumps("user@example.com") @@ -77,17 +86,18 @@ def test_get_cached_identity_data_migrates_legacy_keys( assert mock_redis.get(f"id-{pr_id}-identity-email") is None def test_has_cached_identity_data_detects_both_formats( - self, dsr_store, mock_redis, pr_id + self, manager, mock_redis, pr_id ): """has_cached_identity_data detects identity data in both legacy and new formats.""" + store = DSRCacheStore(pr_id, manager) # Empty initially - assert dsr_store.has_cached_identity_data(pr_id) is False + assert store.has_cached_identity_data() is False # Add legacy key mock_redis.set(f"id-{pr_id}-identity-email", json.dumps("test@example.com")) - assert dsr_store.has_cached_identity_data(pr_id) is True + assert store.has_cached_identity_data() is True # Clear and test new format - dsr_store.clear(pr_id) - dsr_store.write_identity(pr_id, "email", json.dumps("test@example.com")) - assert dsr_store.has_cached_identity_data(pr_id) is True + store.clear() + store.write_identity("email", json.dumps("test@example.com", _TTL)) + assert store.has_cached_identity_data() is True diff --git a/tests/common/cache/test_dsr_store_migration.py b/tests/common/cache/test_dsr_store_migration.py index 3b2ff127518..0088848c276 100644 --- a/tests/common/cache/test_dsr_store_migration.py +++ b/tests/common/cache/test_dsr_store_migration.py @@ -11,6 +11,8 @@ from fides.common.cache.dsr_store import DSRCacheStore from fides.common.cache.manager import RedisCacheManager +_TTL = 3600 # Test TTL + # Test data factories def make_dsr_id() -> str: @@ -47,27 +49,29 @@ class TestLegacyKeyMigration: ], ) def test_legacy_keys_readable( - self, mock_redis, dsr_store, dsr_id, field_type, getter, field_key, value + self, mock_redis, manager, dsr_id, field_type, getter, field_key, value ): """Legacy keys are readable via store convenience methods.""" + store = DSRCacheStore(dsr_id, manager) legacy_key = make_legacy_key(dsr_id, field_type, field_key) mock_redis.set(legacy_key, value) # Call appropriate getter if getter == "get_masking_secret": - result = dsr_store.get_masking_secret(dsr_id, "hash", field_key) + result = store.get_masking_secret("hash", field_key) elif field_key: - result = getattr(dsr_store, getter)(dsr_id, field_key) + result = getattr(store, getter)(field_key) else: - result = getattr(dsr_store, getter)(dsr_id) + result = getattr(store, getter)() assert result == value - def test_legacy_key_migrated_on_read(self, mock_redis, dsr_store, dsr_id): + def test_legacy_key_migrated_on_read(self, mock_redis, manager, dsr_id): """Legacy key is migrated to new format on first read.""" + store = DSRCacheStore(dsr_id, manager) mock_redis.set(make_legacy_key(dsr_id, "identity", "email"), "migrate@test.com") - email = dsr_store.get_identity(dsr_id, "email") + email = store.get_identity("email") assert email == "migrate@test.com" # New key exists, legacy deleted, index updated @@ -79,10 +83,11 @@ def test_legacy_key_migrated_on_read(self, mock_redis, dsr_store, dsr_id): f"__idx:dsr:{dsr_id}" ) - def test_new_writes_create_indexed_keys_only(self, mock_redis, dsr_store, dsr_id): + def test_new_writes_create_indexed_keys_only(self, mock_redis, manager, dsr_id): """New writes create new-format keys and index them; no legacy keys written.""" - dsr_store.write_identity(dsr_id, "email", "new@example.com") - dsr_store.write_custom_field(dsr_id, "department", "Sales") + store = DSRCacheStore(dsr_id, manager) + store.write_identity("email", "new@example.com", _TTL) + store.write_custom_field("department", "Sales", _TTL) assert ( mock_redis.get(make_new_key(dsr_id, "identity:email")) == "new@example.com" @@ -98,14 +103,15 @@ def test_new_writes_create_indexed_keys_only(self, mock_redis, dsr_store, dsr_id is None ) - def test_clear_removes_mixed_keys(self, mock_redis, dsr_store, dsr_id): + def test_clear_removes_mixed_keys(self, mock_redis, manager, dsr_id): """clear() removes both legacy and new keys using SCAN.""" + store = DSRCacheStore(dsr_id, manager) mock_redis.set(make_legacy_key(dsr_id, "identity", "email"), "legacy@test.com") mock_redis.set(make_legacy_key(dsr_id, "encryption", "key"), "legacy-key") - dsr_store.write_identity(dsr_id, "phone_number", "+1234567890") - dsr_store.write_custom_field(dsr_id, "department", "Engineering") + store.write_identity("phone_number", "+1234567890", _TTL) + store.write_custom_field("department", "Engineering", _TTL) - dsr_store.clear(dsr_id) + store.clear() assert len(mock_redis.keys(f"*{dsr_id}*")) == 0 @@ -117,9 +123,11 @@ def test_index_backfill(self, mock_redis, dsr_id): ) store = DSRCacheStore( - RedisCacheManager(mock_redis), backfill_index_on_legacy_read=True + dsr_id, + RedisCacheManager(mock_redis), + backfill_index_on_legacy_read=True, ) - keys = store.get_all_keys(dsr_id) + keys = store.get_all_keys() assert len(keys) == 2 assert len(mock_redis.smembers(f"__idx:dsr:{dsr_id}")) == 2 @@ -132,36 +140,41 @@ class TestMultipleRequestIsolation: def test_mixed_dsr_states(self, mock_redis): """Operations on one DSR don't affect others (legacy, new, mixed).""" dsr1, dsr2, dsr3 = make_dsr_id(), make_dsr_id(), make_dsr_id() - store = DSRCacheStore(RedisCacheManager(mock_redis)) + mgr = RedisCacheManager(mock_redis) + store1 = DSRCacheStore(dsr1, mgr) + store2 = DSRCacheStore(dsr2, mgr) + store3 = DSRCacheStore(dsr3, mgr) # DSR1: legacy, DSR2: new, DSR3: mixed mock_redis.set(make_legacy_key(dsr1, "identity", "email"), "dsr1@test.com") - store.write_identity(dsr2, "email", "dsr2@test.com") + store2.write_identity("email", "dsr2@test.com", _TTL) mock_redis.set(make_legacy_key(dsr3, "identity", "email"), "dsr3@test.com") - store.write_identity(dsr3, "phone_number", "+1234567890") + store3.write_identity("phone_number", "+1234567890", _TTL) # Verify all readable - assert store.get_identity(dsr1, "email") == "dsr1@test.com" - assert store.get_identity(dsr2, "email") == "dsr2@test.com" - assert store.get_identity(dsr3, "email") == "dsr3@test.com" - assert store.get_identity(dsr3, "phone_number") == "+1234567890" + assert store1.get_identity("email") == "dsr1@test.com" + assert store2.get_identity("email") == "dsr2@test.com" + assert store3.get_identity("email") == "dsr3@test.com" + assert store3.get_identity("phone_number") == "+1234567890" # Clear DSR2 doesn't affect others - store.clear(dsr2) - assert store.get_identity(dsr1, "email") == "dsr1@test.com" - assert store.get_identity(dsr3, "email") == "dsr3@test.com" - assert store.get_identity(dsr2, "email") is None - assert store.get_all_keys(dsr2) == [] + store2.clear() + assert store1.get_identity("email") == "dsr1@test.com" + assert store3.get_identity("email") == "dsr3@test.com" + assert store2.get_identity("email") is None + assert store2.get_all_keys() == [] def test_clear_isolation(self, mock_redis): """Clearing one DSR doesn't delete another's keys.""" dsr1, dsr2 = make_dsr_id(), make_dsr_id() - store = DSRCacheStore(RedisCacheManager(mock_redis)) + mgr = RedisCacheManager(mock_redis) + store1 = DSRCacheStore(dsr1, mgr) + store2 = DSRCacheStore(dsr2, mgr) - store.write_identity(dsr1, "email", "dsr1@test.com") - store.write_identity(dsr2, "email", "dsr2@test.com") + store1.write_identity("email", "dsr1@test.com", _TTL) + store2.write_identity("email", "dsr2@test.com", _TTL) - store.clear(dsr1) + store1.clear() assert mock_redis.get(make_new_key(dsr1, "identity:email")) is None assert mock_redis.get(make_new_key(dsr2, "identity:email")) == "dsr2@test.com" diff --git a/tests/common/cache/test_dsr_store_production_compatibility.py b/tests/common/cache/test_dsr_store_production_compatibility.py index 2aa7d0e1cde..504c72b3d50 100644 --- a/tests/common/cache/test_dsr_store_production_compatibility.py +++ b/tests/common/cache/test_dsr_store_production_compatibility.py @@ -27,6 +27,8 @@ get_identity_cache_key, ) +_TTL = 3600 # Test TTL + @pytest.mark.unit class TestInFlightDSRLifecycle: @@ -110,13 +112,13 @@ def test_full_lifecycle_legacy_request_processed_by_new_code(self, mock_redis): assert encrypted != "sensitive data" # Actually encrypted # Read custom fields (triggers migration) - store = get_dsr_cache_store() - custom_fields = store.get_cached_custom_fields(pr_id) + store = get_dsr_cache_store(pr_id) + custom_fields = store.get_cached_custom_fields() assert custom_fields["department"] == json.dumps("Engineering") assert custom_fields["tenant_id"] == json.dumps("tenant-42") # Read DRP body (triggers migration) - drp_body = store.get_cached_drp_request_body(pr_id) + drp_body = store.get_cached_drp_request_body() assert drp_body["meta"] == "DrpMeta(version='0.5')" assert drp_body["regime"] == "ccpa" @@ -130,11 +132,11 @@ def test_full_lifecycle_legacy_request_processed_by_new_code(self, mock_redis): ) # New-format keys should exist - assert store.get_identity(pr_id, "email") == json.dumps("user@example.com") - assert store.get_encryption(pr_id, "key") == "0123456789abcdef" + assert store.get_identity("email") == json.dumps("user@example.com") + assert store.get_encryption("key") == "0123456789abcdef" # --- Phase 4: "Request complete" — clear the cache --- - store.clear(pr_id) + store.clear() # Everything gone all_keys = [k for k in mock_redis.keys("*") if pr_id in k] @@ -166,16 +168,17 @@ def test_multiple_in_flight_requests_mixed_formats(self, mock_redis): ) # Request 2: fully new format - store = get_dsr_cache_store() - store.write_identity(new_id, "email", json.dumps("new@example.com")) - store.write_encryption(new_id, "key", "new-key-123456789") + store_new = get_dsr_cache_store(new_id) + store_new.write_identity("email", json.dumps("new@example.com"), _TTL) + store_new.write_encryption("key", "new-key-123456789", _TTL) # Request 3: mixed (legacy identity, new encryption) cache.set_with_autoexpire( get_identity_cache_key(mixed_id, "email"), json.dumps("mixed@example.com"), ) - store.write_encryption(mixed_id, "key", "mixed-key-12345678") + store_mixed = get_dsr_cache_store(mixed_id) + store_mixed.write_encryption("key", "mixed-key-12345678", _TTL) # --- "New code deployed" — read all three --- with ( @@ -195,8 +198,8 @@ def test_multiple_in_flight_requests_mixed_formats(self, mock_redis): assert enc_key == expected_key, f"Failed for {pr_id}" # Clear one, others unaffected - store = get_dsr_cache_store() - store.clear(legacy_id) + store_legacy = get_dsr_cache_store(legacy_id) + store_legacy.clear() pr_new = MagicMock() pr_new.id = new_id @@ -213,4 +216,4 @@ def test_multiple_in_flight_requests_mixed_formats(self, mock_redis): ) # Legacy request fully cleared - assert store.get_all_keys(legacy_id) == [] + assert store_legacy.get_all_keys() == [] diff --git a/tests/ops/api/v1/endpoints/test_drp_endpoints.py b/tests/ops/api/v1/endpoints/test_drp_endpoints.py index c89280fd575..f4ba2d7c78a 100644 --- a/tests/ops/api/v1/endpoints/test_drp_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_drp_endpoints.py @@ -75,14 +75,14 @@ def test_create_drp_privacy_request( pr = PrivacyRequest.get(db=db, object_id=response_data["request_id"]) # test appropriate data is cached - store = get_dsr_cache_store() - meta_value = store.get_drp(pr.id, "meta") + store = get_dsr_cache_store(pr.id) + meta_value = store.get_drp("meta") assert meta_value == "DrpMeta(version='0.5')" - regime_value = store.get_drp(pr.id, "regime") + regime_value = store.get_drp("regime") assert regime_value == "ccpa" - exercise_value = store.get_drp(pr.id, "exercise") + exercise_value = store.get_drp("exercise") assert exercise_value == "['access']" - identity_value = store.get_drp(pr.id, "identity") + identity_value = store.get_drp("identity") assert identity_value == encoded_identity assert pr.get_cached_identity_data()["email"] == identity["email"] @@ -123,14 +123,14 @@ def test_create_drp_privacy_request_unsupported_identity_props( pr = PrivacyRequest.get(db=db, object_id=response_data["request_id"]) # test appropriate data is cached - store = get_dsr_cache_store() - meta_value = store.get_drp(pr.id, "meta") + store = get_dsr_cache_store(pr.id) + meta_value = store.get_drp("meta") assert meta_value == "DrpMeta(version='0.5')" - regime_value = store.get_drp(pr.id, "regime") + regime_value = store.get_drp("regime") assert regime_value == "ccpa" - exercise_value = store.get_drp(pr.id, "exercise") + exercise_value = store.get_drp("exercise") assert exercise_value == "['access']" - identity_value = store.get_drp(pr.id, "identity") + identity_value = store.get_drp("identity") assert identity_value == encoded_identity assert pr.get_cached_identity_data()["email"] == identity["email"] assert "address" not in pr.get_cached_identity_data().keys() @@ -332,14 +332,14 @@ def test_create_drp_privacy_request_error_notification( pr = PrivacyRequest.get(db=db, object_id=response_data["request_id"]) # test appropriate data is cached - store = get_dsr_cache_store() - meta_value = store.get_drp(pr.id, "meta") + store = get_dsr_cache_store(pr.id) + meta_value = store.get_drp("meta") assert meta_value == "DrpMeta(version='0.5')" - regime_value = store.get_drp(pr.id, "regime") + regime_value = store.get_drp("regime") assert regime_value == "ccpa" - exercise_value = store.get_drp(pr.id, "exercise") + exercise_value = store.get_drp("exercise") assert exercise_value == "['access']" - identity_value = store.get_drp(pr.id, "identity") + identity_value = store.get_drp("identity") assert identity_value == encoded_identity assert pr.get_cached_identity_data()["email"] == identity["email"] diff --git a/tests/ops/models/privacy_request/test_privacy_request.py b/tests/ops/models/privacy_request/test_privacy_request.py index 3c902b251b9..801b5525745 100644 --- a/tests/ops/models/privacy_request/test_privacy_request.py +++ b/tests/ops/models/privacy_request/test_privacy_request.py @@ -272,8 +272,8 @@ def test_cache_identity_fallback_to_db( privacy_request_with_email_identity.get_cached_identity_data() ) assert cached_identity_data != {} - store = get_dsr_cache_store() - store.delete(privacy_request_with_email_identity.id, "identity:email") + store = get_dsr_cache_store(privacy_request_with_email_identity.id) + store.delete("identity:email") assert ( privacy_request_with_email_identity.get_cached_identity_data() == cached_identity_data @@ -338,10 +338,8 @@ def test_custom_privacy_request_fields_fallback_to_db( ) assert cached_custom_privacy_request_fields is not None # Delete using DSR store to clear the cached custom field - store = get_dsr_cache_store() - store.delete( - privacy_request.id, f"custom_field:{custom_privacy_request_field.label}" - ) + store = get_dsr_cache_store(privacy_request.id) + store.delete(f"custom_field:{custom_privacy_request_field.label}") assert ( privacy_request.get_cached_custom_privacy_request_fields() == cached_custom_privacy_request_fields diff --git a/tests/ops/test_helpers/cache_secrets_helper.py b/tests/ops/test_helpers/cache_secrets_helper.py index 9bec8da4d8a..b19639980a9 100644 --- a/tests/ops/test_helpers/cache_secrets_helper.py +++ b/tests/ops/test_helpers/cache_secrets_helper.py @@ -31,9 +31,9 @@ def clear_cache_identities(request_id: str) -> None: this clears them using the DSR cache store. The get_cached_identity_data call migrates any legacy keys before deletion. """ - store = get_dsr_cache_store() + store = get_dsr_cache_store(request_id) # get_cached_identity_data triggers migration (legacy → new), so all # identity keys will be in new format after this call. - identity_data = store.get_cached_identity_data(request_id) + identity_data = store.get_cached_identity_data() for attr in identity_data: - store.delete(request_id, f"identity:{attr}") + store.delete(f"identity:{attr}") From 5df038ac282c57ae6931a84b44ee327b8cb2369b Mon Sep 17 00:00:00 2001 From: Jade Wibbels Date: Tue, 24 Mar 2026 18:14:02 -0600 Subject: [PATCH 50/50] Fix test failures from DSRCacheStore API refactor - Fix _TTL misplaced inside json.dumps() in custom fields and identity integration tests - Update encryption_utils test assertions: get_encryption() no longer takes request_id (it's a constructor arg now) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../cache/test_dsr_store_custom_fields_integration.py | 2 +- .../cache/test_dsr_store_identity_integration.py | 2 +- tests/ops/tasks/test_encryption_utils.py | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/common/cache/test_dsr_store_custom_fields_integration.py b/tests/common/cache/test_dsr_store_custom_fields_integration.py index cbcf0cef06d..7eba5994125 100644 --- a/tests/common/cache/test_dsr_store_custom_fields_integration.py +++ b/tests/common/cache/test_dsr_store_custom_fields_integration.py @@ -101,7 +101,7 @@ def test_has_cached_custom_fields_detects_both_formats( # Clear and test new format store.clear() - store.write_custom_field("department", json.dumps("Engineering", _TTL)) + store.write_custom_field("department", json.dumps("Engineering"), _TTL) assert store.has_cached_custom_fields() is True diff --git a/tests/common/cache/test_dsr_store_identity_integration.py b/tests/common/cache/test_dsr_store_identity_integration.py index a95943594ea..12e549acee2 100644 --- a/tests/common/cache/test_dsr_store_identity_integration.py +++ b/tests/common/cache/test_dsr_store_identity_integration.py @@ -99,5 +99,5 @@ def test_has_cached_identity_data_detects_both_formats( # Clear and test new format store.clear() - store.write_identity("email", json.dumps("test@example.com", _TTL)) + store.write_identity("email", json.dumps("test@example.com"), _TTL) assert store.has_cached_identity_data() is True diff --git a/tests/ops/tasks/test_encryption_utils.py b/tests/ops/tasks/test_encryption_utils.py index 27d3d9c8636..fb3bec87174 100644 --- a/tests/ops/tasks/test_encryption_utils.py +++ b/tests/ops/tasks/test_encryption_utils.py @@ -24,7 +24,7 @@ def test_encrypt_access_request_results_no_encryption_key(mock_cache): result = encrypt_access_request_results(test_data, request_id) assert result == test_data - mock_cache.get_encryption.assert_called_once_with(request_id, "key") + mock_cache.get_encryption.assert_called_once_with("key") def test_encrypt_access_request_results_with_encryption_key(mock_cache): @@ -40,7 +40,7 @@ def test_encrypt_access_request_results_with_encryption_key(mock_cache): # The result should be a base64 encoded string containing the nonce and encrypted data assert isinstance(result, str) assert len(result) > 0 - mock_cache.get_encryption.assert_called_once_with(request_id, "key") + mock_cache.get_encryption.assert_called_once_with("key") def test_encrypt_access_request_results_with_bytes_input(mock_cache): @@ -54,7 +54,7 @@ def test_encrypt_access_request_results_with_bytes_input(mock_cache): assert isinstance(result, str) assert len(result) > 0 - mock_cache.get_encryption.assert_called_once_with(request_id, "key") + mock_cache.get_encryption.assert_called_once_with("key") def test_encrypt_access_request_results_empty_data(mock_cache): @@ -68,7 +68,7 @@ def test_encrypt_access_request_results_empty_data(mock_cache): assert isinstance(result, str) assert len(result) > 0 - mock_cache.get_encryption.assert_called_once_with(request_id, "key") + mock_cache.get_encryption.assert_called_once_with("key") def test_encrypt_access_request_results_special_characters(mock_cache): @@ -82,4 +82,4 @@ def test_encrypt_access_request_results_special_characters(mock_cache): assert isinstance(result, str) assert len(result) > 0 - mock_cache.get_encryption.assert_called_once_with(request_id, "key") + mock_cache.get_encryption.assert_called_once_with("key")