From 6578a9340d889247bac756fe6c4f288593d02d56 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Thu, 31 Jul 2025 09:26:48 +0200 Subject: [PATCH 01/29] Introduce cache provider --- asab/library/providers/cacahe.py | 99 ++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 asab/library/providers/cacahe.py diff --git a/asab/library/providers/cacahe.py b/asab/library/providers/cacahe.py new file mode 100644 index 000000000..7590b885a --- /dev/null +++ b/asab/library/providers/cacahe.py @@ -0,0 +1,99 @@ +import os +import logging +import hashlib + +from asab.library.providers.filesystem import FileSystemLibraryProvider +from asab.config import Config +from asab import exceptions + +L = logging.getLogger(__name__) + +class CacheLibraryProvider(FileSystemLibraryProvider): + """ + A read-only cache-backed wrapper that serves exclusively from the on-disk cache. + No fallback to upstream providers; cache misses return None or empty list. + + Under [library:cache].dir, each layer is exposed via: + @global//... + where layer_hash = sha256(uri). + + Usage (registering provider in LibraryService._create_library): + provider = CacheLibraryProvider(library, uri, layer) + + Consuming cached layers: + 1. Ensure LibraryCacheService has populated the cache and published 'library.cache.ready'. + 2. LibraryService will wrap each 'libsreg+' URI with this CacheLibraryProvider. + 3. To read an item: + ```python + async with library.open('/path/to/item') as stream: + if stream is None: + # cache miss or not found + return None + data = stream.read() + ``` + 4. To list contents of a directory: + ```python + items = await library.list('/path/to/directory/') + for item in items: + print(item.name) + ``` + + Layer directories on disk: + [cache_root]/@cache// # immutable snapshots + [cache_root]/@cache//.uri # original URI + [cache_root]/@cache//.unique_id # snapshot hash + [cache_root]/@global// # symlink to active snapshot + [cache_root]/@global//.uri + [cache_root]/@global//.unique_id + + Notes for GitLab Merge Request: + - This change introduces a CacheLibraryProvider to serve from disk-only cache. + - Ensure `[library:cache].dir` is documented in configuration reference. + - Update LibraryService to wrap `libsreg+` URIs with this provider instead of direct network fetch. + - Add integration test to verify cache lookup and symlink resolution. + - Confirm backward compatibility: providers other than `libsreg+` remain unaffected. + - Mention atomic symlink swap behavior in release notes. + """ + def __init__(self, library, uri, layer): + # Compute stable layer key + self.uri = uri + self.layer_hash = hashlib.sha256(uri.encode('utf-8')).hexdigest() + + # Locate configured cache root + cache_root = Config.get('library:cache', 'dir', fallback=None) + if not cache_root: + raise exceptions.LibraryConfigurationError( + "Missing [library:cache].dir configuration" + ) + + # Build path to on-disk cache for this layer + self.cache_dir = os.path.join(cache_root, '@global', self.layer_hash) + if not os.path.isdir(self.cache_dir): + L.warning("Cache directory not found for %s: %s", uri, self.cache_dir) + + # Initialize filesystem provider against cache path, skip ready event + cache_uri = 'file://' + self.cache_dir.rstrip('/') + super().__init__(library, cache_uri, layer, set_ready=False) + + # Override read/list to serve only from cache + async def read(self, path): + """ + Attempt to read from cache; return None on cache miss. + + :param path: Absolute path within the library (e.g. '/Templates/config.json') + :returns: File-like stream or None + """ + return await super().read(path) + + async def list(self, path): + """ + Attempt to list directory from cache; return empty list on cache miss. + + :param path: Absolute directory path (e.g. '/Templates/') + :returns: List of LibraryItem objects + """ + try: + return await super().list(path) + except Exception: + L.debug("Cache list miss or error: %s", path) + return [] From 11ccb2189212843b99dd551dc010083ed9b7fb93 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Thu, 31 Jul 2025 09:31:19 +0200 Subject: [PATCH 02/29] remove debug --- asab/library/providers/cacahe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asab/library/providers/cacahe.py b/asab/library/providers/cacahe.py index 7590b885a..db56b10cf 100644 --- a/asab/library/providers/cacahe.py +++ b/asab/library/providers/cacahe.py @@ -95,5 +95,5 @@ async def list(self, path): try: return await super().list(path) except Exception: - L.debug("Cache list miss or error: %s", path) + L.warning("Cache list miss or error: %s", path) return [] From 2cf5951515265984c033eb595752bdbfad76aba5 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Thu, 31 Jul 2025 10:46:48 +0200 Subject: [PATCH 03/29] Rename module --- asab/library/providers/{cacahe.py => cache.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename asab/library/providers/{cacahe.py => cache.py} (100%) diff --git a/asab/library/providers/cacahe.py b/asab/library/providers/cache.py similarity index 100% rename from asab/library/providers/cacahe.py rename to asab/library/providers/cache.py From ea17922c1f89125936060bfa750a888f60873337 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Thu, 31 Jul 2025 10:57:05 +0200 Subject: [PATCH 04/29] init cache service --- asab/library/service.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/asab/library/service.py b/asab/library/service.py index a0dcd5c5c..fb7ff23f0 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -131,9 +131,16 @@ def _create_library(self, path, layer): from .providers.git import GitLibraryProvider library_provider = GitLibraryProvider(self, path, layer) - elif path.startswith('libsreg+'): + elif path.startswith("libsreg+"): + # 1) register the on-disk cache provider first + from .providers.cache import CacheLibraryProvider + cache = CacheLibraryProvider(self, path, layer) + self.Libraries.append(cache) + + # 2) then register the real registry provider as a fallback from .providers.libsreg import LibsRegLibraryProvider - library_provider = LibsRegLibraryProvider(self, path, layer) + real = LibsRegLibraryProvider(self, path, layer) + self.Libraries.append(real) elif path == '' or path.startswith("#") or path.startswith(";"): # This is empty or commented line From a90c6674f6e42b77c256d4affe3e9a8ab4fe4e65 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Fri, 1 Aug 2025 07:51:53 +0200 Subject: [PATCH 05/29] Flake8 --- asab/library/providers/cache.py | 1 + 1 file changed, 1 insertion(+) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index db56b10cf..bd2000d01 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -8,6 +8,7 @@ L = logging.getLogger(__name__) + class CacheLibraryProvider(FileSystemLibraryProvider): """ A read-only cache-backed wrapper that serves exclusively from the on-disk cache. From 95ede91ebde89b99c21514ab07c4ac99545ddabc Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Fri, 1 Aug 2025 08:15:12 +0200 Subject: [PATCH 06/29] More changes --- asab/library/providers/cache.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index bd2000d01..52a7aeecf 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -67,6 +67,11 @@ def __init__(self, library, uri, layer): "Missing [library:cache].dir configuration" ) + library.App.PubSub.subscribe( + "library.cache.ready", + self._on_cache_ready + ) + # Build path to on-disk cache for this layer self.cache_dir = os.path.join(cache_root, '@global', self.layer_hash) if not os.path.isdir(self.cache_dir): @@ -76,6 +81,10 @@ def __init__(self, library, uri, layer): cache_uri = 'file://' + self.cache_dir.rstrip('/') super().__init__(library, cache_uri, layer, set_ready=False) + + async def _on_cache_ready(self, event, data): + await self._set_ready() + # Override read/list to serve only from cache async def read(self, path): """ From 60635d5aff221787783602324995476373209acb Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Fri, 1 Aug 2025 08:16:53 +0200 Subject: [PATCH 07/29] Remove notes --- asab/library/providers/cache.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index 52a7aeecf..d1b42b9b3 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -46,14 +46,6 @@ class CacheLibraryProvider(FileSystemLibraryProvider): [cache_root]/@global// # symlink to active snapshot [cache_root]/@global//.uri [cache_root]/@global//.unique_id - - Notes for GitLab Merge Request: - - This change introduces a CacheLibraryProvider to serve from disk-only cache. - - Ensure `[library:cache].dir` is documented in configuration reference. - - Update LibraryService to wrap `libsreg+` URIs with this provider instead of direct network fetch. - - Add integration test to verify cache lookup and symlink resolution. - - Confirm backward compatibility: providers other than `libsreg+` remain unaffected. - - Mention atomic symlink swap behavior in release notes. """ def __init__(self, library, uri, layer): # Compute stable layer key From 5fafca17be7de364fb87cc7846af739409f0d846 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Fri, 1 Aug 2025 10:40:22 +0200 Subject: [PATCH 08/29] More changes --- asab/library/providers/cache.py | 104 ++++++++++++-------------------- asab/library/service.py | 12 ++-- 2 files changed, 45 insertions(+), 71 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index d1b42b9b3..a7a5cb32a 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -4,98 +4,74 @@ from asab.library.providers.filesystem import FileSystemLibraryProvider from asab.config import Config -from asab import exceptions + +## L = logging.getLogger(__name__) +## + class CacheLibraryProvider(FileSystemLibraryProvider): """ - A read-only cache-backed wrapper that serves exclusively from the on-disk cache. - No fallback to upstream providers; cache misses return None or empty list. + A read-only cache-backed wrapper that serves from the on-disk cache, + and only on I/O errors (“hard” failures) will cascade to the real provider. Under [library:cache].dir, each layer is exposed via: @global//... where layer_hash = sha256(uri). - Usage (registering provider in LibraryService._create_library): - provider = CacheLibraryProvider(library, uri, layer) - - Consuming cached layers: - 1. Ensure LibraryCacheService has populated the cache and published 'library.cache.ready'. - 2. LibraryService will wrap each 'libsreg+' URI with this CacheLibraryProvider. - 3. To read an item: - ```python - async with library.open('/path/to/item') as stream: - if stream is None: - # cache miss or not found - return None - data = stream.read() - ``` - 4. To list contents of a directory: - ```python - items = await library.list('/path/to/directory/') - for item in items: - print(item.name) - ``` - - Layer directories on disk: - [cache_root]/@cache// # immutable snapshots - [cache_root]/@cache//.uri # original URI - [cache_root]/@cache//.unique_id # snapshot hash - [cache_root]/@global// # symlink to active snapshot - [cache_root]/@global//.uri - [cache_root]/@global//.unique_id + Usage (in LibraryService._create_library): + real = LibsRegLibraryProvider(...) + cache = CacheLibraryProvider(library, uri, layer, real) + self.Libraries.append(cache) + + On every read()/list(): + 1. Try cache + 2. If it throws, fall back to real.read()/real.list() """ - def __init__(self, library, uri, layer): - # Compute stable layer key + + def __init__(self, library, uri, layer, real_provider): + # Keep for fallback + self._real = real_provider self.uri = uri - self.layer_hash = hashlib.sha256(uri.encode('utf-8')).hexdigest() + self.layer_hash = hashlib.sha256(uri.encode("utf-8")).hexdigest() - # Locate configured cache root - cache_root = Config.get('library:cache', 'dir', fallback=None) + # 1) Validate cache root + cache_root = Config.get("library:cache", "dir", fallback=None) if not cache_root: - raise exceptions.LibraryConfigurationError( + raise Exception( "Missing [library:cache].dir configuration" ) + if not os.path.isdir(cache_root): + L.critical("Cache root %s not found, exiting.", cache_root) + raise SystemExit("Missing cache root") - library.App.PubSub.subscribe( - "library.cache.ready", - self._on_cache_ready - ) - - # Build path to on-disk cache for this layer - self.cache_dir = os.path.join(cache_root, '@global', self.layer_hash) + # 2) Build the cache URI + self.cache_dir = os.path.join(cache_root, "@global", self.layer_hash) if not os.path.isdir(self.cache_dir): L.warning("Cache directory not found for %s: %s", uri, self.cache_dir) - # Initialize filesystem provider against cache path, skip ready event - cache_uri = 'file://' + self.cache_dir.rstrip('/') + cache_uri = "file://" + self.cache_dir.rstrip("/") + # Initialize the filesystem provider *only* against the cache super().__init__(library, cache_uri, layer, set_ready=False) + # 3) Re-publish readiness when new snapshots arrive + library.App.PubSub.subscribe( + "library.cache.ready", + self._on_cache_ready + ) async def _on_cache_ready(self, event, data): await self._set_ready() - # Override read/list to serve only from cache async def read(self, path): - """ - Attempt to read from cache; return None on cache miss. - - :param path: Absolute path within the library (e.g. '/Templates/config.json') - :returns: File-like stream or None - """ + if not os.path.isdir(self.cache_dir): + # ← cache folder doesn’t exist → fall back + return await self._real.read(path) return await super().read(path) async def list(self, path): - """ - Attempt to list directory from cache; return empty list on cache miss. - - :param path: Absolute directory path (e.g. '/Templates/') - :returns: List of LibraryItem objects - """ - try: - return await super().list(path) - except Exception: - L.warning("Cache list miss or error: %s", path) - return [] + if not os.path.isdir(self.cache_dir): + return await self._real.list(path) + return await super().list(path) diff --git a/asab/library/service.py b/asab/library/service.py index fb7ff23f0..d9c931e06 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -132,15 +132,13 @@ def _create_library(self, path, layer): library_provider = GitLibraryProvider(self, path, layer) elif path.startswith("libsreg+"): - # 1) register the on-disk cache provider first - from .providers.cache import CacheLibraryProvider - cache = CacheLibraryProvider(self, path, layer) - self.Libraries.append(cache) - - # 2) then register the real registry provider as a fallback from .providers.libsreg import LibsRegLibraryProvider real = LibsRegLibraryProvider(self, path, layer) - self.Libraries.append(real) + from .providers.cache import CacheLibraryProvider + cache = CacheLibraryProvider(self, path, layer, real_provider=real) + # Only register the cache wrapper: + self.Libraries.append(cache) + return # <-- skip the generic append below elif path == '' or path.startswith("#") or path.startswith(";"): # This is empty or commented line From d473f7012e0e8959ae9ea5447de371d6d99ed30a Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Fri, 1 Aug 2025 10:49:29 +0200 Subject: [PATCH 09/29] Use format --- asab/library/providers/cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index a7a5cb32a..8bc780a96 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -44,13 +44,13 @@ def __init__(self, library, uri, layer, real_provider): "Missing [library:cache].dir configuration" ) if not os.path.isdir(cache_root): - L.critical("Cache root %s not found, exiting.", cache_root) + L.critical("Cache root '{}' not found, exiting.".format(cache_root)) raise SystemExit("Missing cache root") # 2) Build the cache URI self.cache_dir = os.path.join(cache_root, "@global", self.layer_hash) if not os.path.isdir(self.cache_dir): - L.warning("Cache directory not found for %s: %s", uri, self.cache_dir) + L.warning("Cache directory not found for '{}' '{}'.".format(uri, self.cache_dir)) cache_uri = "file://" + self.cache_dir.rstrip("/") # Initialize the filesystem provider *only* against the cache From f02030a973b3660ada923592aac186f819762072 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 5 Aug 2025 10:24:49 +0200 Subject: [PATCH 10/29] Change to cache service --- asab/library/providers/cache.py | 69 +++++++++++---------------------- asab/library/service.py | 16 +++++--- 2 files changed, 33 insertions(+), 52 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index 8bc780a96..b6e0e8797 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -5,73 +5,50 @@ from asab.library.providers.filesystem import FileSystemLibraryProvider from asab.config import Config -## - L = logging.getLogger(__name__) -## - - class CacheLibraryProvider(FileSystemLibraryProvider): """ - A read-only cache-backed wrapper that serves from the on-disk cache, - and only on I/O errors (“hard” failures) will cascade to the real provider. - - Under [library:cache].dir, each layer is exposed via: - @global//... - where layer_hash = sha256(uri). - - Usage (in LibraryService._create_library): - real = LibsRegLibraryProvider(...) - cache = CacheLibraryProvider(library, uri, layer, real) - self.Libraries.append(cache) + A read‐only cache wrapper that points at + [library:cache].dir/@global/. - On every read()/list(): - 1. Try cache - 2. If it throws, fall back to real.read()/real.list() + Any cache miss (missing dir or file) simply returns None + or raises KeyError so that LibraryService will fall through + to the next provider. """ - def __init__(self, library, uri, layer, real_provider): - # Keep for fallback - self._real = real_provider - self.uri = uri + def __init__(self, library, uri, layer): + # compute the symlink key self.layer_hash = hashlib.sha256(uri.encode("utf-8")).hexdigest() - # 1) Validate cache root + # resolve cache root cache_root = Config.get("library:cache", "dir", fallback=None) if not cache_root: - raise Exception( - "Missing [library:cache].dir configuration" - ) + raise RuntimeError("Missing [library:cache].dir configuration") if not os.path.isdir(cache_root): - L.critical("Cache root '{}' not found, exiting.".format(cache_root)) - raise SystemExit("Missing cache root") + L.critical("Cache root '{0}' not found, exiting.".format(cache_root)) + raise SystemExit(1) - # 2) Build the cache URI - self.cache_dir = os.path.join(cache_root, "@global", self.layer_hash) - if not os.path.isdir(self.cache_dir): - L.warning("Cache directory not found for '{}' '{}'.".format(uri, self.cache_dir)) + # point at the live snapshot + cache_dir = os.path.join(cache_root, "@global", self.layer_hash) + if not os.path.isdir(cache_dir): + L.warning("No cache snapshot for URI '{0}' at '{1}'.".format(uri, cache_dir)) - cache_uri = "file://" + self.cache_dir.rstrip("/") - # Initialize the filesystem provider *only* against the cache + # filesystem-style URI + cache_uri = "file://{0}".format(cache_dir.rstrip("/")) super().__init__(library, cache_uri, layer, set_ready=False) - # 3) Re-publish readiness when new snapshots arrive - library.App.PubSub.subscribe( - "library.cache.ready", - self._on_cache_ready - ) + # re-publish readiness when the cache updates + library.App.PubSub.subscribe("library.cache.ready", self._on_cache_ready) - async def _on_cache_ready(self, event, data): + async def _on_cache_ready(self, *args): + # once a new snapshot appears, mark ready await self._set_ready() async def read(self, path): - if not os.path.isdir(self.cache_dir): - # ← cache folder doesn’t exist → fall back - return await self._real.read(path) + # returns None on cache-miss → LibraryService will try next provider return await super().read(path) async def list(self, path): - if not os.path.isdir(self.cache_dir): - return await self._real.list(path) + # raises KeyError on missing directory → LibraryService will try next provider return await super().list(path) diff --git a/asab/library/service.py b/asab/library/service.py index d9c931e06..b8451b974 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -131,14 +131,18 @@ def _create_library(self, path, layer): from .providers.git import GitLibraryProvider library_provider = GitLibraryProvider(self, path, layer) - elif path.startswith("libsreg+"): - from .providers.libsreg import LibsRegLibraryProvider - real = LibsRegLibraryProvider(self, path, layer) + # LibsReg → cache first, then real + + elif path.startswith('libsreg+'): + # 1) on-disk cache wrapper from .providers.cache import CacheLibraryProvider - cache = CacheLibraryProvider(self, path, layer, real_provider=real) - # Only register the cache wrapper: + cache = CacheLibraryProvider(self, path, layer) self.Libraries.append(cache) - return # <-- skip the generic append below + # 2) registry fallback + from .providers.libsreg import LibsRegLibraryProvider + real = LibsRegLibraryProvider(self, path, layer) + self.Libraries.append(real) + return elif path == '' or path.startswith("#") or path.startswith(";"): # This is empty or commented line From 3f2af1cc9210f0be59c491dcd1991f57d41f7c7c Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 5 Aug 2025 10:43:18 +0200 Subject: [PATCH 11/29] some minor cleanup --- asab/library/providers/cache.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index b6e0e8797..9a4ae5e51 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -44,11 +44,3 @@ def __init__(self, library, uri, layer): async def _on_cache_ready(self, *args): # once a new snapshot appears, mark ready await self._set_ready() - - async def read(self, path): - # returns None on cache-miss → LibraryService will try next provider - return await super().read(path) - - async def list(self, path): - # raises KeyError on missing directory → LibraryService will try next provider - return await super().list(path) From 302872b0db3fc15d0f2841be131448256f2e07a5 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 5 Aug 2025 10:53:03 +0200 Subject: [PATCH 12/29] Fallback only when cache provider does not exist --- asab/library/providers/cache.py | 43 ++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index 9a4ae5e51..8db985a2b 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -12,16 +12,18 @@ class CacheLibraryProvider(FileSystemLibraryProvider): A read‐only cache wrapper that points at [library:cache].dir/@global/. - Any cache miss (missing dir or file) simply returns None - or raises KeyError so that LibraryService will fall through - to the next provider. + Any call to read()/list() will: + – if the cache directory doesn’t exist → fall back to real provider + – otherwise → serve from cache and let misses bubble up """ - def __init__(self, library, uri, layer): - # compute the symlink key + def __init__(self, library, uri, layer, real_provider): + # Keep for fallback + self._real = real_provider + # Compute layer‐key self.layer_hash = hashlib.sha256(uri.encode("utf-8")).hexdigest() - # resolve cache root + # Resolve cache root cache_root = Config.get("library:cache", "dir", fallback=None) if not cache_root: raise RuntimeError("Missing [library:cache].dir configuration") @@ -29,18 +31,31 @@ def __init__(self, library, uri, layer): L.critical("Cache root '{0}' not found, exiting.".format(cache_root)) raise SystemExit(1) - # point at the live snapshot - cache_dir = os.path.join(cache_root, "@global", self.layer_hash) - if not os.path.isdir(cache_dir): - L.warning("No cache snapshot for URI '{0}' at '{1}'.".format(uri, cache_dir)) + # Point at the *live* cache snapshot + self.cache_dir = os.path.join(cache_root, "@global", self.layer_hash) + if not os.path.isdir(self.cache_dir): + L.warning("No cache snapshot for URI '{0}' at '{1}'.".format(uri, self.cache_dir)) - # filesystem-style URI - cache_uri = "file://{0}".format(cache_dir.rstrip("/")) + # Filesystem‐style URI for the cache + cache_uri = "file://{0}".format(self.cache_dir.rstrip("/")) super().__init__(library, cache_uri, layer, set_ready=False) - # re-publish readiness when the cache updates + # Whenever the cache service signals ready, re‐publish readiness library.App.PubSub.subscribe("library.cache.ready", self._on_cache_ready) async def _on_cache_ready(self, *args): - # once a new snapshot appears, mark ready await self._set_ready() + + async def read(self, path): + # FALLBACK only if the entire cache folder is gone + if not os.path.isdir(self.cache_dir): + return await self._real.read(path) + # otherwise serve from cache (None on missing file → next provider) + return await super().read(path) + + async def list(self, path): + # FALLBACK only if the entire cache folder is gone + if not os.path.isdir(self.cache_dir): + return await self._real.list(path) + # otherwise serve from cache (KeyError on missing dir → next provider) + return await super().list(path) From bc368ce469e61a5d3ab46504bcd2482426d7b7fe Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 5 Aug 2025 10:57:20 +0200 Subject: [PATCH 13/29] Call real providers inside cache providers --- asab/library/service.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/asab/library/service.py b/asab/library/service.py index b8451b974..3ff27f35a 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -134,14 +134,14 @@ def _create_library(self, path, layer): # LibsReg → cache first, then real elif path.startswith('libsreg+'): - # 1) on-disk cache wrapper - from .providers.cache import CacheLibraryProvider - cache = CacheLibraryProvider(self, path, layer) - self.Libraries.append(cache) - # 2) registry fallback + # 1) Create the real registry provider from .providers.libsreg import LibsRegLibraryProvider real = LibsRegLibraryProvider(self, path, layer) - self.Libraries.append(real) + + # 2) Wrap it in your cache provider + from .providers.cache import CacheLibraryProvider + cache_wrapper = CacheLibraryProvider(self, path, layer, real_provider=real) + self.Libraries.append(cache_wrapper) return elif path == '' or path.startswith("#") or path.startswith(";"): From 5554df393c828e4512173478adc452756105d8a5 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 5 Aug 2025 14:36:46 +0200 Subject: [PATCH 14/29] More changes --- asab/library/providers/cache.py | 36 ++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index 8db985a2b..d1f73611e 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -5,8 +5,13 @@ from asab.library.providers.filesystem import FileSystemLibraryProvider from asab.config import Config +## + L = logging.getLogger(__name__) +## + + class CacheLibraryProvider(FileSystemLibraryProvider): """ A read‐only cache wrapper that points at @@ -46,16 +51,27 @@ def __init__(self, library, uri, layer, real_provider): async def _on_cache_ready(self, *args): await self._set_ready() + def _cache_live(self): + # True once @global/ exists + return os.path.isdir(self.cache_dir) + async def read(self, path): - # FALLBACK only if the entire cache folder is gone - if not os.path.isdir(self.cache_dir): - return await self._real.read(path) - # otherwise serve from cache (None on missing file → next provider) - return await super().read(path) + if self._cache_live(): + return await super().read(path) + return await self._real.read(path) async def list(self, path): - # FALLBACK only if the entire cache folder is gone - if not os.path.isdir(self.cache_dir): - return await self._real.list(path) - # otherwise serve from cache (KeyError on missing dir → next provider) - return await super().list(path) + if self._cache_live(): + return await super().list(path) + return await self._real.list(path) + + async def find(self, path): + if self._cache_live(): + return await super().find(path) + return await self._real.find(path) + + async def subscribe(self, path, target=None): + # subscribe *only* where we actually expect changes + if self._cache_live(): + return await super().subscribe(path, target) + return await self._real.subscribe(path, target) From 1e1eade95404e0c4f7d3c4adb21fa91629694548 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 5 Aug 2025 15:10:32 +0200 Subject: [PATCH 15/29] More changes --- asab/library/providers/cache.py | 119 +++++++++++++++----------------- asab/library/service.py | 45 ++++++------ 2 files changed, 77 insertions(+), 87 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index d1f73611e..f2d126784 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -9,69 +9,64 @@ L = logging.getLogger(__name__) + ## class CacheLibraryProvider(FileSystemLibraryProvider): - """ - A read‐only cache wrapper that points at - [library:cache].dir/@global/. - - Any call to read()/list() will: - – if the cache directory doesn’t exist → fall back to real provider - – otherwise → serve from cache and let misses bubble up - """ - - def __init__(self, library, uri, layer, real_provider): - # Keep for fallback - self._real = real_provider - # Compute layer‐key - self.layer_hash = hashlib.sha256(uri.encode("utf-8")).hexdigest() - - # Resolve cache root - cache_root = Config.get("library:cache", "dir", fallback=None) - if not cache_root: - raise RuntimeError("Missing [library:cache].dir configuration") - if not os.path.isdir(cache_root): - L.critical("Cache root '{0}' not found, exiting.".format(cache_root)) - raise SystemExit(1) - - # Point at the *live* cache snapshot - self.cache_dir = os.path.join(cache_root, "@global", self.layer_hash) - if not os.path.isdir(self.cache_dir): - L.warning("No cache snapshot for URI '{0}' at '{1}'.".format(uri, self.cache_dir)) - - # Filesystem‐style URI for the cache - cache_uri = "file://{0}".format(self.cache_dir.rstrip("/")) - super().__init__(library, cache_uri, layer, set_ready=False) - - # Whenever the cache service signals ready, re‐publish readiness - library.App.PubSub.subscribe("library.cache.ready", self._on_cache_ready) - - async def _on_cache_ready(self, *args): - await self._set_ready() - - def _cache_live(self): - # True once @global/ exists - return os.path.isdir(self.cache_dir) - - async def read(self, path): - if self._cache_live(): - return await super().read(path) - return await self._real.read(path) - - async def list(self, path): - if self._cache_live(): - return await super().list(path) - return await self._real.list(path) - - async def find(self, path): - if self._cache_live(): - return await super().find(path) - return await self._real.find(path) - - async def subscribe(self, path, target=None): - # subscribe *only* where we actually expect changes - if self._cache_live(): - return await super().subscribe(path, target) - return await self._real.subscribe(path, target) + """ + A read‐only cache wrapper that points at + [library:cache].dir/@global/. + + Any call to read()/list() will serve from cache if present, + or return None if the cache is missing. + """ + + def __init__(self, library, uri, layer): + # Compute layer hash INSIDE the provider + self.layer_hash = hashlib.sha256(uri.encode("utf-8")).hexdigest() + + # Resolve cache root + cache_root = Config.get("library:cache", "dir", fallback=None) + if not cache_root: + raise RuntimeError("Missing [library:cache].dir configuration") + if not os.path.isdir(cache_root): + L.critical("Cache root '{}' not found, exiting.".format(cache_root)) + raise SystemExit(1) + + # Compose live cache snapshot directory + self.cache_dir = os.path.join(cache_root, "@global", self.layer_hash) + if not os.path.isdir(self.cache_dir): + L.warning( + "No cache snapshot for URI '{}' at '{}'.".format(uri, self.cache_dir) + ) + + cache_uri = "file://{}".format(self.cache_dir.rstrip("/")) + super().__init__(library, cache_uri, layer, set_ready=False) + library.App.PubSub.subscribe("library.cache.ready", self._on_cache_ready) + + async def _on_cache_ready(self, *args): + await self._set_ready() + + def _cache_live(self): + return os.path.isdir(self.cache_dir) + + async def read(self, path): + if self._cache_live(): + return await super().read(path) + return None + + async def list(self, path): + if self._cache_live(): + return await super().list(path) + return None + + async def find(self, path): + if self._cache_live(): + return await super().find(path) + return None + + async def subscribe(self, path, target=None): + if self._cache_live(): + return await super().subscribe(path, target) + return None diff --git a/asab/library/service.py b/asab/library/service.py index 3ff27f35a..275022f10 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -114,45 +114,40 @@ async def _on_tick60(self, message_type): await self._read_disabled() def _create_library(self, path, layer): - library_provider = None + # Handle libsreg+ URIs (which may be comma-separated) + if path.startswith('libsreg+'): + uris = path[7:].split(',') # Remove 'libsreg+' prefix, split by comma + for uri in uris: + full_uri = 'libsreg+{}'.format(uri.strip()) + from .providers.cache import CacheLibraryProvider + provider = CacheLibraryProvider(self, full_uri, layer) + if provider._cache_live(): + self.Libraries.append(provider) + else: + from .providers.libsreg import LibsRegLibraryProvider + real_provider = LibsRegLibraryProvider(self, full_uri, layer) + self.Libraries.append(real_provider) + return + if path.startswith('zk://') or path.startswith('zookeeper://'): from .providers.zookeeper import ZooKeeperLibraryProvider - library_provider = ZooKeeperLibraryProvider(self, path, layer) - + provider = ZooKeeperLibraryProvider(self, path, layer) elif path.startswith('./') or path.startswith('/') or path.startswith('file://'): from .providers.filesystem import FileSystemLibraryProvider - library_provider = FileSystemLibraryProvider(self, path, layer) - + provider = FileSystemLibraryProvider(self, path, layer) elif path.startswith('azure+https://'): from .providers.azurestorage import AzureStorageLibraryProvider - library_provider = AzureStorageLibraryProvider(self, path, layer) - + provider = AzureStorageLibraryProvider(self, path, layer) elif path.startswith('git+'): from .providers.git import GitLibraryProvider - library_provider = GitLibraryProvider(self, path, layer) - - # LibsReg → cache first, then real - - elif path.startswith('libsreg+'): - # 1) Create the real registry provider - from .providers.libsreg import LibsRegLibraryProvider - real = LibsRegLibraryProvider(self, path, layer) - - # 2) Wrap it in your cache provider - from .providers.cache import CacheLibraryProvider - cache_wrapper = CacheLibraryProvider(self, path, layer, real_provider=real) - self.Libraries.append(cache_wrapper) - return - + provider = GitLibraryProvider(self, path, layer) elif path == '' or path.startswith("#") or path.startswith(";"): - # This is empty or commented line return - else: L.error("Incorrect/unknown provider for '{}'".format(path)) raise SystemExit("Exit due to a critical configuration error.") - self.Libraries.append(library_provider) + self.Libraries.append(provider) def is_ready(self) -> bool: """ From 66f5e5333635b391f75e2a5ce7ac2cb23c1650d3 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Thu, 7 Aug 2025 08:25:59 +0200 Subject: [PATCH 16/29] More changes to cache provider --- asab/library/providers/cache.py | 76 ++++++++++------ asab/library/service.py | 151 +++++++++++++++++++++++--------- 2 files changed, 159 insertions(+), 68 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index f2d126784..43942cfdf 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -2,49 +2,73 @@ import logging import hashlib -from asab.library.providers.filesystem import FileSystemLibraryProvider from asab.config import Config - -## +from asab.library.providers.filesystem import FileSystemLibraryProvider L = logging.getLogger(__name__) -## - - class CacheLibraryProvider(FileSystemLibraryProvider): """ - A read‐only cache wrapper that points at + A read-only cache wrapper that points at [library:cache].dir/@global/. - Any call to read()/list() will serve from cache if present, - or return None if the cache is missing. + Any call to read()/list()/find()/subscribe() will serve from cache if present, + or raise KeyError if the cache is missing to trigger fallback. """ def __init__(self, library, uri, layer): - # Compute layer hash INSIDE the provider - self.layer_hash = hashlib.sha256(uri.encode("utf-8")).hexdigest() + # 1) Compute the exact same layer_hash your LibraryCacheService wrote + master_hash = hashlib.sha256(uri.encode("utf-8")).hexdigest() - # Resolve cache root + # 2) Locate the symlink under @global cache_root = Config.get("library:cache", "dir", fallback=None) if not cache_root: raise RuntimeError("Missing [library:cache].dir configuration") + global_link = os.path.join(cache_root, "@global", master_hash) + + # 3) Resolve link target, handling absolute and relative (from global_link's dir) + if os.path.islink(global_link): + target = os.readlink(global_link) + if os.path.isabs(target): + # absolute symlink + resolved = target + else: + # relative to the directory containing the symlink + base = os.path.dirname(global_link) + resolved = os.path.join(base, target) + cache_dir = os.path.realpath(resolved) + + elif os.path.isdir(global_link): + # someone recreated the dir instead of symlink + cache_dir = global_link + + else: + # not yet created → point at the expected symlink path + cache_dir = global_link + + # 4) Remember for _cache_live() + self.layer_hash = master_hash + self.cache_dir = cache_dir + + # 5) Sanity‐check cache_root exists if not os.path.isdir(cache_root): L.critical("Cache root '{}' not found, exiting.".format(cache_root)) raise SystemExit(1) - # Compose live cache snapshot directory - self.cache_dir = os.path.join(cache_root, "@global", self.layer_hash) + # 6) Warn if no snapshot directory is present yet if not os.path.isdir(self.cache_dir): L.warning( "No cache snapshot for URI '{}' at '{}'.".format(uri, self.cache_dir) ) + # 7) Delegate to FileSystemLibraryProvider cache_uri = "file://{}".format(self.cache_dir.rstrip("/")) super().__init__(library, cache_uri, layer, set_ready=False) library.App.PubSub.subscribe("library.cache.ready", self._on_cache_ready) + + async def _on_cache_ready(self, *args): await self._set_ready() @@ -52,21 +76,21 @@ def _cache_live(self): return os.path.isdir(self.cache_dir) async def read(self, path): - if self._cache_live(): - return await super().read(path) - return None + if not self._cache_live(): + raise KeyError("No cache for '{}'".format(path)) + return await super().read(path) async def list(self, path): - if self._cache_live(): - return await super().list(path) - return None + if not self._cache_live(): + raise KeyError("No cache for '{}'".format(path)) + return await super().list(path) async def find(self, path): - if self._cache_live(): - return await super().find(path) - return None + if not self._cache_live(): + raise KeyError("No cache for '{}'".format(path)) + return await super().find(path) async def subscribe(self, path, target=None): - if self._cache_live(): - return await super().subscribe(path, target) - return None + if not self._cache_live(): + raise KeyError("No cache for '{}'".format(path)) + return await super().subscribe(path, target) diff --git a/asab/library/service.py b/asab/library/service.py index 275022f10..d856cacc1 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -83,6 +83,7 @@ def __init__( super().__init__(app, service_name) self.Libraries: list[LibraryProviderABC] = [] + self.CacheLibraries: list[LibraryProviderABC] = [] # cache-first self.Disabled: dict = {} self.DisabledPaths: list = [] @@ -116,38 +117,47 @@ async def _on_tick60(self, message_type): def _create_library(self, path, layer): # Handle libsreg+ URIs (which may be comma-separated) if path.startswith('libsreg+'): - uris = path[7:].split(',') # Remove 'libsreg+' prefix, split by comma - for uri in uris: - full_uri = 'libsreg+{}'.format(uri.strip()) - from .providers.cache import CacheLibraryProvider - provider = CacheLibraryProvider(self, full_uri, layer) - if provider._cache_live(): - self.Libraries.append(provider) - else: - from .providers.libsreg import LibsRegLibraryProvider - real_provider = LibsRegLibraryProvider(self, full_uri, layer) - self.Libraries.append(real_provider) + from .providers.cache import CacheLibraryProvider + cachep = CacheLibraryProvider(self, path, layer) + # always register the cache wrapper (even if no snapshot yet) + self.CacheLibraries.append(cachep) + + from .providers.libsreg import LibsRegLibraryProvider + realp = LibsRegLibraryProvider(self, path, layer) + self.Libraries.append(realp) return + # ZooKeeper (no cache support) if path.startswith('zk://') or path.startswith('zookeeper://'): from .providers.zookeeper import ZooKeeperLibraryProvider provider = ZooKeeperLibraryProvider(self, path, layer) + self.Libraries.append(provider) + + # Filesystem (no cache support) elif path.startswith('./') or path.startswith('/') or path.startswith('file://'): from .providers.filesystem import FileSystemLibraryProvider provider = FileSystemLibraryProvider(self, path, layer) + self.Libraries.append(provider) + + # Azure Storage elif path.startswith('azure+https://'): from .providers.azurestorage import AzureStorageLibraryProvider provider = AzureStorageLibraryProvider(self, path, layer) + self.Libraries.append(provider) + + # Git elif path.startswith('git+'): from .providers.git import GitLibraryProvider provider = GitLibraryProvider(self, path, layer) - elif path == '' or path.startswith("#") or path.startswith(";"): + self.Libraries.append(provider) + + # comments or blanks + elif not path or path[0] in ('#', ';'): return - else: - L.error("Incorrect/unknown provider for '{}'".format(path)) - raise SystemExit("Exit due to a critical configuration error.") - self.Libraries.append(provider) + else: + L.error("Incorrect provider for '{}'".format(path)) + raise SystemExit(1) def is_ready(self) -> bool: """ @@ -194,6 +204,20 @@ async def find(self, path: str) -> typing.List[str]: """ _validate_path_item(path) + # 1) cache‐first phase + try: + results = [] + for library in self.CacheLibraries: + found = await library.find(path) + if found: + results.extend(found) + if results: + return results + except KeyError: + # cache miss → skip to live + pass + + # 2) live fallback (your original logic) results = [] for library in self.Libraries: found_files = await library.find(path) @@ -227,17 +251,26 @@ async def read(self, path: str) -> typing.Optional[typing.IO]: LogObsolete.warning("Method 'LibraryService.read()' is obsolete. Use 'LibraryService.open()' method instead.") _validate_path_item(path) + # 1) global disable check if self.check_disabled(path): return None - for library in self.Libraries: - itemio = await library.read(path) - if itemio is None: - continue - return itemio - - return None + # 2) cache-first + try: + # this will raise KeyError as soon as any cache provider is “missing” + for cachep in self.CacheLibraries: + itemio = await cachep.read(path) + if itemio is not None: + return itemio + except KeyError: + # cache miss → fall back + pass + # 3) live fallback + for livep in self.Libraries: + itemio = await livep.read(path) + if itemio is not None: + return itemio @contextlib.asynccontextmanager async def open(self, path: str): @@ -259,15 +292,27 @@ async def open(self, path: str): # Same functionality as in read() method itemio = None - disabled = self.check_disabled(path) - if not disabled: - for library in self.Libraries: - itemio = await library.read(path) + + # 2) cache-first + try: + for cachep in self.CacheLibraries: + itemio = await cachep.read(path) + if itemio is not None: + break + except KeyError: + # cache miss → clear and fall back + itemio = None + + # 3) live fallback if nothing in cache + if itemio is None: + for livep in self.Libraries: + itemio = await livep.read(path) if itemio is not None: break + # 4) yield & close if itemio is None: - yield itemio + yield None else: try: yield itemio @@ -304,28 +349,35 @@ async def list(self, path: str = "/", recursive: bool = False) -> typing.List[Li _validate_path_directory(path) - # List requested level using all available providers - items = await self._list(path, providers=self.Libraries) + # cache-first lookup using try/except + try: + print(self.CacheLibraries) + items = await self._list(path, providers=self.CacheLibraries) + breakpoint() + print(items) + except KeyError: + # cache miss: fall back immediately to live providers + items = await self._list(path, providers=self.Libraries) + else: + # if cache returned empty list, also fall back + if not items: + items = await self._list(path, providers=self.Libraries) + # recursive expansion if recursive: - # If recursive scan is requested, then iterate thru list of items - # find 'dir' types there and list them. - # Output of this list is attached to the list for recursive scan - # and also to the final output - recitems = list(items[:]) - - while len(recitems) > 0: - + recitems = list(items) + while recitems: item = recitems.pop(0) if item.type != 'dir': continue - child_items = await self._list(item.name, providers=item.providers) - items.extend(child_items) - recitems.extend(child_items) + child = await self._list(item.name, providers=item.providers) + items.extend(child) + recitems.extend(child) return items + async def _list(self, path, providers): """ Lists items from all providers, merging items with the same name, @@ -342,7 +394,11 @@ async def _list(self, path, providers): unique_items: dict[str, LibraryItem] = {} # Launch tasks to list items from each provider. - tasks = [(self.Libraries.index(provider), asyncio.create_task(provider.list(path))) for provider in providers] + # Launch tasks to list items from each provider. + tasks = [ + (provider.Layer, asyncio.create_task(provider.list(path))) + for provider in providers + ] for outer_layer, task in tasks: try: @@ -735,6 +791,17 @@ def on_library_change(self, message, provider, path): path=path, ) + # 1) cache-first phase + try: + for provider in self.CacheLibraries: + await provider.subscribe(path, target) + # if cache subscribe succeeded without KeyError, skip live fallback + continue + except KeyError: + # cache miss → fall back to live + pass + + # 2) live fallback for provider in self.Libraries: await provider.subscribe(path, target) From 2e7d311192c3b3612895761516029c5b92d52806 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Thu, 7 Aug 2025 08:59:19 +0200 Subject: [PATCH 17/29] Update library service --- asab/library/service.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/asab/library/service.py b/asab/library/service.py index d856cacc1..f3883f78b 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -312,7 +312,7 @@ async def open(self, path: str): # 4) yield & close if itemio is None: - yield None + yield itemio else: try: yield itemio @@ -351,10 +351,7 @@ async def list(self, path: str = "/", recursive: bool = False) -> typing.List[Li # cache-first lookup using try/except try: - print(self.CacheLibraries) items = await self._list(path, providers=self.CacheLibraries) - breakpoint() - print(items) except KeyError: # cache miss: fall back immediately to live providers items = await self._list(path, providers=self.Libraries) From cd662e80eebe0123059d35335c39e53d1568cc09 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 12 Aug 2025 08:37:46 +0200 Subject: [PATCH 18/29] Change from code review --- asab/library/service.py | 103 ++++++++++------------------------------ 1 file changed, 26 insertions(+), 77 deletions(-) diff --git a/asab/library/service.py b/asab/library/service.py index f3883f78b..37159b27c 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -83,7 +83,6 @@ def __init__( super().__init__(app, service_name) self.Libraries: list[LibraryProviderABC] = [] - self.CacheLibraries: list[LibraryProviderABC] = [] # cache-first self.Disabled: dict = {} self.DisabledPaths: list = [] @@ -120,8 +119,7 @@ def _create_library(self, path, layer): from .providers.cache import CacheLibraryProvider cachep = CacheLibraryProvider(self, path, layer) # always register the cache wrapper (even if no snapshot yet) - self.CacheLibraries.append(cachep) - + self.Libraries.append(cachep) from .providers.libsreg import LibsRegLibraryProvider realp = LibsRegLibraryProvider(self, path, layer) self.Libraries.append(realp) @@ -204,20 +202,6 @@ async def find(self, path: str) -> typing.List[str]: """ _validate_path_item(path) - # 1) cache‐first phase - try: - results = [] - for library in self.CacheLibraries: - found = await library.find(path) - if found: - results.extend(found) - if results: - return results - except KeyError: - # cache miss → skip to live - pass - - # 2) live fallback (your original logic) results = [] for library in self.Libraries: found_files = await library.find(path) @@ -251,26 +235,17 @@ async def read(self, path: str) -> typing.Optional[typing.IO]: LogObsolete.warning("Method 'LibraryService.read()' is obsolete. Use 'LibraryService.open()' method instead.") _validate_path_item(path) - # 1) global disable check if self.check_disabled(path): return None - # 2) cache-first - try: - # this will raise KeyError as soon as any cache provider is “missing” - for cachep in self.CacheLibraries: - itemio = await cachep.read(path) - if itemio is not None: - return itemio - except KeyError: - # cache miss → fall back - pass + for library in self.Libraries: + itemio = await library.read(path) + if itemio is None: + continue + return itemio + + return None - # 3) live fallback - for livep in self.Libraries: - itemio = await livep.read(path) - if itemio is not None: - return itemio @contextlib.asynccontextmanager async def open(self, path: str): @@ -292,25 +267,13 @@ async def open(self, path: str): # Same functionality as in read() method itemio = None - - # 2) cache-first - try: - for cachep in self.CacheLibraries: - itemio = await cachep.read(path) - if itemio is not None: - break - except KeyError: - # cache miss → clear and fall back - itemio = None - - # 3) live fallback if nothing in cache - if itemio is None: - for livep in self.Libraries: - itemio = await livep.read(path) + disabled = self.check_disabled(path) + if not disabled: + for library in self.Libraries: + itemio = await library.read(path) if itemio is not None: break - # 4) yield & close if itemio is None: yield itemio else: @@ -349,28 +312,25 @@ async def list(self, path: str = "/", recursive: bool = False) -> typing.List[Li _validate_path_directory(path) - # cache-first lookup using try/except - try: - items = await self._list(path, providers=self.CacheLibraries) - except KeyError: - # cache miss: fall back immediately to live providers - items = await self._list(path, providers=self.Libraries) - else: - # if cache returned empty list, also fall back - if not items: - items = await self._list(path, providers=self.Libraries) + # List requested level using all available providers + items = await self._list(path, providers=self.Libraries) - # recursive expansion if recursive: - recitems = list(items) - while recitems: + # If recursive scan is requested, then iterate thru list of items + # find 'dir' types there and list them. + # Output of this list is attached to the list for recursive scan + # and also to the final output + recitems = list(items[:]) + + while len(recitems) > 0: + item = recitems.pop(0) if item.type != 'dir': continue - child = await self._list(item.name, providers=item.providers) - items.extend(child) - recitems.extend(child) + child_items = await self._list(item.name, providers=item.providers) + items.extend(child_items) + recitems.extend(child_items) return items @@ -648,7 +608,7 @@ async def get_item_metadata(self, path: str) -> typing.Optional[dict]: Retrieve metadata for a specific file in the library, including its `target`. Args: - path (str): The absolute path of the file to retrieve metadata for. + path (str): The absolute pƒath of the file to retrieve metadata for. Must start with '/' and include a filename with an extension. Returns: @@ -788,17 +748,6 @@ def on_library_change(self, message, provider, path): path=path, ) - # 1) cache-first phase - try: - for provider in self.CacheLibraries: - await provider.subscribe(path, target) - # if cache subscribe succeeded without KeyError, skip live fallback - continue - except KeyError: - # cache miss → fall back to live - pass - - # 2) live fallback for provider in self.Libraries: await provider.subscribe(path, target) From 85fa0a8a55d908fbc4cb8d690cb2807ac3d75ebd Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 12 Aug 2025 08:53:49 +0200 Subject: [PATCH 19/29] Typo --- asab/library/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asab/library/service.py b/asab/library/service.py index 515984baa..416793776 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -616,7 +616,7 @@ async def get_item_metadata(self, path: str) -> typing.Optional[dict]: Retrieve metadata for a specific file in the library, including its `target`. Args: - path (str): The absolute pƒath of the file to retrieve metadata for. + path (str): The absolute path of the file to retrieve metadata for. Must start with '/' and include a filename with an extension. Returns: From 903a03920e67918fb0aabb6122ef020f2a93dafd Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 12 Aug 2025 08:54:48 +0200 Subject: [PATCH 20/29] Restore comment --- asab/library/service.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/asab/library/service.py b/asab/library/service.py index 416793776..bc1da30dd 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -357,11 +357,7 @@ async def _list(self, path, providers): unique_items: dict[str, LibraryItem] = {} # Launch tasks to list items from each provider. - # Launch tasks to list items from each provider. - tasks = [ - (provider.Layer, asyncio.create_task(provider.list(path))) - for provider in providers - ] + tasks = [(self.Libraries.index(provider), asyncio.create_task(provider.list(path))) for provider in providers] for outer_layer, task in tasks: try: From 5012fbcf2c629828d022e9bc5104c9eb02a30553 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Thu, 18 Sep 2025 10:35:31 +0200 Subject: [PATCH 21/29] Minor fix --- asab/library/providers/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index 43942cfdf..14a73f17e 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -65,10 +65,10 @@ def __init__(self, library, uri, layer): # 7) Delegate to FileSystemLibraryProvider cache_uri = "file://{}".format(self.cache_dir.rstrip("/")) super().__init__(library, cache_uri, layer, set_ready=False) + library.App.TaskService.schedule(self._set_ready()) library.App.PubSub.subscribe("library.cache.ready", self._on_cache_ready) - async def _on_cache_ready(self, *args): await self._set_ready() From fd63bd0578ac2620b9cd895a73039d405d025b78 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Mon, 23 Feb 2026 08:41:48 +0100 Subject: [PATCH 22/29] Update to cache provider --- asab/library/providers/cache.py | 48 +++++++++++++-------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index 14a73f17e..0d41773a8 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -27,52 +27,42 @@ def __init__(self, library, uri, layer): raise RuntimeError("Missing [library:cache].dir configuration") global_link = os.path.join(cache_root, "@global", master_hash) - # 3) Resolve link target, handling absolute and relative (from global_link's dir) - if os.path.islink(global_link): - target = os.readlink(global_link) - if os.path.isabs(target): - # absolute symlink - resolved = target - else: - # relative to the directory containing the symlink - base = os.path.dirname(global_link) - resolved = os.path.join(base, target) - cache_dir = os.path.realpath(resolved) - - elif os.path.isdir(global_link): - # someone recreated the dir instead of symlink - cache_dir = global_link - - else: - # not yet created → point at the expected symlink path - cache_dir = global_link - - # 4) Remember for _cache_live() + # 3) Remember for _cache_live() self.layer_hash = master_hash - self.cache_dir = cache_dir + self.cache_dir = global_link - # 5) Sanity‐check cache_root exists + # 4) Sanity-check cache_root exists if not os.path.isdir(cache_root): L.critical("Cache root '{}' not found, exiting.".format(cache_root)) raise SystemExit(1) - # 6) Warn if no snapshot directory is present yet - if not os.path.isdir(self.cache_dir): + # 5) Warn if no snapshot directory is present yet + if not self._cache_live(): L.warning( "No cache snapshot for URI '{}' at '{}'.".format(uri, self.cache_dir) ) - # 7) Delegate to FileSystemLibraryProvider + # 6) Delegate to FileSystemLibraryProvider cache_uri = "file://{}".format(self.cache_dir.rstrip("/")) super().__init__(library, cache_uri, layer, set_ready=False) - library.App.TaskService.schedule(self._set_ready()) - library.App.PubSub.subscribe("library.cache.ready", self._on_cache_ready) + library.App.TaskService.schedule(self._set_ready(self._cache_live())) + library.App.PubSub.subscribe("library.cache.ready!", self._on_cache_ready) + library.App.PubSub.subscribe("library.cache.not_ready!", self._on_cache_not_ready) async def _on_cache_ready(self, *args): - await self._set_ready() + live = self._cache_live() + if live and not self.IsReady: + await self._set_ready(True) + + async def _on_cache_not_ready(self, *args): + live = self._cache_live() + if (not live) and self.IsReady: + await self._set_ready(False) def _cache_live(self): + if os.path.islink(self.cache_dir): + return os.path.isdir(os.path.realpath(self.cache_dir)) return os.path.isdir(self.cache_dir) async def read(self, path): From 1623c02dd90d5fd0aa0059c29a71f1a7fb22f63d Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Mon, 23 Feb 2026 13:00:41 +0100 Subject: [PATCH 23/29] Make cache provider fallback-safe on cache miss (return None/[]/no-op instead of KeyError) --- asab/library/providers/cache.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index 0d41773a8..40cb7b10a 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -13,8 +13,8 @@ class CacheLibraryProvider(FileSystemLibraryProvider): A read-only cache wrapper that points at [library:cache].dir/@global/. - Any call to read()/list()/find()/subscribe() will serve from cache if present, - or raise KeyError if the cache is missing to trigger fallback. + Any call to read()/list()/find()/subscribe() will serve from cache if present. + When cache is missing, provider methods return fallback-safe empty values. """ def __init__(self, library, uri, layer): @@ -67,20 +67,20 @@ def _cache_live(self): async def read(self, path): if not self._cache_live(): - raise KeyError("No cache for '{}'".format(path)) + return None return await super().read(path) async def list(self, path): if not self._cache_live(): - raise KeyError("No cache for '{}'".format(path)) + return [] return await super().list(path) async def find(self, path): if not self._cache_live(): - raise KeyError("No cache for '{}'".format(path)) + return [] return await super().find(path) async def subscribe(self, path, target=None): if not self._cache_live(): - raise KeyError("No cache for '{}'".format(path)) + return None return await super().subscribe(path, target) From 64daa5444dc9ce28feed6ecc5d819f3432fa60cc Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Mon, 23 Feb 2026 13:12:48 +0100 Subject: [PATCH 24/29] Add debug message --- asab/library/providers/cache.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index 40cb7b10a..f8e3664ec 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -68,6 +68,14 @@ def _cache_live(self): async def read(self, path): if not self._cache_live(): return None + L.warning( + "Cache hit", + struct_data={ + "path": path, + "provider": self.__class__.__name__, + "base": getattr(self, "global_link", getattr(self, "cache_dir", None)), + }, + ) return await super().read(path) async def list(self, path): From 3617df97ea54152a77ff817bbc653ce6cc2746d0 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 24 Feb 2026 06:00:10 +0100 Subject: [PATCH 25/29] Add debug message after read for precise hit --- asab/library/providers/cache.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index f8e3664ec..3c8c6fe6d 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -68,15 +68,17 @@ def _cache_live(self): async def read(self, path): if not self._cache_live(): return None - L.warning( - "Cache hit", + + itemio = await super().read(path) + L.debug( + "Cache read %s", + "hit" if itemio is not None else "miss", struct_data={ "path": path, - "provider": self.__class__.__name__, - "base": getattr(self, "global_link", getattr(self, "cache_dir", None)), + "base": self.cache_dir, }, ) - return await super().read(path) + return itemio async def list(self, path): if not self._cache_live(): From 40269eb2e80d515d9840e15487813000d0e24582 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Tue, 24 Feb 2026 06:26:37 +0100 Subject: [PATCH 26/29] Add debug message after read for precise hit: warning --- asab/library/providers/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asab/library/providers/cache.py b/asab/library/providers/cache.py index 3c8c6fe6d..b0603236f 100644 --- a/asab/library/providers/cache.py +++ b/asab/library/providers/cache.py @@ -70,7 +70,7 @@ async def read(self, path): return None itemio = await super().read(path) - L.debug( + L.warning( "Cache read %s", "hit" if itemio is not None else "miss", struct_data={ From f3e0a90f02d8d49e7b5fa33591af7c2389a92416 Mon Sep 17 00:00:00 2001 From: Mithun Shivashankar Date: Thu, 2 Apr 2026 10:39:04 +0200 Subject: [PATCH 27/29] Wire git provider --- asab/library/service.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/asab/library/service.py b/asab/library/service.py index fee35bd2b..204fe0bd8 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -118,7 +118,8 @@ async def _on_tick60(self, message_type): await self._read_favorites() def _create_library(self, path, layer): - # Handle libsreg+ URIs (which may be comma-separated) + # Handle cacheable remote providers by inserting a published cache layer + # ahead of the real provider implementation. if path.startswith('libsreg+'): from .providers.cache import CacheLibraryProvider cachep = CacheLibraryProvider(self, path, layer) @@ -128,6 +129,14 @@ def _create_library(self, path, layer): realp = LibsRegLibraryProvider(self, path, layer) self.Libraries.append(realp) return + elif path.startswith('git+'): + from .providers.cache import CacheLibraryProvider + cachep = CacheLibraryProvider(self, path, layer) + self.Libraries.append(cachep) + from .providers.git import GitLibraryProvider + realp = GitLibraryProvider(self, path, layer) + self.Libraries.append(realp) + return # ZooKeeper (no cache support) if path.startswith('zk://') or path.startswith('zookeeper://'): @@ -147,12 +156,6 @@ def _create_library(self, path, layer): provider = AzureStorageLibraryProvider(self, path, layer) self.Libraries.append(provider) - # Git - elif path.startswith('git+'): - from .providers.git import GitLibraryProvider - provider = GitLibraryProvider(self, path, layer) - self.Libraries.append(provider) - # comments or blanks elif not path or path[0] in ('#', ';'): return From 97b0143430afa430d5af826a5df7ba774021f9d3 Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Thu, 2 Apr 2026 12:22:04 +0200 Subject: [PATCH 28/29] Better wrapping. --- asab/library/service.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/asab/library/service.py b/asab/library/service.py index 204fe0bd8..b63bbe648 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -117,25 +117,29 @@ async def _on_tick60(self, message_type): await self._read_disabled() await self._read_favorites() + def _create_library(self, path, layer): - # Handle cacheable remote providers by inserting a published cache layer - # ahead of the real provider implementation. + if path.startswith('libsreg+'): - from .providers.cache import CacheLibraryProvider - cachep = CacheLibraryProvider(self, path, layer) - # always register the cache wrapper (even if no snapshot yet) - self.Libraries.append(cachep) from .providers.libsreg import LibsRegLibraryProvider realp = LibsRegLibraryProvider(self, path, layer) - self.Libraries.append(realp) + if 'library:cache' in Config: + from .providers.cache import CacheLibraryProvider + cachep = CacheLibraryProvider(self, path, layer) + self.Libraries.append(cachep) + else: + self.Libraries.append(realp) return + elif path.startswith('git+'): - from .providers.cache import CacheLibraryProvider - cachep = CacheLibraryProvider(self, path, layer) - self.Libraries.append(cachep) from .providers.git import GitLibraryProvider realp = GitLibraryProvider(self, path, layer) - self.Libraries.append(realp) + if 'library:cache' in Config: + from .providers.cache import CacheLibraryProvider + cachep = CacheLibraryProvider(self, path, layer) + self.Libraries.append(cachep) + else: + self.Libraries.append(realp) return # ZooKeeper (no cache support) From 3ea77bfb9a6cdfb4a94c6e56a42fa864c4d82d0a Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Thu, 2 Apr 2026 21:26:47 +0200 Subject: [PATCH 29/29] Add a logging of the ERRNO for inotify_init --- asab/library/providers/filesystem.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/asab/library/providers/filesystem.py b/asab/library/providers/filesystem.py index b2d97e10b..86237bd34 100644 --- a/asab/library/providers/filesystem.py +++ b/asab/library/providers/filesystem.py @@ -1,5 +1,6 @@ import io import os +import ctypes import os.path import stat import glob @@ -68,8 +69,11 @@ def __init__(self, library, path, layer, *, set_ready=True): if inotify_init is not None: init = inotify_init() if init == -1: + err = ctypes.get_errno() L.warning( - "Subscribing to library changes in filesystem provider is not available. Inotify was not initialized.") + "Subscribing to library changes in filesystem provider is not available. Inotify was not initialized.", + struct_data={'errno': err}, + ) self.FD = None else: self.FD = init