-
Notifications
You must be signed in to change notification settings - Fork 7
Introduce item and directory subscriptions for library providers #741
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
255f835
5fb49cb
6acf8b0
758176b
7740b03
1baa58e
2bfc495
6d417d4
ee47411
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,7 +81,9 @@ def __init__(self, library, path, layer, *, set_ready=True): | |
| self.DisabledFilePath = os.path.join(self.BasePath, '.disabled.yaml') | ||
|
|
||
| self.AggrEvents = [] | ||
| self.WDs = {} # wd -> (subscribed_path, child_path) | ||
| self.WDs = {} # wd -> watched directory path | ||
| self.WDSubscriptions = {} # wd -> list of subscriptions attached to the watched directory | ||
| self.WatchedPaths = {} # watched directory path -> wd | ||
|
|
||
| def _current_tenant_id(self): | ||
| try: | ||
|
|
@@ -382,74 +384,267 @@ def _list(self, path: str): | |
|
|
||
| async def subscribe(self, path, target: typing.Union[str, tuple, None] = None): | ||
| """ | ||
| Subscribe to filesystem changes under `path`. | ||
|
|
||
| Note: | ||
| `target` is accepted for API compatibility; current filesystem implementation | ||
| watches the resolved path without target-specific scoping. | ||
| Translate a logical library subscription into one or more filesystem watches. | ||
|
|
||
| Directories are expanded recursively into watched directory boundaries, | ||
| while items are implemented by watching their parent directory and later | ||
| filtering low-level events by child filename. | ||
|
|
||
| Examples: | ||
| ``subscribe("/Schemas/")`` | ||
| Recursively watches ``<BasePath>/Schemas`` and its existing | ||
| subdirectories. Any later change under that subtree publishes | ||
| ``/Schemas/``. | ||
|
|
||
| ``subscribe("/Correlations/Microsoft/Windows/Account Created.yaml")`` | ||
| Watches the parent directory | ||
| ``<BasePath>/Correlations/Microsoft/Windows`` and publishes the | ||
| item path only when the low-level event name is | ||
| ``"Account Created.yaml"``. | ||
| """ | ||
| if not os.path.isdir(self.BasePath + path): | ||
| return | ||
| if self.FD is None: | ||
| L.warning("Cannot subscribe to changes in the filesystem layer of the library: '{}'".format(self.BasePath)) | ||
| return | ||
| self._subscribe_recursive(path, path) | ||
|
|
||
| for actual_path in self._iter_subscription_paths(path, target): | ||
| if os.path.isdir(actual_path): | ||
| self._subscribe_recursive( | ||
| subscription=self._build_directory_subscription(path, actual_path), | ||
| watched_path=actual_path, | ||
| ) | ||
| elif os.path.isfile(actual_path): | ||
| self._subscribe_item( | ||
| subscription=self._build_item_subscription(path, actual_path), | ||
| ) | ||
|
|
||
| def _subscribe_recursive(self, subscribed_path, path_to_be_listed): | ||
| binary = (self.BasePath + path_to_be_listed).encode() | ||
| wd = inotify_add_watch(self.FD, binary, IN_ALL_EVENTS) | ||
| if wd == -1: | ||
| L.error("Error in inotify_add_watch") | ||
|
|
||
| def _iter_subscription_paths(self, path, target: typing.Union[str, tuple, None]): | ||
| """ | ||
| Resolve a logical subscription path into concrete filesystem paths. | ||
|
|
||
| The returned paths are scope-specific filesystem locations under the | ||
| global, tenant, or personal library trees. | ||
|
|
||
| Examples: | ||
| ``("/Schemas/", "global")`` -> ``["<BasePath>/Schemas"]`` | ||
| ``("/Schemas/", ("tenant", "acme"))`` -> | ||
| ``["<BasePath>/.tenants/acme/Schemas"]`` | ||
| """ | ||
| if target in {None, "global"}: | ||
| return [self.build_path(path, tenant_specific=False)] | ||
|
|
||
| if target == "tenant": | ||
| return [self.build_path(path, tenant_specific=True, tenant=tenant) for tenant in self._get_tenants()] | ||
|
Comment on lines
+436
to
+437
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Line 410 subscribes only tenants present at subscription time. New tenants created later under 🤖 Prompt for AI Agents |
||
|
|
||
| if isinstance(target, tuple) and len(target) == 2 and target[0] == "tenant": | ||
| return [self.build_path(path, tenant_specific=True, tenant=target[1])] | ||
|
Comment on lines
+439
to
+440
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Validate tenant ID before path construction Line 413 passes Suggested fix+ def _validate_scope_component(self, value: str, name: str) -> str:
+ if not isinstance(value, str) or value in {"", ".", ".."}:
+ raise ValueError("Invalid {} identifier".format(name))
+ if os.sep in value or (os.altsep and os.altsep in value):
+ raise ValueError("Invalid {} identifier".format(name))
+ return value
@@
if isinstance(target, tuple) and len(target) == 2 and target[0] == "tenant":
- return [self.build_path(path, tenant_specific=True, tenant=target[1])]
+ tenant_id = self._validate_scope_component(target[1], "tenant")
+ return [self.build_path(path, tenant_specific=True, tenant=tenant_id)]🤖 Prompt for AI Agents |
||
|
|
||
| if target == "personal": | ||
| tenant_id = self._current_tenant_id() | ||
| cred_id = self._current_credentials_id() | ||
| personal_path = self._personal_path(path, tenant_id, cred_id) | ||
| return [personal_path] if personal_path is not None else [] | ||
|
|
||
|
Comment on lines
+442
to
+447
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Line 416–419 resolves only current tenant+credentials, but API semantics describe Suggested direction- def _iter_subscription_paths(self, path, target: typing.Union[str, tuple, None]):
+ async def _iter_subscription_paths(self, path, target: typing.Union[str, tuple, None]):
@@
- if target == "personal":
- tenant_id = self._current_tenant_id()
- cred_id = self._current_credentials_id()
- personal_path = self._personal_path(path, tenant_id, cred_id)
- return [personal_path] if personal_path is not None else []
+ if target == "personal":
+ paths = []
+ for tenant_id, cred_id in await self._get_personal_scopes():
+ personal_path = self._personal_path(path, tenant_id, cred_id)
+ if personal_path is not None:
+ paths.append(personal_path)
+ return pathsAnd in - for actual_path in self._iter_subscription_paths(path, target):
+ for actual_path in await self._iter_subscription_paths(path, target):🤖 Prompt for AI Agents |
||
| if isinstance(target, tuple) and len(target) == 2 and target[0] == "personal": | ||
| tenant_id = self._current_tenant_id() | ||
| personal_path = self._personal_path(path, tenant_id, target[1]) | ||
| return [personal_path] if personal_path is not None else [] | ||
|
|
||
| raise ValueError("Unexpected target: {!r}".format(target)) | ||
|
|
||
|
|
||
| def _get_tenants(self) -> typing.List[str]: | ||
| root = os.path.join(self.BasePath, ".tenants") | ||
| if not os.path.isdir(root): | ||
| return [] | ||
|
|
||
| tenants = [] | ||
| for tenant in os.listdir(root): | ||
| tenant_path = os.path.join(root, tenant) | ||
| if tenant.startswith(".") or not os.path.isdir(tenant_path): | ||
| continue | ||
| tenants.append(tenant) | ||
| return tenants | ||
|
|
||
|
|
||
| def _build_directory_subscription(self, publish_path, actual_path): | ||
| """Create a descriptor for a directory-style logical subscription.""" | ||
| return { | ||
| "kind": "dir", | ||
| "publish_path": publish_path, | ||
| "item_name": None, | ||
| "key": ("dir", publish_path, actual_path), | ||
| } | ||
|
|
||
|
|
||
| def _build_item_subscription(self, publish_path, actual_path): | ||
| """Create a descriptor for an item-style logical subscription.""" | ||
| return { | ||
| "kind": "item", | ||
| "publish_path": publish_path, | ||
| "item_name": os.path.basename(actual_path), | ||
| "key": ("item", publish_path, actual_path), | ||
| } | ||
|
|
||
|
|
||
| def _subscribe_item(self, subscription): | ||
| """ | ||
| Attach an item subscription to its parent directory watch. | ||
|
|
||
| Inotify delivers child-name events relative to watched directories, so | ||
| exact item semantics are recovered later by comparing the event name with | ||
| the descriptor's ``item_name``. | ||
|
|
||
| Example: | ||
| For ``/Correlations/Microsoft/Windows/Account Created.yaml`` the | ||
| provider watches ``.../Windows`` and later checks whether the decoded | ||
| inotify child name equals ``"Account Created.yaml"``. | ||
| """ | ||
| parent_path = os.path.dirname(subscription["key"][2]) | ||
| wd = self._ensure_watch(parent_path) | ||
| if wd is None: | ||
| return | ||
| self.WDs[wd] = (subscribed_path, path_to_be_listed) | ||
| self._attach_subscription(wd, subscription) | ||
|
|
||
|
|
||
| def _subscribe_recursive(self, subscription, watched_path): | ||
| """ | ||
| Ensure a directory subscription covers ``watched_path`` and all descendants. | ||
|
|
||
| Filesystem recursion is not native to inotify, so the provider expands the | ||
| current directory tree in user space by registering one watch per existing | ||
| subdirectory. | ||
|
|
||
| Example: | ||
| A subscription for ``/Schemas/`` results in watches for | ||
| ``.../Schemas``, ``.../Schemas/nested``, ``.../Schemas/nested/deeper``, | ||
| and so on for the current subtree snapshot. | ||
| """ | ||
| wd = self._ensure_watch(watched_path) | ||
| if wd is None: | ||
| return | ||
| self._attach_subscription(wd, subscription) | ||
|
|
||
| try: | ||
| items = self._list(path_to_be_listed) | ||
| except KeyError: | ||
| entries = list(os.scandir(watched_path)) | ||
| except FileNotFoundError: | ||
| # subscribing to non-existing directory is silent | ||
| return | ||
|
|
||
| for item in items: | ||
| if item.type == "dir": | ||
| self._subscribe_recursive(subscribed_path, item.name) | ||
| for entry in entries: | ||
| if entry.name.startswith(".") or not entry.is_dir(follow_symlinks=False): | ||
| continue | ||
| self._subscribe_recursive(subscription, entry.path) | ||
|
|
||
|
|
||
| def _ensure_watch(self, watched_path): | ||
| """Create or reuse the inotify watch for one concrete directory path.""" | ||
| wd = self.WatchedPaths.get(watched_path) | ||
| if wd is not None: | ||
| return wd | ||
|
|
||
| binary = watched_path.encode() | ||
| wd = inotify_add_watch(self.FD, binary, IN_ALL_EVENTS) | ||
| if wd == -1: | ||
| L.error("Error in inotify_add_watch") | ||
| return None | ||
|
|
||
| self.WatchedPaths[watched_path] = wd | ||
| self.WDs[wd] = watched_path | ||
| self.WDSubscriptions.setdefault(wd, []) | ||
| return wd | ||
|
|
||
|
|
||
| def _attach_subscription(self, wd, subscription): | ||
| """Attach a logical subscription descriptor to one watched directory.""" | ||
| subscriptions = self.WDSubscriptions.setdefault(wd, []) | ||
| if any(existing["key"] == subscription["key"] for existing in subscriptions): | ||
| return | ||
| subscriptions.append(subscription) | ||
|
|
||
|
|
||
| def _remove_watch(self, wd): | ||
| watched_path = self.WDs.pop(wd, None) | ||
| self.WDSubscriptions.pop(wd, None) | ||
| if watched_path is not None: | ||
| self.WatchedPaths.pop(watched_path, None) | ||
|
|
||
|
|
||
| def _on_inotify_read(self): | ||
| """ | ||
| Read and decode one batch of packed ``inotify_event`` records. | ||
|
|
||
| A single ``os.read`` call may return many concatenated records, so the | ||
| buffer is walked record by record and each decoded event is handed to the | ||
| provider-level event interpreter. | ||
| """ | ||
| data = os.read(self.FD, 64 * 1024) | ||
|
|
||
| pos = 0 | ||
| while pos < len(data): | ||
| wd, mask, cookie, namesize = struct.unpack_from(EVENT_FMT, data, pos) | ||
| pos += EVENT_SIZE + namesize | ||
| name = (data[pos - namesize: pos].split(b'\x00', 1)[0]).decode() | ||
| self._handle_inotify_event(wd, mask, name) | ||
|
|
||
| if mask & IN_ISDIR == IN_ISDIR and ((mask & IN_CREATE == IN_CREATE) or (mask & IN_MOVED_TO == IN_MOVED_TO)): | ||
| subscribed_path, child_path = self.WDs[wd] | ||
| self._subscribe_recursive(subscribed_path, "/".join([child_path, name])) | ||
| self.AggrTimer.restart(0.2) | ||
|
|
||
| if mask & IN_IGNORED == IN_IGNORED: | ||
| # cleanup | ||
| del self.WDs[wd] | ||
| continue | ||
|
|
||
| name = (data[pos - namesize: pos].split(b'\x00', 1)[0]).decode() | ||
| def _handle_inotify_event(self, wd, mask, name): | ||
| """ | ||
| Interpret one decoded inotify event against attached subscription descriptors. | ||
|
|
||
| full_path = os.path.join(self.BasePath, name) | ||
| if os.path.normpath(full_path) == os.path.normpath(self.DisabledFilePath): | ||
| self.App.TaskService.schedule(self.Library._read_disabled(publish_changes=True)) | ||
| Newly created or moved-in directories are expanded into fresh recursive | ||
| watches so directory subscriptions keep covering the subtree as it grows. | ||
|
|
||
| self.AggrTimer.restart(0.2) | ||
| Example: | ||
| If a watched ``.../Schemas`` directory reports a new child directory | ||
| named ``nested``, the provider adds recursive watches under | ||
| ``.../Schemas/nested`` before aggregating the logical change | ||
| publication. | ||
| """ | ||
| watched_path = self.WDs.get(wd) | ||
| subscriptions = list(self.WDSubscriptions.get(wd, ())) | ||
|
|
||
| if mask & IN_ISDIR == IN_ISDIR and ((mask & IN_CREATE == IN_CREATE) or (mask & IN_MOVED_TO == IN_MOVED_TO)): | ||
| # Extend recursive coverage when a new directory appears under an | ||
| # already subscribed directory subtree. | ||
| child_path = os.path.join(watched_path, name) if watched_path is not None else None | ||
| if child_path is not None: | ||
| for subscription in subscriptions: | ||
| if subscription["kind"] != "dir": | ||
| continue | ||
| self._subscribe_recursive(subscription, child_path) | ||
|
|
||
| self.AggrEvents.append((subscriptions, name)) | ||
|
|
||
| full_path = os.path.join(watched_path, name) if watched_path is not None and len(name) > 0 else watched_path | ||
| if full_path is not None and os.path.normpath(full_path) == os.path.normpath(self.DisabledFilePath): | ||
| self.App.TaskService.schedule(self.Library._read_disabled(publish_changes=True)) | ||
|
|
||
| if mask & IN_IGNORED == IN_IGNORED: | ||
| self._remove_watch(wd) | ||
|
|
||
|
|
||
| async def _on_aggr_timer(self): | ||
| """ | ||
| Collapse low-level inotify bursts into deduplicated logical publications. | ||
|
|
||
| This keeps filesystem notification multiplicity from leaking into the | ||
| ``Library.change!`` contract. | ||
|
|
||
| Example: | ||
| Several low-level write-related events for | ||
| ``Account Created.yaml`` collapse into one logical publication of | ||
| ``/Correlations/Microsoft/Windows/Account Created.yaml``. | ||
| """ | ||
| to_advertise = set() | ||
| for wd, mask, cookie, name in self.AggrEvents: | ||
| # When wathed directory is being removed, more than one inotify events are being produced. | ||
| # When IN_IGNORED event occurs, respective wd is removed from self.WDs, | ||
| # but some other events (like IN_DELETE_SELF) get to this point, without having its reference in self.WDs. | ||
| subscribed_path, _ = self.WDs.get(wd, (None, None)) | ||
| to_advertise.add(subscribed_path) | ||
| for subscriptions, name in self.AggrEvents: | ||
| for subscription in subscriptions: | ||
| if subscription["kind"] == "dir": | ||
| to_advertise.add(subscription["publish_path"]) | ||
| elif name == subscription["item_name"]: | ||
| to_advertise.add(subscription["publish_path"]) | ||
| self.AggrEvents.clear() | ||
|
|
||
| for path in to_advertise: | ||
|
|
@@ -504,3 +699,7 @@ async def finalize(self, app): | |
| if self.FD is not None: | ||
| self.App.Loop.remove_reader(self.FD) | ||
| os.close(self.FD) | ||
| self.AggrEvents.clear() | ||
| self.WDs.clear() | ||
| self.WDSubscriptions.clear() | ||
| self.WatchedPaths.clear() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_is_item_subscription()can misclassify dotted directories as itemsLine 70 removes trailing
/before classification, so paths like/.tenants/or/foo.bar/are treated as items. That can route directory subscriptions into wrong handling.Suggested fix
📝 Committable suggestion
🤖 Prompt for AI Agents