Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
952199d
WIP: add glob pattern support for --import-module
Dec 11, 2025
b8d6f62
Implemented tests
Dec 13, 2025
e1d0d17
Refactor fixtures to use asset templates
Dec 16, 2025
ba08efe
refactor: use singledispatch for test path utilities
Dec 19, 2025
fbf6cb0
refactor and extend glob pattern matching with advanced features
Dec 26, 2025
559b393
Feature, Add Pattern model
TheLazzziest Jan 1, 2026
7f35f75
Feature, Add ModuleFinder
TheLazzziest Jan 1, 2026
e6afae3
Feature, Fix validation issues
TheLazzziest Jan 1, 2026
8c3a769
Feature, Reimplment module lookup block. Update tests
TheLazzziest Jan 1, 2026
2a0fc66
Feature, Add pre-commit to optional dependencies
TheLazzziest Jan 1, 2026
eabad47
Feature, Improve the method description for state processing
TheLazzziest Jan 2, 2026
408019b
Feature, Replace pool executor with a separate thread
TheLazzziest Jan 2, 2026
de0a598
Feature, Add validation rules for pattern masks
TheLazzziest Jan 2, 2026
8eb7a61
Feature, Add base.py module to the namespace case
TheLazzziest Jan 2, 2026
2156ce1
Feature, Fix validation tests for patterns. Add tests for get_graph_s…
TheLazzziest Jan 2, 2026
c5d99f2
Feature, Remove do_import
TheLazzziest Jan 2, 2026
7d0716e
Merge branch 'main' into feature/glob-pattern-support
Houston56 Jan 13, 2026
ca98f29
Separate graph building from serialization and add dynamic comparison
Jan 14, 2026
26a903e
fix format
Jan 14, 2026
04c83e8
Merge remote-tracking branch 'origin/feature/glob-pattern-support' in…
Houston56 Jan 14, 2026
ce11e2d
Merge remote-tracking branch 'upstream/main' into feature/glob-patter…
Houston56 Jan 18, 2026
c26352d
Fix mypy type errors
Houston56 Jan 18, 2026
85da90a
Refactor: make module imports single-threaded
Houston56 Jan 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions paracelsus/finders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
from collections import deque
from dataclasses import dataclass
from pathlib import Path
from typing import Generator, Optional, Set


@dataclass(frozen=True, eq=True)
class GlobNode:
pattern: str
next: Optional["GlobNode"] = None

@staticmethod
def nodify(tokens: list[str]) -> Optional["GlobNode"]:
head = None
for part in reversed(tokens):
new_node = GlobNode(pattern=part, next=head)
head = new_node
return head

def __hash__(self) -> int:
return id(self)

def __eq__(self, value: object) -> bool:
return isinstance(value, GlobNode) and self is value

@property
def is_final(self):
return self.next is None

def __repr__(self):
return f"Node({self.pattern})"


@dataclass(frozen=True)
class SearchState:
"""Represents a snapshot of the traversal cursor."""

path: Path
node: GlobNode


class ModuleFinder:
def __init__(self, root: Path, segments: list[str]):
self.root = root
self.head = GlobNode.nodify(segments)

self.queue: deque[SearchState] = deque()
# To prevent infinite loops with symlinks or redundant '**' paths
self.visited: Set[SearchState] = set()

def find(self) -> Generator[Path, None, None]:
"""
Finds all modules that match the glob-like pattern for Python modules.

Supports patterns like:
- example.*.models
- example.fo?.models
- example.*.*.models
- example.**.api.*.models
- example.api.v[12].models
- example.api.v[0-9].models
- example.api.v[!1].models

"""
if self.head is None:
return

# Initialize state
self.queue.append(SearchState(self.root, self.head))

while self.queue:
state = self.queue.popleft()

# Optimization: distinct paths to the same state are redundant
if state in self.visited:
continue
self.visited.add(state)

yield from self._process_state(state)

def _process_state(self, state: SearchState) -> Generator[Path, None, None]:
"""
Implement BFS to search for files and directories matching the given pattern.
"""
node = state.node
path = state.path

# === 1. Recursive Wildcard (**) ===
if node.pattern == "**":
# Branch A: Skip (0 matches).
# Move to next node, keep path same.
if node.next:
self.queue.append(SearchState(path, node.next))

# Branch B: Consume (1+ matches).
# Stay on current node, move deeper into filesystem.
for child in self._safe_iterdir(path):
if child.is_dir():
self.queue.append(SearchState(child, node))

else:
for child in self._safe_iterdir(path):
is_match = False

if child.is_dir():
if child.match(node.pattern):
is_match = True

elif child.is_file():
if Path(child.stem).match(node.pattern):
is_match = True

if not is_match:
continue

if node.is_final and self._is_valid_module(child):
yield child

elif node.next is not None and child.is_dir():
self.queue.append(SearchState(child, node.next))

def _safe_iterdir(self, path: Path) -> Generator[Path, None, None]:
"""Safe wrapper around iterdir to handle permission errors."""
try:
if path.is_dir():
yield from path.iterdir()
except PermissionError:
pass

def _is_valid_module(self, path: Path) -> bool:
"""
Determines if a path is a valid python module.
1. File: my_module.py (but not __init__.py)
2. Package: my_package/ (must contain __init__.py)
3. Supports PEP 420 Namespace Packages.
"""

name = path.name

# 1. Ignore common garbage/internal directories
if name == "__pycache__" or name.startswith("."):
return False

if path.is_file():
return path.suffix == ".py" and path.name != "__init__.py"

if path.is_dir():
return (path / "__init__.py").exists() or name.isidentifier()

return False
Loading