Rust-powered filesystem toolkit for Python. Fast recursive directory listing, parallel file hashing, bulk copy/move with progress, cross-platform file watching, file deduplication, content search, directory diff/sync, snapshots, disk usage, and batch rename.
pip install pyfs_watcherFrom source:
pip install maturin
maturin developimport pyfs_watcher
# Streaming iterator
for entry in pyfs_watcher.walk("/data", file_type="file", glob_pattern="*.py"):
print(entry.path, entry.file_size)
# Bulk collect (faster when you need all results)
entries = pyfs_watcher.walk_collect("/data", max_depth=3, sort=True, skip_hidden=True)# Single file
result = pyfs_watcher.hash_file("large.iso", algorithm="blake3")
print(result.hash_hex)
# Parallel batch hashing
results = pyfs_watcher.hash_files(paths, algorithm="blake3", callback=lambda r: print(r.path))def on_progress(p):
pct = p.bytes_copied / p.total_bytes * 100
print(f"{pct:.0f}% - {p.current_file}")
pyfs_watcher.copy_files(sources, "/dest", progress_callback=on_progress)
pyfs_watcher.move_files(sources, "/dest") # rename if same fs, copy+delete otherwise# Sync
with pyfs_watcher.FileWatcher("/data", debounce_ms=500, ignore_patterns=["*.tmp"]) as w:
for changes in w:
for c in changes:
print(c.path, c.change_type) # "created", "modified", "deleted"
# Async
async for changes in pyfs_watcher.async_watch("/data"):
for c in changes:
print(c.path, c.change_type)groups = pyfs_watcher.find_duplicates(
["/photos", "/backup"],
min_size=1024,
progress_callback=lambda stage, done, total: print(f"{stage}: {done}/{total}"),
)
for g in groups:
print(f"{g.file_size}B x {len(g.paths)} copies = {g.wasted_bytes}B wasted")# Find all files containing "TODO" in Python files
results = pyfs_watcher.search("/project", r"TODO", glob_pattern="*.py")
for r in results:
for m in r.matches:
print(f" {r.path}:{m.line_number}: {m.line_text.strip()}")
# Streaming mode
for r in pyfs_watcher.search_iter("/project", r"FIXME"):
print(r.path, r.match_count)diff = pyfs_watcher.diff_dirs("/original", "/copy", detect_moves=True)
print(f"Added: {len(diff.added)}, Removed: {len(diff.removed)}, "
f"Modified: {len(diff.modified)}, Moved: {len(diff.moved)}")result = pyfs_watcher.sync("/source", "/backup", delete_extra=True)
print(f"Copied: {len(result.copied)}, Deleted: {len(result.deleted)}, "
f"Skipped: {len(result.skipped)}")
# Preview changes without writing
result = pyfs_watcher.sync("/source", "/backup", dry_run=True)# Take a snapshot
snap = pyfs_watcher.snapshot("/important_data")
snap.save("baseline.json")
# Later, verify nothing changed
result = pyfs_watcher.verify("baseline.json")
if not result.ok:
for c in result.modified:
print(f"Modified: {c.path}")
for c in result.removed:
print(f"Removed: {c.path}")usage = pyfs_watcher.disk_usage("/data")
print(f"Total: {usage.total_size:,} bytes in {usage.total_files} files")
for child in usage.children[:5]: # top 5 largest
print(f" {child.path}: {child.size:,} bytes")# Preview renames (dry_run=True by default)
result = pyfs_watcher.bulk_rename("/photos", r"IMG_(\d+)", r"photo_\1")
for entry in result.renamed:
print(f" {entry.old_name} -> {entry.new_name}")
# Apply renames
result = pyfs_watcher.bulk_rename("/photos", r"IMG_(\d+)", r"photo_\1", dry_run=False)
# Undo if needed
result.undo()All functions raise typed exceptions inheriting from FsWatcherError:
WalkError- directory walk failuresHashError- hashing failuresCopyError- copy/move failuresWatchError- file watching failuresSearchError- content search failuresDirDiffError- directory diff failuresSyncError- sync failuresSnapshotError- snapshot/verify failuresDiskUsageError- disk usage failuresRenameError- bulk rename failures
Standard FileNotFoundError and PermissionError are raised for I/O errors.
# Setup
uv venv && source .venv/bin/activate
uv pip install maturin pytest pytest-asyncio pytest-timeout
# Build
maturin develop
# Test
cargo test # Rust tests
pytest tests/ # Python tests
# Benchmark
python benches/bench_walk.py
python benches/bench_hash.py- Rust + PyO3 for Python bindings
- jwalk for parallel directory traversal
- BLAKE3/SHA-256 for hashing with rayon parallelism
- notify + debouncer for cross-platform file watching
- Staged dedup pipeline: size grouping -> partial hash -> full hash
- regex crate for parallel content search
- serde/serde_json for snapshot serialization
- chrono for timestamps