From fe74af58cef315f992ef181a4ec2b7b83d10ccff Mon Sep 17 00:00:00 2001 From: UladzislauK-Writer Date: Tue, 23 Dec 2025 11:52:16 +0100 Subject: [PATCH] feat: log project hash on changes --- src/writer/app_runner.py | 80 +++++++++++++++- tests/backend/test_app_runner.py | 156 ++++++++++++++++++++++++++++++- 2 files changed, 234 insertions(+), 2 deletions(-) diff --git a/src/writer/app_runner.py b/src/writer/app_runner.py index 7f331882d..ade4d8b77 100644 --- a/src/writer/app_runner.py +++ b/src/writer/app_runner.py @@ -1,5 +1,6 @@ import asyncio import concurrent.futures +import hashlib import importlib.util import io import logging @@ -8,6 +9,7 @@ import multiprocessing.connection import multiprocessing.synchronize import os +import pathlib import shutil import signal import subprocess @@ -16,7 +18,7 @@ import threading import zipfile from types import ModuleType -from typing import Any, Callable, Dict, List, Optional, Union, cast +from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast import watchdog.events from pydantic import ValidationError @@ -691,6 +693,76 @@ def on_any_event(self, event) -> None: return self.update_callback() +class ProjectHashLogHandler(watchdog.events.PatternMatchingEventHandler): + def __init__(self, app_path: str, wf_project_context: WfProjectContext, patterns: List[str]): + super().__init__(patterns=patterns) + self.wf_project_context = wf_project_context + + for file in pathlib.Path(app_path).rglob("*"): + if file.is_dir(): + continue + file_hash = hashlib.md5() + try: + with open(file, 'rb') as f: + while chunk := f.read(8192): + file_hash.update(chunk) + self.wf_project_context.file_hashes[str(file.absolute())] = file_hash.hexdigest() + except Exception as e: + logging.warning(f"Failed to hash {file}: {e}") + + self.project_hash = "" + self._log_hash() + + def _calculate_project_hash(self) -> str: + project_hash = hashlib.md5() + for filename in sorted(self.wf_project_context.file_hashes.keys()): + file_hash = self.wf_project_context.file_hashes[filename] + project_hash.update(bytes.fromhex(file_hash)) + return project_hash.hexdigest() + + def _process_file(self, file_path) -> None: + try: + file_hash = hashlib.md5() + with open(file_path, 'rb') as f: + while chunk := f.read(8192): + file_hash.update(chunk) + self.wf_project_context.file_hashes[file_path] = file_hash.hexdigest() + self._log_hash() + except FileNotFoundError: + return + except Exception as e: + logging.warning(f"Failed to hash {file_path}: {e}") + + def _log_hash(self) -> None: + previous_project_hash = self.project_hash + self.project_hash = self._calculate_project_hash() + if previous_project_hash != self.project_hash: + logging.debug(f"Project hash: {self.project_hash}") + + def on_modified(self, event: Union[watchdog.events.DirModifiedEvent, watchdog.events.FileModifiedEvent]): + if not event.is_directory: + self._process_file(event.src_path) + + def on_created(self, event: Union[watchdog.events.DirCreatedEvent, watchdog.events.FileCreatedEvent]): + if not event.is_directory: + self._process_file(event.src_path) + else: + for sub_event in watchdog.events.generate_sub_created_events(event.src_path): + self.on_created(sub_event) + + def on_moved(self, event: Union[watchdog.events.DirMovedEvent, watchdog.events.FileMovedEvent]): + if not event.is_directory: + self.wf_project_context.file_hashes.pop(event.src_path, None) + self._process_file(event.dest_path) + else: + for sub_event in watchdog.events.generate_sub_moved_events(event.src_path, event.dest_path): + self.on_moved(sub_event) + + def on_deleted(self, event: Union[watchdog.events.DirDeletedEvent, watchdog.events.FileDeletedEvent]): + if not event.is_directory: + self.wf_project_context.file_hashes.pop(event.src_path, None) + self._log_hash() + class ThreadSafeAsyncEvent(asyncio.Event): """Asyncio event adapted to be thread-safe.""" @@ -824,6 +896,12 @@ def _start_fs_observer(self): path=self.app_path, recursive=True, ) + if logging.getLogger().isEnabledFor(logging.DEBUG): + self.observer.schedule( + ProjectHashLogHandler(self.app_path, self.wf_project_context, patterns=["*"]), + path=self.app_path, + recursive=True, + ) # See _install_requirements docstring for info # self.observer.schedule( # FileEventHandler(self._install_requirements, patterns=["requirements.txt"]), diff --git a/tests/backend/test_app_runner.py b/tests/backend/test_app_runner.py index e7a6ef8c9..8be1245bc 100644 --- a/tests/backend/test_app_runner.py +++ b/tests/backend/test_app_runner.py @@ -1,8 +1,11 @@ import asyncio +import hashlib import threading +from types import SimpleNamespace import pytest -from writer.app_runner import AppRunner +import watchdog.events +from writer.app_runner import AppRunner, ProjectHashLogHandler from writer.ss_types import ( EventRequest, InitSessionRequest, @@ -345,3 +348,154 @@ async def test_handle_event_should_return_result_of_event_handler_execution( # Then assert res.payload.result["result"] is not None + + + +@pytest.fixture +def wf_project_context(): + return SimpleNamespace(file_hashes={}) + + +@pytest.fixture +def sample_app(tmp_path): + """ + Creates a directory structure: + app/ + a.txt + sub/ + b.txt + """ + app = tmp_path / "app" + app.mkdir() + + (app / "a.txt").write_text("hello") + sub = app / "sub" + sub.mkdir() + (sub / "b.txt").write_text("world") + + return app + + +class TestProjectHashLogHandler: + @staticmethod + def md5_of_bytes(data: bytes) -> str: + h = hashlib.md5() + h.update(data) + return h.hexdigest() + + def test_initial_hashing(self, sample_app, wf_project_context): + handler = ProjectHashLogHandler( + app_path=str(sample_app), + wf_project_context=wf_project_context, + patterns=["*"], + ) + + expected = { + str((sample_app / "a.txt").absolute()): self.md5_of_bytes(b"hello"), + str((sample_app / "sub" / "b.txt").absolute()): self.md5_of_bytes(b"world"), + } + + assert wf_project_context.file_hashes == expected + assert handler.project_hash == handler._calculate_project_hash() + + + def test_project_hash_is_order_independent(self, sample_app, wf_project_context): + handler = ProjectHashLogHandler( + app_path=str(sample_app), + wf_project_context=wf_project_context, + patterns=["*"], + ) + + original_hash = handler.project_hash + + # Reinsert hashes in reverse order + items = list(wf_project_context.file_hashes.items()) + wf_project_context.file_hashes.clear() + for k, v in reversed(items): + wf_project_context.file_hashes[k] = v + + assert handler._calculate_project_hash() == original_hash + + + def test_on_modified_updates_file_hash(self, sample_app, wf_project_context): + handler = ProjectHashLogHandler( + app_path=str(sample_app), + wf_project_context=wf_project_context, + patterns=["*"], + ) + + file_path = sample_app / "a.txt" + file_path.write_text("changed") + + event = watchdog.events.FileModifiedEvent(str(file_path)) + handler.on_modified(event) + + assert wf_project_context.file_hashes[str(file_path)] == self.md5_of_bytes(b"changed") + + + def test_on_created_adds_file(self, sample_app, wf_project_context): + handler = ProjectHashLogHandler( + app_path=str(sample_app), + wf_project_context=wf_project_context, + patterns=["*"], + ) + + new_file = sample_app / "new.txt" + new_file.write_text("new") + + event = watchdog.events.FileCreatedEvent(str(new_file)) + handler.on_created(event) + + assert str(new_file) in wf_project_context.file_hashes + assert wf_project_context.file_hashes[str(new_file)] == self.md5_of_bytes(b"new") + + + def test_on_deleted_removes_file(self, sample_app, wf_project_context): + handler = ProjectHashLogHandler( + app_path=str(sample_app), + wf_project_context=wf_project_context, + patterns=["*"], + ) + + file_path = sample_app / "a.txt" + event = watchdog.events.FileDeletedEvent(str(file_path)) + + handler.on_deleted(event) + + assert str(file_path) not in wf_project_context.file_hashes + + + def test_on_moved_updates_hashes(self, sample_app, wf_project_context): + handler = ProjectHashLogHandler( + app_path=str(sample_app), + wf_project_context=wf_project_context, + patterns=["*"], + ) + + src = sample_app / "a.txt" + dest = sample_app / "a_renamed.txt" + src.rename(dest) + + event = watchdog.events.FileMovedEvent( + src_path=str(src), + dest_path=str(dest), + ) + + handler.on_moved(event) + + assert str(src) not in wf_project_context.file_hashes + assert str(dest) in wf_project_context.file_hashes + assert wf_project_context.file_hashes[str(dest)] == self.md5_of_bytes(b"hello") + + + def test_process_file_missing_is_ignored(self, sample_app, wf_project_context): + handler = ProjectHashLogHandler( + app_path=str(sample_app), + wf_project_context=wf_project_context, + patterns=["*"], + ) + + missing_file = sample_app / "missing.txt" + + handler._process_file(str(missing_file)) + assert str(missing_file) not in wf_project_context.file_hashes \ No newline at end of file