From b88604f3334bb903a12a17c3a5578d5cc8255e65 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Thu, 1 Jan 2026 13:36:58 +0100 Subject: [PATCH 1/3] Implement a portable lockfile. --- CHANGELOG.md | 4 +- docs/source/how_to_guides/index.md | 1 + docs/source/how_to_guides/portability.md | 32 +++ docs/source/reference_guides/configuration.md | 11 +- docs/source/reference_guides/index.md | 1 + docs/source/reference_guides/lockfile.md | 90 ++++++ docs/source/tutorials/making_tasks_persist.md | 2 +- pyproject.toml | 1 + src/_pytask/console.py | 20 +- src/_pytask/execute.py | 16 +- src/_pytask/lockfile.py | 270 ++++++++++++++++++ src/_pytask/persist.py | 13 +- src/_pytask/pluginmanager.py | 1 + src/_pytask/state.py | 94 ++++++ tests/conftest.py | 2 +- tests/test_execute.py | 8 +- tests/test_lockfile.py | 43 +++ uv.lock | 73 +++++ 18 files changed, 660 insertions(+), 22 deletions(-) create mode 100644 docs/source/how_to_guides/portability.md create mode 100644 docs/source/reference_guides/lockfile.md create mode 100644 src/_pytask/lockfile.py create mode 100644 src/_pytask/state.py create mode 100644 tests/test_lockfile.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 080bef0bb..8cf021593 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,9 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and ## Unreleased -- Nothing yet. +- {issue}`735` adds the `pytask.lock` lockfile as the primary state backend with a + portable format, documentation, and a one-run SQLite fallback when no lockfile + exists. ## 0.5.8 - 2025-12-30 diff --git a/docs/source/how_to_guides/index.md b/docs/source/how_to_guides/index.md index d01ecae9c..f41a02476 100644 --- a/docs/source/how_to_guides/index.md +++ b/docs/source/how_to_guides/index.md @@ -13,6 +13,7 @@ maxdepth: 1 --- migrating_from_scripts_to_pytask interfaces_for_dependencies_products +portability remote_files functional_interface capture_warnings diff --git a/docs/source/how_to_guides/portability.md b/docs/source/how_to_guides/portability.md new file mode 100644 index 000000000..031b5edda --- /dev/null +++ b/docs/source/how_to_guides/portability.md @@ -0,0 +1,32 @@ +# Portability + +This guide explains how to keep pytask state portable across machines. + +## Two Portability Concerns + +1. **Portable IDs** + + - The lockfile stores task and node IDs. + - IDs must be project‑relative and stable across machines. + - pytask builds these IDs from the project root; no action required for most users. + +1. **Portable State Values** + + - `state.value` is opaque and comes from `PNode.state()` / `PTask.state()`. + - Content hashes are portable; timestamps or absolute paths are not. + - Custom nodes should avoid machine‑specific paths in `state()`. + +## Tips + +- Commit `pytask.lock` to your repository. If you ship the repository together with the + build artifacts (for example, a zipped project folder including `pytask.lock` and the + produced files), you can move it to another machine and runs will skip recomputation. +- Prefer file content hashes over timestamps for custom nodes. +- For `PythonNode` values that are not natively stable, provide a custom hash function. +- If inputs live outside the project root, IDs will include `..` segments to remain + relative; this is expected. + +## Legacy SQLite + +SQLite is the old state format. It is used only when no lockfile exists, and the +lockfile is written during that run. Subsequent runs rely on the lockfile. diff --git a/docs/source/reference_guides/configuration.md b/docs/source/reference_guides/configuration.md index 2f809ce82..0f062f419 100644 --- a/docs/source/reference_guides/configuration.md +++ b/docs/source/reference_guides/configuration.md @@ -44,11 +44,12 @@ are welcome to also support macOS. ````{confval} database_url -pytask uses a database to keep track of tasks, products, and dependencies over runs. By -default, it will create an SQLite database in the project's root directory called -`.pytask/pytask.sqlite3`. If you want to use a different name or a different dialect -[supported by sqlalchemy](https://docs.sqlalchemy.org/en/latest/core/engines.html#backend-specific-urls), -use either {option}`pytask build --database-url` or `database_url` in the config. +SQLite is the legacy state format. pytask now uses `pytask.lock` as the primary state +backend and only consults the database when no lockfile exists. During that first run, +the lockfile is written and subsequent runs use the lockfile only. + +The `database_url` option remains for backwards compatibility and controls the legacy +database location and dialect ([supported by sqlalchemy](https://docs.sqlalchemy.org/en/latest/core/engines.html#backend-specific-urls)). ```toml database_url = "sqlite:///.pytask/pytask.sqlite3" diff --git a/docs/source/reference_guides/index.md b/docs/source/reference_guides/index.md index e3b85fa7d..adcada161 100644 --- a/docs/source/reference_guides/index.md +++ b/docs/source/reference_guides/index.md @@ -9,6 +9,7 @@ maxdepth: 1 --- command_line_interface configuration +lockfile hookspecs api ``` diff --git a/docs/source/reference_guides/lockfile.md b/docs/source/reference_guides/lockfile.md new file mode 100644 index 000000000..47fc7c9a6 --- /dev/null +++ b/docs/source/reference_guides/lockfile.md @@ -0,0 +1,90 @@ +# The Lock File + +`pytask.lock` is the default state backend. It stores task state in a portable, +git-friendly format so runs can be resumed or shared across machines. + +```{note} +SQLite is the legacy format. It is still read when no lockfile exists, and a lockfile +is written during that first run. Subsequent runs use the lockfile only. +``` + +## Example + +```toml +# This file is automatically @generated by pytask. +# It is not intended for manual editing. + +lock-version = "1.0" + +[[task]] +id = "src/tasks/data.py::task_clean_data" + +[task.state] +value = "f9e8d7c6..." + +[[task.depends_on]] +id = "data/raw/input.csv" + +[task.depends_on.state] +value = "e5f6g7h8..." + +[[task.produces]] +id = "data/processed/clean.parquet" + +[task.produces.state] +value = "m3n4o5p6..." +``` + +## Behavior + +On each run, pytask: + +1. Reads `pytask.lock` (if present). +1. Compares current dependency/product/task `state()` to stored `state.value`. +1. Skips tasks whose states match; runs the rest. +1. Updates `pytask.lock` after each completed task (atomic write). + +`pytask-parallel` uses a single coordinator to write the lock file, so writes are +serialized even when tasks execute in parallel. + +## Portability + +There are two portability concerns: + +1. **IDs**: Lockfile IDs must be project‑relative and stable across machines. +1. **State values**: `state.value` is opaque; portability depends on each node’s + `state()` implementation. Content hashes are portable; timestamps are not. + +## File Format Reference + +### Top-Level + +| Field | Required | Description | +| -------------- | -------- | ---------------------------------- | +| `lock-version` | Yes | Schema version (currently `"1.0"`) | + +### Task Entry + +| Field | Required | Description | +| ------- | -------- | -------------------------------------------- | +| `id` | Yes | Portable task identifier | +| `state` | Yes | State dictionary with a single `value` field | + +### Dependency/Product Entry + +| Field | Required | Description | +| ------- | -------- | -------------------------------------------- | +| `id` | Yes | Node identifier | +| `state` | Yes | State dictionary with a single `value` field | + +### State Dictionary + +| Field | Required | Description | +| ------- | -------- | ------------------- | +| `value` | Yes | Opaque state string | + +## Version Compatibility + +- **Upgrade**: newer pytask upgrades old lock files in memory and writes the new format + on the next update. +- **Downgrade**: older pytask errors with a clear upgrade message. diff --git a/docs/source/tutorials/making_tasks_persist.md b/docs/source/tutorials/making_tasks_persist.md index ae1e16361..6a5e1a59f 100644 --- a/docs/source/tutorials/making_tasks_persist.md +++ b/docs/source/tutorials/making_tasks_persist.md @@ -9,7 +9,7 @@ In this case, you can apply the {func}`@pytask.mark.persist =1.3.0", "rich>=13.8.0", "sqlalchemy>=2.0.31", + "msgspec[toml]>=0.18.6", 'tomli>=1; python_version < "3.11"', 'typing-extensions>=4.8.0; python_version < "3.11"', "universal-pathlib>=0.2.2", diff --git a/src/_pytask/console.py b/src/_pytask/console.py index 8451ff669..f22b5ab21 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -111,10 +111,26 @@ def render_to_string( example, render warnings with colors or text in exceptions. """ - buffer = console.render(renderable) + render_console = console + if not strip_styles and console.no_color and console.color_system is not None: + theme: Theme | None + try: + theme = Theme(console._theme_stack._entries[-1]) # type: ignore[attr-defined] + except (AttributeError, IndexError, TypeError): + theme = None + render_console = Console( + color_system=console.color_system, + force_terminal=True, + width=console.width, + no_color=False, + markup=getattr(console, "_markup", True), + theme=theme, + ) + + buffer = render_console.render(renderable) if strip_styles: buffer = Segment.strip_styles(buffer) - return console._render_buffer(buffer) + return render_console._render_buffer(buffer) def format_task_name(task: PTask, editor_url_scheme: str) -> Text: diff --git a/src/_pytask/execute.py b/src/_pytask/execute.py index 167efdcf2..9b3fd8c3c 100644 --- a/src/_pytask/execute.py +++ b/src/_pytask/execute.py @@ -20,9 +20,6 @@ from _pytask.dag_utils import TopologicalSorter from _pytask.dag_utils import descending_tasks from _pytask.dag_utils import node_and_neighbors -from _pytask.database_utils import get_node_change_info -from _pytask.database_utils import has_node_changed -from _pytask.database_utils import update_states_in_database from _pytask.exceptions import ExecutionError from _pytask.exceptions import NodeLoadError from _pytask.exceptions import NodeNotFoundError @@ -46,6 +43,9 @@ from _pytask.pluginmanager import hookimpl from _pytask.provisional_utils import collect_provisional_products from _pytask.reports import ExecutionReport +from _pytask.state import get_node_change_info +from _pytask.state import has_node_changed +from _pytask.state import update_states from _pytask.traceback import remove_traceback_from_exc_info from _pytask.tree_util import tree_leaves from _pytask.tree_util import tree_map @@ -196,7 +196,7 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C # Check if node changed and collect detailed info if in explain mode if session.config["explain"]: has_changed, reason, details = get_node_change_info( - task=task, node=node, state=node_state + session=session, task=task, node=node, state=node_state ) if has_changed: needs_to_be_executed = True @@ -222,7 +222,9 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C ) ) else: - has_changed = has_node_changed(task=task, node=node, state=node_state) + has_changed = has_node_changed( + session=session, task=task, node=node, state=node_state + ) if has_changed: needs_to_be_executed = True @@ -232,6 +234,8 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C if not needs_to_be_executed: collect_provisional_products(session, task) + if not session.config["dry_run"] and not session.config["explain"]: + update_states(session, task) raise SkippedUnchanged # Create directory for product if it does not exist. Maybe this should be a `setup` @@ -326,7 +330,7 @@ def pytask_execute_task_process_report( task = report.task if report.outcome == TaskOutcome.SUCCESS: - update_states_in_database(session, task.signature) + update_states(session, task) elif report.exc_info and isinstance(report.exc_info[1], WouldBeExecuted): report.outcome = TaskOutcome.WOULD_BE_EXECUTED diff --git a/src/_pytask/lockfile.py b/src/_pytask/lockfile.py new file mode 100644 index 000000000..2d19bc717 --- /dev/null +++ b/src/_pytask/lockfile.py @@ -0,0 +1,270 @@ +"""Support for the pytask lock file.""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from dataclasses import field +from pathlib import Path +from typing import TYPE_CHECKING +from typing import Any + +import msgspec +from packaging.version import Version +from upath import UPath + +from _pytask.node_protocols import PNode +from _pytask.node_protocols import PPathNode +from _pytask.node_protocols import PTask +from _pytask.node_protocols import PTaskWithPath +from _pytask.nodes import PythonNode +from _pytask.pluginmanager import hookimpl + +if TYPE_CHECKING: + from _pytask.session import Session + +CURRENT_LOCKFILE_VERSION = "1.0" + + +class LockfileError(Exception): + """Raised when reading or writing a lockfile fails.""" + + +class LockfileVersionError(LockfileError): + """Raised when a lockfile version is not supported.""" + + +class _State(msgspec.Struct): + value: str + + +class _NodeEntry(msgspec.Struct): + id: str + state: _State + + +class _TaskEntry(msgspec.Struct): + id: str + state: _State + depends_on: list[_NodeEntry] = msgspec.field(default_factory=list) + produces: list[_NodeEntry] = msgspec.field(default_factory=list) + + +class _Lockfile(msgspec.Struct, forbid_unknown_fields=False): + lock_version: str = msgspec.field(name="lock-version") + task: list[_TaskEntry] = msgspec.field(default_factory=list) + + +def _relative_path(path: Path, root: Path) -> str: + if isinstance(path, UPath) and path.protocol: + return str(path) + try: + rel = os.path.relpath(path, root) + except ValueError: + return path.as_posix() + return Path(rel).as_posix() + + +def build_portable_task_id(task: PTask, root: Path) -> str: + if isinstance(task, PTaskWithPath): + base_name = getattr(task, "base_name", None) or task.name + return f"{_relative_path(task.path, root)}::{base_name}" + return task.name + + +def _build_portable_task_id_from_parts( + task_path: Path | None, task_name: str, root: Path +) -> str: + if task_path is None: + return task_name + return f"{_relative_path(task_path, root)}::{task_name}" + + +def build_portable_node_id(node: PNode, root: Path) -> str: + if isinstance(node, PythonNode) and node.node_info: + task_id = _build_portable_task_id_from_parts( + node.node_info.task_path, node.node_info.task_name, root + ) + node_id = f"{task_id}::{node.node_info.arg_name}" + if node.node_info.path: + suffix = "-".join(str(p) for p in node.node_info.path) + node_id = f"{node_id}::{suffix}" + return node_id + if isinstance(node, PPathNode): + return _relative_path(node.path, root) + return node.name + + +def read_lockfile(path: Path) -> _Lockfile | None: + if not path.exists(): + return None + + raw = msgspec.toml.decode(path.read_bytes()) + if not isinstance(raw, dict): + msg = "Lockfile has invalid format." + raise LockfileError(msg) + + version = raw.get("lock-version") + if not isinstance(version, str): + msg = "Lockfile is missing 'lock-version'." + raise LockfileError(msg) + + if Version(version) > Version(CURRENT_LOCKFILE_VERSION): + msg = ( + f"Unsupported lock-version {version!r}. " + f"Current version is {CURRENT_LOCKFILE_VERSION}." + ) + raise LockfileVersionError(msg) + + lockfile = msgspec.toml.decode(path.read_bytes(), type=_Lockfile) + + if Version(version) < Version(CURRENT_LOCKFILE_VERSION): + lockfile = _Lockfile( + lock_version=CURRENT_LOCKFILE_VERSION, + task=lockfile.task, + ) + return lockfile + + +def _normalize_lockfile(lockfile: _Lockfile) -> _Lockfile: + tasks = [] + for task in sorted(lockfile.task, key=lambda entry: entry.id): + depends_on = sorted(task.depends_on, key=lambda entry: entry.id) + produces = sorted(task.produces, key=lambda entry: entry.id) + tasks.append( + _TaskEntry( + id=task.id, + state=task.state, + depends_on=depends_on, + produces=produces, + ) + ) + return _Lockfile(lock_version=CURRENT_LOCKFILE_VERSION, task=tasks) + + +def write_lockfile(path: Path, lockfile: _Lockfile) -> None: + normalized = _normalize_lockfile(lockfile) + data = msgspec.toml.encode(normalized) + tmp = path.with_suffix(f"{path.suffix}.tmp") + tmp.write_bytes(data) + tmp.replace(path) + + +def _build_task_entry(session: Session, task: PTask, root: Path) -> _TaskEntry | None: + task_state = task.state() + if task_state is None: + return None + + dag = session.dag + predecessors = set(dag.predecessors(task.signature)) + successors = set(dag.successors(task.signature)) + + depends_on = [] + for node_signature in predecessors: + node = ( + dag.nodes[node_signature].get("task") or dag.nodes[node_signature]["node"] + ) + if not isinstance(node, (PNode, PTask)): + continue + state = node.state() + if state is None: + continue + node_id = ( + build_portable_task_id(node, root) + if isinstance(node, PTask) + else build_portable_node_id(node, root) + ) + depends_on.append(_NodeEntry(id=node_id, state=_State(state))) + + produces = [] + for node_signature in successors: + node = ( + dag.nodes[node_signature].get("task") or dag.nodes[node_signature]["node"] + ) + if not isinstance(node, (PNode, PTask)): + continue + state = node.state() + if state is None: + continue + node_id = ( + build_portable_task_id(node, root) + if isinstance(node, PTask) + else build_portable_node_id(node, root) + ) + produces.append(_NodeEntry(id=node_id, state=_State(state))) + + return _TaskEntry( + id=build_portable_task_id(task, root), + state=_State(task_state), + depends_on=depends_on, + produces=produces, + ) + + +@dataclass +class LockfileState: + path: Path + root: Path + use_lockfile_for_skip: bool + lockfile: _Lockfile + _task_index: dict[str, _TaskEntry] = field(init=False, default_factory=dict) + _node_index: dict[str, dict[str, str]] = field(init=False, default_factory=dict) + + def __post_init__(self) -> None: + self._rebuild_indexes() + + @classmethod + def from_path(cls, path: Path, root: Path) -> LockfileState: + existing = read_lockfile(path) + if existing is None: + lockfile = _Lockfile( + lock_version=CURRENT_LOCKFILE_VERSION, + task=[], + ) + return cls( + path=path, + root=root, + use_lockfile_for_skip=False, + lockfile=lockfile, + ) + return cls( + path=path, + root=root, + use_lockfile_for_skip=True, + lockfile=existing, + ) + + def _rebuild_indexes(self) -> None: + self._task_index = {task.id: task for task in self.lockfile.task} + self._node_index = {} + for task in self.lockfile.task: + nodes = {} + for entry in task.depends_on + task.produces: + nodes[entry.id] = entry.state.value + self._node_index[task.id] = nodes + + def get_task_entry(self, task_id: str) -> _TaskEntry | None: + return self._task_index.get(task_id) + + def get_node_state(self, task_id: str, node_id: str) -> str | None: + return self._node_index.get(task_id, {}).get(node_id) + + def update_task(self, session: Session, task: PTask) -> None: + entry = _build_task_entry(session, task, self.root) + if entry is None: + return + self._task_index[entry.id] = entry + self.lockfile = _Lockfile( + lock_version=CURRENT_LOCKFILE_VERSION, + task=list(self._task_index.values()), + ) + self._rebuild_indexes() + write_lockfile(self.path, self.lockfile) + + +@hookimpl +def pytask_post_parse(config: dict[str, Any]) -> None: + """Initialize the lockfile state.""" + path = config["root"] / "pytask.lock" + config["lockfile_path"] = path + config["lockfile_state"] = LockfileState.from_path(path, config["root"]) diff --git a/src/_pytask/persist.py b/src/_pytask/persist.py index 9bd567b4a..40d958b32 100644 --- a/src/_pytask/persist.py +++ b/src/_pytask/persist.py @@ -6,13 +6,14 @@ from typing import Any from _pytask.dag_utils import node_and_neighbors -from _pytask.database_utils import has_node_changed -from _pytask.database_utils import update_states_in_database +from _pytask.database_utils import update_states_in_database as _db_update_states from _pytask.mark_utils import has_mark from _pytask.outcomes import Persisted from _pytask.outcomes import TaskOutcome from _pytask.pluginmanager import hookimpl from _pytask.provisional_utils import collect_provisional_products +from _pytask.state import has_node_changed +from _pytask.state import update_states if TYPE_CHECKING: from _pytask.node_protocols import PTask @@ -20,6 +21,11 @@ from _pytask.session import Session +def update_states_in_database(session: Session, task_signature: str) -> None: + """Compatibility wrapper for older callers/tests.""" + _db_update_states(session, task_signature) + + @hookimpl def pytask_parse_config(config: dict[str, Any]) -> None: """Add the marker to the configuration.""" @@ -52,6 +58,7 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: if all_nodes_exist: any_node_changed = any( has_node_changed( + session=session, task=task, node=session.dag.nodes[name].get("task") or session.dag.nodes[name]["node"], @@ -79,6 +86,6 @@ def pytask_execute_task_process_report( """ if report.exc_info and isinstance(report.exc_info[1], Persisted): report.outcome = TaskOutcome.PERSISTENCE - update_states_in_database(session, report.task.signature) + update_states(session, report.task) return True return None diff --git a/src/_pytask/pluginmanager.py b/src/_pytask/pluginmanager.py index 2a7ef4a69..c493c006a 100644 --- a/src/_pytask/pluginmanager.py +++ b/src/_pytask/pluginmanager.py @@ -53,6 +53,7 @@ def pytask_add_hooks(pm: PluginManager) -> None: "_pytask.provisional", "_pytask.execute", "_pytask.live", + "_pytask.lockfile", "_pytask.logging", "_pytask.mark", "_pytask.nodes", diff --git a/src/_pytask/state.py b/src/_pytask/state.py new file mode 100644 index 000000000..9c8928d2e --- /dev/null +++ b/src/_pytask/state.py @@ -0,0 +1,94 @@ +"""State handling for lockfile and database backends.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from _pytask.database_utils import get_node_change_info as _db_get_node_change_info +from _pytask.database_utils import has_node_changed as _db_has_node_changed +from _pytask.database_utils import update_states_in_database as _db_update_states +from _pytask.lockfile import LockfileState +from _pytask.lockfile import build_portable_node_id +from _pytask.lockfile import build_portable_task_id +from _pytask.node_protocols import PTask + +if TYPE_CHECKING: + from _pytask.node_protocols import PNode + from _pytask.session import Session + + +def _get_lockfile_state(session: Session) -> LockfileState | None: + return session.config.get("lockfile_state") + + +def has_node_changed( + session: Session, task: PTask, node: PTask | PNode, state: str | None +) -> bool: + lockfile_state = _get_lockfile_state(session) + if lockfile_state and lockfile_state.use_lockfile_for_skip: + if state is None: + return True + task_id = build_portable_task_id(task, lockfile_state.root) + if node is task or ( + hasattr(node, "signature") and node.signature == task.signature + ): + entry = lockfile_state.get_task_entry(task_id) + if entry is None: + return True + return state != entry.state.value + node_id = ( + build_portable_task_id(node, lockfile_state.root) + if isinstance(node, PTask) + else build_portable_node_id(node, lockfile_state.root) + ) + stored_state = lockfile_state.get_node_state(task_id, node_id) + if stored_state is None: + return True + return state != stored_state + return _db_has_node_changed(task=task, node=node, state=state) + + +def get_node_change_info( + session: Session, task: PTask, node: PTask | PNode, state: str | None +) -> tuple[bool, str, dict[str, str]]: + lockfile_state = _get_lockfile_state(session) + if not (lockfile_state and lockfile_state.use_lockfile_for_skip): + return _db_get_node_change_info(task=task, node=node, state=state) + + details: dict[str, str] = {} + if state is None: + return True, "missing", details + + task_id = build_portable_task_id(task, lockfile_state.root) + is_task = node is task or ( + hasattr(node, "signature") and node.signature == task.signature + ) + if is_task: + entry = lockfile_state.get_task_entry(task_id) + if entry is None: + return True, "not_in_db", details + stored_state = entry.state.value + else: + node_id = ( + build_portable_task_id(node, lockfile_state.root) + if isinstance(node, PTask) + else build_portable_node_id(node, lockfile_state.root) + ) + stored_state = lockfile_state.get_node_state(task_id, node_id) + if stored_state is None: + return True, "not_in_db", details + + if state != stored_state: + details["old_hash"] = stored_state + details["new_hash"] = state + return True, "changed", details + return False, "unchanged", details + + +def update_states(session: Session, task: PTask) -> None: + if session.dag is None: + return + lockfile_state = _get_lockfile_state(session) + if lockfile_state is not None: + lockfile_state.update_task(session, task) + _db_update_states(session, task.signature) diff --git a/tests/conftest.py b/tests/conftest.py index e0b45db31..027f32da0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,7 +44,7 @@ def _remove_variable_info_from_output(data: str, path: Any) -> str: # noqa: ARG # Remove dynamic versions. index_root = next(i for i, line in enumerate(lines) if line.startswith("Root:")) - new_info_line = " ".join(lines[1:index_root]) + new_info_line = " ".join(line.strip() for line in lines[1:index_root]) for platform in ("linux", "win32", "darwin"): new_info_line = new_info_line.replace(platform, "") pattern = re.compile(version.VERSION_PATTERN, flags=re.IGNORECASE | re.VERBOSE) diff --git a/tests/test_execute.py b/tests/test_execute.py index 6e76db2de..918e4419a 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -606,6 +606,7 @@ def create_file( result = subprocess.run( (sys.executable, tmp_path.joinpath("task_module.py").as_posix()), check=False, + cwd=tmp_path, ) assert result.returncode == ExitCode.OK assert tmp_path.joinpath("file.txt").read_text() == "This is the text." @@ -634,7 +635,8 @@ def task2() -> None: pass """ tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) result = run_in_subprocess( - (sys.executable, tmp_path.joinpath("task_module.py").as_posix()) + (sys.executable, tmp_path.joinpath("task_module.py").as_posix()), + cwd=tmp_path, ) assert result.exit_code == ExitCode.OK @@ -951,11 +953,11 @@ def func(path): produces={"path": PathNode(path=tmp_path / "out.txt")}, ) - session = build(tasks=[task]) + session = build(tasks=[task], paths=tmp_path) assert session.exit_code == ExitCode.OK assert tmp_path.joinpath("out.txt").exists() - session = build(tasks=task) + session = build(tasks=task, paths=tmp_path) assert session.exit_code == ExitCode.OK diff --git a/tests/test_lockfile.py b/tests/test_lockfile.py new file mode 100644 index 000000000..cb9b43c39 --- /dev/null +++ b/tests/test_lockfile.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import textwrap + +import pytest + +from _pytask.lockfile import CURRENT_LOCKFILE_VERSION +from _pytask.lockfile import LockfileVersionError +from _pytask.lockfile import read_lockfile + + +def test_lockfile_upgrades_older_version(tmp_path): + path = tmp_path / "pytask.lock" + path.write_text( + textwrap.dedent( + """ + lock-version = "0.9" + task = [] + """ + ).strip() + + "\n" + ) + + lockfile = read_lockfile(path) + + assert lockfile is not None + assert lockfile.lock_version == CURRENT_LOCKFILE_VERSION + + +def test_lockfile_rejects_newer_version(tmp_path): + path = tmp_path / "pytask.lock" + path.write_text( + textwrap.dedent( + """ + lock-version = "9.0" + task = [] + """ + ).strip() + + "\n" + ) + + with pytest.raises(LockfileVersionError): + read_lockfile(path) diff --git a/uv.lock b/uv.lock index da73057c4..a41fadd49 100644 --- a/uv.lock +++ b/uv.lock @@ -1913,6 +1913,68 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b6/bc/8bd826dd03e022153bfa1766dcdec4976d6c818865ed54223d71f07862b3/msgpack-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:bce7d9e614a04d0883af0b3d4d501171fbfca038f12c77fa838d9f198147a23f", size = 75140, upload-time = "2024-09-10T04:24:31.288Z" }, ] +[[package]] +name = "msgspec" +version = "0.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/9c/bfbd12955a49180cbd234c5d29ec6f74fe641698f0cd9df154a854fc8a15/msgspec-0.20.0.tar.gz", hash = "sha256:692349e588fde322875f8d3025ac01689fead5901e7fb18d6870a44519d62a29", size = 317862, upload-time = "2025-11-24T03:56:28.934Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/5e/151883ba2047cca9db8ed2f86186b054ad200bc231352df15b0c1dd75b1f/msgspec-0.20.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:23a6ec2a3b5038c233b04740a545856a068bc5cb8db184ff493a58e08c994fbf", size = 195191, upload-time = "2025-11-24T03:55:08.549Z" }, + { url = "https://files.pythonhosted.org/packages/50/88/a795647672f547c983eff0823b82aaa35db922c767e1b3693e2dcf96678d/msgspec-0.20.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cde2c41ed3eaaef6146365cb0d69580078a19f974c6cb8165cc5dcd5734f573e", size = 188513, upload-time = "2025-11-24T03:55:10.008Z" }, + { url = "https://files.pythonhosted.org/packages/4b/91/eb0abb0e0de142066cebfe546dc9140c5972ea824aa6ff507ad0b6a126ac/msgspec-0.20.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5da0daa782f95d364f0d95962faed01e218732aa1aa6cad56b25a5d2092e75a4", size = 216370, upload-time = "2025-11-24T03:55:11.566Z" }, + { url = "https://files.pythonhosted.org/packages/15/2a/48e41d9ef0a24b1c6e67cbd94a676799e0561bfbc163be1aaaff5ca853f5/msgspec-0.20.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9369d5266144bef91be2940a3821e03e51a93c9080fde3ef72728c3f0a3a8bb7", size = 222653, upload-time = "2025-11-24T03:55:13.159Z" }, + { url = "https://files.pythonhosted.org/packages/90/c9/14b825df203d980f82a623450d5f39e7f7a09e6e256c52b498ea8f29d923/msgspec-0.20.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:90fb865b306ca92c03964a5f3d0cd9eb1adda14f7e5ac7943efd159719ea9f10", size = 222337, upload-time = "2025-11-24T03:55:14.777Z" }, + { url = "https://files.pythonhosted.org/packages/8b/d7/39a5c3ddd294f587d6fb8efccc8361b6aa5089974015054071e665c9d24b/msgspec-0.20.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e8112cd48b67dfc0cfa49fc812b6ce7eb37499e1d95b9575061683f3428975d3", size = 225565, upload-time = "2025-11-24T03:55:16.4Z" }, + { url = "https://files.pythonhosted.org/packages/98/bd/5db3c14d675ee12842afb9b70c94c64f2c873f31198c46cbfcd7dffafab0/msgspec-0.20.0-cp310-cp310-win_amd64.whl", hash = "sha256:666b966d503df5dc27287675f525a56b6e66a2b8e8ccd2877b0c01328f19ae6c", size = 188412, upload-time = "2025-11-24T03:55:17.747Z" }, + { url = "https://files.pythonhosted.org/packages/76/c7/06cc218bc0c86f0c6c6f34f7eeea6cfb8b835070e8031e3b0ef00f6c7c69/msgspec-0.20.0-cp310-cp310-win_arm64.whl", hash = "sha256:099e3e85cd5b238f2669621be65f0728169b8c7cb7ab07f6137b02dc7feea781", size = 173951, upload-time = "2025-11-24T03:55:19.335Z" }, + { url = "https://files.pythonhosted.org/packages/03/59/fdcb3af72f750a8de2bcf39d62ada70b5eb17b06d7f63860e0a679cb656b/msgspec-0.20.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:09e0efbf1ac641fedb1d5496c59507c2f0dc62a052189ee62c763e0aae217520", size = 193345, upload-time = "2025-11-24T03:55:20.613Z" }, + { url = "https://files.pythonhosted.org/packages/5a/15/3c225610da9f02505d37d69a77f4a2e7daae2a125f99d638df211ba84e59/msgspec-0.20.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23ee3787142e48f5ee746b2909ce1b76e2949fbe0f97f9f6e70879f06c218b54", size = 186867, upload-time = "2025-11-24T03:55:22.4Z" }, + { url = "https://files.pythonhosted.org/packages/81/36/13ab0c547e283bf172f45491edfdea0e2cecb26ae61e3a7b1ae6058b326d/msgspec-0.20.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:81f4ac6f0363407ac0465eff5c7d4d18f26870e00674f8fcb336d898a1e36854", size = 215351, upload-time = "2025-11-24T03:55:23.958Z" }, + { url = "https://files.pythonhosted.org/packages/6b/96/5c095b940de3aa6b43a71ec76275ac3537b21bd45c7499b5a17a429110fa/msgspec-0.20.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb4d873f24ae18cd1334f4e37a178ed46c9d186437733351267e0a269bdf7e53", size = 219896, upload-time = "2025-11-24T03:55:25.356Z" }, + { url = "https://files.pythonhosted.org/packages/98/7a/81a7b5f01af300761087b114dafa20fb97aed7184d33aab64d48874eb187/msgspec-0.20.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b92b8334427b8393b520c24ff53b70f326f79acf5f74adb94fd361bcff8a1d4e", size = 220389, upload-time = "2025-11-24T03:55:26.99Z" }, + { url = "https://files.pythonhosted.org/packages/70/c0/3d0cce27db9a9912421273d49eab79ce01ecd2fed1a2f1b74af9b445f33c/msgspec-0.20.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:562c44b047c05cc0384e006fae7a5e715740215c799429e0d7e3e5adf324285a", size = 223348, upload-time = "2025-11-24T03:55:28.311Z" }, + { url = "https://files.pythonhosted.org/packages/89/5e/406b7d578926b68790e390d83a1165a9bfc2d95612a1a9c1c4d5c72ea815/msgspec-0.20.0-cp311-cp311-win_amd64.whl", hash = "sha256:d1dcc93a3ce3d3195985bfff18a48274d0b5ffbc96fa1c5b89da6f0d9af81b29", size = 188713, upload-time = "2025-11-24T03:55:29.553Z" }, + { url = "https://files.pythonhosted.org/packages/47/87/14fe2316624ceedf76a9e94d714d194cbcb699720b210ff189f89ca4efd7/msgspec-0.20.0-cp311-cp311-win_arm64.whl", hash = "sha256:aa387aa330d2e4bd69995f66ea8fdc87099ddeedf6fdb232993c6a67711e7520", size = 174229, upload-time = "2025-11-24T03:55:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/d9/6f/1e25eee957e58e3afb2a44b94fa95e06cebc4c236193ed0de3012fff1e19/msgspec-0.20.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2aba22e2e302e9231e85edc24f27ba1f524d43c223ef5765bd8624c7df9ec0a5", size = 196391, upload-time = "2025-11-24T03:55:32.677Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ee/af51d090ada641d4b264992a486435ba3ef5b5634bc27e6eb002f71cef7d/msgspec-0.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:716284f898ab2547fedd72a93bb940375de9fbfe77538f05779632dc34afdfde", size = 188644, upload-time = "2025-11-24T03:55:33.934Z" }, + { url = "https://files.pythonhosted.org/packages/49/d6/9709ee093b7742362c2934bfb1bbe791a1e09bed3ea5d8a18ce552fbfd73/msgspec-0.20.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:558ed73315efa51b1538fa8f1d3b22c8c5ff6d9a2a62eff87d25829b94fc5054", size = 218852, upload-time = "2025-11-24T03:55:35.575Z" }, + { url = "https://files.pythonhosted.org/packages/5c/a2/488517a43ccf5a4b6b6eca6dd4ede0bd82b043d1539dd6bb908a19f8efd3/msgspec-0.20.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:509ac1362a1d53aa66798c9b9fd76872d7faa30fcf89b2fba3bcbfd559d56eb0", size = 224937, upload-time = "2025-11-24T03:55:36.859Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e8/49b832808aa23b85d4f090d1d2e48a4e3834871415031ed7c5fe48723156/msgspec-0.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1353c2c93423602e7dea1aa4c92f3391fdfc25ff40e0bacf81d34dbc68adb870", size = 222858, upload-time = "2025-11-24T03:55:38.187Z" }, + { url = "https://files.pythonhosted.org/packages/9f/56/1dc2fa53685dca9c3f243a6cbecd34e856858354e455b77f47ebd76cf5bf/msgspec-0.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb33b5eb5adb3c33d749684471c6a165468395d7aa02d8867c15103b81e1da3e", size = 227248, upload-time = "2025-11-24T03:55:39.496Z" }, + { url = "https://files.pythonhosted.org/packages/5a/51/aba940212c23b32eedce752896205912c2668472ed5b205fc33da28a6509/msgspec-0.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:fb1d934e435dd3a2b8cf4bbf47a8757100b4a1cfdc2afdf227541199885cdacb", size = 190024, upload-time = "2025-11-24T03:55:40.829Z" }, + { url = "https://files.pythonhosted.org/packages/41/ad/3b9f259d94f183daa9764fef33fdc7010f7ecffc29af977044fa47440a83/msgspec-0.20.0-cp312-cp312-win_arm64.whl", hash = "sha256:00648b1e19cf01b2be45444ba9dc961bd4c056ffb15706651e64e5d6ec6197b7", size = 175390, upload-time = "2025-11-24T03:55:42.05Z" }, + { url = "https://files.pythonhosted.org/packages/8a/d1/b902d38b6e5ba3bdddbec469bba388d647f960aeed7b5b3623a8debe8a76/msgspec-0.20.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9c1ff8db03be7598b50dd4b4a478d6fe93faae3bd54f4f17aa004d0e46c14c46", size = 196463, upload-time = "2025-11-24T03:55:43.405Z" }, + { url = "https://files.pythonhosted.org/packages/57/b6/eff0305961a1d9447ec2b02f8c73c8946f22564d302a504185b730c9a761/msgspec-0.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f6532369ece217fd37c5ebcfd7e981f2615628c21121b7b2df9d3adcf2fd69b8", size = 188650, upload-time = "2025-11-24T03:55:44.761Z" }, + { url = "https://files.pythonhosted.org/packages/99/93/f2ec1ae1de51d3fdee998a1ede6b2c089453a2ee82b5c1b361ed9095064a/msgspec-0.20.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9a1697da2f85a751ac3cc6a97fceb8e937fc670947183fb2268edaf4016d1ee", size = 218834, upload-time = "2025-11-24T03:55:46.441Z" }, + { url = "https://files.pythonhosted.org/packages/28/83/36557b04cfdc317ed8a525c4993b23e43a8fbcddaddd78619112ca07138c/msgspec-0.20.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7fac7e9c92eddcd24c19d9e5f6249760941485dff97802461ae7c995a2450111", size = 224917, upload-time = "2025-11-24T03:55:48.06Z" }, + { url = "https://files.pythonhosted.org/packages/8f/56/362037a1ed5be0b88aced59272442c4b40065c659700f4b195a7f4d0ac88/msgspec-0.20.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f953a66f2a3eb8d5ea64768445e2bb301d97609db052628c3e1bcb7d87192a9f", size = 222821, upload-time = "2025-11-24T03:55:49.388Z" }, + { url = "https://files.pythonhosted.org/packages/92/75/fa2370ec341cedf663731ab7042e177b3742645c5dd4f64dc96bd9f18a6b/msgspec-0.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:247af0313ae64a066d3aea7ba98840f6681ccbf5c90ba9c7d17f3e39dbba679c", size = 227227, upload-time = "2025-11-24T03:55:51.125Z" }, + { url = "https://files.pythonhosted.org/packages/f1/25/5e8080fe0117f799b1b68008dc29a65862077296b92550632de015128579/msgspec-0.20.0-cp313-cp313-win_amd64.whl", hash = "sha256:67d5e4dfad52832017018d30a462604c80561aa62a9d548fc2bd4e430b66a352", size = 189966, upload-time = "2025-11-24T03:55:52.458Z" }, + { url = "https://files.pythonhosted.org/packages/79/b6/63363422153937d40e1cb349c5081338401f8529a5a4e216865decd981bf/msgspec-0.20.0-cp313-cp313-win_arm64.whl", hash = "sha256:91a52578226708b63a9a13de287b1ec3ed1123e4a088b198143860c087770458", size = 175378, upload-time = "2025-11-24T03:55:53.721Z" }, + { url = "https://files.pythonhosted.org/packages/bb/18/62dc13ab0260c7d741dda8dc7f481495b93ac9168cd887dda5929880eef8/msgspec-0.20.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:eead16538db1b3f7ec6e3ed1f6f7c5dec67e90f76e76b610e1ffb5671815633a", size = 196407, upload-time = "2025-11-24T03:55:55.001Z" }, + { url = "https://files.pythonhosted.org/packages/dd/1d/b9949e4ad6953e9f9a142c7997b2f7390c81e03e93570c7c33caf65d27e1/msgspec-0.20.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:703c3bb47bf47801627fb1438f106adbfa2998fe586696d1324586a375fca238", size = 188889, upload-time = "2025-11-24T03:55:56.311Z" }, + { url = "https://files.pythonhosted.org/packages/1e/19/f8bb2dc0f1bfe46cc7d2b6b61c5e9b5a46c62298e8f4d03bbe499c926180/msgspec-0.20.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6cdb227dc585fb109305cee0fd304c2896f02af93ecf50a9c84ee54ee67dbb42", size = 219691, upload-time = "2025-11-24T03:55:57.908Z" }, + { url = "https://files.pythonhosted.org/packages/b8/8e/6b17e43f6eb9369d9858ee32c97959fcd515628a1df376af96c11606cf70/msgspec-0.20.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27d35044dd8818ac1bd0fedb2feb4fbdff4e3508dd7c5d14316a12a2d96a0de0", size = 224918, upload-time = "2025-11-24T03:55:59.322Z" }, + { url = "https://files.pythonhosted.org/packages/1c/db/0e833a177db1a4484797adba7f429d4242585980b90882cc38709e1b62df/msgspec-0.20.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b4296393a29ee42dd25947981c65506fd4ad39beaf816f614146fa0c5a6c91ae", size = 223436, upload-time = "2025-11-24T03:56:00.716Z" }, + { url = "https://files.pythonhosted.org/packages/c3/30/d2ee787f4c918fd2b123441d49a7707ae9015e0e8e1ab51aa7967a97b90e/msgspec-0.20.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:205fbdadd0d8d861d71c8f3399fe1a82a2caf4467bc8ff9a626df34c12176980", size = 227190, upload-time = "2025-11-24T03:56:02.371Z" }, + { url = "https://files.pythonhosted.org/packages/ff/37/9c4b58ff11d890d788e700b827db2366f4d11b3313bf136780da7017278b/msgspec-0.20.0-cp314-cp314-win_amd64.whl", hash = "sha256:7dfebc94fe7d3feec6bc6c9df4f7e9eccc1160bb5b811fbf3e3a56899e398a6b", size = 193950, upload-time = "2025-11-24T03:56:03.668Z" }, + { url = "https://files.pythonhosted.org/packages/e9/4e/cab707bf2fa57408e2934e5197fc3560079db34a1e3cd2675ff2e47e07de/msgspec-0.20.0-cp314-cp314-win_arm64.whl", hash = "sha256:2ad6ae36e4a602b24b4bf4eaf8ab5a441fec03e1f1b5931beca8ebda68f53fc0", size = 179018, upload-time = "2025-11-24T03:56:05.038Z" }, + { url = "https://files.pythonhosted.org/packages/4c/06/3da3fc9aaa55618a8f43eb9052453cfe01f82930bca3af8cea63a89f3a11/msgspec-0.20.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f84703e0e6ef025663dd1de828ca028774797b8155e070e795c548f76dde65d5", size = 200389, upload-time = "2025-11-24T03:56:06.375Z" }, + { url = "https://files.pythonhosted.org/packages/83/3b/cc4270a5ceab40dfe1d1745856951b0a24fd16ac8539a66ed3004a60c91e/msgspec-0.20.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7c83fc24dd09cf1275934ff300e3951b3adc5573f0657a643515cc16c7dee131", size = 193198, upload-time = "2025-11-24T03:56:07.742Z" }, + { url = "https://files.pythonhosted.org/packages/cd/ae/4c7905ac53830c8e3c06fdd60e3cdcfedc0bbc993872d1549b84ea21a1bd/msgspec-0.20.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f13ccb1c335a124e80c4562573b9b90f01ea9521a1a87f7576c2e281d547f56", size = 225973, upload-time = "2025-11-24T03:56:09.18Z" }, + { url = "https://files.pythonhosted.org/packages/d9/da/032abac1de4d0678d99eaeadb1323bd9d247f4711c012404ba77ed6f15ca/msgspec-0.20.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:17c2b5ca19f19306fc83c96d85e606d2cc107e0caeea85066b5389f664e04846", size = 229509, upload-time = "2025-11-24T03:56:10.898Z" }, + { url = "https://files.pythonhosted.org/packages/69/52/fdc7bdb7057a166f309e0b44929e584319e625aaba4771b60912a9321ccd/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d931709355edabf66c2dd1a756b2d658593e79882bc81aae5964969d5a291b63", size = 230434, upload-time = "2025-11-24T03:56:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/cb/fe/1dfd5f512b26b53043884e4f34710c73e294e7cc54278c3fe28380e42c37/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:565f915d2e540e8a0c93a01ff67f50aebe1f7e22798c6a25873f9fda8d1325f8", size = 231758, upload-time = "2025-11-24T03:56:13.765Z" }, + { url = "https://files.pythonhosted.org/packages/97/f6/9ba7121b8e0c4e0beee49575d1dbc804e2e72467692f0428cf39ceba1ea5/msgspec-0.20.0-cp314-cp314t-win_amd64.whl", hash = "sha256:726f3e6c3c323f283f6021ebb6c8ccf58d7cd7baa67b93d73bfbe9a15c34ab8d", size = 206540, upload-time = "2025-11-24T03:56:15.029Z" }, + { url = "https://files.pythonhosted.org/packages/c8/3e/c5187de84bb2c2ca334ab163fcacf19a23ebb1d876c837f81a1b324a15bf/msgspec-0.20.0-cp314-cp314t-win_arm64.whl", hash = "sha256:93f23528edc51d9f686808a361728e903d6f2be55c901d6f5c92e44c6d546bfc", size = 183011, upload-time = "2025-11-24T03:56:16.442Z" }, +] + +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "tomli-w" }, +] + [[package]] name = "multidict" version = "6.4.4" @@ -2657,6 +2719,7 @@ dependencies = [ { name = "attrs" }, { name = "click" }, { name = "click-default-group" }, + { name = "msgspec", extra = ["toml"] }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "optree" }, @@ -2717,6 +2780,7 @@ requires-dist = [ { name = "attrs", specifier = ">=21.3.0" }, { name = "click", specifier = ">=8.1.8,!=8.2.0" }, { name = "click-default-group", specifier = ">=1.2.4" }, + { name = "msgspec", extras = ["toml"], specifier = ">=0.18.6" }, { name = "networkx", specifier = ">=2.4.0" }, { name = "optree", specifier = ">=0.9.0" }, { name = "packaging", specifier = ">=23.0.0" }, @@ -3703,6 +3767,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, ] +[[package]] +name = "tomli-w" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/75/241269d1da26b624c0d5e110e8149093c759b7a286138f4efd61a60e75fe/tomli_w-1.2.0.tar.gz", hash = "sha256:2dd14fac5a47c27be9cd4c976af5a12d87fb1f0b4512f81d69cce3b35ae25021", size = 7184, upload-time = "2025-01-15T12:07:24.262Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/18/c86eb8e0202e32dd3df50d43d7ff9854f8e0603945ff398974c1d91ac1ef/tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90", size = 6675, upload-time = "2025-01-15T12:07:22.074Z" }, +] + [[package]] name = "toolz" version = "1.0.0" From c74d896bf1ab0ecf775f63d9276ae395f3fe49bf Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Thu, 1 Jan 2026 13:39:19 +0100 Subject: [PATCH 2/3] Fix typing issue. --- src/_pytask/console.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/_pytask/console.py b/src/_pytask/console.py index f22b5ab21..3c4b91e7a 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -115,11 +115,11 @@ def render_to_string( if not strip_styles and console.no_color and console.color_system is not None: theme: Theme | None try: - theme = Theme(console._theme_stack._entries[-1]) # type: ignore[attr-defined] + theme = Theme(console._theme_stack._entries[-1]) except (AttributeError, IndexError, TypeError): theme = None render_console = Console( - color_system=console.color_system, + color_system=console.color_system, # type: ignore[invalid-argument-type] force_terminal=True, width=console.width, no_color=False, From 55957567773ef63f1161d0d73fdf788ded8cb607 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Thu, 1 Jan 2026 14:40:20 +0100 Subject: [PATCH 3/3] Implement a flag. --- docs/source/how_to_guides/portability.md | 17 ++++- docs/source/reference_guides/lockfile.md | 68 +++++++++---------- src/_pytask/build.py | 10 +++ src/_pytask/lockfile.py | 83 +++++++++++++++--------- src/_pytask/state.py | 4 +- tests/test_lockfile.py | 44 +++++++++++-- 6 files changed, 149 insertions(+), 77 deletions(-) diff --git a/docs/source/how_to_guides/portability.md b/docs/source/how_to_guides/portability.md index 031b5edda..6bc2f85ca 100644 --- a/docs/source/how_to_guides/portability.md +++ b/docs/source/how_to_guides/portability.md @@ -12,7 +12,7 @@ This guide explains how to keep pytask state portable across machines. 1. **Portable State Values** - - `state.value` is opaque and comes from `PNode.state()` / `PTask.state()`. + - `state` is opaque and comes from `PNode.state()` / `PTask.state()`. - Content hashes are portable; timestamps or absolute paths are not. - Custom nodes should avoid machine‑specific paths in `state()`. @@ -26,6 +26,21 @@ This guide explains how to keep pytask state portable across machines. - If inputs live outside the project root, IDs will include `..` segments to remain relative; this is expected. +## Cleaning Up the Lockfile + +`pytask.lock` is updated incrementally. Entries are only replaced when the corresponding +tasks run. If tasks are removed or renamed, their old entries remain as stale data and +are ignored. + +To clean up stale entries without deleting the file, run: + +``` +pytask build --clean-lockfile +``` + +This rewrites the lockfile after a successful build with only the currently collected +tasks and their current state values. + ## Legacy SQLite SQLite is the old state format. It is used only when no lockfile exists, and the diff --git a/docs/source/reference_guides/lockfile.md b/docs/source/reference_guides/lockfile.md index 47fc7c9a6..316427e51 100644 --- a/docs/source/reference_guides/lockfile.md +++ b/docs/source/reference_guides/lockfile.md @@ -14,25 +14,17 @@ is written during that first run. Subsequent runs use the lockfile only. # This file is automatically @generated by pytask. # It is not intended for manual editing. -lock-version = "1.0" +lock-version = "1" [[task]] id = "src/tasks/data.py::task_clean_data" +state = "f9e8d7c6..." -[task.state] -value = "f9e8d7c6..." +[task.depends_on] +"data/raw/input.csv" = "e5f6g7h8..." -[[task.depends_on]] -id = "data/raw/input.csv" - -[task.depends_on.state] -value = "e5f6g7h8..." - -[[task.produces]] -id = "data/processed/clean.parquet" - -[task.produces.state] -value = "m3n4o5p6..." +[task.produces] +"data/processed/clean.parquet" = "m3n4o5p6..." ``` ## Behavior @@ -40,7 +32,7 @@ value = "m3n4o5p6..." On each run, pytask: 1. Reads `pytask.lock` (if present). -1. Compares current dependency/product/task `state()` to stored `state.value`. +1. Compares current dependency/product/task `state()` to stored `state`. 1. Skips tasks whose states match; runs the rest. 1. Updates `pytask.lock` after each completed task (atomic write). @@ -52,39 +44,43 @@ serialized even when tasks execute in parallel. There are two portability concerns: 1. **IDs**: Lockfile IDs must be project‑relative and stable across machines. -1. **State values**: `state.value` is opaque; portability depends on each node’s - `state()` implementation. Content hashes are portable; timestamps are not. +1. **State values**: `state` is opaque; portability depends on each node’s `state()` + implementation. Content hashes are portable; timestamps are not. + +## Maintenance + +Use `pytask build --clean-lockfile` to rewrite `pytask.lock` with only currently +collected tasks. The rewrite happens after a successful build and recomputes current +state values without executing tasks again. ## File Format Reference ### Top-Level -| Field | Required | Description | -| -------------- | -------- | ---------------------------------- | -| `lock-version` | Yes | Schema version (currently `"1.0"`) | +| Field | Required | Description | +| -------------- | -------- | -------------------------------- | +| `lock-version` | Yes | Schema version (currently `"1"`) | ### Task Entry -| Field | Required | Description | -| ------- | -------- | -------------------------------------------- | -| `id` | Yes | Portable task identifier | -| `state` | Yes | State dictionary with a single `value` field | +| Field | Required | Description | +| ------------ | -------- | ----------------------------- | +| `id` | Yes | Portable task identifier | +| `state` | Yes | Opaque state string | +| `depends_on` | No | Mapping from node id to state | +| `produces` | No | Mapping from node id to state | ### Dependency/Product Entry -| Field | Required | Description | -| ------- | -------- | -------------------------------------------- | -| `id` | Yes | Node identifier | -| `state` | Yes | State dictionary with a single `value` field | +Node entries are stored as key-value pairs inside `depends_on` and `produces`, where the +key is the node id and the value is the node state string. -### State Dictionary +## Version Compatibility -| Field | Required | Description | -| ------- | -------- | ------------------- | -| `value` | Yes | Opaque state string | +Only lock-version `"1"` is supported. Older or newer versions error with a clear upgrade +message. -## Version Compatibility +## Implementation Notes -- **Upgrade**: newer pytask upgrades old lock files in memory and writes the new format - on the next update. -- **Downgrade**: older pytask errors with a clear upgrade message. +- The lockfile is encoded/decoded with `msgspec`’s TOML support. +- Writes are atomic: pytask writes a temporary file and replaces `pytask.lock`. diff --git a/src/_pytask/build.py b/src/_pytask/build.py index 83b4c3bdf..4208600bb 100644 --- a/src/_pytask/build.py +++ b/src/_pytask/build.py @@ -75,6 +75,7 @@ def build( # noqa: C901, PLR0912, PLR0913, PLR0915 debug_pytask: bool = False, disable_warnings: bool = False, dry_run: bool = False, + clean_lockfile: bool = False, editor_url_scheme: Literal["no_link", "file", "vscode", "pycharm"] # noqa: PYI051 | str = "file", explain: bool = False, @@ -124,6 +125,8 @@ def build( # noqa: C901, PLR0912, PLR0913, PLR0915 Whether warnings should be disabled and not displayed. dry_run Whether a dry-run should be performed that shows which tasks need to be rerun. + clean_lockfile + Whether the lockfile should be rewritten to only include collected tasks. editor_url_scheme An url scheme that allows to click on task names, node names and filenames and jump right into you preferred editor to the right line. @@ -192,6 +195,7 @@ def build( # noqa: C901, PLR0912, PLR0913, PLR0915 "debug_pytask": debug_pytask, "disable_warnings": disable_warnings, "dry_run": dry_run, + "clean_lockfile": clean_lockfile, "editor_url_scheme": editor_url_scheme, "explain": explain, "expression": expression, @@ -341,6 +345,12 @@ def build( # noqa: C901, PLR0912, PLR0913, PLR0915 default=False, help="Execute a task even if it succeeded successfully before.", ) +@click.option( + "--clean-lockfile", + is_flag=True, + default=False, + help="Rewrite the lockfile with only currently collected tasks.", +) @click.option( "--explain", is_flag=True, diff --git a/src/_pytask/lockfile.py b/src/_pytask/lockfile.py index 2d19bc717..5dc8f9c37 100644 --- a/src/_pytask/lockfile.py +++ b/src/_pytask/lockfile.py @@ -18,12 +18,13 @@ from _pytask.node_protocols import PTask from _pytask.node_protocols import PTaskWithPath from _pytask.nodes import PythonNode +from _pytask.outcomes import ExitCode from _pytask.pluginmanager import hookimpl if TYPE_CHECKING: from _pytask.session import Session -CURRENT_LOCKFILE_VERSION = "1.0" +CURRENT_LOCKFILE_VERSION = "1" class LockfileError(Exception): @@ -34,20 +35,11 @@ class LockfileVersionError(LockfileError): """Raised when a lockfile version is not supported.""" -class _State(msgspec.Struct): - value: str - - -class _NodeEntry(msgspec.Struct): - id: str - state: _State - - class _TaskEntry(msgspec.Struct): id: str - state: _State - depends_on: list[_NodeEntry] = msgspec.field(default_factory=list) - produces: list[_NodeEntry] = msgspec.field(default_factory=list) + state: str + depends_on: dict[str, str] = msgspec.field(default_factory=dict) + produces: dict[str, str] = msgspec.field(default_factory=dict) class _Lockfile(msgspec.Struct, forbid_unknown_fields=False): @@ -109,28 +101,25 @@ def read_lockfile(path: Path) -> _Lockfile | None: msg = "Lockfile is missing 'lock-version'." raise LockfileError(msg) - if Version(version) > Version(CURRENT_LOCKFILE_VERSION): + if Version(version) != Version(CURRENT_LOCKFILE_VERSION): msg = ( f"Unsupported lock-version {version!r}. " f"Current version is {CURRENT_LOCKFILE_VERSION}." ) raise LockfileVersionError(msg) - lockfile = msgspec.toml.decode(path.read_bytes(), type=_Lockfile) - - if Version(version) < Version(CURRENT_LOCKFILE_VERSION): - lockfile = _Lockfile( - lock_version=CURRENT_LOCKFILE_VERSION, - task=lockfile.task, - ) - return lockfile + try: + return msgspec.toml.decode(path.read_bytes(), type=_Lockfile) + except msgspec.DecodeError: + msg = "Lockfile has invalid format." + raise LockfileError(msg) from None def _normalize_lockfile(lockfile: _Lockfile) -> _Lockfile: tasks = [] for task in sorted(lockfile.task, key=lambda entry: entry.id): - depends_on = sorted(task.depends_on, key=lambda entry: entry.id) - produces = sorted(task.produces, key=lambda entry: entry.id) + depends_on = {key: task.depends_on[key] for key in sorted(task.depends_on)} + produces = {key: task.produces[key] for key in sorted(task.produces)} tasks.append( _TaskEntry( id=task.id, @@ -159,7 +148,7 @@ def _build_task_entry(session: Session, task: PTask, root: Path) -> _TaskEntry | predecessors = set(dag.predecessors(task.signature)) successors = set(dag.successors(task.signature)) - depends_on = [] + depends_on: dict[str, str] = {} for node_signature in predecessors: node = ( dag.nodes[node_signature].get("task") or dag.nodes[node_signature]["node"] @@ -174,9 +163,9 @@ def _build_task_entry(session: Session, task: PTask, root: Path) -> _TaskEntry | if isinstance(node, PTask) else build_portable_node_id(node, root) ) - depends_on.append(_NodeEntry(id=node_id, state=_State(state))) + depends_on[node_id] = state - produces = [] + produces: dict[str, str] = {} for node_signature in successors: node = ( dag.nodes[node_signature].get("task") or dag.nodes[node_signature]["node"] @@ -191,11 +180,11 @@ def _build_task_entry(session: Session, task: PTask, root: Path) -> _TaskEntry | if isinstance(node, PTask) else build_portable_node_id(node, root) ) - produces.append(_NodeEntry(id=node_id, state=_State(state))) + produces[node_id] = state return _TaskEntry( id=build_portable_task_id(task, root), - state=_State(task_state), + state=task_state, depends_on=depends_on, produces=produces, ) @@ -238,9 +227,7 @@ def _rebuild_indexes(self) -> None: self._task_index = {task.id: task for task in self.lockfile.task} self._node_index = {} for task in self.lockfile.task: - nodes = {} - for entry in task.depends_on + task.produces: - nodes[entry.id] = entry.state.value + nodes = {**task.depends_on, **task.produces} self._node_index[task.id] = nodes def get_task_entry(self, task_id: str) -> _TaskEntry | None: @@ -261,6 +248,21 @@ def update_task(self, session: Session, task: PTask) -> None: self._rebuild_indexes() write_lockfile(self.path, self.lockfile) + def rebuild_from_session(self, session: Session) -> None: + if session.dag is None: + return + tasks = [] + for task in session.tasks: + entry = _build_task_entry(session, task, self.root) + if entry is not None: + tasks.append(entry) + self.lockfile = _Lockfile( + lock_version=CURRENT_LOCKFILE_VERSION, + task=tasks, + ) + self._rebuild_indexes() + write_lockfile(self.path, self.lockfile) + @hookimpl def pytask_post_parse(config: dict[str, Any]) -> None: @@ -268,3 +270,20 @@ def pytask_post_parse(config: dict[str, Any]) -> None: path = config["root"] / "pytask.lock" config["lockfile_path"] = path config["lockfile_state"] = LockfileState.from_path(path, config["root"]) + + +@hookimpl +def pytask_unconfigure(session: Session) -> None: + """Optionally rewrite the lockfile to drop stale entries.""" + if session.config.get("command") != "build": + return + if not session.config.get("clean_lockfile"): + return + if session.config.get("dry_run"): + return + if session.exit_code != ExitCode.OK: + return + lockfile_state = session.config.get("lockfile_state") + if lockfile_state is None: + return + lockfile_state.rebuild_from_session(session) diff --git a/src/_pytask/state.py b/src/_pytask/state.py index 9c8928d2e..5b05c1731 100644 --- a/src/_pytask/state.py +++ b/src/_pytask/state.py @@ -35,7 +35,7 @@ def has_node_changed( entry = lockfile_state.get_task_entry(task_id) if entry is None: return True - return state != entry.state.value + return state != entry.state node_id = ( build_portable_task_id(node, lockfile_state.root) if isinstance(node, PTask) @@ -67,7 +67,7 @@ def get_node_change_info( entry = lockfile_state.get_task_entry(task_id) if entry is None: return True, "not_in_db", details - stored_state = entry.state.value + stored_state = entry.state else: node_id = ( build_portable_task_id(node, lockfile_state.root) diff --git a/tests/test_lockfile.py b/tests/test_lockfile.py index cb9b43c39..6d7d29e35 100644 --- a/tests/test_lockfile.py +++ b/tests/test_lockfile.py @@ -4,12 +4,15 @@ import pytest -from _pytask.lockfile import CURRENT_LOCKFILE_VERSION from _pytask.lockfile import LockfileVersionError from _pytask.lockfile import read_lockfile +from pytask import ExitCode +from pytask import PathNode +from pytask import TaskWithoutPath +from pytask import build -def test_lockfile_upgrades_older_version(tmp_path): +def test_lockfile_rejects_older_version(tmp_path): path = tmp_path / "pytask.lock" path.write_text( textwrap.dedent( @@ -21,10 +24,8 @@ def test_lockfile_upgrades_older_version(tmp_path): + "\n" ) - lockfile = read_lockfile(path) - - assert lockfile is not None - assert lockfile.lock_version == CURRENT_LOCKFILE_VERSION + with pytest.raises(LockfileVersionError): + read_lockfile(path) def test_lockfile_rejects_newer_version(tmp_path): @@ -41,3 +42,34 @@ def test_lockfile_rejects_newer_version(tmp_path): with pytest.raises(LockfileVersionError): read_lockfile(path) + + +def test_clean_lockfile_removes_stale_entries(tmp_path): + def func_first(path): + path.touch() + + def func_second(path): + path.touch() + + task_first = TaskWithoutPath( + name="task_first", + function=func_first, + produces={"path": PathNode(path=tmp_path / "first.txt")}, + ) + task_second = TaskWithoutPath( + name="task_second", + function=func_second, + produces={"path": PathNode(path=tmp_path / "second.txt")}, + ) + + session = build(tasks=[task_first, task_second], paths=tmp_path) + assert session.exit_code == ExitCode.OK + lockfile = read_lockfile(tmp_path / "pytask.lock") + assert lockfile is not None + assert {entry.id for entry in lockfile.task} == {"task_first", "task_second"} + + session = build(tasks=[task_first], paths=tmp_path, clean_lockfile=True) + assert session.exit_code == ExitCode.OK + lockfile = read_lockfile(tmp_path / "pytask.lock") + assert lockfile is not None + assert {entry.id for entry in lockfile.task} == {"task_first"}