diff --git a/Makefile b/Makefile index 530a02d..28b9c0b 100644 --- a/Makefile +++ b/Makefile @@ -52,32 +52,17 @@ test: # Generate expected test outputs from test inputs generate-test-outputs: - @rm -rf tests/outputs_basic - @uv run efemel process "**/*.py" --cwd tests/inputs/basic --out tests/outputs/basic - - @rm -rf tests/outputs_with_imports - @uv run efemel process "*.py" --cwd tests/inputs/with_imports --out tests/outputs/with_imports --env prod - - @rm -rf tests/outputs_basic_flattened - @uv run efemel process "**/*.py" --cwd tests/inputs/basic --out tests/outputs/flattened --flatten - - @rm -rf tests/outputs_basic_with_hooks - @uv run efemel process "**/*.py" --cwd tests/inputs/basic --out tests/outputs/with_hooks --hooks tests/hooks/before_after/output_filename.py - - @rm -rf tests/outputs/with_hooks_dir - @uv run efemel process "**/*.py" --cwd tests/inputs/basic --out tests/outputs/with_hooks_dir --hooks tests/hooks/multiple - - @rm -rf tests/outputs/process_data_pick - @uv run efemel process "**/*.py" --cwd tests/inputs/process_data --out tests/outputs/process_data_pick --pick user_data - - @rm -rf tests/outputs/process_data_unwrap - @uv run efemel process "**/*.py" --cwd tests/inputs/process_data --out tests/outputs/process_data_unwrap --unwrap user_data - - @rm -rf tests/outputs/with_params - @uv run efemel process "**/*.py" --cwd tests/inputs/with_params --out tests/outputs/with_params --param app_name=myapp --param version=2.0.0 --param debug_mode=true --param port=8080 --param 'database_config={"host":"prod-db","port":5432}' --param memory_mb=512 - - @rm -rf tests/outputs/with_params_file - @uv run efemel process "config.py" --cwd tests/inputs/with_params_file --out tests/outputs/with_params_file --params-file tests/params/params.py + mkdir -p tests/outputs/dry_run + touch tests/outputs/dry_run/.gitkeep + @uv run efemel process "**/*.py" --cwd tests/inputs/basic --out tests/outputs/basic --clean + @uv run efemel process "*.py" --cwd tests/inputs/with_imports --out tests/outputs/with_imports --env prod --clean + @uv run efemel process "**/*.py" --cwd tests/inputs/basic --out tests/outputs/flattened --flatten --clean + @uv run efemel process "**/*.py" --cwd tests/inputs/basic --out tests/outputs/with_hooks --hooks tests/hooks/before_after/output_filename.py --clean + @uv run efemel process "**/*.py" --cwd tests/inputs/basic --out tests/outputs/with_hooks_dir --hooks tests/hooks/multiple --clean + @uv run efemel process "**/*.py" --cwd tests/inputs/process_data --out tests/outputs/process_data_pick --pick user_data --clean + @uv run efemel process "**/*.py" --cwd tests/inputs/process_data --out tests/outputs/process_data_unwrap --unwrap user_data --clean + @uv run efemel process "**/*.py" --cwd tests/inputs/with_params --out tests/outputs/with_params --param app_name=myapp --param version=2.0.0 --param debug_mode=true --param port=8080 --param 'database_config={"host":"prod-db","port":5432}' --param memory_mb=512 --clean + @uv run efemel process "config.py" --cwd tests/inputs/with_params_file --out tests/outputs/with_params_file --params-file tests/params/params.py --clean # Clean build artifacts and cache files diff --git a/README.md b/README.md index e9fac95..31717c0 100644 --- a/README.md +++ b/README.md @@ -349,6 +349,8 @@ efemel process config.py --out output/ --params-file params.py | `--workers` | `-w` | `int` | No | `CPU_COUNT` | Number of parallel workers | | `--hooks` | `-h` | `str` | No | `None` | Path to hooks file or directory | | `--flatten` | `-f` | `flag` | No | `False` | Flatten directory structure | +| `--clean` | - | `flag` | No | `False` | Clean (delete) the output directory before processing | +| `--dry-run` | - | `flag` | No | `False` | Show what would be processed without writing files | | `--pick` | `-p` | `str` | No | `None` | Pick specific keys from the extracted data (can be used multiple times) | | `--unwrap` | `-u` | `str` | No | `None` | Extract specific values from the processed data, merging them (can be used multiple times) | | `--param` | `-P` | `str` | No | `None` | Pass custom parameters to processed scripts in key=value format (can be used multiple times) | diff --git a/efemel/cli.py b/efemel/cli.py index afdfa3f..385bcbe 100644 --- a/efemel/cli.py +++ b/efemel/cli.py @@ -1,4 +1,5 @@ import os +import shutil from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path @@ -11,6 +12,7 @@ from efemel.readers.local import LocalReader from efemel.transformers.json import JSONTransformer from efemel.writers.local import LocalWriter +from efemel.writers.sink import SinkWriter DEFAULT_WORKERS = os.cpu_count() or 1 @@ -73,12 +75,27 @@ def info(): type=click.Path(exists=True, readable=True, resolve_path=True), help="Path to a Python file that will be processed to extract parameters for other files", ) -def process(file_pattern, out, flatten, cwd, env, workers, hooks, pick, unwrap, param, params_file): +@click.option( + "--clean", + is_flag=True, + default=False, + help="Clean (delete) the output directory before processing", +) +@click.option( + "--dry-run", + is_flag=True, + default=False, + help="Show what would be processed without writing files", +) +def process(file_pattern, out, flatten, cwd, env, workers, hooks, pick, unwrap, param, params_file, clean, dry_run): """Process Python files and extract serializable variables to JSON. FILE_PATTERN: Glob pattern to match Python files (e.g., "**/*.py") """ + if dry_run: + click.echo("Dry run mode enabled. No files will be written.") + # Parse custom parameters custom_params = {} @@ -124,7 +141,7 @@ def process(file_pattern, out, flatten, cwd, env, workers, hooks, pick, unwrap, reader = LocalReader(cwd) transformer = JSONTransformer() - writer = LocalWriter(out, reader.original_cwd) + writer = LocalWriter(out, reader.original_cwd) if not dry_run else SinkWriter(out, reader.original_cwd) if flatten: # Add the flatten_output_path hook to the hooks manager @@ -160,6 +177,17 @@ def process(file_pattern, out, flatten, cwd, env, workers, hooks, pick, unwrap, click.echo("No files found matching the pattern.") return + # Clean output directory if requested + output_path = Path(out) + if clean and not dry_run: + if output_path.exists(): + click.echo(f"Cleaning output directory: {output_path}") + shutil.rmtree(output_path) + click.echo("Output directory cleaned successfully.") + + # Ensure the output directory exists for writing + output_path.mkdir(parents=True, exist_ok=True) + def process_single_file(file_path: Path, cwd: Path): # Added type hint for clarity """Process a single file and return results.""" try: diff --git a/efemel/writers/__init__.py b/efemel/writers/__init__.py new file mode 100644 index 0000000..7304d69 --- /dev/null +++ b/efemel/writers/__init__.py @@ -0,0 +1,4 @@ +from .local import LocalWriter +from .sink import SinkWriter + +__all__ = ["LocalWriter", "SinkWriter"] diff --git a/efemel/writers/sink.py b/efemel/writers/sink.py new file mode 100644 index 0000000..6ec3bfa --- /dev/null +++ b/efemel/writers/sink.py @@ -0,0 +1,26 @@ +from pathlib import Path + + +class SinkWriter: + def __init__(self, output_dir: str, cwd: Path | None = None): + """ + Initialize the SinkWriter. + This class mimics LocalWriter but doesn't actually write files - used for dry runs. + """ + self.output_dir = Path(output_dir) + + # If output path is relative, make it relative to original working directory + if not self.output_dir.is_absolute(): + self.output_dir = cwd / self.output_dir + + def write(self, data: str, file_path: Path) -> Path: + """ + Simulate writing data to a file without actually writing it. + Returns the path where the file would have been written. + + :param data: The data that would be written (ignored). + :param file_path: The path to the file where the data would be written. + :return: The path where the file would have been written. + """ + output_file = self.output_dir / file_path.with_suffix(".json") + return output_file diff --git a/tests/outputs/dry_run/.gitkeep b/tests/outputs/dry_run/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/outputs/dry_run/.gitkeep @@ -0,0 +1 @@ + diff --git a/tests/test_cli.py b/tests/test_cli.py index 64ca0bc..f4e348e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -89,6 +89,12 @@ def get_test_scenarios(): ], "assets": ["params/params.py"], }, + { + "name": "dry run", + "inputs_dir": test_dir / "inputs/basic", + "outputs_dir": test_dir / "outputs/dry_run", + "process_args": ["--dry-run"], + }, ] diff --git a/wiki/Home.md b/wiki/Home.md index 54f357a..67e3636 100644 --- a/wiki/Home.md +++ b/wiki/Home.md @@ -349,6 +349,8 @@ efemel process config.py --out output/ --params-file params.py | `--workers` | `-w` | `int` | No | `CPU_COUNT` | Number of parallel workers | | `--hooks` | `-h` | `str` | No | `None` | Path to hooks file or directory | | `--flatten` | `-f` | `flag` | No | `False` | Flatten directory structure | +| `--clean` | - | `flag` | No | `False` | Clean (delete) the output directory before processing | +| `--dry-run` | - | `flag` | No | `False` | Show what would be processed without writing files | | `--pick` | `-p` | `str` | No | `None` | Pick specific keys from the extracted data (can be used multiple times) | | `--unwrap` | `-u` | `str` | No | `None` | Extract specific values from the processed data, merging them (can be used multiple times) | | `--param` | `-P` | `str` | No | `None` | Pass custom parameters to processed scripts in key=value format (can be used multiple times) |