From e2a41cf87dfbdf26eeda042235e938183c58da51 Mon Sep 17 00:00:00 2001 From: Magnus Hansson Date: Sun, 9 Nov 2025 11:17:05 +0100 Subject: [PATCH 1/3] Fix CI mypy errors and add pre-commit hooks - Fix type annotations: change Optional[click.DateTime] to Optional[datetime.datetime] - Fix import sorting in converters (datetime before typing) - Remove unused import in test_workflow.py - Add .pre-commit-config.yaml with hooks matching CI pipeline - Add pre-commit to dev dependencies - Add pre-commit hooks documentation to README - Format code with black --- .pre-commit-config.yaml | 46 ++++++++++++++ README.md | 79 +++++++++++++++++++++++-- bis_scraper/cli/main.py | 8 +-- bis_scraper/converters/controller.py | 2 +- bis_scraper/converters/pdf_converter.py | 2 +- pyproject.toml | 3 +- tests/integration/test_workflow.py | 2 +- tests/test_cli.py | 4 +- 8 files changed, 129 insertions(+), 17 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..82cf089 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,46 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + + - repo: local + hooks: + - id: pytest + name: pytest + entry: pytest + language: system + pass_filenames: false + always_run: true + + - id: mypy + name: mypy + entry: mypy bis_scraper + language: system + pass_filenames: false + always_run: true + + - id: black + name: black + entry: black --check bis_scraper tests + language: system + pass_filenames: false + always_run: true + + - id: isort + name: isort + entry: isort --check bis_scraper tests + language: system + pass_filenames: false + always_run: true + + - id: ruff + name: ruff + entry: ruff check bis_scraper tests + language: system + pass_filenames: false + always_run: true + diff --git a/README.md b/README.md index d4cac35..1891081 100644 --- a/README.md +++ b/README.md @@ -321,30 +321,30 @@ def analyze_speeches(data_dir, institution, keywords): # Path to text files for the institution institution_dir = Path(data_dir) / "texts" / institution.lower().replace(" ", "_") results = [] - + # Process each text file for txt_file in glob.glob(f"{institution_dir}/*.txt"): file_code = os.path.basename(txt_file).split('.')[0] - + with open(txt_file, 'r', encoding='utf-8') as f: text = f.read().lower() - + # Count keywords word_counts = {} for keyword in keywords: pattern = r'\b' + re.escape(keyword.lower()) + r'\b' word_counts[keyword] = len(re.findall(pattern, text)) - + # Get total word count total_words = len(re.findall(r'\b\w+\b', text)) - + # Add to results results.append({ 'file_code': file_code, 'total_words': total_words, **word_counts }) - + # Convert to DataFrame for analysis df = pd.DataFrame(results) return df @@ -434,6 +434,73 @@ mypy bis_scraper ruff bis_scraper tests ``` +### Pre-commit Hooks + +This project uses [pre-commit](https://pre-commit.com/) hooks to automatically run the full CI pipeline locally before each commit. This ensures that all code quality checks pass before pushing to the repository. + +#### Installation + +First, install pre-commit (if not already installed). If you've installed the dev dependencies, pre-commit is already included: + +```bash +# If you've installed dev dependencies, pre-commit is already available +pip install -e ".[dev]" + +# Or install pre-commit separately +pip install pre-commit +``` + +Then install the git hooks: + +```bash +pre-commit install +``` + +This will set up the hooks to run automatically on every commit. + +#### Running Manually + +You can run all pre-commit hooks manually on all files: + +```bash +pre-commit run --all-files +``` + +To run a specific hook: + +```bash +pre-commit run --all-files +``` + +For example: +```bash +pre-commit run pytest --all-files +pre-commit run mypy --all-files +pre-commit run black --all-files +``` + +#### What the Hooks Do + +The pre-commit hooks run the same checks as the CI pipeline: + +1. **pytest** - Runs all tests +2. **mypy** - Type checking on `bis_scraper` package +3. **black** - Code formatting check +4. **isort** - Import sorting check +5. **ruff** - Linting + +If any hook fails, the commit will be blocked. Fix the issues and try committing again. + +#### Skipping Hooks (Not Recommended) + +If you need to skip hooks for a specific commit (not recommended), you can use: + +```bash +git commit --no-verify +``` + +However, the CI pipeline will still run these checks, so it's better to fix issues locally. + ## Contributing Contributions are welcome! Please feel free to submit a Pull Request. diff --git a/bis_scraper/cli/main.py b/bis_scraper/cli/main.py index 5c6eb03..042de28 100644 --- a/bis_scraper/cli/main.py +++ b/bis_scraper/cli/main.py @@ -179,8 +179,8 @@ def scrape( @click.pass_context def convert( ctx: click.Context, - start_date: Optional[click.DateTime], - end_date: Optional[click.DateTime], + start_date: Optional[datetime.datetime], + end_date: Optional[datetime.datetime], institutions: Tuple[str, ...], force: bool, limit: Optional[int], @@ -238,8 +238,8 @@ def convert( @click.pass_context def run_all( ctx: click.Context, - start_date: Optional[click.DateTime], - end_date: Optional[click.DateTime], + start_date: Optional[datetime.datetime], + end_date: Optional[datetime.datetime], institutions: tuple[str, ...], force: bool, limit: Optional[int], diff --git a/bis_scraper/converters/controller.py b/bis_scraper/converters/controller.py index d094700..9a3213f 100644 --- a/bis_scraper/converters/controller.py +++ b/bis_scraper/converters/controller.py @@ -1,10 +1,10 @@ """Controller module for PDF to text conversion operations.""" +import datetime import logging import time from pathlib import Path from typing import Optional, Tuple -import datetime from bis_scraper.converters.pdf_converter import PdfConverter from bis_scraper.models import ConversionResult diff --git a/bis_scraper/converters/pdf_converter.py b/bis_scraper/converters/pdf_converter.py index 15497fd..fe3724b 100644 --- a/bis_scraper/converters/pdf_converter.py +++ b/bis_scraper/converters/pdf_converter.py @@ -1,9 +1,9 @@ """PDF to text converter implementation.""" +import datetime import logging from pathlib import Path from typing import List, Optional -import datetime import textract # type: ignore diff --git a/pyproject.toml b/pyproject.toml index e5c1b4f..db3422b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dev = [ "responses>=0.22.0", "types-requests>=2.28.0", "types-beautifulsoup4>=4.8.0", + "pre-commit>=3.0.0", ] [project.urls] @@ -83,4 +84,4 @@ ignore = [] "bis_scraper/scrapers/**/*.py" = ["E501"] "bis_scraper/converters/**/*.py" = ["E501"] "bis_scraper/utils/**/*.py" = ["E501"] -"bis_scraper/cli/**/*.py" = ["E501"] \ No newline at end of file +"bis_scraper/cli/**/*.py" = ["E501"] diff --git a/tests/integration/test_workflow.py b/tests/integration/test_workflow.py index 33c57fc..49dae09 100644 --- a/tests/integration/test_workflow.py +++ b/tests/integration/test_workflow.py @@ -9,7 +9,7 @@ import responses -from bis_scraper.converters.controller import convert_pdfs, convert_pdfs_dates +from bis_scraper.converters.controller import convert_pdfs_dates from bis_scraper.scrapers.controller import scrape_bis from bis_scraper.utils.constants import HTML_EXTENSION, PDF_EXTENSION, SPEECHES_URL diff --git a/tests/test_cli.py b/tests/test_cli.py index f2e8641..3eb584f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -46,9 +46,7 @@ def test_convert_command(self, mock_convert) -> None: # Set up the mock to return a simple result from bis_scraper.models import ConversionResult - mock_convert.return_value = ConversionResult( - successful=5, skipped=2, failed=1 - ) + mock_convert.return_value = ConversionResult(successful=5, skipped=2, failed=1) # Test with verbose and institution result = self.runner.invoke( From 362c32a8143fd814c7ee88f0634ac43f7b9ede6a Mon Sep 17 00:00:00 2001 From: Magnus Hansson Date: Sun, 9 Nov 2025 11:21:23 +0100 Subject: [PATCH 2/3] Remove redundant check_code_quality.py and run_tests.py scripts These scripts are now redundant since pre-commit hooks run all the same checks. Pre-commit hooks provide a better developer experience and ensure consistency. --- README.md | 12 +---- check_code_quality.py | 105 ------------------------------------------ run_tests.py | 98 --------------------------------------- 3 files changed, 1 insertion(+), 214 deletions(-) delete mode 100755 check_code_quality.py delete mode 100755 run_tests.py diff --git a/README.md b/README.md index 1891081..ea7d56d 100644 --- a/README.md +++ b/README.md @@ -410,17 +410,7 @@ This project uses several tools to ensure code quality: - `mypy` for type checking - `ruff` for linting -You can run all these checks using the provided script: - -```bash -# Check code quality -./check_code_quality.py - -# Fix issues automatically where possible -./check_code_quality.py --fix -``` - -Or run each tool individually: +The recommended way to run all these checks is using pre-commit hooks (see [Pre-commit Hooks](#pre-commit-hooks) section below). You can also run each tool individually: ```bash # Format code diff --git a/check_code_quality.py b/check_code_quality.py deleted file mode 100755 index 5383c54..0000000 --- a/check_code_quality.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 -"""Script to run code quality checks for the BIS Scraper package.""" - -import argparse -import subprocess -import sys -from pathlib import Path - - -def run_command(command: list[str], name: str) -> bool: - """Run a shell command and report its success. - - Args: - command: Command to run as a list of arguments - name: Name of the tool being run - - Returns: - True if the command succeeded, False otherwise - """ - print(f"\nRunning {name}...") - try: - subprocess.run(command, check=True) - print(f"āœ… {name} passed") - return True - except subprocess.CalledProcessError: - print(f"āŒ {name} failed") - return False - - -def main() -> int: - """Run code quality checks. - - Returns: - Exit code (0 for success, non-zero for failure) - """ - parser = argparse.ArgumentParser(description="Run code quality checks") - parser.add_argument("--fix", action="store_true", help="Attempt to fix issues automatically") - args = parser.parse_args() - - pkg_dir = Path("bis_scraper") - test_dir = Path("tests") - - # Check if directories exist - if not pkg_dir.exists() or not test_dir.exists(): - print(f"Error: Could not find required directories: {pkg_dir} and {test_dir}") - return 1 - - # Define commands to run - commands = [] - - # Black (code formatting) - black_cmd = ["black"] - if not args.fix: - black_cmd.append("--check") - black_cmd.extend([str(pkg_dir), str(test_dir)]) - commands.append((black_cmd, "Black (code formatting)")) - - # isort (import sorting) - isort_cmd = ["isort"] - if not args.fix: - isort_cmd.append("--check") - isort_cmd.extend([str(pkg_dir), str(test_dir)]) - commands.append((isort_cmd, "isort (import sorting)")) - - # mypy (type checking) - mypy_cmd = ["mypy", str(pkg_dir)] - commands.append((mypy_cmd, "mypy (type checking)")) - - # ruff (linting) - ruff_cmd = ["ruff", "check"] - if args.fix: - ruff_cmd.append("--fix") - ruff_cmd.extend([str(pkg_dir), str(test_dir)]) - commands.append((ruff_cmd, "ruff (linting)")) - - # Run all commands - results = [] - for cmd, name in commands: - cmd_result = run_command(cmd, name) - results.append(cmd_result) - - # Print summary - print("\n" + "=" * 50) - print("SUMMARY") - print("=" * 50) - - all_passed = True - for i, (cmd, name) in enumerate(commands): - status = "PASS" if results[i] else "FAIL" - print(f"{status}: {name}") - if not results[i]: - all_passed = False - - if all_passed: - print("\nāœ… All checks passed!") - return 0 - else: - print("\nāŒ Some checks failed.") - if not args.fix: - print("Run with --fix to attempt to automatically fix issues") - return 1 - - -if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file diff --git a/run_tests.py b/run_tests.py deleted file mode 100755 index 2a4dce9..0000000 --- a/run_tests.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env python3 -"""Script to run all tests for the BIS Scraper package.""" - -import argparse -import sys -import unittest -from pathlib import Path - - -def run_tests(test_type: str = "all", verbose: bool = False) -> int: - """Run the specified tests. - - Args: - test_type: Type of tests to run ("unit", "integration", or "all") - verbose: Whether to run tests in verbose mode - - Returns: - Exit code (0 for success, non-zero for failure) - """ - # Set verbosity level - verbosity = 2 if verbose else 1 - - # Find the test directory - test_dir = Path(__file__).parent / "tests" - - # Create test loader - loader = unittest.TestLoader() - - if test_type == "unit" or test_type == "all": - print("Running unit tests...") - unit_dir = test_dir / "unit" - - # Run each unit test file separately to avoid Responses state issues - unit_test_files = list(unit_dir.glob("test_*.py")) - for test_file in unit_test_files: - print(f"\nRunning {test_file.name}...") - file_tests = loader.discover(start_dir=str(unit_dir), pattern=test_file.name) - unit_runner = unittest.TextTestRunner(verbosity=verbosity) - unit_result = unit_runner.run(file_tests) - if not unit_result.wasSuccessful(): - return 1 - - if test_type == "integration" or test_type == "all": - print("\nRunning integration tests...") - # For integration tests, we'll manually load them - # This is more direct than using discover, which can have path issues - sys.path.insert(0, str(Path(__file__).parent)) - - try: - from tests.integration.test_workflow import TestCompleteWorkflow - - # Create test suite and run it - suite = unittest.TestLoader().loadTestsFromTestCase(TestCompleteWorkflow) - integration_runner = unittest.TextTestRunner(verbosity=verbosity) - integration_result = integration_runner.run(suite) - if not integration_result.wasSuccessful(): - return 1 - except ImportError as e: - print(f"Error importing integration tests: {e}") - return 1 - - if test_type == "all": - print("\nRunning CLI tests...") - # Manually import CLI tests to avoid discover path import issues - project_root = Path(__file__).parent - sys.path.insert(0, str(project_root)) - try: - from tests.test_cli import TestCli - - cli_suite = unittest.TestLoader().loadTestsFromTestCase(TestCli) - cli_runner = unittest.TextTestRunner(verbosity=verbosity) - cli_result = cli_runner.run(cli_suite) - if not cli_result.wasSuccessful(): - return 1 - except ImportError as e: - print(f"Error importing CLI tests: {e}") - return 1 - - print("\nAll tests passed!") - return 0 - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run BIS Scraper tests") - parser.add_argument( - "--type", - choices=["unit", "integration", "all"], - default="all", - help="Type of tests to run (unit, integration, or all)" - ) - parser.add_argument( - "--verbose", - action="store_true", - help="Run tests in verbose mode" - ) - - args = parser.parse_args() - sys.exit(run_tests(args.type, args.verbose)) \ No newline at end of file From 07712f11a8e793a0cc1e79e80fbfb2a2b11488ba Mon Sep 17 00:00:00 2001 From: Magnus Hansson Date: Sun, 9 Nov 2025 11:23:54 +0100 Subject: [PATCH 3/3] Apply pre-commit hook fixes (trailing whitespace and end-of-file fixes) --- docs/api.md | 2 +- docs/test_coverage.md | 2 +- install.sh | 2 +- scripts/README.md | 6 +++--- scripts/analyze_results.sh | 16 ++++++++-------- temp/cache_demo.py | 28 ++++++++++++++-------------- temp/cache_improvement_summary.md | 2 +- temp/progress_summary.md | 2 +- temp/project_plan.md | 2 +- temp/summary.md | 2 +- 10 files changed, 32 insertions(+), 32 deletions(-) diff --git a/docs/api.md b/docs/api.md index 1b7db82..962f9f0 100644 --- a/docs/api.md +++ b/docs/api.md @@ -247,4 +247,4 @@ bis-scraper --help - `--force`: Force re-download or re-conversion - `--limit INTEGER`: Maximum number of speeches to process - `--data-dir DIRECTORY`: Base directory for data storage -- `--log-dir DIRECTORY`: Directory for log files \ No newline at end of file +- `--log-dir DIRECTORY`: Directory for log files diff --git a/docs/test_coverage.md b/docs/test_coverage.md index 5ea9100..9beab07 100644 --- a/docs/test_coverage.md +++ b/docs/test_coverage.md @@ -71,4 +71,4 @@ When adding new features or modifying existing code, please ensure: 4. **Tests use mocks appropriately** to avoid external dependencies 5. **Integration impacts are tested** when changing core components -The project uses GitHub Actions to automatically run tests on pull requests, ensuring that all contributions maintain the expected quality standards. \ No newline at end of file +The project uses GitHub Actions to automatically run tests on pull requests, ensuring that all contributions maintain the expected quality standards. diff --git a/install.sh b/install.sh index a7cf28e..a0f2865 100755 --- a/install.sh +++ b/install.sh @@ -22,4 +22,4 @@ pip install -e . echo "=== Installation complete ===" echo "To activate the environment, run: source .venv/bin/activate" -echo "To use the package, run: bis-scraper --help" \ No newline at end of file +echo "To use the package, run: bis-scraper --help" diff --git a/scripts/README.md b/scripts/README.md index fd4a4eb..e965efa 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -45,13 +45,13 @@ If no data directory is specified, the script will use the default (`$HOME/bis_f ```bash nano scripts/run_full_scrape.sh ``` - + 2. **Run the scraping process**: ```bash cd scripts ./run_full_scrape.sh ``` - + For long-running jobs, consider using screen or tmux: ```bash screen -S bis_scraper @@ -65,4 +65,4 @@ If no data directory is specified, the script will use the default (`$HOME/bis_f ```bash cd scripts ./analyze_results.sh - ``` \ No newline at end of file + ``` diff --git a/scripts/analyze_results.sh b/scripts/analyze_results.sh index 83eff24..cc7d400 100755 --- a/scripts/analyze_results.sh +++ b/scripts/analyze_results.sh @@ -39,7 +39,7 @@ if check_directory "$LOG_DIR"; then echo "Log files found in $LOG_DIR" log_count=$(find "$LOG_DIR" -type f -name "*.log" | wc -l) echo "Number of log files: $log_count" - + # Find the most recent log file most_recent=$(find "$LOG_DIR" -type f -name "*.log" -printf "%T@ %p\n" | sort -n | tail -1 | cut -f2 -d' ') if [ ! -z "$most_recent" ]; then @@ -56,11 +56,11 @@ fi # Analyze PDF data if check_directory "$PDF_DIR"; then echo -e "\n======= PDF files analysis =======" - + # Total PDFs total_pdfs=$(find "$PDF_DIR" -type f -name "*.pdf" | wc -l) echo "Total PDF files: $total_pdfs" - + # Count by institution echo -e "\nPDF files by institution:" echo "---" @@ -72,7 +72,7 @@ if check_directory "$PDF_DIR"; then fi done echo "---" - + # Count by year (based on filename pattern YYMMDD[a-z].pdf) echo -e "\nPDF files by year:" echo "---" @@ -95,11 +95,11 @@ fi # Analyze TXT data if check_directory "$TXT_DIR"; then echo -e "\n======= Text files analysis =======" - + # Total TXTs total_txts=$(find "$TXT_DIR" -type f -name "*.txt" | wc -l) echo "Total TXT files: $total_txts" - + # Count by institution echo -e "\nTXT files by institution:" echo "---" @@ -111,7 +111,7 @@ if check_directory "$TXT_DIR"; then fi done echo "---" - + # Conversion success rate if [ $total_pdfs -gt 0 ]; then success_rate=$(echo "scale=2; $total_txts * 100 / $total_pdfs" | bc) @@ -123,4 +123,4 @@ fi echo -e "\n========================================================" echo "Analysis complete" -echo "========================================================" \ No newline at end of file +echo "========================================================" diff --git a/temp/cache_demo.py b/temp/cache_demo.py index 5901afe..6f0eae0 100644 --- a/temp/cache_demo.py +++ b/temp/cache_demo.py @@ -13,61 +13,61 @@ def main() -> None: # Create a temporary directory for the demo demo_dir = Path("temp/demo_output") demo_dir.mkdir(parents=True, exist_ok=True) - + # Initialize scraper scraper = BisScraper(demo_dir) - + # Test dates (using dates from 2020 as they likely have speeches) test_dates = [ datetime.date(2020, 1, 1), datetime.date(2020, 1, 2), datetime.date(2020, 1, 3), ] - + print("=== BIS Scraper Date Cache Demo ===\n") - + # First run - no cache print("šŸ” First run (no cache):") start_time = time.time() - + for date in test_dates: print(f" Checking {date}...", end=" ", flush=True) date_start = time.time() scraper.scrape_date(date) date_time = time.time() - date_start print(f"took {date_time:.2f}s") - + first_run_time = time.time() - start_time print(f"\n Total time: {first_run_time:.2f}s") print(f" Results: {scraper.result.downloaded} downloaded, {scraper.result.skipped} skipped") - + # Save the cache scraper._save_date_cache() - + # Second run - with cache print("\nšŸš€ Second run (with cache):") scraper2 = BisScraper(demo_dir) # New instance will load the cache start_time = time.time() - + for date in test_dates: print(f" Checking {date}...", end=" ", flush=True) date_start = time.time() scraper2.scrape_date(date) date_time = time.time() - date_start print(f"took {date_time:.3f}s") - + second_run_time = time.time() - start_time print(f"\n Total time: {second_run_time:.3f}s") print(f" Results: {scraper2.result.downloaded} downloaded, {scraper2.result.skipped} skipped") - + # Show improvement improvement = (first_run_time - second_run_time) / first_run_time * 100 speedup = first_run_time / second_run_time if second_run_time > 0 else float('inf') - + print(f"\n✨ Performance improvement:") print(f" Time saved: {first_run_time - second_run_time:.2f}s ({improvement:.1f}%)") print(f" Speedup: {speedup:.0f}x faster") - + # Show cache info cache_file = demo_dir / ".bis_scraper_date_cache.json" if cache_file.exists(): @@ -76,4 +76,4 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/temp/cache_improvement_summary.md b/temp/cache_improvement_summary.md index 7408ff0..d80006a 100644 --- a/temp/cache_improvement_summary.md +++ b/temp/cache_improvement_summary.md @@ -74,4 +74,4 @@ bis-scraper scrape --force --start-date 2020-01-01 --end-date 2020-12-31 ## Backward Compatibility - Fully backward compatible - existing scripts work without changes - Cache is created automatically on first run -- Old installations can be upgraded without issues \ No newline at end of file +- Old installations can be upgraded without issues diff --git a/temp/progress_summary.md b/temp/progress_summary.md index 0ca846c..a2b9d84 100644 --- a/temp/progress_summary.md +++ b/temp/progress_summary.md @@ -70,4 +70,4 @@ The BIS Scraper project has been successfully transformed from its original impl 5. Extensive test coverage 6. CI/CD integration -These improvements make the package more maintainable, extensible, and user-friendly, providing a solid foundation for future enhancements. \ No newline at end of file +These improvements make the package more maintainable, extensible, and user-friendly, providing a solid foundation for future enhancements. diff --git a/temp/project_plan.md b/temp/project_plan.md index a66f190..5fde55c 100644 --- a/temp/project_plan.md +++ b/temp/project_plan.md @@ -131,4 +131,4 @@ The BIS Scraper is a Python package designed to download and process speeches fr The BIS Scraper project has successfully transitioned from its original implementation to a modern, well-structured Python package. The current focus is on stabilizing the core functionality and ensuring all original features are preserved with improved implementation. -The next steps involve comprehensive testing, detailed documentation, and incremental feature additions to enhance the package's capabilities for central bank speech analysis. \ No newline at end of file +The next steps involve comprehensive testing, detailed documentation, and incremental feature additions to enhance the package's capabilities for central bank speech analysis. diff --git a/temp/summary.md b/temp/summary.md index 0280eff..b4e6368 100644 --- a/temp/summary.md +++ b/temp/summary.md @@ -127,4 +127,4 @@ convert_pdfs( 2. **CI/CD Pipeline**: Set up GitHub Actions for automated testing 3. **Documentation Site**: Consider generating API documentation with Sphinx 4. **More Tests**: Add more comprehensive testing, especially integration tests -5. **Performance Optimization**: Explore options for parallel processing \ No newline at end of file +5. **Performance Optimization**: Explore options for parallel processing