From e2a41cf87dfbdf26eeda042235e938183c58da51 Mon Sep 17 00:00:00 2001
From: Magnus Hansson <hansson.carl.magnus@gmail.com>
Date: Sun, 9 Nov 2025 11:17:05 +0100
Subject: [PATCH 1/3] Fix CI mypy errors and add pre-commit hooks

- Fix type annotations: change Optional[click.DateTime] to Optional[datetime.datetime]
- Fix import sorting in converters (datetime before typing)
- Remove unused import in test_workflow.py
- Add .pre-commit-config.yaml with hooks matching CI pipeline
- Add pre-commit to dev dependencies
- Add pre-commit hooks documentation to README
- Format code with black
---
 .pre-commit-config.yaml                 | 46 ++++++++++++++
 README.md                               | 79 +++++++++++++++++++++++--
 bis_scraper/cli/main.py                 |  8 +--
 bis_scraper/converters/controller.py    |  2 +-
 bis_scraper/converters/pdf_converter.py |  2 +-
 pyproject.toml                          |  3 +-
 tests/integration/test_workflow.py      |  2 +-
 tests/test_cli.py                       |  4 +-
 8 files changed, 129 insertions(+), 17 deletions(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..82cf089
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,46 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+
+  - repo: local
+    hooks:
+      - id: pytest
+        name: pytest
+        entry: pytest
+        language: system
+        pass_filenames: false
+        always_run: true
+
+      - id: mypy
+        name: mypy
+        entry: mypy bis_scraper
+        language: system
+        pass_filenames: false
+        always_run: true
+
+      - id: black
+        name: black
+        entry: black --check bis_scraper tests
+        language: system
+        pass_filenames: false
+        always_run: true
+
+      - id: isort
+        name: isort
+        entry: isort --check bis_scraper tests
+        language: system
+        pass_filenames: false
+        always_run: true
+
+      - id: ruff
+        name: ruff
+        entry: ruff check bis_scraper tests
+        language: system
+        pass_filenames: false
+        always_run: true
+
diff --git a/README.md b/README.md
index d4cac35..1891081 100644
--- a/README.md
+++ b/README.md
@@ -321,30 +321,30 @@ def analyze_speeches(data_dir, institution, keywords):
     # Path to text files for the institution
     institution_dir = Path(data_dir) / "texts" / institution.lower().replace(" ", "_")
     results = []
-    
+
     # Process each text file
     for txt_file in glob.glob(f"{institution_dir}/*.txt"):
         file_code = os.path.basename(txt_file).split('.')[0]
-        
+
         with open(txt_file, 'r', encoding='utf-8') as f:
             text = f.read().lower()
-            
+
             # Count keywords
             word_counts = {}
             for keyword in keywords:
                 pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
                 word_counts[keyword] = len(re.findall(pattern, text))
-            
+
             # Get total word count
             total_words = len(re.findall(r'\b\w+\b', text))
-            
+
             # Add to results
             results.append({
                 'file_code': file_code,
                 'total_words': total_words,
                 **word_counts
             })
-    
+
     # Convert to DataFrame for analysis
     df = pd.DataFrame(results)
     return df
@@ -434,6 +434,73 @@ mypy bis_scraper
 ruff bis_scraper tests
 ```
 
+### Pre-commit Hooks
+
+This project uses [pre-commit](https://pre-commit.com/) hooks to automatically run the full CI pipeline locally before each commit. This ensures that all code quality checks pass before pushing to the repository.
+
+#### Installation
+
+First, install pre-commit (if not already installed). If you've installed the dev dependencies, pre-commit is already included:
+
+```bash
+# If you've installed dev dependencies, pre-commit is already available
+pip install -e ".[dev]"
+
+# Or install pre-commit separately
+pip install pre-commit
+```
+
+Then install the git hooks:
+
+```bash
+pre-commit install
+```
+
+This will set up the hooks to run automatically on every commit.
+
+#### Running Manually
+
+You can run all pre-commit hooks manually on all files:
+
+```bash
+pre-commit run --all-files
+```
+
+To run a specific hook:
+
+```bash
+pre-commit run <hook-id> --all-files
+```
+
+For example:
+```bash
+pre-commit run pytest --all-files
+pre-commit run mypy --all-files
+pre-commit run black --all-files
+```
+
+#### What the Hooks Do
+
+The pre-commit hooks run the same checks as the CI pipeline:
+
+1. **pytest** - Runs all tests
+2. **mypy** - Type checking on `bis_scraper` package
+3. **black** - Code formatting check
+4. **isort** - Import sorting check
+5. **ruff** - Linting
+
+If any hook fails, the commit will be blocked. Fix the issues and try committing again.
+
+#### Skipping Hooks (Not Recommended)
+
+If you need to skip hooks for a specific commit (not recommended), you can use:
+
+```bash
+git commit --no-verify
+```
+
+However, the CI pipeline will still run these checks, so it's better to fix issues locally.
+
 ## Contributing
 
 Contributions are welcome! Please feel free to submit a Pull Request.
diff --git a/bis_scraper/cli/main.py b/bis_scraper/cli/main.py
index 5c6eb03..042de28 100644
--- a/bis_scraper/cli/main.py
+++ b/bis_scraper/cli/main.py
@@ -179,8 +179,8 @@ def scrape(
 @click.pass_context
 def convert(
     ctx: click.Context,
-    start_date: Optional[click.DateTime],
-    end_date: Optional[click.DateTime],
+    start_date: Optional[datetime.datetime],
+    end_date: Optional[datetime.datetime],
     institutions: Tuple[str, ...],
     force: bool,
     limit: Optional[int],
@@ -238,8 +238,8 @@ def convert(
 @click.pass_context
 def run_all(
     ctx: click.Context,
-    start_date: Optional[click.DateTime],
-    end_date: Optional[click.DateTime],
+    start_date: Optional[datetime.datetime],
+    end_date: Optional[datetime.datetime],
     institutions: tuple[str, ...],
     force: bool,
     limit: Optional[int],
diff --git a/bis_scraper/converters/controller.py b/bis_scraper/converters/controller.py
index d094700..9a3213f 100644
--- a/bis_scraper/converters/controller.py
+++ b/bis_scraper/converters/controller.py
@@ -1,10 +1,10 @@
 """Controller module for PDF to text conversion operations."""
 
+import datetime
 import logging
 import time
 from pathlib import Path
 from typing import Optional, Tuple
-import datetime
 
 from bis_scraper.converters.pdf_converter import PdfConverter
 from bis_scraper.models import ConversionResult
diff --git a/bis_scraper/converters/pdf_converter.py b/bis_scraper/converters/pdf_converter.py
index 15497fd..fe3724b 100644
--- a/bis_scraper/converters/pdf_converter.py
+++ b/bis_scraper/converters/pdf_converter.py
@@ -1,9 +1,9 @@
 """PDF to text converter implementation."""
 
+import datetime
 import logging
 from pathlib import Path
 from typing import List, Optional
-import datetime
 
 import textract  # type: ignore
 
diff --git a/pyproject.toml b/pyproject.toml
index e5c1b4f..db3422b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ dev = [
     "responses>=0.22.0",
     "types-requests>=2.28.0",
     "types-beautifulsoup4>=4.8.0",
+    "pre-commit>=3.0.0",
 ]
 
 [project.urls]
@@ -83,4 +84,4 @@ ignore = []
 "bis_scraper/scrapers/**/*.py" = ["E501"]
 "bis_scraper/converters/**/*.py" = ["E501"]
 "bis_scraper/utils/**/*.py" = ["E501"]
-"bis_scraper/cli/**/*.py" = ["E501"] 
\ No newline at end of file
+"bis_scraper/cli/**/*.py" = ["E501"]
diff --git a/tests/integration/test_workflow.py b/tests/integration/test_workflow.py
index 33c57fc..49dae09 100644
--- a/tests/integration/test_workflow.py
+++ b/tests/integration/test_workflow.py
@@ -9,7 +9,7 @@
 
 import responses
 
-from bis_scraper.converters.controller import convert_pdfs, convert_pdfs_dates
+from bis_scraper.converters.controller import convert_pdfs_dates
 from bis_scraper.scrapers.controller import scrape_bis
 from bis_scraper.utils.constants import HTML_EXTENSION, PDF_EXTENSION, SPEECHES_URL
 
diff --git a/tests/test_cli.py b/tests/test_cli.py
index f2e8641..3eb584f 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -46,9 +46,7 @@ def test_convert_command(self, mock_convert) -> None:
         # Set up the mock to return a simple result
         from bis_scraper.models import ConversionResult
 
-        mock_convert.return_value = ConversionResult(
-            successful=5, skipped=2, failed=1
-        )
+        mock_convert.return_value = ConversionResult(successful=5, skipped=2, failed=1)
 
         # Test with verbose and institution
         result = self.runner.invoke(

From 362c32a8143fd814c7ee88f0634ac43f7b9ede6a Mon Sep 17 00:00:00 2001
From: Magnus Hansson <hansson.carl.magnus@gmail.com>
Date: Sun, 9 Nov 2025 11:21:23 +0100
Subject: [PATCH 2/3] Remove redundant check_code_quality.py and run_tests.py
 scripts

These scripts are now redundant since pre-commit hooks run all the same checks.
Pre-commit hooks provide a better developer experience and ensure consistency.
---
 README.md             |  12 +----
 check_code_quality.py | 105 ------------------------------------------
 run_tests.py          |  98 ---------------------------------------
 3 files changed, 1 insertion(+), 214 deletions(-)
 delete mode 100755 check_code_quality.py
 delete mode 100755 run_tests.py

diff --git a/README.md b/README.md
index 1891081..ea7d56d 100644
--- a/README.md
+++ b/README.md
@@ -410,17 +410,7 @@ This project uses several tools to ensure code quality:
 - `mypy` for type checking
 - `ruff` for linting
 
-You can run all these checks using the provided script:
-
-```bash
-# Check code quality
-./check_code_quality.py
-
-# Fix issues automatically where possible
-./check_code_quality.py --fix
-```
-
-Or run each tool individually:
+The recommended way to run all these checks is using pre-commit hooks (see [Pre-commit Hooks](#pre-commit-hooks) section below). You can also run each tool individually:
 
 ```bash
 # Format code
diff --git a/check_code_quality.py b/check_code_quality.py
deleted file mode 100755
index 5383c54..0000000
--- a/check_code_quality.py
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env python3
-"""Script to run code quality checks for the BIS Scraper package."""
-
-import argparse
-import subprocess
-import sys
-from pathlib import Path
-
-
-def run_command(command: list[str], name: str) -> bool:
-    """Run a shell command and report its success.
-    
-    Args:
-        command: Command to run as a list of arguments
-        name: Name of the tool being run
-    
-    Returns:
-        True if the command succeeded, False otherwise
-    """
-    print(f"\nRunning {name}...")
-    try:
-        subprocess.run(command, check=True)
-        print(f"✅ {name} passed")
-        return True
-    except subprocess.CalledProcessError:
-        print(f"❌ {name} failed")
-        return False
-
-
-def main() -> int:
-    """Run code quality checks.
-    
-    Returns:
-        Exit code (0 for success, non-zero for failure)
-    """
-    parser = argparse.ArgumentParser(description="Run code quality checks")
-    parser.add_argument("--fix", action="store_true", help="Attempt to fix issues automatically")
-    args = parser.parse_args()
-    
-    pkg_dir = Path("bis_scraper")
-    test_dir = Path("tests")
-    
-    # Check if directories exist
-    if not pkg_dir.exists() or not test_dir.exists():
-        print(f"Error: Could not find required directories: {pkg_dir} and {test_dir}")
-        return 1
-    
-    # Define commands to run
-    commands = []
-    
-    # Black (code formatting)
-    black_cmd = ["black"]
-    if not args.fix:
-        black_cmd.append("--check")
-    black_cmd.extend([str(pkg_dir), str(test_dir)])
-    commands.append((black_cmd, "Black (code formatting)"))
-    
-    # isort (import sorting)
-    isort_cmd = ["isort"]
-    if not args.fix:
-        isort_cmd.append("--check")
-    isort_cmd.extend([str(pkg_dir), str(test_dir)])
-    commands.append((isort_cmd, "isort (import sorting)"))
-    
-    # mypy (type checking)
-    mypy_cmd = ["mypy", str(pkg_dir)]
-    commands.append((mypy_cmd, "mypy (type checking)"))
-    
-    # ruff (linting)
-    ruff_cmd = ["ruff", "check"]
-    if args.fix:
-        ruff_cmd.append("--fix")
-    ruff_cmd.extend([str(pkg_dir), str(test_dir)])
-    commands.append((ruff_cmd, "ruff (linting)"))
-    
-    # Run all commands
-    results = []
-    for cmd, name in commands:
-        cmd_result = run_command(cmd, name)
-        results.append(cmd_result)
-    
-    # Print summary
-    print("\n" + "=" * 50)
-    print("SUMMARY")
-    print("=" * 50)
-    
-    all_passed = True
-    for i, (cmd, name) in enumerate(commands):
-        status = "PASS" if results[i] else "FAIL"
-        print(f"{status}: {name}")
-        if not results[i]:
-            all_passed = False
-    
-    if all_passed:
-        print("\n✅ All checks passed!")
-        return 0
-    else:
-        print("\n❌ Some checks failed.")
-        if not args.fix:
-            print("Run with --fix to attempt to automatically fix issues")
-        return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main()) 
\ No newline at end of file
diff --git a/run_tests.py b/run_tests.py
deleted file mode 100755
index 2a4dce9..0000000
--- a/run_tests.py
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/env python3
-"""Script to run all tests for the BIS Scraper package."""
-
-import argparse
-import sys
-import unittest
-from pathlib import Path
-
-
-def run_tests(test_type: str = "all", verbose: bool = False) -> int:
-    """Run the specified tests.
-    
-    Args:
-        test_type: Type of tests to run ("unit", "integration", or "all")
-        verbose: Whether to run tests in verbose mode
-    
-    Returns:
-        Exit code (0 for success, non-zero for failure)
-    """
-    # Set verbosity level
-    verbosity = 2 if verbose else 1
-    
-    # Find the test directory
-    test_dir = Path(__file__).parent / "tests"
-    
-    # Create test loader
-    loader = unittest.TestLoader()
-    
-    if test_type == "unit" or test_type == "all":
-        print("Running unit tests...")
-        unit_dir = test_dir / "unit"
-        
-        # Run each unit test file separately to avoid Responses state issues
-        unit_test_files = list(unit_dir.glob("test_*.py"))
-        for test_file in unit_test_files:
-            print(f"\nRunning {test_file.name}...")
-            file_tests = loader.discover(start_dir=str(unit_dir), pattern=test_file.name)
-            unit_runner = unittest.TextTestRunner(verbosity=verbosity)
-            unit_result = unit_runner.run(file_tests)
-            if not unit_result.wasSuccessful():
-                return 1
-    
-    if test_type == "integration" or test_type == "all":
-        print("\nRunning integration tests...")
-        # For integration tests, we'll manually load them
-        # This is more direct than using discover, which can have path issues
-        sys.path.insert(0, str(Path(__file__).parent))
-        
-        try:
-            from tests.integration.test_workflow import TestCompleteWorkflow
-            
-            # Create test suite and run it
-            suite = unittest.TestLoader().loadTestsFromTestCase(TestCompleteWorkflow)
-            integration_runner = unittest.TextTestRunner(verbosity=verbosity)
-            integration_result = integration_runner.run(suite)
-            if not integration_result.wasSuccessful():
-                return 1
-        except ImportError as e:
-            print(f"Error importing integration tests: {e}")
-            return 1
-    
-    if test_type == "all":
-        print("\nRunning CLI tests...")
-        # Manually import CLI tests to avoid discover path import issues
-        project_root = Path(__file__).parent
-        sys.path.insert(0, str(project_root))
-        try:
-            from tests.test_cli import TestCli
-
-            cli_suite = unittest.TestLoader().loadTestsFromTestCase(TestCli)
-            cli_runner = unittest.TextTestRunner(verbosity=verbosity)
-            cli_result = cli_runner.run(cli_suite)
-            if not cli_result.wasSuccessful():
-                return 1
-        except ImportError as e:
-            print(f"Error importing CLI tests: {e}")
-            return 1
-    
-    print("\nAll tests passed!")
-    return 0
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run BIS Scraper tests")
-    parser.add_argument(
-        "--type", 
-        choices=["unit", "integration", "all"], 
-        default="all",
-        help="Type of tests to run (unit, integration, or all)"
-    )
-    parser.add_argument(
-        "--verbose", 
-        action="store_true", 
-        help="Run tests in verbose mode"
-    )
-    
-    args = parser.parse_args()
-    sys.exit(run_tests(args.type, args.verbose)) 
\ No newline at end of file

From 07712f11a8e793a0cc1e79e80fbfb2a2b11488ba Mon Sep 17 00:00:00 2001
From: Magnus Hansson <hansson.carl.magnus@gmail.com>
Date: Sun, 9 Nov 2025 11:23:54 +0100
Subject: [PATCH 3/3] Apply pre-commit hook fixes (trailing whitespace and
 end-of-file fixes)

---
 docs/api.md                       |  2 +-
 docs/test_coverage.md             |  2 +-
 install.sh                        |  2 +-
 scripts/README.md                 |  6 +++---
 scripts/analyze_results.sh        | 16 ++++++++--------
 temp/cache_demo.py                | 28 ++++++++++++++--------------
 temp/cache_improvement_summary.md |  2 +-
 temp/progress_summary.md          |  2 +-
 temp/project_plan.md              |  2 +-
 temp/summary.md                   |  2 +-
 10 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/docs/api.md b/docs/api.md
index 1b7db82..962f9f0 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -247,4 +247,4 @@ bis-scraper --help
 - `--force`: Force re-download or re-conversion
 - `--limit INTEGER`: Maximum number of speeches to process
 - `--data-dir DIRECTORY`: Base directory for data storage
-- `--log-dir DIRECTORY`: Directory for log files 
\ No newline at end of file
+- `--log-dir DIRECTORY`: Directory for log files
diff --git a/docs/test_coverage.md b/docs/test_coverage.md
index 5ea9100..9beab07 100644
--- a/docs/test_coverage.md
+++ b/docs/test_coverage.md
@@ -71,4 +71,4 @@ When adding new features or modifying existing code, please ensure:
 4. **Tests use mocks appropriately** to avoid external dependencies
 5. **Integration impacts are tested** when changing core components
 
-The project uses GitHub Actions to automatically run tests on pull requests, ensuring that all contributions maintain the expected quality standards. 
\ No newline at end of file
+The project uses GitHub Actions to automatically run tests on pull requests, ensuring that all contributions maintain the expected quality standards.
diff --git a/install.sh b/install.sh
index a7cf28e..a0f2865 100755
--- a/install.sh
+++ b/install.sh
@@ -22,4 +22,4 @@ pip install -e .
 
 echo "=== Installation complete ==="
 echo "To activate the environment, run: source .venv/bin/activate"
-echo "To use the package, run: bis-scraper --help" 
\ No newline at end of file
+echo "To use the package, run: bis-scraper --help"
diff --git a/scripts/README.md b/scripts/README.md
index fd4a4eb..e965efa 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -45,13 +45,13 @@ If no data directory is specified, the script will use the default (`$HOME/bis_f
    ```bash
    nano scripts/run_full_scrape.sh
    ```
-   
+
 2. **Run the scraping process**:
    ```bash
    cd scripts
    ./run_full_scrape.sh
    ```
-   
+
    For long-running jobs, consider using screen or tmux:
    ```bash
    screen -S bis_scraper
@@ -65,4 +65,4 @@ If no data directory is specified, the script will use the default (`$HOME/bis_f
    ```bash
    cd scripts
    ./analyze_results.sh
-   ``` 
\ No newline at end of file
+   ```
diff --git a/scripts/analyze_results.sh b/scripts/analyze_results.sh
index 83eff24..cc7d400 100755
--- a/scripts/analyze_results.sh
+++ b/scripts/analyze_results.sh
@@ -39,7 +39,7 @@ if check_directory "$LOG_DIR"; then
     echo "Log files found in $LOG_DIR"
     log_count=$(find "$LOG_DIR" -type f -name "*.log" | wc -l)
     echo "Number of log files: $log_count"
-    
+
     # Find the most recent log file
     most_recent=$(find "$LOG_DIR" -type f -name "*.log" -printf "%T@ %p\n" | sort -n | tail -1 | cut -f2 -d' ')
     if [ ! -z "$most_recent" ]; then
@@ -56,11 +56,11 @@ fi
 # Analyze PDF data
 if check_directory "$PDF_DIR"; then
     echo -e "\n======= PDF files analysis ======="
-    
+
     # Total PDFs
     total_pdfs=$(find "$PDF_DIR" -type f -name "*.pdf" | wc -l)
     echo "Total PDF files: $total_pdfs"
-    
+
     # Count by institution
     echo -e "\nPDF files by institution:"
     echo "---"
@@ -72,7 +72,7 @@ if check_directory "$PDF_DIR"; then
         fi
     done
     echo "---"
-    
+
     # Count by year (based on filename pattern YYMMDD[a-z].pdf)
     echo -e "\nPDF files by year:"
     echo "---"
@@ -95,11 +95,11 @@ fi
 # Analyze TXT data
 if check_directory "$TXT_DIR"; then
     echo -e "\n======= Text files analysis ======="
-    
+
     # Total TXTs
     total_txts=$(find "$TXT_DIR" -type f -name "*.txt" | wc -l)
     echo "Total TXT files: $total_txts"
-    
+
     # Count by institution
     echo -e "\nTXT files by institution:"
     echo "---"
@@ -111,7 +111,7 @@ if check_directory "$TXT_DIR"; then
         fi
     done
     echo "---"
-    
+
     # Conversion success rate
     if [ $total_pdfs -gt 0 ]; then
         success_rate=$(echo "scale=2; $total_txts * 100 / $total_pdfs" | bc)
@@ -123,4 +123,4 @@ fi
 
 echo -e "\n========================================================"
 echo "Analysis complete"
-echo "========================================================" 
\ No newline at end of file
+echo "========================================================"
diff --git a/temp/cache_demo.py b/temp/cache_demo.py
index 5901afe..6f0eae0 100644
--- a/temp/cache_demo.py
+++ b/temp/cache_demo.py
@@ -13,61 +13,61 @@ def main() -> None:
     # Create a temporary directory for the demo
     demo_dir = Path("temp/demo_output")
     demo_dir.mkdir(parents=True, exist_ok=True)
-    
+
     # Initialize scraper
     scraper = BisScraper(demo_dir)
-    
+
     # Test dates (using dates from 2020 as they likely have speeches)
     test_dates = [
         datetime.date(2020, 1, 1),
         datetime.date(2020, 1, 2),
         datetime.date(2020, 1, 3),
     ]
-    
+
     print("=== BIS Scraper Date Cache Demo ===\n")
-    
+
     # First run - no cache
     print("🔍 First run (no cache):")
     start_time = time.time()
-    
+
     for date in test_dates:
         print(f"  Checking {date}...", end=" ", flush=True)
         date_start = time.time()
         scraper.scrape_date(date)
         date_time = time.time() - date_start
         print(f"took {date_time:.2f}s")
-    
+
     first_run_time = time.time() - start_time
     print(f"\n  Total time: {first_run_time:.2f}s")
     print(f"  Results: {scraper.result.downloaded} downloaded, {scraper.result.skipped} skipped")
-    
+
     # Save the cache
     scraper._save_date_cache()
-    
+
     # Second run - with cache
     print("\n🚀 Second run (with cache):")
     scraper2 = BisScraper(demo_dir)  # New instance will load the cache
     start_time = time.time()
-    
+
     for date in test_dates:
         print(f"  Checking {date}...", end=" ", flush=True)
         date_start = time.time()
         scraper2.scrape_date(date)
         date_time = time.time() - date_start
         print(f"took {date_time:.3f}s")
-    
+
     second_run_time = time.time() - start_time
     print(f"\n  Total time: {second_run_time:.3f}s")
     print(f"  Results: {scraper2.result.downloaded} downloaded, {scraper2.result.skipped} skipped")
-    
+
     # Show improvement
     improvement = (first_run_time - second_run_time) / first_run_time * 100
     speedup = first_run_time / second_run_time if second_run_time > 0 else float('inf')
-    
+
     print(f"\n✨ Performance improvement:")
     print(f"  Time saved: {first_run_time - second_run_time:.2f}s ({improvement:.1f}%)")
     print(f"  Speedup: {speedup:.0f}x faster")
-    
+
     # Show cache info
     cache_file = demo_dir / ".bis_scraper_date_cache.json"
     if cache_file.exists():
@@ -76,4 +76,4 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    main() 
\ No newline at end of file
+    main()
diff --git a/temp/cache_improvement_summary.md b/temp/cache_improvement_summary.md
index 7408ff0..d80006a 100644
--- a/temp/cache_improvement_summary.md
+++ b/temp/cache_improvement_summary.md
@@ -74,4 +74,4 @@ bis-scraper scrape --force --start-date 2020-01-01 --end-date 2020-12-31
 ## Backward Compatibility
 - Fully backward compatible - existing scripts work without changes
 - Cache is created automatically on first run
-- Old installations can be upgraded without issues 
\ No newline at end of file
+- Old installations can be upgraded without issues
diff --git a/temp/progress_summary.md b/temp/progress_summary.md
index 0ca846c..a2b9d84 100644
--- a/temp/progress_summary.md
+++ b/temp/progress_summary.md
@@ -70,4 +70,4 @@ The BIS Scraper project has been successfully transformed from its original impl
 5. Extensive test coverage
 6. CI/CD integration
 
-These improvements make the package more maintainable, extensible, and user-friendly, providing a solid foundation for future enhancements. 
\ No newline at end of file
+These improvements make the package more maintainable, extensible, and user-friendly, providing a solid foundation for future enhancements.
diff --git a/temp/project_plan.md b/temp/project_plan.md
index a66f190..5fde55c 100644
--- a/temp/project_plan.md
+++ b/temp/project_plan.md
@@ -131,4 +131,4 @@ The BIS Scraper is a Python package designed to download and process speeches fr
 
 The BIS Scraper project has successfully transitioned from its original implementation to a modern, well-structured Python package. The current focus is on stabilizing the core functionality and ensuring all original features are preserved with improved implementation.
 
-The next steps involve comprehensive testing, detailed documentation, and incremental feature additions to enhance the package's capabilities for central bank speech analysis. 
\ No newline at end of file
+The next steps involve comprehensive testing, detailed documentation, and incremental feature additions to enhance the package's capabilities for central bank speech analysis.
diff --git a/temp/summary.md b/temp/summary.md
index 0280eff..b4e6368 100644
--- a/temp/summary.md
+++ b/temp/summary.md
@@ -127,4 +127,4 @@ convert_pdfs(
 2. **CI/CD Pipeline**: Set up GitHub Actions for automated testing
 3. **Documentation Site**: Consider generating API documentation with Sphinx
 4. **More Tests**: Add more comprehensive testing, especially integration tests
-5. **Performance Optimization**: Explore options for parallel processing 
\ No newline at end of file
+5. **Performance Optimization**: Explore options for parallel processing