HanssonMagnus · HanssonMagnus · Nov 9, 2025 · Nov 9, 2025 · Nov 9, 2025 · Nov 9, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,46 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+
+  - repo: local
+    hooks:
+      - id: pytest
+        name: pytest
+        entry: pytest
+        language: system
+        pass_filenames: false
+        always_run: true
+
+      - id: mypy
+        name: mypy
+        entry: mypy bis_scraper
+        language: system
+        pass_filenames: false
+        always_run: true
+
+      - id: black
+        name: black
+        entry: black --check bis_scraper tests
+        language: system
+        pass_filenames: false
+        always_run: true
+
+      - id: isort
+        name: isort
+        entry: isort --check bis_scraper tests
+        language: system
+        pass_filenames: false
+        always_run: true
+
+      - id: ruff
+        name: ruff
+        entry: ruff check bis_scraper tests
+        language: system
+        pass_filenames: false
+        always_run: true
+
diff --git a/README.md b/README.md
@@ -321,30 +321,30 @@ def analyze_speeches(data_dir, institution, keywords):
     # Path to text files for the institution
     institution_dir = Path(data_dir) / "texts" / institution.lower().replace(" ", "_")
     results = []
-    
+
     # Process each text file
     for txt_file in glob.glob(f"{institution_dir}/*.txt"):
         file_code = os.path.basename(txt_file).split('.')[0]
-        
+
         with open(txt_file, 'r', encoding='utf-8') as f:
             text = f.read().lower()
-            
+
             # Count keywords
             word_counts = {}
             for keyword in keywords:
                 pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
                 word_counts[keyword] = len(re.findall(pattern, text))
-            
+
             # Get total word count
             total_words = len(re.findall(r'\b\w+\b', text))
-            
+
             # Add to results
             results.append({
                 'file_code': file_code,
                 'total_words': total_words,
                 **word_counts
             })
-    
+
     # Convert to DataFrame for analysis
     df = pd.DataFrame(results)
     return df
@@ -410,17 +410,7 @@ This project uses several tools to ensure code quality:
 - `mypy` for type checking
 - `ruff` for linting
 
-You can run all these checks using the provided script:
-
-```bash
-# Check code quality
-./check_code_quality.py
-
-# Fix issues automatically where possible
-./check_code_quality.py --fix
-```
-
-Or run each tool individually:
+The recommended way to run all these checks is using pre-commit hooks (see [Pre-commit Hooks](#pre-commit-hooks) section below). You can also run each tool individually:
 
 ```bash
 # Format code
@@ -434,6 +424,73 @@ mypy bis_scraper
 ruff bis_scraper tests
 ```
 
+### Pre-commit Hooks
+
+This project uses [pre-commit](https://pre-commit.com/) hooks to automatically run the full CI pipeline locally before each commit. This ensures that all code quality checks pass before pushing to the repository.
+
+#### Installation
+
+First, install pre-commit (if not already installed). If you've installed the dev dependencies, pre-commit is already included:
+
+```bash
+# If you've installed dev dependencies, pre-commit is already available
+pip install -e ".[dev]"
+
+# Or install pre-commit separately
+pip install pre-commit
+```
+
+Then install the git hooks:
+
+```bash
+pre-commit install
+```
+
+This will set up the hooks to run automatically on every commit.
+
+#### Running Manually
+
+You can run all pre-commit hooks manually on all files:
+
+```bash
+pre-commit run --all-files
+```
+
+To run a specific hook:
+
+```bash
+pre-commit run <hook-id> --all-files
+```
+
+For example:
+```bash
+pre-commit run pytest --all-files
+pre-commit run mypy --all-files
+pre-commit run black --all-files
+```
+
+#### What the Hooks Do
+
+The pre-commit hooks run the same checks as the CI pipeline:
+
+1. **pytest** - Runs all tests
+2. **mypy** - Type checking on `bis_scraper` package
+3. **black** - Code formatting check
+4. **isort** - Import sorting check
+5. **ruff** - Linting
+
+If any hook fails, the commit will be blocked. Fix the issues and try committing again.
+
+#### Skipping Hooks (Not Recommended)
+
+If you need to skip hooks for a specific commit (not recommended), you can use:
+
+```bash
+git commit --no-verify
+```
+
+However, the CI pipeline will still run these checks, so it's better to fix issues locally.
+
 ## Contributing
 
 Contributions are welcome! Please feel free to submit a Pull Request.

diff --git a/bis_scraper/cli/main.py b/bis_scraper/cli/main.py
@@ -179,8 +179,8 @@ def scrape(
 @click.pass_context
 def convert(
     ctx: click.Context,
-    start_date: Optional[click.DateTime],
-    end_date: Optional[click.DateTime],
+    start_date: Optional[datetime.datetime],
+    end_date: Optional[datetime.datetime],
     institutions: Tuple[str, ...],
     force: bool,
     limit: Optional[int],
@@ -238,8 +238,8 @@ def convert(
 @click.pass_context
 def run_all(
     ctx: click.Context,
-    start_date: Optional[click.DateTime],
-    end_date: Optional[click.DateTime],
+    start_date: Optional[datetime.datetime],
+    end_date: Optional[datetime.datetime],
     institutions: tuple[str, ...],
     force: bool,
     limit: Optional[int],

diff --git a/bis_scraper/converters/controller.py b/bis_scraper/converters/controller.py
@@ -1,10 +1,10 @@
 """Controller module for PDF to text conversion operations."""
 
+import datetime
 import logging
 import time
 from pathlib import Path
 from typing import Optional, Tuple
-import datetime
 
 from bis_scraper.converters.pdf_converter import PdfConverter
 from bis_scraper.models import ConversionResult

diff --git a/bis_scraper/converters/pdf_converter.py b/bis_scraper/converters/pdf_converter.py
@@ -1,9 +1,9 @@
 """PDF to text converter implementation."""
 
+import datetime
 import logging
 from pathlib import Path
 from typing import List, Optional
-import datetime
 
 import textract  # type: ignore
 

diff --git a/check_code_quality.py b/check_code_quality.py
diff --git a/docs/api.md b/docs/api.md
@@ -247,4 +247,4 @@ bis-scraper --help
 - `--force`: Force re-download or re-conversion
 - `--limit INTEGER`: Maximum number of speeches to process
 - `--data-dir DIRECTORY`: Base directory for data storage
-- `--log-dir DIRECTORY`: Directory for log files 
+- `--log-dir DIRECTORY`: Directory for log files
diff --git a/docs/test_coverage.md b/docs/test_coverage.md
@@ -71,4 +71,4 @@ When adding new features or modifying existing code, please ensure:
 4. **Tests use mocks appropriately** to avoid external dependencies
 5. **Integration impacts are tested** when changing core components
 
-The project uses GitHub Actions to automatically run tests on pull requests, ensuring that all contributions maintain the expected quality standards. 
+The project uses GitHub Actions to automatically run tests on pull requests, ensuring that all contributions maintain the expected quality standards.
diff --git a/install.sh b/install.sh
@@ -22,4 +22,4 @@ pip install -e .
 
 echo "=== Installation complete ==="
 echo "To activate the environment, run: source .venv/bin/activate"
-echo "To use the package, run: bis-scraper --help" 
+echo "To use the package, run: bis-scraper --help"
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,7 @@ dev = [
     "responses>=0.22.0",
     "types-requests>=2.28.0",
     "types-beautifulsoup4>=4.8.0",
+    "pre-commit>=3.0.0",
 ]
 
 [project.urls]
@@ -83,4 +84,4 @@ ignore = []
 "bis_scraper/scrapers/**/*.py" = ["E501"]
 "bis_scraper/converters/**/*.py" = ["E501"]
 "bis_scraper/utils/**/*.py" = ["E501"]
-"bis_scraper/cli/**/*.py" = ["E501"] 
+"bis_scraper/cli/**/*.py" = ["E501"]