jayded · llbbl · Aug 23, 2025
diff --git a/.gitignore b/.gitignore
@@ -109,3 +109,23 @@ venv.bak/
 rationale_benchmark/data/esnli_previous
 data/esnli_previous
 esnli_union/
+
+# Claude settings
+.claude/*
+
+# IDE files
+.vscode/
+.idea/
+*.sublime-project
+*.sublime-workspace
+
+# Testing artifacts
+test-results/
+.benchmarks/
+
+# OS files
+.DS_Store
+Thumbs.db
+*.swp
+*.swo
+*~
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,95 @@
+[tool.poetry]
+name = "rationale-benchmark"
+version = "0.1.0"
+description = "Rationale benchmark for interpretable machine learning"
+authors = ["Your Name <you@example.com>"]
+readme = "README.md"
+packages = [{include = "rationale_benchmark"}]
+
+[tool.poetry.dependencies]
+python = "^3.8"
+ftfy = "5.5.1"
+gensim = "3.7.1"
+numpy = "1.16.2"
+pandas = "0.24.2"
+pytorch-transformers = "1.1.0"
+scipy = "1.2.1"
+scispacy = "0.2.3"
+scikit-learn = "0.20.3"
+spacy = "2.1.8"
+tensorflow-gpu = "1.14"
+torch = "1.3.0"
+tqdm = "4.31.1"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.0"
+pytest-cov = "^4.1.0"
+pytest-mock = "^3.11.1"
+
+
+[tool.pytest.ini_options]
+minversion = "7.0"
+testpaths = ["tests"]
+python_files = ["test_*.py", "*_test.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+addopts = [
+    "-ra",
+    "--strict-markers",
+    "--cov=rationale_benchmark",
+    "--cov-branch",
+    "--cov-report=term-missing",
+    "--cov-report=html:htmlcov",
+    "--cov-report=xml:coverage.xml",
+    "--cov-fail-under=80",
+    "-v",
+    "--tb=short"
+]
+markers = [
+    "unit: marks tests as unit tests (fast, isolated)",
+    "integration: marks tests as integration tests (may require external resources)",
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')"
+]
+filterwarnings = [
+    "ignore::DeprecationWarning",
+    "ignore::PendingDeprecationWarning"
+]
+
+[tool.coverage.run]
+source = ["rationale_benchmark"]
+branch = true
+omit = [
+    "*/tests/*",
+    "*/__pycache__/*",
+    "*/site-packages/*",
+    "*/.venv/*",
+    "*/venv/*",
+    "*/env/*"
+]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "def __str__",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+    "class .*\\bProtocol\\):",
+    "@(abc\\.)?abstractmethod"
+]
+precision = 2
+show_missing = true
+skip_covered = false
+skip_empty = true
+
+[tool.coverage.html]
+directory = "htmlcov"
+
+[tool.coverage.xml]
+output = "coverage.xml"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,194 @@
+import json
+import os
+import tempfile
+from pathlib import Path
+from typing import Dict, Any, Generator
+from unittest.mock import MagicMock
+
+import pytest
+
+
+@pytest.fixture
+def temp_dir() -> Generator[Path, None, None]:
+    """Create a temporary directory for test files."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield Path(tmpdir)
+
+
+@pytest.fixture
+def temp_file(temp_dir: Path) -> Generator[Path, None, None]:
+    """Create a temporary file for testing."""
+    temp_path = temp_dir / "test_file.txt"
+    temp_path.write_text("test content")
+    yield temp_path
+    if temp_path.exists():
+        temp_path.unlink()
+
+
+@pytest.fixture
+def mock_config() -> Dict[str, Any]:
+    """Provide a mock configuration dictionary."""
+    return {
+        "model_type": "test_model",
+        "batch_size": 32,
+        "learning_rate": 0.001,
+        "epochs": 10,
+        "seed": 42,
+        "device": "cpu",
+        "data_dir": "/tmp/test_data",
+        "output_dir": "/tmp/test_output"
+    }
+
+
+@pytest.fixture
+def sample_json_config(temp_dir: Path) -> Path:
+    """Create a sample JSON configuration file."""
+    config_path = temp_dir / "config.json"
+    config = {
+        "experiment_name": "test_experiment",
+        "model_params": {
+            "hidden_size": 128,
+            "num_layers": 2,
+            "dropout": 0.1
+        },
+        "training_params": {
+            "batch_size": 16,
+            "learning_rate": 0.0001,
+            "epochs": 5
+        }
+    }
+    config_path.write_text(json.dumps(config, indent=2))
+    return config_path
+
+
+@pytest.fixture
+def sample_data() -> Dict[str, Any]:
+    """Provide sample data for testing."""
+    return {
+        "train": [
+            {"id": 1, "text": "Sample text 1", "label": 0},
+            {"id": 2, "text": "Sample text 2", "label": 1},
+            {"id": 3, "text": "Sample text 3", "label": 0}
+        ],
+        "test": [
+            {"id": 4, "text": "Test text 1", "label": 1},
+            {"id": 5, "text": "Test text 2", "label": 0}
+        ]
+    }
+
+
+@pytest.fixture
+def mock_model() -> MagicMock:
+    """Create a mock model object."""
+    model = MagicMock()
+    model.predict = MagicMock(return_value=[0.8, 0.2])
+    model.train = MagicMock()
+    model.evaluate = MagicMock(return_value={"accuracy": 0.95, "loss": 0.15})
+    model.save = MagicMock()
+    model.load = MagicMock()
+    return model
+
+
+@pytest.fixture
+def mock_tokenizer() -> MagicMock:
+    """Create a mock tokenizer object."""
+    tokenizer = MagicMock()
+    tokenizer.tokenize = MagicMock(return_value=["test", "tokens"])
+    tokenizer.encode = MagicMock(return_value=[101, 2023, 102])
+    tokenizer.decode = MagicMock(return_value="decoded text")
+    return tokenizer
+
+
+@pytest.fixture
+def environment_vars(monkeypatch) -> Dict[str, str]:
+    """Set and return test environment variables."""
+    env_vars = {
+        "TEST_ENV": "testing",
+        "DATA_PATH": "/tmp/test_data",
+        "MODEL_PATH": "/tmp/test_models",
+        "LOG_LEVEL": "DEBUG"
+    }
+    for key, value in env_vars.items():
+        monkeypatch.setenv(key, value)
+    return env_vars
+
+
+@pytest.fixture
+def capture_logs(caplog):
+    """Fixture to capture and return log messages."""
+    import logging
+    caplog.set_level(logging.DEBUG)
+    return caplog
+
+
+@pytest.fixture(autouse=True)
+def reset_random_seeds():
+    """Automatically reset random seeds before each test for reproducibility."""
+    import random
+
+    random.seed(42)
+
+    try:
+        import numpy as np
+        np.random.seed(42)
+    except ImportError:
+        pass
+
+    try:
+        import torch
+        torch.manual_seed(42)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(42)
+    except ImportError:
+        pass
+
+
+@pytest.fixture
+def mock_file_system(tmp_path):
+    """Create a mock file system structure for testing."""
+    structure = {
+        "data": ["train.csv", "test.csv", "val.csv"],
+        "models": ["model_1.pt", "model_2.pt"],
+        "configs": ["config_1.json", "config_2.json"],
+        "logs": ["train.log", "eval.log"]
+    }
+
+    for dir_name, files in structure.items():
+        dir_path = tmp_path / dir_name
+        dir_path.mkdir()
+        for file_name in files:
+            (dir_path / file_name).touch()
+
+    return tmp_path
+
+
+@pytest.fixture
+def sample_predictions():
+    """Provide sample prediction results for testing."""
+    return {
+        "predictions": [
+            {"id": 1, "predicted": 0, "actual": 0, "confidence": 0.95},
+            {"id": 2, "predicted": 1, "actual": 1, "confidence": 0.87},
+            {"id": 3, "predicted": 0, "actual": 1, "confidence": 0.62},
+            {"id": 4, "predicted": 1, "actual": 0, "confidence": 0.73}
+        ],
+        "metrics": {
+            "accuracy": 0.5,
+            "precision": 0.5,
+            "recall": 0.5,
+            "f1_score": 0.5
+        }
+    }
+
+
+def pytest_configure(config):
+    """Configure pytest with custom markers."""
+    config.addinivalue_line(
+        "markers", "unit: mark test as a unit test (fast, isolated)"
+    )
+    config.addinivalue_line(
+        "markers", "integration: mark test as an integration test (may require external resources)"
+    )
+    config.addinivalue_line(
+        "markers", "slow: mark test as slow running"
+    )
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py