dropreg · llbbl · Jun 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,148 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+Pipfile.lock
+
+# PEP 582
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# IDEs
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# Claude settings
+.claude/*
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Project specific - DO NOT ignore lock files
+# poetry.lock is tracked
+# uv.lock is tracked
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,84 @@
+[tool.poetry]
+name = "r-drop"
+version = "0.1.0"
+description = "R-Drop: Regularized Dropout for Neural Networks"
+authors = ["R-Drop Authors"]
+readme = "README.md"
+packages = [
+    { include = "fairseq_src" },
+    { include = "huggingface_transformer_src" },
+    { include = "vit_src" }
+]
+
+[tool.poetry.dependencies]
+python = "^3.8"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.3"
+pytest-cov = "^4.1.0"
+pytest-mock = "^3.12.0"
+
+[tool.poetry.scripts]
+test = "pytest:main"
+tests = "pytest:main"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+minversion = "7.0"
+testpaths = ["tests"]
+python_files = ["test_*.py", "*_test.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+addopts = [
+    "-ra",
+    "--strict-markers",
+    "--cov=fairseq_src",
+    "--cov=huggingface_transformer_src", 
+    "--cov=vit_src",
+    "--cov-report=html",
+    "--cov-report=xml",
+    "--cov-report=term-missing",
+    "--cov-fail-under=0",  # Set to 0 for infrastructure setup, should be increased when actual tests are added
+    "-vv"
+]
+markers = [
+    "unit: marks tests as unit tests (fast, isolated)",
+    "integration: marks tests as integration tests (slower, may require external resources)",
+    "slow: marks tests as slow running"
+]
+
+[tool.coverage.run]
+source = ["fairseq_src", "huggingface_transformer_src", "vit_src"]
+omit = [
+    "*/tests/*",
+    "*/__pycache__/*",
+    "*/site-packages/*",
+    "*/setup.py",
+    "*/__init__.py"
+]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "if self.debug:",
+    "if __name__ == .__main__.:",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if 0:",
+    "if False:",
+    "class .*\\(Protocol\\):",
+    "@(abc\\.)?abstractmethod"
+]
+precision = 2
+show_missing = true
+skip_covered = false
+
+[tool.coverage.html]
+directory = "htmlcov"
+
+[tool.coverage.xml]
+output = "coverage.xml"
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1 @@
+# Testing package for R-Drop
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,150 @@
+"""Shared pytest fixtures and configuration for R-Drop tests."""
+
+import os
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Generator
+
+import pytest
+
+
+@pytest.fixture
+def temp_dir() -> Generator[Path, None, None]:
+    """Create a temporary directory for test files."""
+    temp_path = tempfile.mkdtemp()
+    yield Path(temp_path)
+    shutil.rmtree(temp_path)
+
+
+@pytest.fixture
+def mock_config():
+    """Provide a mock configuration dictionary for testing."""
+    return {
+        "model": {
+            "name": "test_model",
+            "hidden_size": 768,
+            "num_layers": 12,
+            "dropout": 0.1,
+        },
+        "training": {
+            "batch_size": 32,
+            "learning_rate": 1e-4,
+            "epochs": 10,
+            "alpha": 0.5,  # R-Drop alpha parameter
+        },
+        "data": {
+            "train_path": "data/train.txt",
+            "val_path": "data/val.txt",
+            "test_path": "data/test.txt",
+        },
+    }
+
+
+@pytest.fixture
+def sample_data():
+    """Provide sample data for testing."""
+    return {
+        "inputs": ["This is a test sentence.", "Another test example."],
+        "labels": [0, 1],
+        "attention_mask": [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]],
+    }
+
+
+@pytest.fixture
+def mock_model_output():
+    """Provide mock model output tensors."""
+    import torch
+
+    batch_size = 2
+    num_classes = 10
+
+    return {
+        "logits": torch.randn(batch_size, num_classes),
+        "hidden_states": torch.randn(batch_size, 768),
+        "attention_weights": torch.randn(batch_size, 12, 5, 5),
+    }
+
+
+@pytest.fixture(autouse=True)
+def reset_environment():
+    """Reset environment variables before each test."""
+    original_env = os.environ.copy()
+    yield
+    os.environ.clear()
+    os.environ.update(original_env)
+
+
+@pytest.fixture
+def mock_file_system(temp_dir):
+    """Create a mock file system structure for testing."""
+    # Create directories
+    (temp_dir / "data").mkdir()
+    (temp_dir / "models").mkdir()
+    (temp_dir / "logs").mkdir()
+
+    # Create mock data files
+    (temp_dir / "data" / "train.txt").write_text("Sample training data\n" * 100)
+    (temp_dir / "data" / "val.txt").write_text("Sample validation data\n" * 20)
+    (temp_dir / "data" / "test.txt").write_text("Sample test data\n" * 20)
+
+    return temp_dir
+
+
+@pytest.fixture
+def capture_logs(caplog):
+    """Fixture to capture and return logs for testing."""
+    with caplog.at_level("DEBUG"):
+        yield caplog
+
+
+def pytest_configure(config):
+    """Configure pytest with custom settings."""
+    # Add custom markers descriptions
+    config.addinivalue_line(
+        "markers", "unit: mark test as a unit test (fast, isolated)"
+    )
+    config.addinivalue_line(
+        "markers", "integration: mark test as an integration test (may require external resources)"
+    )
+    config.addinivalue_line(
+        "markers", "slow: mark test as slow running"
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    """Modify test collection to add markers based on location."""
+    for item in items:
+        # Automatically mark tests based on their location
+        if "unit" in str(item.fspath):
+            item.add_marker(pytest.mark.unit)
+        elif "integration" in str(item.fspath):
+            item.add_marker(pytest.mark.integration)
+
+
+# Helper fixtures for common test scenarios
+@pytest.fixture
+def mock_torch_model():
+    """Create a mock PyTorch model for testing."""
+    import torch.nn as nn
+
+    class MockModel(nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.linear = nn.Linear(768, 10)
+            self.dropout = nn.Dropout(0.1)
+
+        def forward(self, x):
+            return self.dropout(self.linear(x))
+
+    return MockModel()
+
+
+@pytest.fixture
+def disable_gpu():
+    """Disable GPU for tests to ensure CPU-only execution."""
+    import os
+    os.environ["CUDA_VISIBLE_DEVICES"] = ""
+    yield
+    if "CUDA_VISIBLE_DEVICES" in os.environ:
+        del os.environ["CUDA_VISIBLE_DEVICES"]
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
@@ -0,0 +1 @@
+# Integration tests for R-Drop