calgo-lab · chandlerNick · Dec 29, 2025 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/tab_err/api/high_level.py b/tab_err/api/high_level.py
@@ -214,6 +214,7 @@ def create_errors(  # noqa: PLR0913
             Defaults to None.
         seed (int | None, optional): Random seed. Defaults to None.
 
+
     Returns:
         tuple[pd.DataFrame, pd.DataFrame]:
             - The first element is a copy of 'data' with errors.
@@ -268,3 +269,87 @@ def create_errors(  # noqa: PLR0913
     # Create Errors & Return
     dirty_data, error_mask = mid_level.create_errors(data_copy, config)
     return dirty_data, error_mask
+
+
+def create_errors_with_config(  # noqa: PLR0913
+    data: pd.DataFrame,
+    error_rate: float,
+    n_error_models_per_column: int = 1,
+    error_types_to_include: list[ErrorType] | None = None,
+    error_types_to_exclude: list[ErrorType] | None = None,
+    error_mechanisms_to_include: list[ErrorMechanism] | None = None,
+    error_mechanisms_to_exclude: list[ErrorMechanism] | None = None,
+    seed: int | None = None,
+) -> tuple[pd.DataFrame, pd.DataFrame, MidLevelConfig]:
+    """Creates errors in a given DataFrame, at a rate of *approximately* max_error_rate and returns the config used to do so.
+
+    Args:
+        data (pd.DataFrame): The pandas DataFrame to create errors in.
+        error_rate (float): The maximum error rate to be introduced to each column in the DataFrame.
+        n_error_models_per_column (int, optional): The number of valid error models to apply to each column. Defaults to 1.
+        error_types_to_include (list[ErrorType] | None, optional): A list of the error types to be included when building error models. Defaults to None.
+        error_types_to_exclude (list[ErrorType] | None, optional): A list of the error types to be excluded when building error models. Defaults to None.
+            When both error_types_to_include and error_types_to_exclude are none, the maximum number of default error types will be used.
+            At least one must be None or an error will occur.
+        error_mechanisms_to_include (list[ErrorMechanism] | None = None): A list of the error mechanisms to be included when building error models.
+            Defaults to None.
+        error_mechanisms_to_exclude (list[ErrorMechanism] | None = None): A list of the error mechanisms to be excluded when building error models.
+            Defaults to None.
+        seed (int | None, optional): Random seed. Defaults to None.
+
+
+    Returns:
+        tuple[pd.DataFrame, pd.DataFrame, dict[str | int, list[ErrorModel]]]:
+            - The first element is a copy of 'data' with errors.
+            - The second element is the associated error mask.
+            - The third element is the dictionary of columns to lists of error models applied.
+    """
+    random_generator = seed_randomness_and_get_generator(seed=seed)
+    # Input Checking
+    check_error_rate(error_rate)
+    check_data_emptiness(data)
+
+    # Set Up Data
+    data_copy = data.copy()
+    error_mask = pd.DataFrame(data=False, index=data.index, columns=data.columns)
+
+    # Build Dictionaries
+    col_type = _build_column_type_dictionary(
+        data=data, random_generator=random_generator, error_types_to_include=error_types_to_include, error_types_to_exclude=error_types_to_exclude
+    )
+    col_mechanisms = _build_column_mechanism_dictionary(
+        data=data,
+        random_generator=random_generator,
+        error_mechanisms_to_include=error_mechanisms_to_include,
+        error_mechanisms_to_exclude=error_mechanisms_to_exclude,
+    )
+    col_num_models = _build_column_number_of_models_dictionary(data=data, column_types=col_type, column_mechanisms=col_mechanisms)
+
+    if n_error_models_per_column > 0:
+        error_rate = error_rate / n_error_models_per_column
+        config_dictionary: dict[str | int, list[ErrorModel]] = {
+            column: [] for column in data.columns if col_num_models[column] > 0
+        }  # Filter out those columns with no valid error models
+
+        if error_rate * len(data) < 1:  # This value is calculated and rounded to 0 in the sample function of the error mechanism subclasses "n_errors"
+            msg = f"With a per-model error rate of: {error_rate} and {len(data)} rows, 0 errors will be introduced."
+            warnings.warn(msg, stacklevel=2)
+
+        for column, error_model_list in config_dictionary.items():
+            for _ in range(n_error_models_per_column):
+                error_model_list.append(
+                    ErrorModel(
+                        # NOTE: in python 3.9 mypy fails here but tests work
+                        error_type=random_generator.choice(col_type[column]),  # type: ignore[arg-type]
+                        error_mechanism=random_generator.choice(col_mechanisms[column]),  # type: ignore[arg-type]
+                        error_rate=error_rate,
+                    )
+                )
+        config = MidLevelConfig(config_dictionary)
+    else:  # n_error_models_per_column is 0 or less.
+        msg = f"n_error_models_per_column is: {n_error_models_per_column} and should be a positive integer"
+        raise ValueError(msg)
+
+    # Create Errors & Return
+    dirty_data, error_mask = mid_level.create_errors(data_copy, config)
+    return dirty_data, error_mask, config
diff --git a/tab_err/api/object_oriented.py b/tab_err/api/object_oriented.py
@@ -0,0 +1,95 @@
+from __future__ import annotations
+
+import pickle
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from tab_err.api.high_level import create_errors, create_errors_with_config
+
+if TYPE_CHECKING:
+    import pandas as pd
+    from typing_extensions import Self
+
+    from tab_err import ErrorMechanism, ErrorType
+    from tab_err.api import MidLevelConfig
+
+
+class ErrorInjector:
+    """Object-oriented wrapper around the high-level error creation API.
+
+    This class allows:
+    - Reproducible error injection
+    - Access to the configuration used
+    - Serialization of the configuration for later reuse
+    """
+
+    def __init__(  # noqa: PLR0913
+        self,
+        *,
+        error_rate: float,
+        n_error_models_per_column: int = 1,
+        error_types_to_include: list[ErrorType] | None = None,
+        error_types_to_exclude: list[ErrorType] | None = None,
+        error_mechanisms_to_include: list[ErrorMechanism] | None = None,
+        error_mechanisms_to_exclude: list[ErrorMechanism] | None = None,
+        seed: int | None = None,
+    ) -> None:
+        """Initialize the ErrorInjector object."""
+        self._error_rate = error_rate
+        self._n_error_models_per_column = n_error_models_per_column
+        self._error_types_to_include = error_types_to_include
+        self._error_types_to_exclude = error_types_to_exclude
+        self._error_mechanisms_to_include = error_mechanisms_to_include
+        self._error_mechanisms_to_exclude = error_mechanisms_to_exclude
+        self._seed = seed
+
+        self._last_config: MidLevelConfig | None = None
+
+    @property
+    def config(self) -> MidLevelConfig:
+        """Return the configuration used in the most recent error creation."""
+        if self._last_config is None:
+            msg = "No configuration available. Call apply_with_config() first."
+            raise RuntimeError(msg)
+        return self._last_config
+
+    def apply(self, data: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
+        """Apply errors to a DataFrame without retaining the configuration."""
+        dirty_data, error_mask = create_errors(
+            data=data,
+            error_rate=self._error_rate,
+            n_error_models_per_column=self._n_error_models_per_column,
+            error_types_to_include=self._error_types_to_include,
+            error_types_to_exclude=self._error_types_to_exclude,
+            error_mechanisms_to_include=self._error_mechanisms_to_include,
+            error_mechanisms_to_exclude=self._error_mechanisms_to_exclude,
+            seed=self._seed,
+        )
+        return dirty_data, error_mask
+
+    def apply_with_config(self, data: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
+        """Apply errors to a DataFrame and store the configuration used."""
+        dirty_data, error_mask, config = create_errors_with_config(
+            data=data,
+            error_rate=self._error_rate,
+            n_error_models_per_column=self._n_error_models_per_column,
+            error_types_to_include=self._error_types_to_include,
+            error_types_to_exclude=self._error_types_to_exclude,
+            error_mechanisms_to_include=self._error_mechanisms_to_include,
+            error_mechanisms_to_exclude=self._error_mechanisms_to_exclude,
+            seed=self._seed,
+        )
+        self._last_config = config
+        return dirty_data, error_mask
+
+    def save_config(self, path: str | Path) -> None:
+        """Serialize the last-used configuration to disk."""
+        with Path(path).open("wb") as f:
+            pickle.dump(self.config, f)
+
+    @classmethod
+    def from_config(cls, config: MidLevelConfig) -> Self:
+        """Create an injector that reuses an existing configuration."""
+        injector = cls(error_rate=0.0)
+        injector._last_config = config
+        return injector
diff --git a/tab_err/error_mechanism/_ear.py b/tab_err/error_mechanism/_ear.py
@@ -79,3 +79,11 @@ def _sample(self: EAR, data: pd.DataFrame, column: str | int, error_rate: float,
         se_mask.loc[selected_rows.index] = True
 
         return error_mask
+
+    def __str__(self) -> str:
+        """Return a human-readable string for the object."""
+        return f"EAR(condition_to_column={self.condition_to_column}, seed={getattr(self, '_random_generator', None)})"
+
+    def __repr__(self) -> str:
+        """Return a detailed string for debugging."""
+        return f"<EAR condition_to_column={self.condition_to_column}, random_generator={getattr(self, '_random_generator', None)}>"
diff --git a/tab_err/error_mechanism/_ecar.py b/tab_err/error_mechanism/_ecar.py
@@ -60,3 +60,11 @@ def _sample(
         error_indices = self._random_generator.choice(se_mask_error_free.index, n_errors, replace=False)
         se_mask[error_indices] = True
         return error_mask
+
+    def __str__(self) -> str:
+        """Human-readable string."""
+        return f"ECAR(seed={getattr(self._random_generator, 'seed', None)})"
+
+    def __repr__(self) -> str:
+        """Detailed debug string."""
+        return f"<ECAR random_generator={self._random_generator}>"
diff --git a/tab_err/error_mechanism/_enar.py b/tab_err/error_mechanism/_enar.py
@@ -66,3 +66,9 @@ def _sample(self: ENAR, data: pd.DataFrame, column: str | int, error_rate: float
         se_mask.loc[selected_rows.index] = True
 
         return error_mask
+
+    def __str__(self) -> str:
+        return f"ENAR(seed={getattr(self._random_generator, 'seed', None)})"
+
+    def __repr__(self) -> str:
+        return f"<ENAR random_generator={self._random_generator}>"