From ea06b580dc40fd7f2afc9c44f9c9e7989a10d95d Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 18 Feb 2026 16:37:02 +0100
Subject: [PATCH 01/68] feat: add async pipeline manager POC

---
 logprep/ng/poc/__init__.py                    |   0
 logprep/ng/poc/async_pipeline/__init__.py     |   0
 .../ng/poc/async_pipeline/pipeline_manager.py | 335 ++++++++++++++++++
 logprep/ng/poc/async_pipeline/types.py        | 107 ++++++
 .../ng/poc/async_pipeline/utils/__init__.py   |   0
 .../utils/worker_chain_validator.py           | 140 ++++++++
 .../ng/poc/async_pipeline/worker/__init__.py  |   0
 .../async_pipeline/worker/pipeline_worker.py  | 105 ++++++
 .../ng/poc/async_pipeline/worker/worker.py    | 227 ++++++++++++
 logprep/ng/poc/main.py                        |  95 +++++
 logprep/ng/poc/main_standalone_worker.py      | 130 +++++++
 logprep/ng/poc/mocked/__init__.py             |   0
 logprep/ng/poc/mocked/mocking_functions.py    |  27 ++
 logprep/ng/poc/mocked/mocking_processor.py    |  19 +
 logprep/ng/poc/mocked/mocking_types.py        |  50 +++
 logprep/ng/poc/pipeline_manager.py            | 218 ++++++++++++
 pyproject.toml                                |   2 +-
 17 files changed, 1454 insertions(+), 1 deletion(-)
 create mode 100644 logprep/ng/poc/__init__.py
 create mode 100644 logprep/ng/poc/async_pipeline/__init__.py
 create mode 100644 logprep/ng/poc/async_pipeline/pipeline_manager.py
 create mode 100644 logprep/ng/poc/async_pipeline/types.py
 create mode 100644 logprep/ng/poc/async_pipeline/utils/__init__.py
 create mode 100644 logprep/ng/poc/async_pipeline/utils/worker_chain_validator.py
 create mode 100644 logprep/ng/poc/async_pipeline/worker/__init__.py
 create mode 100644 logprep/ng/poc/async_pipeline/worker/pipeline_worker.py
 create mode 100644 logprep/ng/poc/async_pipeline/worker/worker.py
 create mode 100644 logprep/ng/poc/main.py
 create mode 100644 logprep/ng/poc/main_standalone_worker.py
 create mode 100644 logprep/ng/poc/mocked/__init__.py
 create mode 100644 logprep/ng/poc/mocked/mocking_functions.py
 create mode 100644 logprep/ng/poc/mocked/mocking_processor.py
 create mode 100644 logprep/ng/poc/mocked/mocking_types.py
 create mode 100644 logprep/ng/poc/pipeline_manager.py

diff --git a/logprep/ng/poc/__init__.py b/logprep/ng/poc/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/logprep/ng/poc/async_pipeline/__init__.py b/logprep/ng/poc/async_pipeline/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/logprep/ng/poc/async_pipeline/pipeline_manager.py b/logprep/ng/poc/async_pipeline/pipeline_manager.py
new file mode 100644
index 000000000..2f1d4c085
--- /dev/null
+++ b/logprep/ng/poc/async_pipeline/pipeline_manager.py
@@ -0,0 +1,335 @@
+"""
+Pipeline orchestration and lifecycle management.
+
+This module provides the coordinator responsible for running a validated, linear
+worker chain and for managing long-lived background tasks bound to the manager
+lifecycle. It implements dynamic handler resolution for pipeline-bound workers
+and supports controlled restarts via soft shutdown and reload semantics.
+
+The focus is operational correctness: predictable startup/shutdown behavior,
+reload safety, and fail-fast propagation of task failures via a single manager
+lifetime signal.
+"""
+
+import asyncio
+import inspect
+from asyncio import AbstractEventLoop
+from collections.abc import Callable, Coroutine
+from traceback import print_tb
+from types import FunctionType
+from typing import Any, Concatenate, ParamSpec, TypeAlias, TypeVar, cast
+
+from async_pipeline.types import Handler, HandlerResolver
+from async_pipeline.utils.worker_chain_validator import (
+    validate_and_sort_linear_worker_chain,
+)
+from async_pipeline.worker.pipeline_worker import PipelineWorker
+
+P = ParamSpec("P")
+SelfT = TypeVar("SelfT")
+
+AsyncMarked: TypeAlias = Callable[Concatenate[SelfT, P], Coroutine[Any, Any, Any]]
+BackgroundTaskFactory: TypeAlias = Callable[[], Coroutine[Any, Any, Any]]
+
+
+def background_task(func: AsyncMarked[SelfT, P]) -> AsyncMarked[SelfT, P]:
+    """
+    Mark an async function or method as a pipeline background task.
+
+    The decorator does not alter runtime behavior. It only attaches
+    metadata used by PipelineManager during background task discovery.
+
+    Marked callables are automatically registered and executed as
+    long-lived control-plane tasks bound to the manager lifecycle.
+
+    Notes
+    -----
+    - Only async callables are supported.
+    - The decorator performs no wrapping or scheduling.
+    - Execution semantics are defined entirely by PipelineManager.
+    """
+
+    setattr(func, "__background_task__", True)
+    return func
+
+
+class PipelineManager(HandlerResolver):
+    """
+    Orchestrates a linear chain of PipelineWorker instances and manager-scoped background tasks.
+
+    The manager binds itself as a HandlerResolver for PipelineWorkers, starts/stops worker tasks,
+    and optionally discovers and runs @background_task-marked async methods.
+
+    Lifecycle:
+    - run(): start workers + background tasks and wait until stop_event is set
+    - soft_shut_down(): stop worker tasks only (manager stays alive)
+    - reload(): soft shutdown + restart workers (background tasks keep running)
+    - shut_down(): stop workers + background tasks and end manager lifetime
+    """
+
+    def __init__(
+        self,
+        workers: list[PipelineWorker[Any]],
+        loop: AbstractEventLoop | None = None,
+    ) -> None:
+        """
+        Initialize the manager with a worker chain and optional event loop.
+
+        Workers are validated/sorted into a strict linear chain and PipelineWorkers are
+        bound to this manager as their handler resolver.
+        """
+        self._loop: AbstractEventLoop | None = loop
+        self._workers: list[PipelineWorker[Any]] = self._validate_and_bind_workers(workers=workers)
+
+        # public: background tasks and subclasses may rely on this as the manager lifetime signal
+        self.stop_event = asyncio.Event()
+
+        self._worker_tasks: set[asyncio.Task[Any]] = set()
+        self._background_tasks: set[asyncio.Task[Any]] = set()
+
+        self._registered_background_task_factories: set[BackgroundTaskFactory] = set()
+        self._background_tasks_started = False
+
+        self._exceptions: list[BaseException] = []
+        self._reload_lock = asyncio.Lock()
+
+    def _validate_and_bind_workers(
+        self,
+        workers: list[PipelineWorker[Any]],
+    ) -> list[PipelineWorker[Any]]:
+        """
+        Validate worker wiring and bind this manager as their handler resolver.
+
+        Ensures the worker chain forms a strict linear pipeline and resets
+        resolver bindings to keep handler resolution consistent.
+        """
+
+        workers = validate_and_sort_linear_worker_chain(workers)
+
+        for worker in workers:
+            if isinstance(worker, PipelineWorker):
+                worker.bind_resolver(self)
+
+        return workers
+
+    def resolve(self, name: str) -> Handler[Any]:
+        """
+        Resolve a handler by name on this manager instance.
+
+        Implementations typically provide handler methods on subclasses which are looked up
+        dynamically by attribute name.
+        """
+        handler = getattr(self, name, None)
+        if handler is None or not callable(handler):
+            raise AttributeError(f"Missing handler {name!r} on {type(self).__name__}.")
+        return cast(Handler[Any], handler)
+
+    def register_additional_background_tasks(self, callback: BackgroundTaskFactory) -> None:
+        """
+        Register an additional manager-scoped background task factory.
+
+        The callback must be an async callable producing a coroutine and will be scheduled
+        when start_background_tasks() runs.
+        """
+        if not inspect.iscoroutinefunction(callback):
+            raise TypeError(
+                "register_additional_background_tasks() only accepts async callables "
+                "(async def ...). Sync callables are not supported."
+            )
+        self._registered_background_task_factories.add(callback)
+
+    def _setup(self) -> None:
+        """Perform manager initialization steps that require a fully constructed instance."""
+        self._auto_register_marked_background_tasks()
+
+    def start_background_tasks(self) -> None:
+        """
+        Start manager-scoped background tasks once per manager lifetime.
+
+        This is intended for long-lived control-plane tasks (metrics, cleanup, etc.).
+        """
+        if self._background_tasks_started:
+            return
+        if self._loop is None:
+            raise RuntimeError("start_background_tasks() requires an event loop.")
+
+        self._background_tasks_started = True
+        self._enqueue_registered_background_tasks()
+
+    def _auto_register_marked_background_tasks(self) -> None:
+        """
+        Discover and register @background_task-marked async methods.
+
+        Discovery inspects the class hierarchy without invoking attribute access on the
+        instance to avoid side effects from descriptors/properties.
+        """
+        seen: set[str] = set()
+
+        for cls in type(self).mro():
+            if cls is object:
+                break
+
+            for name, attr in cls.__dict__.items():
+                if name in seen:
+                    continue
+                seen.add(name)
+
+                func: Callable[..., Any] | None = None
+                if isinstance(attr, staticmethod):
+                    func = cast(Callable[..., Any], attr.__func__)
+                elif isinstance(attr, classmethod):
+                    func = cast(Callable[..., Any], attr.__func__)
+                elif isinstance(attr, FunctionType):
+                    func = attr
+
+                if func is None:
+                    continue
+
+                if getattr(func, "__background_task__", False):
+                    bound = getattr(self, name)
+
+                    if not inspect.iscoroutinefunction(bound):
+                        raise TypeError(
+                            f"Background task {type(self).__name__}.{name} is marked with "
+                            "@background_task but is not async. Only async background tasks are supported."
+                        )
+
+                    self._registered_background_task_factories.add(
+                        cast(BackgroundTaskFactory, bound)
+                    )
+
+    def _enqueue_registered_background_tasks(self) -> None:
+        """Schedule all registered background task factories on the configured event loop."""
+        if self._loop is None:
+            raise RuntimeError("_enqueue_registered_background_tasks() requires an event loop.")
+
+        for callback in self._registered_background_task_factories:
+            task = self._loop.create_task(callback())
+            self._add_background_task(task)
+
+    def run_workers(self) -> None:
+        """
+        Start worker tasks (data-plane).
+
+        Worker tasks may be restarted on reload; background tasks are not.
+        """
+        if self._loop is None:
+            raise RuntimeError("run_workers() requires an event loop.")
+
+        for worker in self._workers:
+            t = self._loop.create_task(worker.run())
+            self._add_worker_task(t)
+
+    async def run(self) -> None:
+        """
+        Run the manager until stop_event is set.
+
+        Starts workers and background tasks and then blocks waiting for shutdown.
+        """
+        if self._loop is None:
+            self._loop = asyncio.get_running_loop()
+
+        self._setup()
+        self.run_workers()
+        self.start_background_tasks()
+
+        await self.stop_event.wait()
+
+    async def reload(self) -> None:
+        """
+        Restart worker tasks while keeping the manager alive.
+
+        Performs a soft shutdown (workers only), re-binds PipelineWorkers to this resolver,
+        and starts workers again. Background tasks continue running.
+        """
+
+        async with self._reload_lock:
+            print("Reloading...")
+            await self.soft_shut_down()
+            self._workers = self._validate_and_bind_workers(workers=self._workers)
+            self.run_workers()
+            print("Reload done.")
+
+    async def soft_shut_down(self) -> None:
+        """
+        Stop worker tasks without ending the manager lifetime.
+
+        Intended to allow reload/restart of the data-plane while keeping control-plane
+        background tasks running.
+        """
+        await self._shut_down_workers()
+
+    async def shut_down(self) -> None:
+        """
+        Fully shut down the manager.
+
+        Stops workers and background tasks, clears registrations, and signals stop_event
+        so run() can exit.
+        """
+        await self.soft_shut_down()
+        await self._shut_down_background_tasks()
+
+        self._registered_background_task_factories.clear()
+        self.stop_event.set()
+
+    async def _shut_down_workers(self) -> None:
+        """Signal, cancel, and await completion of all worker tasks."""
+        for worker in self._workers:
+            worker.stop_event.set()
+
+        current = asyncio.current_task()
+        tasks = [t for t in self._worker_tasks if t is not current]
+
+        for t in tasks:
+            t.cancel()
+
+        await asyncio.gather(*tasks, return_exceptions=True)
+        self._worker_tasks.clear()
+
+        for worker in self._workers:
+            worker.stop_event = asyncio.Event()
+
+    async def _shut_down_background_tasks(self) -> None:
+        """Cancel and await completion of all manager-scoped background tasks."""
+        current = asyncio.current_task()
+        tasks = [t for t in self._background_tasks if t is not current]
+
+        for t in tasks:
+            t.cancel()
+
+        await asyncio.gather(*tasks, return_exceptions=True)
+        self._background_tasks.clear()
+
+    def _add_worker_task(self, task: asyncio.Task[Any]) -> None:
+        """Track a worker task and fail-fast on exceptions."""
+        self._worker_tasks.add(task)
+
+        def _done(t: asyncio.Task[Any]) -> None:
+            self._worker_tasks.discard(t)
+
+            if t.cancelled():
+                return
+
+            exc = t.exception()
+            if exc is not None:
+                self._exceptions.append(exc)
+                self.stop_event.set()
+
+        task.add_done_callback(_done)
+
+    def _add_background_task(self, task: asyncio.Task[Any]) -> None:
+        """Track a background task and fail-fast on exceptions."""
+        self._background_tasks.add(task)
+
+        def _done(t: asyncio.Task[Any]) -> None:
+            self._background_tasks.discard(t)
+
+            if t.cancelled():
+                return
+
+            exc = t.exception()
+            if exc is not None:
+                self._exceptions.append(exc)
+                self.stop_event.set()
+
+        task.add_done_callback(_done)
diff --git a/logprep/ng/poc/async_pipeline/types.py b/logprep/ng/poc/async_pipeline/types.py
new file mode 100644
index 000000000..5851b17b6
--- /dev/null
+++ b/logprep/ng/poc/async_pipeline/types.py
@@ -0,0 +1,107 @@
+"""
+Fundamental contracts and abstractions for the async pipeline system.
+
+This module defines the structural interfaces that decouple workers,
+handlers, and pipeline infrastructure. The intent is to establish a
+clear separation between execution mechanics and processing logic,
+allowing components to remain reusable, composable, and reload-safe.
+
+All definitions here describe behavior, expectations, and semantic
+constraints rather than implementing runtime functionality.
+"""
+
+import asyncio
+from abc import ABC, abstractmethod
+from collections.abc import Callable, Coroutine
+from typing import TypeVar
+
+T = TypeVar("T")
+
+SyncHandler = Callable[[list[T]], list[T]]
+AsyncHandler = Callable[[list[T]], Coroutine[object, object, list[T]]]
+Handler = SyncHandler[T] | AsyncHandler[T]
+
+
+class SizeLimitedQueue(asyncio.Queue[T]):
+    """
+    Bounded asyncio.Queue with explicit semantic intent.
+
+    This subclass exists purely to make queue semantics and typing intent
+    explicit within the pipeline architecture.
+
+    Differences from asyncio.Queue:
+
+    - Enforces bounded capacity at construction time.
+    - Signals backpressure semantics at the type level.
+    - Improves readability by distinguishing pipeline queues from generic queues.
+
+    Parameters
+    ----------
+    maxsize : int
+        Maximum number of items the queue can hold.
+
+        Must be > 0. A non-positive value is rejected to prevent accidental
+        creation of unbounded queues, which would break memory budgeting
+        and backpressure guarantees.
+
+    Raises
+    ------
+    ValueError
+        If maxsize <= 0.
+
+    Notes
+    -----
+    - Behavior is otherwise identical to asyncio.Queue.
+    - This class introduces no additional synchronization or scheduling logic.
+    - Primarily used to encode architectural constraints (memory/backpressure)
+      rather than functionality.
+    """
+
+    def __init__(self, maxsize: int) -> None:
+        if maxsize <= 0:
+            raise ValueError("Queue must be bounded")
+        super().__init__(maxsize=maxsize)
+
+
+class HandlerResolver(ABC):
+    """
+    Resolves handler identifiers to executable handler callables.
+
+    A HandlerResolver provides the indirection layer between pipeline-bound
+    components (e.g. PipelineWorker) and the concrete handler implementation.
+
+    Implementations are responsible for mapping a handler name/key to a
+    callable object that processes a batch of items.
+
+    Contract:
+
+    - Input:
+        name: str
+            Logical handler identifier (typically configured on workers).
+
+    - Return:
+        Handler
+            A callable matching the Handler type contract:
+
+                SyncHandler[T]:  (list[T]) -> list[T]
+                AsyncHandler[T]: (list[T]) -> Awaitable[list[T]]
+
+    - Errors:
+        AttributeError
+            Raised if the handler cannot be resolved.
+
+        TypeError
+            Raised if the resolved object is not a valid Handler.
+
+    Notes:
+
+    - Resolution is intentionally dynamic to support late binding, reloads,
+      dependency injection, and runtime configuration changes.
+
+    - Implementations may cache results but must remain consistent with
+      reload / rebinding semantics of the pipeline system.
+    """
+
+    @abstractmethod
+    def resolve(self, name: str) -> Handler:
+        """Return the handler associated with *name*."""
diff --git a/logprep/ng/poc/async_pipeline/utils/__init__.py b/logprep/ng/poc/async_pipeline/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/logprep/ng/poc/async_pipeline/utils/worker_chain_validator.py b/logprep/ng/poc/async_pipeline/utils/worker_chain_validator.py
new file mode 100644
index 000000000..c19cb4cef
--- /dev/null
+++ b/logprep/ng/poc/async_pipeline/utils/worker_chain_validator.py
@@ -0,0 +1,140 @@
+"""
+Worker chain validation utilities.
+
+This module validates queue-based wiring between workers to ensure a strict,
+linear pipeline topology. It provides deterministic ordering for execution and
+fails fast on ambiguous or unsafe configurations.
+"""
+
+import asyncio
+from typing import Any, TypeGuard, TypeVar
+
+from async_pipeline.worker.worker import Worker
+
+W = TypeVar("W", bound=Worker[Any])
+
+
+def _is_async_queue(obj: object) -> TypeGuard[asyncio.Queue[Any]]:
+    """Return True if *obj* is an asyncio.Queue."""
+    return isinstance(obj, asyncio.Queue)
+
+
+def _object_identity(obj: object) -> int:
+    """Return identity key used for queue wiring validation."""
+    return id(obj)
+
+
+def _input_queue_identity(worker: Worker[Any]) -> int | None:
+    """Return identity of the worker input queue, if queue-backed."""
+    input_source = worker.in_queue
+    return _object_identity(input_source) if _is_async_queue(input_source) else None
+
+
+def _output_queue_identity(worker: Worker[Any]) -> int | None:
+    """Return identity of the worker output queue, if configured."""
+    output_queue = worker.out_queue
+    return _object_identity(output_queue) if output_queue is not None else None
+
+
+def validate_and_sort_linear_worker_chain(workers: list[W]) -> list[W]:
+    """
+    Validate and order workers as a strict linear chain.
+
+    Ensures a single start worker, prohibits fan-in/fan-out, detects cycles,
+    and verifies full chain connectivity based on queue identity wiring.
+    """
+    if not workers:
+        return []
+
+    consumer_by_input_queue_id: dict[int, W] = {}
+
+    for worker in workers:
+        queue_id = _input_queue_identity(worker)
+
+        if queue_id is None:
+            continue
+
+        if queue_id in consumer_by_input_queue_id:
+            raise ValueError(
+                f"Invalid worker chain: multiple consumers detected for input queue id {queue_id}."
+            )
+
+        consumer_by_input_queue_id[queue_id] = worker
+
+    producer_by_output_queue_id: dict[int, W] = {}
+
+    for worker in workers:
+        queue_id = _output_queue_identity(worker)
+
+        if queue_id is None:
+            continue
+
+        if queue_id in producer_by_output_queue_id:
+            raise ValueError(
+                f"Invalid worker chain: multiple producers detected for output queue id {queue_id}."
+            )
+
+        producer_by_output_queue_id[queue_id] = worker
+
+    produced_output_queue_ids = set(producer_by_output_queue_id.keys())
+
+    start_workers: list[W] = []
+
+    for worker in workers:
+        input_queue_id = _input_queue_identity(worker)
+
+        if input_queue_id is None:
+            start_workers.append(worker)
+        elif input_queue_id not in produced_output_queue_ids:
+            start_workers.append(worker)
+
+    start_workers = list({id(w): w for w in start_workers}.values())
+
+    if len(start_workers) != 1:
+        names = ", ".join(worker.name for worker in start_workers) or "none"
+
+        raise ValueError(
+            f"Invalid worker chain: expected exactly one start worker, "
+            f"got {len(start_workers)} ({names})."
+        )
+
+    start_worker = start_workers[0]
+
+    ordered_workers: list[W] = []
+    visited_worker_ids: set[int] = set()
+
+    current_worker: W = start_worker
+
+    while True:
+        worker_identity = id(current_worker)
+
+        if worker_identity in visited_worker_ids:
+            raise ValueError(
+                f"Invalid worker chain: cycle detected at worker {current_worker.name}."
+            )
+
+        visited_worker_ids.add(worker_identity)
+        ordered_workers.append(current_worker)
+
+        if current_worker.out_queue is None:
+            break
+
+        next_queue_id = _object_identity(current_worker.out_queue)
+        next_worker = consumer_by_input_queue_id.get(next_queue_id)
+
+        if next_worker is None:
+            break
+
+        current_worker = next_worker
+
+    if len(ordered_workers) != len(workers):
+        unreachable_workers = [
+            worker.name for worker in workers if id(worker) not in visited_worker_ids
+        ]
+
+        raise ValueError(
+            "Invalid worker chain: chain is not fully connected. "
+            f"Unreachable workers: {', '.join(unreachable_workers)}."
+        )
+
+    return ordered_workers
diff --git a/logprep/ng/poc/async_pipeline/worker/__init__.py b/logprep/ng/poc/async_pipeline/worker/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/logprep/ng/poc/async_pipeline/worker/pipeline_worker.py b/logprep/ng/poc/async_pipeline/worker/pipeline_worker.py
new file mode 100644
index 000000000..741b4f4e5
--- /dev/null
+++ b/logprep/ng/poc/async_pipeline/worker/pipeline_worker.py
@@ -0,0 +1,105 @@
+"""
+Pipeline-aware worker specialization.
+
+This module defines PipelineWorker, a Worker variant that resolves its batch
+handler dynamically via a HandlerResolver. This enables late binding of handler
+implementations, supports reload/rebind scenarios, and keeps workers reusable
+outside of a concrete pipeline manager.
+"""
+
+from typing import Any, Generic, TypeVar
+
+from async_pipeline.types import AsyncHandler, Handler, HandlerResolver, SyncHandler
+from async_pipeline.worker.worker import Worker
+
+T = TypeVar("T")
+
+
+class PipelineWorker(Worker[T], Generic[T]):
+    """
+    Worker that resolves its handler dynamically via a HandlerResolver.
+
+    A PipelineWorker stores a logical handler identifier instead of a direct
+    callable. The handler is resolved lazily at runtime and cached until the
+    resolver is re-bound.
+
+    This keeps the worker decoupled from concrete handler implementations while
+    preserving the batching and forwarding semantics of the base Worker.
+    """
+
+    def __init__(
+        self,
+        *args: Any,
+        handler_name: str,
+        handler_resolver: HandlerResolver | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Initialize a pipeline-bound worker.
+
+        Parameters
+        ----------
+        handler_name:
+            Logical handler identifier used for resolution via the resolver.
+
+        handler_resolver:
+            Optional resolver to bind immediately. If omitted, a resolver must be
+            provided via bind_resolver() before the first flush.
+        """
+        super().__init__(*args, **kwargs)
+        self._handler_name = handler_name
+        self._handler_resolver: HandlerResolver | None = None
+        self._resolved_handler: AsyncHandler[T] | SyncHandler[T] | None = None
+
+        if handler_resolver is not None:
+            self.bind_resolver(handler_resolver)
+
+    def bind_resolver(self, handler_resolver: HandlerResolver) -> None:
+        """
+        Bind a resolver used to resolve the configured handler name.
+
+        Rebinding clears any cached resolved handler so subsequent flushes will
+        resolve again against the new resolver.
+        """
+        # Fail fast: must be a real subclass of the ABC
+        if not isinstance(handler_resolver, HandlerResolver):
+            raise TypeError(
+                f"handler_resolver must be an instance of HandlerResolver (ABC). "
+                f"Got: {type(handler_resolver).__name__}"
+            )
+
+        self._handler_resolver = handler_resolver
+        self._resolved_handler = None
+
+    def _ensure_resolved_handler(self) -> AsyncHandler[T] | SyncHandler[T]:
+        """
+        Resolve and cache the handler for this worker.
+
+        Returns a callable matching the handler contract. Resolution is performed
+        once per binding and cached until bind_resolver() is called again.
+        """
+        if self._resolved_handler is not None:
+            return self._resolved_handler
+
+        if self._handler_resolver is None:
+            raise RuntimeError(
+                f"PipelineWorker {self.name!r} requires a resolver to resolve {self._handler_name!r}."
+            )
+
+        handler: Handler = self._handler_resolver.resolve(self._handler_name)
+
+        if not callable(handler):
+            raise TypeError(f"Resolved handler {self._handler_name!r} is not callable")
+
+        self._resolved_handler = handler
+        return self._resolved_handler
+
+    async def _flush_batch(self, batch: list[T]) -> None:
+        """
+        Flush a batch using a lazily resolved handler.
+
+        Ensures the handler is resolved before delegating to the base Worker
+        flush implementation.
+        """
+        self._handler = self._ensure_resolved_handler()
+        await super()._flush_batch(batch)
diff --git a/logprep/ng/poc/async_pipeline/worker/worker.py b/logprep/ng/poc/async_pipeline/worker/worker.py
new file mode 100644
index 000000000..cc748c009
--- /dev/null
+++ b/logprep/ng/poc/async_pipeline/worker/worker.py
@@ -0,0 +1,227 @@
+"""
+Worker execution and batching mechanics.
+
+This module provides the standalone Worker abstraction responsible for
+input consumption, deterministic batching, optional batch processing,
+and cooperative shutdown behavior.
+
+The worker is intentionally decoupled from pipeline orchestration logic
+and focuses solely on predictable buffering, flushing, and backpressure
+interaction with the output queue.
+"""
+
+import asyncio
+import inspect
+from collections import deque
+from collections.abc import AsyncIterator
+from typing import Generic, TypeVar
+
+from async_pipeline.types import AsyncHandler, SizeLimitedQueue, SyncHandler
+
+T = TypeVar("T")
+
+
+class Worker(Generic[T]):
+    """
+    Generic batching worker with cooperative shutdown semantics.
+
+    A Worker consumes items from an input source, buffers them into batches
+    based on size and/or time thresholds, optionally applies a handler, and
+    forwards results to an output queue.
+
+    The worker is intentionally standalone and independent from pipeline
+    orchestration logic.
+
+    Responsibilities
+    ----------------
+    - Input consumption (Queue or AsyncIterator)
+    - Size/time-based batching
+    - Optional batch processing via handler
+    - Output forwarding
+    - Graceful cancellation and final flush
+
+    Lifecycle
+    ---------
+    run()
+        Start the worker loop until stop_event is set or the task is cancelled.
+
+    stop_event
+        Cooperative shutdown signal used by external coordinators.
+
+    Guarantees
+    ----------
+    - Buffered items are flushed on cancellation or shutdown.
+    - Batch triggers remain deterministic (size vs timer).
+    - No implicit threading or scheduling side effects.
+
+    Notes
+    -----
+    - The worker does not own the event loop.
+    - Backpressure behavior is delegated to the output queue.
+    - Handler execution may be synchronous or asynchronous.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        batch_size: int,
+        batch_interval_s: float,
+        in_queue: asyncio.Queue[T] | AsyncIterator[T],
+        out_queue: SizeLimitedQueue[T] | None = None,
+        handler: AsyncHandler[T] | SyncHandler[T] | None = None,
+    ) -> None:
+        self.name = name
+
+        self.in_queue = in_queue
+        self.out_queue = out_queue
+        self._handler = handler
+
+        self.stop_event = asyncio.Event()
+
+        self._buffer: deque[T] = deque()
+        self._buffer_lock = asyncio.Lock()
+
+        self._timer_task: asyncio.Task[None] | None = None
+        self._batch_size = batch_size
+        self._batch_interval_s = batch_interval_s
+
+    def _start_timer_locked(self) -> None:
+        """
+        Arm or re-arm the batch timer.
+
+        Must be called with _buffer_lock held. Ensures that at most one
+        timer task is active for the current batch window.
+        """
+        if self._timer_task and not self._timer_task.done():
+            self._timer_task.cancel()
+        self._timer_task = asyncio.create_task(self._flush_after_interval())
+
+    def _cancel_timer_if_needed(self) -> None:
+        """
+        Cancel the active timer task if it is still pending.
+
+        Avoids cancelling the currently executing timer task to prevent
+        self-cancellation race conditions.
+        """
+        t = self._timer_task
+        if not t or t.done():
+            return
+        if t is asyncio.current_task():
+            return
+        t.cancel()
+
+    async def _flush_after_interval(self) -> None:
+        """
+        Timer coroutine responsible for time-based batch flushing.
+
+        Sleeps for the configured interval and flushes the buffered items
+        if the batch has not already been drained by the size trigger.
+        """
+        try:
+            await asyncio.sleep(self._batch_interval_s)
+        except asyncio.CancelledError:
+            return
+
+        batch: list[T] | None = None
+        async with self._buffer_lock:
+            if self._buffer:
+                batch = self._drain_locked()
+            if self._timer_task is asyncio.current_task():
+                self._timer_task = None
+
+        if batch:
+            await self._flush_batch(batch)
+
+    def _drain_locked(self) -> list[T]:
+        """
+        Drain the current buffer contents.
+
+        Must be called with _buffer_lock held. Cancels any active timer
+        and returns a snapshot of buffered items.
+        """
+        batch = list(self._buffer)
+        self._buffer.clear()
+        self._cancel_timer_if_needed()
+        self._timer_task = None
+        return batch
+
+    async def add(self, item: T) -> None:
+        """
+        Add a single item to the batch buffer.
+
+        May trigger a flush if the size threshold is reached. Starts the
+        batch timer when the first item of a new batch arrives.
+        """
+        batch_to_flush: list[T] | None = None
+
+        async with self._buffer_lock:
+            self._buffer.append(item)
+
+            if len(self._buffer) == 1:
+                self._start_timer_locked()
+
+            if len(self._buffer) >= self._batch_size:
+                batch_to_flush = self._drain_locked()
+
+        if batch_to_flush:
+            await self._flush_batch(batch_to_flush)
+
+    async def flush(self) -> None:
+        """
+        Force flushing of buffered items.
+
+        Drains and processes the current buffer regardless of size or
+        timer state.
+        """
+        batch_to_flush: list[T] | None = None
+        async with self._buffer_lock:
+            if self._buffer:
+                batch_to_flush = self._drain_locked()
+        if batch_to_flush:
+            await self._flush_batch(batch_to_flush)
+
+    async def _flush_batch(self, batch: list[T]) -> None:
+        """
+        Process and forward a completed batch.
+
+        Applies the optional handler and forwards the resulting items to
+        the output queue if configured.
+        """
+        batch_result: list[T] = batch
+
+        if self._handler is not None:
+            result = self._handler(batch_result)
+            if inspect.isawaitable(result):
+                batch_result = await result
+            else:
+                batch_result = result
+
+        if self.out_queue is not None:
+            for item in batch_result:
+                await self.out_queue.put(item)
+            await asyncio.sleep(0)
+
+    async def run(self) -> None:
+        """
+        Execute the worker processing loop.
+
+        Continuously consumes items until stop_event is set or the task is
+        cancelled. Ensures a final buffer flush during shutdown.
+        """
+
+        try:
+            while not self.stop_event.is_set():
+                if isinstance(self.in_queue, asyncio.Queue):
+                    item = await self.in_queue.get()
+                    try:
+                        await self.add(item)
+                    finally:
+                        self.in_queue.task_done()
+                else:
+                    item = await anext(self.in_queue)
+                    await self.add(item)
+
+        except asyncio.CancelledError:
+            pass
+        finally:
+            await self.flush()
diff --git a/logprep/ng/poc/main.py b/logprep/ng/poc/main.py
new file mode 100644
index 000000000..85472b496
--- /dev/null
+++ b/logprep/ng/poc/main.py
@@ -0,0 +1,95 @@
+import asyncio
+
+from async_pipeline.types import SizeLimitedQueue
+from async_pipeline.worker.pipeline_worker import PipelineWorker
+from mocked.mocking_functions import iter_input_pull
+from mocked.mocking_types import Event
+from pipeline_manager import ConcretePipelineManager
+
+MAX_QUEUE_SIZE = 100_000
+
+BATCH_SIZE = 20_000
+BATCH_INTERVAL_S = 5
+
+
+def get_workers() -> list[PipelineWorker[Event]]:
+    input_worker: PipelineWorker[Event] = PipelineWorker(
+        name="input_worker",
+        batch_size=BATCH_SIZE,
+        batch_interval_s=BATCH_INTERVAL_S,
+        in_queue=aiter(iter_input_pull()),
+        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+        handler_name="handler_input_data",
+    )
+
+    processor_worker: PipelineWorker[Event] = PipelineWorker(
+        name="processor_worker",
+        batch_size=BATCH_SIZE,
+        batch_interval_s=BATCH_INTERVAL_S,
+        in_queue=input_worker.out_queue,
+        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+        handler_name="handler_processor_data",
+    )
+
+    output_1_worker: PipelineWorker[Event] = PipelineWorker(
+        name="output_1_worker",
+        batch_size=BATCH_SIZE,
+        batch_interval_s=BATCH_INTERVAL_S,
+        in_queue=processor_worker.out_queue,
+        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+        handler_name="handler_output_1_data",
+    )
+
+    output_2_worker: PipelineWorker[Event] = PipelineWorker(
+        name="output_2_worker",
+        batch_size=BATCH_SIZE,
+        batch_interval_s=BATCH_INTERVAL_S,
+        in_queue=output_1_worker.out_queue,
+        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+        handler_name="handler_output_2_data",
+    )
+
+    acknowledge_worker: PipelineWorker[Event] = PipelineWorker(
+        name="acknowledge_worker",
+        batch_size=BATCH_SIZE,
+        batch_interval_s=BATCH_INTERVAL_S,
+        in_queue=output_2_worker.out_queue,
+        out_queue=None,
+        handler_name="handler_acknowledgement_data",
+    )
+
+    return [
+        input_worker,
+        processor_worker,
+        output_1_worker,
+        output_2_worker,
+        acknowledge_worker,
+    ]
+
+
+def main() -> None:
+    loop = asyncio.new_event_loop()
+    try:
+        asyncio.set_event_loop(loop)
+
+        pipeline_manager = ConcretePipelineManager(
+            workers=get_workers(),
+            loop=loop,
+        )
+
+        loop.run_until_complete(pipeline_manager.run())
+    finally:
+        loop.close()
+
+
+async def async_main() -> None:
+    pipeline_manager = ConcretePipelineManager(
+        workers=get_workers(),
+    )
+
+    await pipeline_manager.run()
+
+
+if __name__ == "__main__":
+    # asyncio.run(async_main())
+    main()
diff --git a/logprep/ng/poc/main_standalone_worker.py b/logprep/ng/poc/main_standalone_worker.py
new file mode 100644
index 000000000..41e539f6b
--- /dev/null
+++ b/logprep/ng/poc/main_standalone_worker.py
@@ -0,0 +1,130 @@
+import asyncio
+
+from async_pipeline.types import SizeLimitedQueue
+from async_pipeline.worker.worker import Worker
+from mocked.mocking_functions import iter_input_pull
+from mocked.mocking_types import Event
+
+MAX_QUEUE_SIZE = 100_000
+
+BATCH_SIZE = 2_500
+BATCH_INTERVAL_S = 5
+
+
+# ---- handlers (match: (list[Event]) -> list[Event]) ----
+
+acked = 0
+
+
+async def handler_input_data(events: list[Event]) -> list[Event]:
+    print(f"[handler_input_data] batch={len(events)}")
+    await asyncio.sleep(1)
+    return events
+
+
+async def handler_processor_data(events: list[Event]) -> list[Event]:
+    print(f"[handler_processor_data] batch={len(events)}")
+    await asyncio.sleep(1)
+    return events
+
+
+async def handler_output_1_data(events: list[Event]) -> list[Event]:
+    print(f"[handler_output_1_data] batch={len(events)}")
+    await asyncio.sleep(1)
+    return events
+
+
+async def handler_output_2_data(events: list[Event]) -> list[Event]:
+    print(f"[handler_output_2_data] batch={len(events)}")
+    await asyncio.sleep(1)
+    return events
+
+
+async def handler_acknowledgement_data(events: list[Event]) -> list[Event]:
+    global acked
+
+    print(f"[handler_acknowledgement_data] batch={len(events)}")
+    await asyncio.sleep(1)
+
+    acked += len(events)
+    print(f">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Total {acked=}")
+    return events
+
+
+def get_workers() -> list[Worker[Event]]:
+    input_worker: Worker[Event] = Worker(
+        name="input_worker",
+        batch_size=BATCH_SIZE,
+        batch_interval_s=BATCH_INTERVAL_S,
+        in_queue=aiter(iter_input_pull()),
+        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+        handler=handler_input_data,
+    )
+
+    processor_worker: Worker[Event] = Worker(
+        name="processor_worker",
+        batch_size=BATCH_SIZE,
+        batch_interval_s=BATCH_INTERVAL_S,
+        in_queue=input_worker.out_queue,
+        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+        handler=handler_processor_data,
+    )
+
+    output_1_worker: Worker[Event] = Worker(
+        name="output_1_worker",
+        batch_size=BATCH_SIZE,
+        batch_interval_s=BATCH_INTERVAL_S,
+        in_queue=processor_worker.out_queue,
+        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+        handler=handler_output_1_data,
+    )
+
+    output_2_worker: Worker[Event] = Worker(
+        name="output_2_worker",
+        batch_size=BATCH_SIZE,
+        batch_interval_s=BATCH_INTERVAL_S,
+        in_queue=output_1_worker.out_queue,
+        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+        handler=handler_output_2_data,
+    )
+
+    acknowledge_worker: Worker[Event] = Worker(
+        name="acknowledge_worker",
+        batch_size=BATCH_SIZE,
+        batch_interval_s=BATCH_INTERVAL_S,
+        in_queue=output_2_worker.out_queue,
+        out_queue=None,
+        handler=handler_acknowledgement_data,
+    )
+
+    return [
+        input_worker,
+        processor_worker,
+        output_1_worker,
+        output_2_worker,
+        acknowledge_worker,
+    ]
+
+
+def main() -> None:
+    loop = asyncio.new_event_loop()
+    try:
+        asyncio.set_event_loop(loop)
+
+        workers = get_workers()
+        tasks = [loop.create_task(w.run()) for w in workers]
+
+        # Demo: keep running; Ctrl+C stops
+        loop.run_until_complete(asyncio.gather(*tasks))
+    finally:
+        loop.close()
+
+
+async def async_main() -> None:
+    workers = get_workers()
+    await asyncio.gather(*(asyncio.create_task(w.run()) for w in workers))
+
+
+if __name__ == "__main__":
+    # asyncio.run(async_main())
+    main()
diff --git a/logprep/ng/poc/mocked/__init__.py b/logprep/ng/poc/mocked/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/logprep/ng/poc/mocked/mocking_functions.py b/logprep/ng/poc/mocked/mocking_functions.py
new file mode 100644
index 000000000..c4e2f8e66
--- /dev/null
+++ b/logprep/ng/poc/mocked/mocking_functions.py
@@ -0,0 +1,27 @@
+import json
+import random
+import time
+import uuid
+from collections.abc import AsyncIterator
+
+from logprep.ng.poc.mocked.mocking_types import Event
+
+
+# HELPER
+async def iter_input_pull() -> AsyncIterator[Event]:
+    while True:
+        event_id = uuid.uuid4()
+        yield Event(
+            event_id=event_id,
+            payload=json.dumps({"additional_data": f"{event_id}"}),
+        )
+
+
+async def store(events: list[Event], topic: str) -> None:
+    # blocking sleep
+    time.sleep(random.randint(1, 5) / 10)
+
+
+async def commit(events: list[Event]) -> None:
+    # blocking sleep
+    time.sleep(random.randint(1, 5) / 10)
diff --git a/logprep/ng/poc/mocked/mocking_processor.py b/logprep/ng/poc/mocked/mocking_processor.py
new file mode 100644
index 000000000..54a74579a
--- /dev/null
+++ b/logprep/ng/poc/mocked/mocking_processor.py
@@ -0,0 +1,19 @@
+import json
+import random
+import time
+
+from mocked.mocking_types import Event
+
+
+class Processor:
+    @staticmethod
+    def process(events: list[Event]) -> list[Event]:
+        for event in events:
+            new_payload = json.loads(event.payload)
+            new_payload["processed"] = True
+            event.payload = json.dumps(new_payload)
+
+        # blocking sleep
+        time.sleep(random.randint(1, 5) / 10)
+
+        return events
diff --git a/logprep/ng/poc/mocked/mocking_types.py b/logprep/ng/poc/mocked/mocking_types.py
new file mode 100644
index 000000000..a249e5e27
--- /dev/null
+++ b/logprep/ng/poc/mocked/mocking_types.py
@@ -0,0 +1,50 @@
+import enum
+import json
+import uuid
+from collections.abc import Iterator
+from dataclasses import dataclass, field
+from typing import Any
+
+
+class State(str, enum.Enum):
+    RECEIVING = "receiving"
+    RECEIVED = "received"
+    PROCESSING = "processing"
+    PROCESSED = "processed"
+    STORING_OUTPUT_1 = "storing_output_1"
+    STORED_OUTPUT_1 = "stored_output_1"
+    STORING_OUTPUT_2 = "storing_output_2"
+    STORED_OUTPUT_2 = "stored_output_2"
+    DELIVERING = "delivering"
+    DELIVERED = "delivered"
+    ACKNOWLEDGING = "acknowledging"
+    ACKNOWLEDGED = "acknowledged"
+
+
+@dataclass
+class Event:
+    payload: str
+    state: State = State.RECEIVING
+    event_id: uuid.UUID = field(default_factory=uuid.uuid4)
+    errors: list[str] = field(default_factory=list)
+
+    def update_payload(self, payload: dict[str, Any]) -> None:
+        """Convenience helper for the demo: replace payload JSON."""
+
+        self.payload = json.dumps(payload)
+
+    def __iter__(self) -> Iterator[tuple[str, Any]]:
+        """Iterate over the JSON payload as key/value pairs.
+
+        This makes `Event` usable in contexts that expect an iterable without
+        relying on the previously incorrect `__iter__` signature.
+        """
+
+        try:
+            data = json.loads(self.payload)
+        except json.JSONDecodeError:
+            return iter(())
+
+        if isinstance(data, dict):
+            return iter(data.items())
+        return iter(())
diff --git a/logprep/ng/poc/pipeline_manager.py b/logprep/ng/poc/pipeline_manager.py
new file mode 100644
index 000000000..58779f01f
--- /dev/null
+++ b/logprep/ng/poc/pipeline_manager.py
@@ -0,0 +1,218 @@
+"""
+Concrete pipeline manager implementation.
+
+This module defines a PipelineManager specialization responsible for
+state tracking, backlog management, and runtime metrics. It encapsulates
+pipeline-specific processing behavior while preserving the generic
+lifecycle and orchestration semantics of the base manager.
+"""
+
+import asyncio
+import random
+import uuid
+from asyncio import AbstractEventLoop
+from collections import Counter
+from typing import Any
+
+from async_pipeline.pipeline_manager import PipelineManager, background_task
+from async_pipeline.worker.pipeline_worker import PipelineWorker
+from mocked.mocking_functions import commit, store
+from mocked.mocking_processor import Processor
+from mocked.mocking_types import Event, State
+
+
+class ConcretePipelineManager(PipelineManager):
+    """
+    PipelineManager specialization with event state tracking and metrics.
+
+    Maintains an internal backlog for lifecycle/state visibility and
+    provides concrete handler implementations for pipeline stages.
+    """
+
+    def __init__(
+        self,
+        workers: list[PipelineWorker[Event]],
+        loop: AbstractEventLoop | None = None,
+    ) -> None:
+        """Initialize backlog storage and runtime metric tracking."""
+        super().__init__(workers=workers, loop=loop)
+
+        self._event_backlog: dict[uuid.UUID, Event] = {}
+        self._event_backlog_lock = asyncio.Lock()
+
+        self._metric: dict[str, Any] = {
+            "start_time": None,
+            "last_time": None,
+            "last_acked": 0,
+            "peak_rate": 0,
+            "total_acknowledged": 0,
+        }
+
+    async def _update_event_states(
+        self,
+        events: list[Event],
+        new_state: State,
+        *,
+        locked: bool = False,
+    ) -> None:
+        """Update state for events, optionally assuming external lock ownership."""
+        if locked:
+            for event in events:
+                event.state = new_state
+                self._event_backlog[event.event_id].state = new_state
+        else:
+            async with self._event_backlog_lock:
+                for event in events:
+                    event.state = new_state
+                    self._event_backlog[event.event_id].state = new_state
+
+    async def handler_input_data(self, events: list[Event]) -> list[Event]:
+        """Register incoming events and mark them as received."""
+        async with self._event_backlog_lock:
+            for event in events:
+                self._event_backlog.setdefault(event.event_id, event)
+
+            await self._update_event_states(
+                events=events,
+                new_state=State.RECEIVED,
+                locked=True,
+            )
+
+        return events
+
+    async def handler_processor_data(self, events: list[Event]) -> list[Event]:
+        """Process events and transition through processing states."""
+        await self._update_event_states(events=events, new_state=State.PROCESSING)
+
+        processed_events = await asyncio.to_thread(Processor.process, events)
+        # processed_events = Processor.process(events)
+
+        await self._update_event_states(
+            events=processed_events,
+            new_state=State.PROCESSED,
+        )
+
+        return processed_events
+
+    async def handler_output_1_data(self, events: list[Event]) -> list[Event]:
+        """Simulate output stage 1 storage."""
+        await self._update_event_states(events=events, new_state=State.STORING_OUTPUT_1)
+
+        await asyncio.sleep(random.randint(1, 5) / 10)
+
+        await self._update_event_states(events=events, new_state=State.STORED_OUTPUT_1)
+
+        return events
+
+    async def handler_output_2_data(self, events: list[Event]) -> list[Event]:
+        """Simulate output stage 2 storage."""
+        await self._update_event_states(events=events, new_state=State.STORING_OUTPUT_2)
+
+        await asyncio.sleep(random.randint(1, 5) / 10)
+
+        await self._update_event_states(events=events, new_state=State.STORED_OUTPUT_2)
+
+        return events
+
+    async def handler_delivery_data(self, events: list[Event]) -> list[Event]:
+        """Deliver processed events to the external sink."""
+        await self._update_event_states(events=events, new_state=State.DELIVERING)
+
+        await store(events, "output_data")
+
+        await self._update_event_states(events=events, new_state=State.DELIVERED)
+
+        return events
+
+    async def handler_acknowledgement_data(self, events: list[Event]) -> list[Event]:
+        """Acknowledge delivered events and update metrics."""
+        await self._update_event_states(events=events, new_state=State.ACKNOWLEDGING)
+
+        await commit(events)
+
+        async with self._event_backlog_lock:
+            await self._update_event_states(
+                events=events,
+                new_state=State.ACKNOWLEDGED,
+                locked=True,
+            )
+
+            self._metric["total_acknowledged"] += len(events)
+            total_acked = self._metric["total_acknowledged"]
+
+        self._print_metric(total_acked=total_acked, acked=len(events))
+        return events
+
+    def _print_metric(self, total_acked: int, acked: int) -> None:
+        """Update and display runtime throughput metrics."""
+        now = asyncio.get_running_loop().time()
+
+        if self._metric["start_time"] is None:
+            self._metric["start_time"] = now
+
+        elapsed = now - self._metric["start_time"]
+
+        h, rem = divmod(int(elapsed), 3600)
+        m, s = divmod(rem, 60)
+
+        elapsed_min = elapsed / 60
+        acked_delta = total_acked - self._metric["last_acked"]
+
+        last_time = self._metric["last_time"]
+        time_delta = now - last_time if last_time else 0
+
+        live_rate = (acked_delta / time_delta) * 60 if time_delta else 0
+        avg_rate = total_acked / elapsed_min if elapsed_min else 0
+
+        self._metric["peak_rate"] = max(self._metric["peak_rate"], live_rate)
+
+        self._metric["last_time"] = now
+        self._metric["last_acked"] = total_acked
+
+        print(
+            f"Running: {h}h {m}m {s}s | "
+            f"Acked: {acked:_} | "
+            f"Total Acked: {total_acked:_} | "
+            f"Avg Rate: {avg_rate:_.1f}/min | "
+            f"Live Rate: {live_rate:_.1f}/min | "
+            f"Peak Rate: {self._metric['peak_rate']:_.1f}/min"
+        )
+
+    @background_task
+    async def _clean_up_delivered_events(self) -> None:
+        """Remove acknowledged events from the backlog."""
+        while not self.stop_event.is_set():
+            async with self._event_backlog_lock:
+                acknowledged = [
+                    eid for eid, e in self._event_backlog.items() if e.state is State.ACKNOWLEDGED
+                ]
+
+                for eid in acknowledged:
+                    del self._event_backlog[eid]
+
+            await asyncio.sleep(10)
+
+    async def _show_metric(self) -> None:
+        """Continuously display backlog state distribution."""
+        while not self.stop_event.is_set():
+            async with self._event_backlog_lock:
+                counter = Counter(event.state for event in self._event_backlog.values())
+                total = len(self._event_backlog)
+
+            print(
+                f"\nEvents: {total},\n"
+                f"Receiving: {counter[State.RECEIVING]:_},\n"
+                f"Received: {counter[State.RECEIVED]:_},\n"
+                f"Processing: {counter[State.PROCESSING]:_},\n"
+                f"Processed: {counter[State.PROCESSED]:_},\n"
+                f"Storing_output_1: {counter[State.STORING_OUTPUT_1]:_},\n"
+                f"Stored_output_1: {counter[State.STORED_OUTPUT_1]:_},\n"
+                f"Storing_output_2: {counter[State.STORING_OUTPUT_2]:_},\n"
+                f"Stored_output_2: {counter[State.STORED_OUTPUT_2]:_},\n"
+                f"Delivering: {counter[State.DELIVERING]:_},\n"
+                f"Delivered: {counter[State.DELIVERED]:_},\n"
+                f"Acknowledging: {counter[State.ACKNOWLEDGING]:_},\n"
+                f"Acknowledged: {counter[State.ACKNOWLEDGED]:_}\n"
+            )
+
+            await asyncio.sleep(random.randint(1, 5) / 2)
diff --git a/pyproject.toml b/pyproject.toml
index 07ba18812..c80ef420e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -162,7 +162,7 @@ before-build = "curl -sSf https://sh.rustup.rs | sh -s -- -y"
 environment = 'PATH=$HOME/.cargo/bin:$PATH'
 
 [tool.pylint.MAIN]
-ignore = ".venv"
+ignore = ".venv, poc"
 fail-under = 9.5
 
 [tool.pylint.FORMAT]

From b7a200b7dcfa5a754b680280fb050ed93e5e4b5a Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Tue, 24 Feb 2026 21:27:20 +0100
Subject: [PATCH 02/68] add debug launch configuration for ng

---
 .vscode/launch.json | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 1b682f69b..3c0da7fe0 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -15,6 +15,21 @@
                 "PROMETHEUS_MULTIPROC_DIR": "tmp/logprep"
             },
             "justMyCode": false
+        },
+        {
+            "name": "Debug ng example pipeline",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "logprep/run_ng.py",
+            "console": "integratedTerminal",
+            "args": [
+                "run",
+                "examples/exampledata/config/ng_pipeline.yml"
+            ],
+            "env": {
+                "PROMETHEUS_MULTIPROC_DIR": "tmp/logprep"
+            },
+            "justMyCode": false
         }
     ]
 }

From 0a3350e88ffd4021d538d4a85ab089d813bda7a8 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Tue, 24 Feb 2026 21:28:01 +0100
Subject: [PATCH 03/68] add integration draft

---
 examples/exampledata/config/ng_pipeline.yml |   1 +
 logprep/ng/abc/input.py                     |   8 +-
 logprep/ng/manager.py                       | 159 ++++++++++
 logprep/ng/pipeline.py                      |  53 +---
 logprep/ng/runner.py                        | 272 +++++------------
 logprep/ng/sender.py                        |  52 +---
 logprep/ng/util/async.py                    |  31 ++
 logprep/ng/util/worker/types.py             |  45 +++
 logprep/ng/util/worker/worker.py            | 320 ++++++++++++++++++++
 logprep/run_ng.py                           |   3 +-
 pyproject.toml                              |   2 +-
 11 files changed, 669 insertions(+), 277 deletions(-)
 create mode 100644 logprep/ng/manager.py
 create mode 100644 logprep/ng/util/async.py
 create mode 100644 logprep/ng/util/worker/types.py
 create mode 100644 logprep/ng/util/worker/worker.py

diff --git a/examples/exampledata/config/ng_pipeline.yml b/examples/exampledata/config/ng_pipeline.yml
index 5a173a0fb..fc8c4dbda 100644
--- a/examples/exampledata/config/ng_pipeline.yml
+++ b/examples/exampledata/config/ng_pipeline.yml
@@ -17,6 +17,7 @@ logger:
     "uvicorn.access": {"level": "ERROR"}
     "OpenSearchOutput": {"level": "DEBUG"}
     "KafkaOutput": {"level": "ERROR"}
+    "Input": {"level": "ERROR"}
 metrics:
   enabled: true
   port: 8001
diff --git a/logprep/ng/abc/input.py b/logprep/ng/abc/input.py
index 67e0db9b9..be632f42d 100644
--- a/logprep/ng/abc/input.py
+++ b/logprep/ng/abc/input.py
@@ -10,7 +10,7 @@
 import typing
 import zlib
 from abc import abstractmethod
-from collections.abc import Iterator
+from collections.abc import AsyncIterator
 from copy import deepcopy
 from functools import cached_property
 from hmac import HMAC
@@ -87,7 +87,7 @@ class SourceDisconnectedWarning(InputWarning):
     """Lost (or failed to establish) contact with the source."""
 
 
-class InputIterator(Iterator):
+class InputIterator(AsyncIterator):
     """Base Class for an input Iterator"""
 
     def __init__(self, input_connector: "Input", timeout: float):
@@ -131,6 +131,10 @@ def __next__(self) -> LogEvent | None:
         )
         return event
 
+    async def __anext__(self):
+        # TODO implement properly
+        return self.__next__()
+
 
 class Input(Connector):
     """Connect to a source for log data."""
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
new file mode 100644
index 000000000..7ae36f270
--- /dev/null
+++ b/logprep/ng/manager.py
@@ -0,0 +1,159 @@
+"""
+Runner module
+"""
+
+import logging
+import typing
+from typing import cast
+
+from logprep.factory import Factory
+from logprep.ng.abc.input import Input
+from logprep.ng.abc.output import Output
+from logprep.ng.abc.processor import Processor
+from logprep.ng.event.event_state import EventStateType
+from logprep.ng.event.log_event import LogEvent
+from logprep.ng.event.set_event_backlog import SetEventBacklog
+from logprep.ng.pipeline import Pipeline
+from logprep.ng.sender import Sender
+from logprep.ng.util.configuration import Configuration
+from logprep.ng.util.worker.types import SizeLimitedQueue
+from logprep.ng.util.worker.worker import TransferWorker, Worker, WorkerOrchestrator
+
+logger = logging.getLogger("PipelineManager")
+
+
+MAX_QUEUE_SIZE = 100_000
+
+BATCH_SIZE = 2_500
+BATCH_INTERVAL_S = 5
+
+
+class PipelineManager:
+    """Orchestrator class managing pipeline inputs, processors and outputs"""
+
+    def __init__(self, configuration: Configuration) -> None:
+        """Initialize the component from the given `configuration`."""
+
+        self.configuration = configuration
+
+    def _setup(self):
+        self._event_backlog = SetEventBacklog()
+
+        self._input_connector = cast(Input, Factory.create(self.configuration.input))
+        self._input_connector.event_backlog = self._event_backlog  # TODO needs to be disentagled
+        self._input_connector.setup()
+
+        processors = [
+            typing.cast(Processor, Factory.create(processor_config))
+            for processor_config in self.configuration.pipeline
+        ]
+        for processor in processors:
+            processor.setup()
+
+        self._pipeline = Pipeline(processors)
+
+        output_connectors = [
+            typing.cast(Output, Factory.create({output_name: output}))
+            for output_name, output in self.configuration.output.items()
+        ]
+
+        error_output = (
+            typing.cast(Output, Factory.create(self.configuration.error_output))
+            if self.configuration.error_output
+            else None
+        )
+
+        if error_output is None:
+            logger.warning("No error output configured.")
+
+        self._sender = Sender(outputs=output_connectors, error_output=error_output)
+        self._sender.setup()
+
+        self._orchestrator = self._create_orchestrator()
+
+    def _create_orchestrator(self) -> WorkerOrchestrator:
+        input_worker: Worker[LogEvent, LogEvent] = TransferWorker(
+            name="input_worker",
+            batch_size=1,
+            batch_interval_s=BATCH_INTERVAL_S,
+            in_queue=self._input_connector(timeout=self.configuration.timeout),
+            out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+        )
+
+        async def process(batch: list[LogEvent]) -> list[LogEvent]:
+            return [self._pipeline.process(event) for event in batch]
+
+        processing_worker: Worker[LogEvent, LogEvent] = Worker(
+            name="processing_worker",
+            batch_size=BATCH_SIZE,
+            batch_interval_s=BATCH_INTERVAL_S,
+            in_queue=input_worker.out_queue,  # type: ignore
+            out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+            handler=process,
+        )
+
+        async def send(batch: list[LogEvent]) -> list[LogEvent]:
+            return self._sender.process(batch)
+
+        output_worker: Worker[LogEvent, LogEvent] = Worker(
+            name="output_worker",
+            batch_size=BATCH_SIZE,
+            batch_interval_s=BATCH_INTERVAL_S,
+            in_queue=processing_worker.out_queue,  # type: ignore
+            out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+            handler=send,
+        )
+
+        acknowledge_worker: Worker[LogEvent, LogEvent] = Worker(
+            name="acknowledge_worker",
+            batch_size=BATCH_SIZE,
+            batch_interval_s=BATCH_INTERVAL_S,
+            in_queue=output_worker.out_queue,  # type: ignore
+            out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+            handler=self._process_sent_events,
+        )
+
+        return WorkerOrchestrator(
+            workers=[input_worker, processing_worker, output_worker, acknowledge_worker]
+        )
+
+    async def run(self) -> None:
+        """Run the runner and continuously process events until stopped."""
+
+        self._setup()
+        await self._orchestrator.run()
+
+    async def shut_down(self) -> None:
+        """Shut down runner components, and required runner attributes."""
+
+        if self._orchestrator is not None:
+            await self._orchestrator.shut_down(1)
+
+        if self._sender is not None:
+            self._sender.shut_down()
+        self._input_connector.acknowledge()
+
+        len_delivered_events = len(
+            list(self._input_connector.event_backlog.get(EventStateType.DELIVERED))
+        )
+        if len_delivered_events:
+            logger.error(
+                "Input connector has %d non-acked events in event_backlog.", len_delivered_events
+            )
+
+        logger.info("Runner shut down complete.")
+
+    async def _process_sent_events(self, batch: list[LogEvent]) -> list[LogEvent]:
+        """Process a batch of events got from sender iterator."""
+
+        logger.debug("Got batch of events from sender (batch_size=%d).", len(batch))
+        for event in batch:
+            if event is None:
+                continue
+
+            if event.state == EventStateType.FAILED:
+                logger.error("event failed: %s", event)
+            else:
+                logger.debug("event processed: %s", event.state)
+
+        return batch
diff --git a/logprep/ng/pipeline.py b/logprep/ng/pipeline.py
index ace4e8a9c..5bca9ce05 100644
--- a/logprep/ng/pipeline.py
+++ b/logprep/ng/pipeline.py
@@ -1,9 +1,6 @@
 """pipeline module for processing events through a series of processors."""
 
 import logging
-from collections.abc import Iterator
-from functools import partial
-from typing import Generator
 
 from logprep.ng.abc.processor import Processor
 from logprep.ng.event.log_event import LogEvent
@@ -14,7 +11,7 @@
 def _process_event(event: LogEvent | None, processors: list[Processor]) -> LogEvent:
     """process all processors for one event"""
     if event is None or not event.data:
-        return None
+        raise ValueError("no event given")
     event.state.next_state()
     for processor in processors:
         if not event.data:
@@ -28,49 +25,29 @@ def _process_event(event: LogEvent | None, processors: list[Processor]) -> LogEv
     return event
 
 
-class Pipeline(Iterator):
-    """Pipeline class to process events through a series of processors.
-    Examples:
-        >>> from logprep.ng.event.log_event import LogEvent
-        >>> from logprep.ng.abc.event import Event
-        >>> class MockProcessor:
-        ...     def process(self, event: LogEvent) -> None:
-        ...         event.data["processed"] = True
-        ...
-        >>>
-        >>> # Create test events
-        >>> events = [
-        ...     LogEvent({"message": "test1"}, original=b""),
-        ...     LogEvent({"message": "test2"}, original=b"")
-        ... ]
-        >>> processors = [MockProcessor()]
-        >>>
-        >>> # Create and run pipeline
-        >>> pipeline = Pipeline(iter(events), processors)
-        >>> processed_events = list(pipeline)
-        >>> len(processed_events)
-        2
-        >>> processed_events[0].data["processed"]
-        True
-        >>> processed_events[1].data["message"]
-        'test2'
-    """
+class Pipeline:
+    """Pipeline class to process events through a series of processors."""
 
     def __init__(
         self,
-        log_events_iter: Iterator[LogEvent],
         processors: list[Processor],
     ) -> None:
         self.processors = processors
-        self.log_events_iter = log_events_iter
 
-    def __iter__(self) -> Generator[LogEvent | None, None, None]:
-        """Iterate over processed events."""
+    def process(self, event: LogEvent) -> LogEvent:
+        """Process the given event through the series of configured processors
 
-        yield from map(partial(_process_event, processors=self.processors), self.log_events_iter)
+        Parameters
+        ----------
+        event : LogEvent
+            The event to be processed and modified in-place.
 
-    def __next__(self):
-        raise NotImplementedError("Use iteration to get processed events.")
+        Returns
+        -------
+        LogEvent
+            The event which was presented as an input and modified in-place.
+        """
+        return _process_event(event, processors=self.processors)
 
     def shut_down(self) -> None:
         """Shutdown the pipeline gracefully."""
diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index 7bd90cdf9..75f510cea 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -2,29 +2,26 @@
 Runner module
 """
 
+import asyncio
 import json
 import logging
-import logging.config
 import os
 import warnings
-from typing import cast
+from collections.abc import AsyncGenerator
 
 from attrs import asdict
 
-from logprep.factory import Factory
-from logprep.ng.abc.input import Input
-from logprep.ng.abc.output import Output
-from logprep.ng.abc.processor import Processor
-from logprep.ng.event.event_state import EventStateType
-from logprep.ng.event.set_event_backlog import SetEventBacklog
-from logprep.ng.pipeline import Pipeline
-from logprep.ng.sender import Sender
+from logprep.ng.manager import PipelineManager
 from logprep.ng.util.configuration import Configuration
 from logprep.ng.util.defaults import DEFAULT_LOG_CONFIG
 
 logger = logging.getLogger("Runner")
 
 
+GRACEFUL_SHUTDOWN_TIMEOUT = 10
+MAX_CONFIG_REFRESH_INTERVAL_DEVIATION_PERCENT = 0.05
+
+
 class Runner:
     """Class, a singleton runner, responsible for running the log processing pipeline."""
 
@@ -43,195 +40,98 @@ def __init__(self, configuration: Configuration) -> None:
         """
 
         self.configuration = configuration
-        self._running_config_version = configuration.version
-        self._input_connector: Input | None = None
-
-        # Initialized in `setup()`; updated by runner logic thereafter:
-        self.should_exit: bool | None = None
-        self.sender: Sender | None = None
-
-        self.setup()
-
-    def _initialize_pipeline(self) -> Pipeline:
-        """Initialize the pipeline from the given `configuration`.
-
-        This method performs the following tasks:
-
-        - Creates components based on the configuration:
-          - input connector
-          - processors
-
-        - Sets up the input connector:
-          - attaches an event backlog
-          - calls its `setup()` method
-          - initializes its iterator with the configured timeout
-
-        - Validates that:
-          - an input connector is configured
-          - all processors are properly configured
-
-        - Instantiates the `Pipeline` with:
-          - the input connector iterator
-          - the list of processors
-
-        Returns
-        -------
-        Pipeline
-            The instantiated pipeline instance (not yet set up).
-        """
-
-        self._input_connector = cast(Input, Factory.create(self.configuration.input))
-        self._input_connector.event_backlog = SetEventBacklog()
-        self._input_connector.setup()
-
-        input_iterator = self._input_connector(timeout=self.configuration.timeout)
-        processors = cast(
-            list[Processor],
-            [Factory.create(processor_config) for processor_config in self.configuration.pipeline],
-        )
-
-        return Pipeline(
-            log_events_iter=input_iterator,
-            processors=cast(list[Processor], processors),
-        )
+        self._running_config_version: None | str = None
+        self._main_task: asyncio.Task | None = None
 
-    def _initialize_sender(self) -> Sender:
-        """Initialize the sender from the given `configuration`.
-
-        This method performs the following tasks:
-
-        - Creates components based on the configuration:
-          - output connectors
-          - error output
-
-        - Validates that:
-          - all output connectors are configured
-          - an error output is available
-
-        - Instantiates the `Sender` with:
-          - the initialized pipeline
-          - configured outputs
-          - configured error output
-          - process count from configuration
-
-        Returns
-        -------
-        Sender
-            The instantiated sender instance (not yet set up).
-        """
-
-        output_connectors = cast(
-            list[Output],
-            [
-                Factory.create({output_name: output})
-                for output_name, output in self.configuration.output.items()
-            ],
-        )
-
-        error_output: Output | None = (
-            Factory.create(self.configuration.error_output)
-            if self.configuration.error_output
-            else None
-        )
-
-        if error_output is None:
-            logger.warning("No error output configured.")
-
-        return Sender(
-            pipeline=self._initialize_pipeline(),
-            outputs=cast(list[Output], output_connectors),
-            error_output=error_output,
-            process_count=self.configuration.process_count,
-        )
-
-    def run(self) -> None:
-        """Run the runner and continuously process events until stopped.
-
-        This method starts the main processing loop, refreshes the configuration
-        if needed, processes event batches, and only exits once `stop()` has been
-        called (setting `should_exit` to True). At the end, it shuts down all
-        components gracefully.
-        """
-
-        # TODO:
-        # * integration tests
+        self._pipeline_manager: PipelineManager | None = None
 
+    async def _refresh_configuration_gen(self) -> AsyncGenerator[Configuration, None]:
         self.configuration.schedule_config_refresh()
-
+        refresh_interval = self.configuration.config_refresh_interval
         while True:
-            if self.should_exit:
-                logger.debug("Runner exiting.")
-                break
-
-            logger.debug("Runner processing loop.")
-
-            logger.debug("Check configuration change before processing a batch of events.")
             self.configuration.refresh()
 
             if self.configuration.version != self._running_config_version:
-                self.reload()
-
-            logger.debug("Process next batch of events.")
-            self._process_events()
-
-        self.shut_down()
-        logger.debug("End log processing.")
-
-    def _process_events(self) -> None:
-        """Process a batch of events got from sender iterator."""
+                yield self.configuration
+                self._running_config_version = self.configuration.version
+                refresh_interval = self.configuration.config_refresh_interval
+
+            if refresh_interval is not None:
+                try:
+                    await asyncio.sleep(
+                        # realistic bad case: starting to sleep just a moment before scheduled time
+                        # unlikely worst case: starting to sleep even after scheduled time
+                        #                      (if yield takes some time and interval is short)
+                        # --> compensate bad case by giving an upper boundary to the deviation
+                        refresh_interval
+                        * MAX_CONFIG_REFRESH_INTERVAL_DEVIATION_PERCENT
+                    )
+                except asyncio.CancelledError:
+                    logger.debug("Config refresh cancelled. Exiting...")
+                    raise
+            else:
+                logger.debug("Config refresh has been disabled.")
+                break
 
-        logger.debug("Start log processing.")
+    async def _run_pipeline(self, config: Configuration) -> tuple[PipelineManager, asyncio.Task]:
+        manager = PipelineManager(config)
+        manager_task = asyncio.create_task(manager.run(), name="pipeline_manager")
+        return manager, manager_task
+
+    async def _shut_down_pipeline(
+        self, manager: PipelineManager, manager_task: asyncio.Task
+    ) -> None:
+        await manager.shut_down()
+        try:
+            await asyncio.wait_for(manager_task, GRACEFUL_SHUTDOWN_TIMEOUT)
+            logger.error("graceful shut down of pipeline manager succeeded")
+        except TimeoutError:
+            logger.error("could not gracefully shut down pipeline manager within timeframe")
+
+    async def _run(self) -> None:
+        logger.debug("Running _run")
+        try:
+            manager, manager_task = await self._run_pipeline(self.configuration)
+
+            async for refreshed_config in self._refresh_configuration_gen():
+                logger.debug("Configuration change detected. Restarting pipeline...")
+                await self._shut_down_pipeline(manager, manager_task)
+                manager, manager_task = await self._run_pipeline(refreshed_config)
+
+            logger.debug("Configuration refresh disabled. Waiting for ")
+            await manager_task
+        except asyncio.CancelledError:
+            if manager is not None and manager_task is not None:
+                await self._shut_down_pipeline(manager, manager_task)
+
+        logger.debug("End of _run")
+
+    async def run(self) -> None:
+        """Run the runner and continuously process events until stopped."""
+        self._running_config_version = self.configuration.version
 
-        sender = cast(Sender, self.sender)
-        logger.debug(f"Get batch of events from sender (batch_size={sender.batch_size}).")
-        for event in sender:
-            if event is None:
-                continue
+        self._main_task = asyncio.create_task(self._run(), name="config_refresh")
 
-            if event.state == EventStateType.FAILED:
-                logger.error("event failed: %s", event)
-            else:
-                logger.debug("event processed: %s", event.state)
+        await self._main_task
 
-        logger.debug("Finished processing batch of events.")
-
-    def setup(self) -> None:
-        """Set up the runner, its components, and required runner attributes."""
+        self.shut_down()
+        logger.debug("End log processing.")
 
-        self.sender = self._initialize_sender()
-        self.sender.setup()
-        self.should_exit = False
+    def stop(self) -> None:
+        """Stop the runner and signal the underlying processing pipeline to exit."""
 
-        logger.info("Runner set up complete.")
+        logger.info("Stopping runner and exiting...")
+        if self._main_task is not None:
+            logger.debug("Cancelling runner main task")
+            self._main_task.cancel()
+        else:
+            logger.debug("Attempting to stop inactive runner")
 
     def shut_down(self) -> None:
         """Shut down runner components, and required runner attributes."""
 
-        self.should_exit = True
-        cast(Sender, self.sender).shut_down()
-        self.sender = None
-
-        input_connector = cast(Input, self._input_connector)
-        input_connector.acknowledge()
-
-        len_delivered_events = len(input_connector.event_backlog.get(EventStateType.DELIVERED))
-        if len_delivered_events:
-            logger.error(
-                f"Input connector has {len_delivered_events} non-acked events in event_backlog."
-            )
-
         logger.info("Runner shut down complete.")
 
-    def stop(self) -> None:
-        """Stop the runner and signal the underlying processing pipeline to exit.
-
-        This method sets the `should_exit` flag to True, which will cause the
-        runner and its components to stop gracefully.
-        """
-
-        logger.info("Stopping runner and exiting...")
-        self.should_exit = True
-
     def setup_logging(self) -> None:
         """Setup the logging configuration.
         is called in the :code:`logprep.run_logprep` module.
@@ -244,15 +144,3 @@ def setup_logging(self) -> None:
         log_config = DEFAULT_LOG_CONFIG | asdict(self.configuration.logger)
         os.environ["LOGPREP_LOG_CONFIG"] = json.dumps(log_config)
         logging.config.dictConfig(log_config)
-
-    def reload(self) -> None:
-        """Reload the log processing pipeline."""
-
-        logger.info("Reloading log processing pipeline...")
-
-        self.shut_down()
-        self.setup()
-
-        self._running_config_version = self.configuration.version
-        self.configuration.schedule_config_refresh()
-        logger.info("Finished reloading log processing pipeline.")
diff --git a/logprep/ng/sender.py b/logprep/ng/sender.py
index ece8b4616..a1257df17 100644
--- a/logprep/ng/sender.py
+++ b/logprep/ng/sender.py
@@ -1,16 +1,13 @@
 """sender module"""
 
 import logging
-from collections.abc import Iterator
-from itertools import islice
-from typing import Generator
+import typing
 
 from logprep.ng.abc.event import ExtraDataEvent
 from logprep.ng.abc.output import Output
 from logprep.ng.event.error_event import ErrorEvent
 from logprep.ng.event.event_state import EventStateType
 from logprep.ng.event.log_event import LogEvent
-from logprep.ng.pipeline import Pipeline
 
 logger = logging.getLogger("Sender")
 
@@ -23,40 +20,23 @@ def __str__(self) -> str:
         return f"{self.message}: {self.exceptions}"
 
 
-class Sender(Iterator):
+class Sender:
     """Sender class to handle sending events to configured outputs."""
 
     def __init__(
         self,
-        pipeline: Pipeline,
         outputs: list[Output],
         error_output: Output | None = None,
-        process_count: int = 3,
     ) -> None:
-        self.pipeline = pipeline
         self._outputs = {output.name: output for output in outputs}
         self._default_output = [output for output in outputs if output.default][0]
         self._error_output = error_output
-        self.batch_size = process_count
-        self.should_exit = False
-
-    def __next__(self) -> LogEvent | ErrorEvent:
-        """not implemented, use iter()"""
-        raise NotImplementedError("Use iter() to get events from the Sender.")
-
-    def __iter__(self) -> Generator[LogEvent | ErrorEvent, None, None]:
-        """Iterate over processed events."""
-        while True:
-            logger.debug("Sender iterating")
-            batch = list(islice(self.pipeline, self.batch_size))
-            self._send_and_flush_processed_events(batch_events=batch)
-            if self._error_output:
-                self._send_and_flush_failed_events(batch_events=batch)
-            if self.should_exit:
-                logger.debug("Sender exiting")
-                self.shut_down()
-                return
-            yield from batch
+
+    def process(self, batch: list[LogEvent]) -> list[LogEvent]:
+        self._send_and_flush_processed_events(batch_events=batch)
+        if self._error_output:
+            self._send_and_flush_failed_events(batch_events=batch)
+        return batch
 
     def _send_and_flush_failed_events(self, batch_events: list[LogEvent]) -> None:
         error_events = [
@@ -86,7 +66,7 @@ def _send_and_flush_processed_events(self, batch_events: list[LogEvent]) -> None
             output.flush()
 
     def _send_extra_data(self, event: LogEvent) -> None:
-        extra_data_events: list[ExtraDataEvent] = event.extra_data
+        extra_data_events = typing.cast(list[ExtraDataEvent], event.extra_data)
         for extra_data_event in extra_data_events:
             for output in extra_data_event.outputs:
                 for output_name, output_target in output.items():
@@ -124,14 +104,12 @@ def _get_error_event(self, event: LogEvent) -> ErrorEvent:
     def shut_down(self) -> None:
         """Shutdown all outputs gracefully."""
 
-        self.stop()
         for _, output in self._outputs.items():
             output.shut_down()
         if self._error_output:
             self._error_output.shut_down()
         logger.info("All outputs have been shut down.")
 
-        self.pipeline.shut_down()
         logger.info("Sender has been shut down.")
 
     def setup(self) -> None:
@@ -141,15 +119,3 @@ def setup(self) -> None:
         if self._error_output:
             self._error_output.setup()
         logger.info("All outputs have been set up.")
-        self.pipeline.setup()
-
-    def stop(self) -> None:
-        """Request the sender to stop iteration.
-
-        Calling stop() sets the should_exit flag. The sender will finish processing
-        the current batch and exit on the next iteration (i.e., the next next() call).
-        If you need to enforce an immediate stop, use shut_down() instead.
-        """
-
-        self.should_exit = True
-        logger.info("Sender stop signal received.")
diff --git a/logprep/ng/util/async.py b/logprep/ng/util/async.py
new file mode 100644
index 000000000..764c4f634
--- /dev/null
+++ b/logprep/ng/util/async.py
@@ -0,0 +1,31 @@
+import asyncio
+from collections.abc import Callable, Coroutine
+from typing import Any, ParamSpec, TypeVar
+
+T = TypeVar("T")
+P = ParamSpec("P")
+
+
+def create_task(
+    factory: Callable[P, Coroutine[Any, Any, T]], *args: P.args, **kwargs: P.kwargs
+) -> asyncio.Task[T]:
+    """
+    Wraps :code:`asyncio.create_task` to automatically assign a name derived from...
+
+    Parameters
+    ----------
+    factory : Callable[P, Coroutine[Any, Any, T]]
+        _description_
+
+    Returns
+    -------
+    asyncio.Task[T]
+        _description_
+    """
+    factory_self = getattr(factory, "__self__", None)
+    name = (
+        f"{factory_self.__class__.__name__}.{factory.__name__}"
+        if factory_self is not None
+        else f"{factory.__name__}"
+    )
+    return asyncio.create_task(factory(*args, **kwargs), name=name)
diff --git a/logprep/ng/util/worker/types.py b/logprep/ng/util/worker/types.py
new file mode 100644
index 000000000..78593ed8c
--- /dev/null
+++ b/logprep/ng/util/worker/types.py
@@ -0,0 +1,45 @@
+"""
+Fundamental contracts and abstractions for the async pipeline system.
+
+This module defines the structural interfaces that decouple workers,
+handlers, and pipeline infrastructure. The intent is to establish a
+clear separation between execution mechanics and processing logic,
+allowing components to remain reusable, composable, and reload-safe.
+
+All definitions here describe behavior, expectations, and semantic
+constraints rather than implementing runtime functionality.
+"""
+
+import asyncio
+from collections.abc import Callable, Coroutine
+from typing import TypeVar
+
+T = TypeVar("T")
+Input = TypeVar("Input")
+Output = TypeVar("Output")
+
+SyncHandler = Callable[[list[Input]], list[Output]]
+AsyncHandler = Callable[[list[Input]], Coroutine[object, object, list[Output]]]
+Handler = SyncHandler[Input, Output] | AsyncHandler[Input, Output]
+
+
+class SizeLimitedQueue(asyncio.Queue[T]):
+    """
+    Queue wrapper which ensures a maxsize configured.
+
+    Parameters
+    ----------
+    maxsize : int
+        Maximum number of items the queue can hold.
+        Must be > 0.
+
+    Raises
+    ------
+    ValueError
+        If maxsize <= 0.
+    """
+
+    def __init__(self, maxsize: int) -> None:
+        if maxsize <= 0:
+            raise ValueError("Queue must be bounded")
+        super().__init__(maxsize=maxsize)
diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
new file mode 100644
index 000000000..0b0d55913
--- /dev/null
+++ b/logprep/ng/util/worker/worker.py
@@ -0,0 +1,320 @@
+"""
+Worker execution and batching mechanics.
+
+This module provides the standalone Worker abstraction responsible for
+input consumption, deterministic batching, optional batch processing,
+and cooperative shutdown behavior.
+
+The worker is intentionally decoupled from pipeline orchestration logic
+and focuses solely on predictable buffering, flushing, and backpressure
+interaction with the output queue.
+"""
+
+import asyncio
+import logging
+from asyncio import AbstractEventLoop
+from collections import deque
+from collections.abc import AsyncIterator
+from typing import Any, Generic, TypeVar
+
+from logprep.ng.util.worker.types import AsyncHandler, SizeLimitedQueue
+
+logger = logging.getLogger("Worker")
+
+T = TypeVar("T")
+Input = TypeVar("Input")
+Output = TypeVar("Output")
+
+
+class Worker(Generic[Input, Output]):
+    """
+    Generic batching worker with cooperative shutdown semantics.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        batch_size: int,
+        batch_interval_s: float,
+        handler: AsyncHandler[Input, Output],
+        in_queue: asyncio.Queue[Input] | AsyncIterator[Input],
+        out_queue: SizeLimitedQueue[Output] | None = None,
+    ) -> None:
+        self.name = name
+
+        self._handler = handler
+
+        self.in_queue = in_queue
+        self.out_queue = out_queue
+
+        self._batch_interval_s = batch_interval_s
+        self._batch_size = batch_size
+
+        self._batch_buffer: deque[Input] = deque()
+        self._buffer_lock = asyncio.Lock()  # TODO is locking really required?
+
+        self._flush_timer: asyncio.Task[None] | None = None
+
+    def _start_timer_locked(self) -> None:
+        """
+        Arm or re-arm the batch timer.
+
+        Must be called with _buffer_lock held. Ensures that at most one
+        timer task is active for the current batch window.
+        """
+        if self._flush_timer and not self._flush_timer.done():
+            self._flush_timer.cancel()
+        self._flush_timer = asyncio.create_task(self._flush_after_interval())
+
+    def _cancel_timer_if_needed(self) -> None:
+        """
+        Cancel the active timer task if it is still pending.
+
+        Avoids cancelling the currently executing timer task to prevent
+        self-cancellation race conditions.
+        """
+        t = self._flush_timer
+        if not t or t.done():
+            return
+        if t is asyncio.current_task():
+            return
+        t.cancel()
+
+    async def _flush_after_interval(self) -> None:
+        """
+        Timer coroutine responsible for time-based batch flushing.
+
+        Sleeps for the configured interval and flushes the buffered items
+        if the batch has not already been drained by the size trigger.
+        """
+        try:
+            logger.debug("timer sleeping")
+            await asyncio.sleep(self._batch_interval_s)
+        except asyncio.CancelledError:
+            logger.debug("timer caught cancelled error")
+            return
+
+        batch: list[Input] | None = None
+        async with self._buffer_lock:
+            if self._batch_buffer:
+                batch = self._drain_locked()
+            if self._flush_timer is asyncio.current_task():
+                self._flush_timer = None
+
+        if batch:
+            await self._flush_batch(batch)
+
+    def _drain_locked(self) -> list[Input]:
+        """
+        Drain the current buffer contents.
+
+        Must be called with _buffer_lock held. Cancels any active timer
+        and returns a snapshot of buffered items.
+        """
+        batch = list(self._batch_buffer)
+        self._batch_buffer.clear()
+        self._cancel_timer_if_needed()
+        self._flush_timer = None
+        return batch
+
+    async def add(self, item: Input) -> None:
+        """
+        Add a single item to the batch buffer.
+
+        May trigger a flush if the size threshold is reached. Starts the
+        batch timer when the first item of a new batch arrives.
+        """
+        batch_to_flush: list[Input] | None = None
+
+        async with self._buffer_lock:
+            self._batch_buffer.append(item)
+
+            if len(self._batch_buffer) == 1:
+                self._start_timer_locked()
+
+            if len(self._batch_buffer) >= self._batch_size:
+                batch_to_flush = self._drain_locked()
+
+        if batch_to_flush:
+            await self._flush_batch(batch_to_flush)
+
+    async def flush(self) -> None:
+        """
+        Force flushing of buffered items.
+
+        Drains and processes the current buffer regardless of size or
+        timer state.
+        """
+        batch_to_flush: list[Input] | None = None
+        async with self._buffer_lock:
+            if self._batch_buffer:
+                batch_to_flush = self._drain_locked()
+        if batch_to_flush:
+            await self._flush_batch(batch_to_flush)
+
+    async def _process_batch(self, batch: list[Input]) -> list[Output]:
+        return await self._handler(batch)
+
+    async def _flush_batch(self, batch: list[Input]) -> None:
+        """
+        Process and forward a completed batch.
+
+        Applies the optional handler and forwards the resulting items to
+        the output queue if configured.
+        """
+        batch_result: list[Output] = await self._process_batch(batch)
+
+        if self.out_queue is not None:
+            for item in batch_result:
+                await self.out_queue.put(item)
+            await asyncio.sleep(0)
+
+    async def run(self, stop_event: asyncio.Event) -> None:
+        """
+        Execute the worker processing loop.
+
+        Continuously consumes items until stop_event is set or the task is
+        cancelled. Ensures a final buffer flush during shutdown.
+        """
+
+        try:
+            if isinstance(self.in_queue, asyncio.Queue):
+                while not stop_event.is_set():
+                    item = await self.in_queue.get()
+                    await self.add(item)
+                    # TODO is this await really necessary?
+                    await asyncio.sleep(0.0)
+            else:
+                while not stop_event.is_set():
+                    item = await anext(self.in_queue)
+                    await self.add(item)
+                    # TODO is this await really necessary?
+                    await asyncio.sleep(0.0)
+
+        except asyncio.CancelledError:
+            logger.debug("Worker cancelled")
+            raise
+        finally:
+            await self.flush()
+
+
+class TransferWorker(Worker[T, T]):
+    def __init__(
+        self,
+        name: str,
+        batch_size: int,
+        batch_interval_s: float,
+        in_queue: asyncio.Queue[T] | AsyncIterator[T],
+        out_queue: SizeLimitedQueue[T] | None = None,
+    ) -> None:
+        super().__init__(
+            name=name,
+            batch_size=batch_size,
+            batch_interval_s=batch_interval_s,
+            in_queue=in_queue,
+            out_queue=out_queue,
+            handler=self.__handle_noop,
+        )
+
+    async def __handle_noop(self, batch: list[T]) -> list[T]:
+        await asyncio.sleep(0)
+        return [e for e in batch if e is not None]
+
+
+class WorkerOrchestrator:
+    """
+    Orchestrates a chain of workers.
+
+    Lifecycle:
+    - run(): start workers + background tasks and wait until stop_event is set
+    - shut_down(): stop workers + background tasks and end manager lifetime
+    """
+
+    def __init__(
+        self,
+        workers: list[Worker],
+        loop: AbstractEventLoop | None = None,
+    ) -> None:
+        """
+        Initialize the manager with a worker chain and optional event loop.
+        """
+        self._loop: AbstractEventLoop = loop if loop is not None else asyncio.get_event_loop()
+        self._workers: list[Worker] = workers
+
+        self._stop_event = asyncio.Event()
+
+        self._worker_tasks: set[asyncio.Task[Any]] = set()
+
+        self._exceptions: list[BaseException] = []
+        self._reload_lock = asyncio.Lock()
+
+    def _setup(self) -> None:
+        """Perform manager initialization steps that require a fully constructed instance."""
+
+    def run_workers(self) -> None:
+        """
+        Start worker tasks (data-plane).
+
+        Worker tasks may be restarted on reload; background tasks are not.
+        """
+        for worker in self._workers:
+            t = self._loop.create_task(worker.run(self._stop_event), name=worker.name)
+            self._add_worker_task(t)
+
+    def _add_worker_task(self, task: asyncio.Task[Any]) -> None:
+        """Track a worker task and fail-fast on exceptions."""
+        self._worker_tasks.add(task)
+
+        def _done(t: asyncio.Task[Any]) -> None:
+            self._worker_tasks.discard(t)
+
+            if t.cancelled():
+                return
+
+            exc = t.exception()
+            if exc is not None:
+                self._exceptions.append(exc)
+                self._stop_event.set()
+
+        task.add_done_callback(_done)
+
+    async def run(self) -> None:
+        """
+        Run the manager until stop_event is set.
+
+        Starts workers and background tasks and then blocks waiting for shutdown.
+        """
+
+        self._setup()
+        self.run_workers()
+
+        await self._stop_event.wait()
+
+    async def shut_down(self, timeout_s: float) -> None:
+        """
+        Fully shut down the manager.
+
+        Stops workers and background tasks, clears registrations, and signals stop_event
+        so run() can exit.
+        """
+        self._stop_event.set()
+
+        current_task = asyncio.current_task()
+        tasks_but_current = [t for t in self._worker_tasks if t is not current_task]
+
+        logger.debug("waiting for termination of %d tasks", len(tasks_but_current))
+
+        try:
+            await asyncio.wait_for(
+                asyncio.gather(*tasks_but_current, return_exceptions=True), timeout_s
+            )
+        except TimeoutError:
+            unfinished_workers = [w for w in tasks_but_current if not w.done()]
+            if len(unfinished_workers) > 0:
+                logger.debug(
+                    "[%d/%d] did not stop gracefully. Cancelling: [%s]",
+                    len(unfinished_workers),
+                    len(tasks_but_current),
+                    ", ".join(map(asyncio.Task.get_name, unfinished_workers)),
+                )
+                await asyncio.gather(*tasks_but_current, return_exceptions=True)
diff --git a/logprep/run_ng.py b/logprep/run_ng.py
index 8db477f0a..4ca695a51 100644
--- a/logprep/run_ng.py
+++ b/logprep/run_ng.py
@@ -8,6 +8,7 @@
 from multiprocessing import set_start_method
 
 import click
+import uvloop
 
 from logprep.ng.runner import Runner
 from logprep.ng.util.configuration import Configuration, InvalidConfigurationError
@@ -80,7 +81,7 @@ def run(configs: tuple[str], version=None) -> None:
             signal.signal(signal.SIGTERM, signal_handler)
             signal.signal(signal.SIGINT, signal_handler)
         logger.debug("Configuration loaded")
-        runner.run()
+        uvloop.run(runner.run())
     except SystemExit as error:
         logger.debug(f"Exit received with code {error.code}")
         sys.exit(error.code)
diff --git a/pyproject.toml b/pyproject.toml
index c80ef420e..b4ffc8b5c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -182,7 +182,7 @@ max-attributes=12
 [tool.pylint.CLASSES]
 
 # List of method names used to declare (i.e. assign) instance attributes.
-defining-attr-methods="__init__,__new__,setup"
+defining-attr-methods="__init__,__new__,setup,_setup"
 
 [tool.mypy]
 # use imported type information but don't report issues if not a file under check

From ee5607c9bc00297f5d86b1263fcd3352bd3a00a2 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Thu, 26 Feb 2026 15:13:57 +0100
Subject: [PATCH 04/68] remove isolated poc code

---
 logprep/ng/poc/__init__.py                    |   0
 logprep/ng/poc/async_pipeline/__init__.py     |   0
 .../ng/poc/async_pipeline/pipeline_manager.py | 335 ------------------
 logprep/ng/poc/async_pipeline/types.py        | 107 ------
 .../ng/poc/async_pipeline/utils/__init__.py   |   0
 .../utils/worker_chain_validator.py           | 140 --------
 .../ng/poc/async_pipeline/worker/__init__.py  |   0
 .../async_pipeline/worker/pipeline_worker.py  | 105 ------
 .../ng/poc/async_pipeline/worker/worker.py    | 227 ------------
 logprep/ng/poc/main.py                        |  95 -----
 logprep/ng/poc/main_standalone_worker.py      | 130 -------
 logprep/ng/poc/mocked/__init__.py             |   0
 logprep/ng/poc/mocked/mocking_functions.py    |  27 --
 logprep/ng/poc/mocked/mocking_processor.py    |  19 -
 logprep/ng/poc/mocked/mocking_types.py        |  50 ---
 logprep/ng/poc/pipeline_manager.py            | 218 ------------
 16 files changed, 1453 deletions(-)
 delete mode 100644 logprep/ng/poc/__init__.py
 delete mode 100644 logprep/ng/poc/async_pipeline/__init__.py
 delete mode 100644 logprep/ng/poc/async_pipeline/pipeline_manager.py
 delete mode 100644 logprep/ng/poc/async_pipeline/types.py
 delete mode 100644 logprep/ng/poc/async_pipeline/utils/__init__.py
 delete mode 100644 logprep/ng/poc/async_pipeline/utils/worker_chain_validator.py
 delete mode 100644 logprep/ng/poc/async_pipeline/worker/__init__.py
 delete mode 100644 logprep/ng/poc/async_pipeline/worker/pipeline_worker.py
 delete mode 100644 logprep/ng/poc/async_pipeline/worker/worker.py
 delete mode 100644 logprep/ng/poc/main.py
 delete mode 100644 logprep/ng/poc/main_standalone_worker.py
 delete mode 100644 logprep/ng/poc/mocked/__init__.py
 delete mode 100644 logprep/ng/poc/mocked/mocking_functions.py
 delete mode 100644 logprep/ng/poc/mocked/mocking_processor.py
 delete mode 100644 logprep/ng/poc/mocked/mocking_types.py
 delete mode 100644 logprep/ng/poc/pipeline_manager.py

diff --git a/logprep/ng/poc/__init__.py b/logprep/ng/poc/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/logprep/ng/poc/async_pipeline/__init__.py b/logprep/ng/poc/async_pipeline/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/logprep/ng/poc/async_pipeline/pipeline_manager.py b/logprep/ng/poc/async_pipeline/pipeline_manager.py
deleted file mode 100644
index 2f1d4c085..000000000
--- a/logprep/ng/poc/async_pipeline/pipeline_manager.py
+++ /dev/null
@@ -1,335 +0,0 @@
-"""
-Pipeline orchestration and lifecycle management.
-
-This module provides the coordinator responsible for running a validated, linear
-worker chain and for managing long-lived background tasks bound to the manager
-lifecycle. It implements dynamic handler resolution for pipeline-bound workers
-and supports controlled restarts via soft shutdown and reload semantics.
-
-The focus is operational correctness: predictable startup/shutdown behavior,
-reload safety, and fail-fast propagation of task failures via a single manager
-lifetime signal.
-"""
-
-import asyncio
-import inspect
-from asyncio import AbstractEventLoop
-from collections.abc import Callable, Coroutine
-from traceback import print_tb
-from types import FunctionType
-from typing import Any, Concatenate, ParamSpec, TypeAlias, TypeVar, cast
-
-from async_pipeline.types import Handler, HandlerResolver
-from async_pipeline.utils.worker_chain_validator import (
-    validate_and_sort_linear_worker_chain,
-)
-from async_pipeline.worker.pipeline_worker import PipelineWorker
-
-P = ParamSpec("P")
-SelfT = TypeVar("SelfT")
-
-AsyncMarked: TypeAlias = Callable[Concatenate[SelfT, P], Coroutine[Any, Any, Any]]
-BackgroundTaskFactory: TypeAlias = Callable[[], Coroutine[Any, Any, Any]]
-
-
-def background_task(func: AsyncMarked[SelfT, P]) -> AsyncMarked[SelfT, P]:
-    """
-    Mark an async function or method as a pipeline background task.
-
-    The decorator does not alter runtime behavior. It only attaches
-    metadata used by PipelineManager during background task discovery.
-
-    Marked callables are automatically registered and executed as
-    long-lived control-plane tasks bound to the manager lifecycle.
-
-    Notes
-    -----
-    - Only async callables are supported.
-    - The decorator performs no wrapping or scheduling.
-    - Execution semantics are defined entirely by PipelineManager.
-    """
-
-    setattr(func, "__background_task__", True)
-    return func
-
-
-class PipelineManager(HandlerResolver):
-    """
-    Orchestrates a linear chain of PipelineWorker instances and manager-scoped background tasks.
-
-    The manager binds itself as a HandlerResolver for PipelineWorkers, starts/stops worker tasks,
-    and optionally discovers and runs @background_task-marked async methods.
-
-    Lifecycle:
-    - run(): start workers + background tasks and wait until stop_event is set
-    - soft_shut_down(): stop worker tasks only (manager stays alive)
-    - reload(): soft shutdown + restart workers (background tasks keep running)
-    - shut_down(): stop workers + background tasks and end manager lifetime
-    """
-
-    def __init__(
-        self,
-        workers: list[PipelineWorker[Any]],
-        loop: AbstractEventLoop | None = None,
-    ) -> None:
-        """
-        Initialize the manager with a worker chain and optional event loop.
-
-        Workers are validated/sorted into a strict linear chain and PipelineWorkers are
-        bound to this manager as their handler resolver.
-        """
-        self._loop: AbstractEventLoop | None = loop
-        self._workers: list[PipelineWorker[Any]] = self._validate_and_bind_workers(workers=workers)
-
-        # public: background tasks and subclasses may rely on this as the manager lifetime signal
-        self.stop_event = asyncio.Event()
-
-        self._worker_tasks: set[asyncio.Task[Any]] = set()
-        self._background_tasks: set[asyncio.Task[Any]] = set()
-
-        self._registered_background_task_factories: set[BackgroundTaskFactory] = set()
-        self._background_tasks_started = False
-
-        self._exceptions: list[BaseException] = []
-        self._reload_lock = asyncio.Lock()
-
-    def _validate_and_bind_workers(
-        self,
-        workers: list[PipelineWorker[Any]],
-    ) -> list[PipelineWorker[Any]]:
-        """
-        Validate worker wiring and bind this manager as their handler resolver.
-
-        Ensures the worker chain forms a strict linear pipeline and resets
-        resolver bindings to keep handler resolution consistent.
-        """
-
-        workers = validate_and_sort_linear_worker_chain(workers)
-
-        for worker in workers:
-            if isinstance(worker, PipelineWorker):
-                worker.bind_resolver(self)
-
-        return workers
-
-    def resolve(self, name: str) -> Handler[Any]:
-        """
-        Resolve a handler by name on this manager instance.
-
-        Implementations typically provide handler methods on subclasses which are looked up
-        dynamically by attribute name.
-        """
-        handler = getattr(self, name, None)
-        if handler is None or not callable(handler):
-            raise AttributeError(f"Missing handler {name!r} on {type(self).__name__}.")
-        return cast(Handler[Any], handler)
-
-    def register_additional_background_tasks(self, callback: BackgroundTaskFactory) -> None:
-        """
-        Register an additional manager-scoped background task factory.
-
-        The callback must be an async callable producing a coroutine and will be scheduled
-        when start_background_tasks() runs.
-        """
-        if not inspect.iscoroutinefunction(callback):
-            raise TypeError(
-                "register_additional_background_tasks() only accepts async callables "
-                "(async def ...). Sync callables are not supported."
-            )
-        self._registered_background_task_factories.add(callback)
-
-    def _setup(self) -> None:
-        """Perform manager initialization steps that require a fully constructed instance."""
-        self._auto_register_marked_background_tasks()
-
-    def start_background_tasks(self) -> None:
-        """
-        Start manager-scoped background tasks once per manager lifetime.
-
-        This is intended for long-lived control-plane tasks (metrics, cleanup, etc.).
-        """
-        if self._background_tasks_started:
-            return
-        if self._loop is None:
-            raise RuntimeError("start_background_tasks() requires an event loop.")
-
-        self._background_tasks_started = True
-        self._enqueue_registered_background_tasks()
-
-    def _auto_register_marked_background_tasks(self) -> None:
-        """
-        Discover and register @background_task-marked async methods.
-
-        Discovery inspects the class hierarchy without invoking attribute access on the
-        instance to avoid side effects from descriptors/properties.
-        """
-        seen: set[str] = set()
-
-        for cls in type(self).mro():
-            if cls is object:
-                break
-
-            for name, attr in cls.__dict__.items():
-                if name in seen:
-                    continue
-                seen.add(name)
-
-                func: Callable[..., Any] | None = None
-                if isinstance(attr, staticmethod):
-                    func = cast(Callable[..., Any], attr.__func__)
-                elif isinstance(attr, classmethod):
-                    func = cast(Callable[..., Any], attr.__func__)
-                elif isinstance(attr, FunctionType):
-                    func = attr
-
-                if func is None:
-                    continue
-
-                if getattr(func, "__background_task__", False):
-                    bound = getattr(self, name)
-
-                    if not inspect.iscoroutinefunction(bound):
-                        raise TypeError(
-                            f"Background task {type(self).__name__}.{name} is marked with "
-                            "@background_task but is not async. Only async background tasks are supported."
-                        )
-
-                    self._registered_background_task_factories.add(
-                        cast(BackgroundTaskFactory, bound)
-                    )
-
-    def _enqueue_registered_background_tasks(self) -> None:
-        """Schedule all registered background task factories on the configured event loop."""
-        if self._loop is None:
-            raise RuntimeError("_enqueue_registered_background_tasks() requires an event loop.")
-
-        for callback in self._registered_background_task_factories:
-            task = self._loop.create_task(callback())
-            self._add_background_task(task)
-
-    def run_workers(self) -> None:
-        """
-        Start worker tasks (data-plane).
-
-        Worker tasks may be restarted on reload; background tasks are not.
-        """
-        if self._loop is None:
-            raise RuntimeError("run_workers() requires an event loop.")
-
-        for worker in self._workers:
-            t = self._loop.create_task(worker.run())
-            self._add_worker_task(t)
-
-    async def run(self) -> None:
-        """
-        Run the manager until stop_event is set.
-
-        Starts workers and background tasks and then blocks waiting for shutdown.
-        """
-        if self._loop is None:
-            self._loop = asyncio.get_running_loop()
-
-        self._setup()
-        self.run_workers()
-        self.start_background_tasks()
-
-        await self.stop_event.wait()
-
-    async def reload(self) -> None:
-        """
-        Restart worker tasks while keeping the manager alive.
-
-        Performs a soft shutdown (workers only), re-binds PipelineWorkers to this resolver,
-        and starts workers again. Background tasks continue running.
-        """
-
-        async with self._reload_lock:
-            print("Reloading...")
-            await self.soft_shut_down()
-            self._workers = self._validate_and_bind_workers(workers=self._workers)
-            self.run_workers()
-            print("Reload done.")
-
-    async def soft_shut_down(self) -> None:
-        """
-        Stop worker tasks without ending the manager lifetime.
-
-        Intended to allow reload/restart of the data-plane while keeping control-plane
-        background tasks running.
-        """
-        await self._shut_down_workers()
-
-    async def shut_down(self) -> None:
-        """
-        Fully shut down the manager.
-
-        Stops workers and background tasks, clears registrations, and signals stop_event
-        so run() can exit.
-        """
-        await self.soft_shut_down()
-        await self._shut_down_background_tasks()
-
-        self._registered_background_task_factories.clear()
-        self.stop_event.set()
-
-    async def _shut_down_workers(self) -> None:
-        """Signal, cancel, and await completion of all worker tasks."""
-        for worker in self._workers:
-            worker.stop_event.set()
-
-        current = asyncio.current_task()
-        tasks = [t for t in self._worker_tasks if t is not current]
-
-        for t in tasks:
-            t.cancel()
-
-        await asyncio.gather(*tasks, return_exceptions=True)
-        self._worker_tasks.clear()
-
-        for worker in self._workers:
-            worker.stop_event = asyncio.Event()
-
-    async def _shut_down_background_tasks(self) -> None:
-        """Cancel and await completion of all manager-scoped background tasks."""
-        current = asyncio.current_task()
-        tasks = [t for t in self._background_tasks if t is not current]
-
-        for t in tasks:
-            t.cancel()
-
-        await asyncio.gather(*tasks, return_exceptions=True)
-        self._background_tasks.clear()
-
-    def _add_worker_task(self, task: asyncio.Task[Any]) -> None:
-        """Track a worker task and fail-fast on exceptions."""
-        self._worker_tasks.add(task)
-
-        def _done(t: asyncio.Task[Any]) -> None:
-            self._worker_tasks.discard(t)
-
-            if t.cancelled():
-                return
-
-            exc = t.exception()
-            if exc is not None:
-                self._exceptions.append(exc)
-                self.stop_event.set()
-
-        task.add_done_callback(_done)
-
-    def _add_background_task(self, task: asyncio.Task[Any]) -> None:
-        """Track a background task and fail-fast on exceptions."""
-        self._background_tasks.add(task)
-
-        def _done(t: asyncio.Task[Any]) -> None:
-            self._background_tasks.discard(t)
-
-            if t.cancelled():
-                return
-
-            exc = t.exception()
-            if exc is not None:
-                self._exceptions.append(exc)
-                self.stop_event.set()
-
-        task.add_done_callback(_done)
diff --git a/logprep/ng/poc/async_pipeline/types.py b/logprep/ng/poc/async_pipeline/types.py
deleted file mode 100644
index 5851b17b6..000000000
--- a/logprep/ng/poc/async_pipeline/types.py
+++ /dev/null
@@ -1,107 +0,0 @@
-"""
-Fundamental contracts and abstractions for the async pipeline system.
-
-This module defines the structural interfaces that decouple workers,
-handlers, and pipeline infrastructure. The intent is to establish a
-clear separation between execution mechanics and processing logic,
-allowing components to remain reusable, composable, and reload-safe.
-
-All definitions here describe behavior, expectations, and semantic
-constraints rather than implementing runtime functionality.
-"""
-
-import asyncio
-from abc import ABC, abstractmethod
-from collections.abc import Callable, Coroutine
-from typing import TypeVar
-
-T = TypeVar("T")
-
-SyncHandler = Callable[[list[T]], list[T]]
-AsyncHandler = Callable[[list[T]], Coroutine[object, object, list[T]]]
-Handler = SyncHandler[T] | AsyncHandler[T]
-
-
-class SizeLimitedQueue(asyncio.Queue[T]):
-    """
-    Bounded asyncio.Queue with explicit semantic intent.
-
-    This subclass exists purely to make queue semantics and typing intent
-    explicit within the pipeline architecture.
-
-    Differences from asyncio.Queue:
-
-    - Enforces bounded capacity at construction time.
-    - Signals backpressure semantics at the type level.
-    - Improves readability by distinguishing pipeline queues from generic queues.
-
-    Parameters
-    ----------
-    maxsize : int
-        Maximum number of items the queue can hold.
-
-        Must be > 0. A non-positive value is rejected to prevent accidental
-        creation of unbounded queues, which would break memory budgeting
-        and backpressure guarantees.
-
-    Raises
-    ------
-    ValueError
-        If maxsize <= 0.
-
-    Notes
-    -----
-    - Behavior is otherwise identical to asyncio.Queue.
-    - This class introduces no additional synchronization or scheduling logic.
-    - Primarily used to encode architectural constraints (memory/backpressure)
-      rather than functionality.
-    """
-
-    def __init__(self, maxsize: int) -> None:
-        if maxsize <= 0:
-            raise ValueError("Queue must be bounded")
-        super().__init__(maxsize=maxsize)
-
-
-class HandlerResolver(ABC):
-    """
-    Resolves handler identifiers to executable handler callables.
-
-    A HandlerResolver provides the indirection layer between pipeline-bound
-    components (e.g. PipelineWorker) and the concrete handler implementation.
-
-    Implementations are responsible for mapping a handler name/key to a
-    callable object that processes a batch of items.
-
-    Contract:
-
-    - Input:
-        name: str
-            Logical handler identifier (typically configured on workers).
-
-    - Return:
-        Handler
-            A callable matching the Handler type contract:
-
-                SyncHandler[T]:  (list[T]) -> list[T]
-                AsyncHandler[T]: (list[T]) -> Awaitable[list[T]]
-
-    - Errors:
-        AttributeError
-            Raised if the handler cannot be resolved.
-
-        TypeError
-            Raised if the resolved object is not a valid Handler.
-
-    Notes:
-
-    - Resolution is intentionally dynamic to support late binding, reloads,
-      dependency injection, and runtime configuration changes.
-
-    - Implementations may cache results but must remain consistent with
-      reload / rebinding semantics of the pipeline system.
-    """
-
-    @abstractmethod
-    def resolve(self, name: str) -> Handler:
-        """Return the handler associated with *name*."""
diff --git a/logprep/ng/poc/async_pipeline/utils/__init__.py b/logprep/ng/poc/async_pipeline/utils/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/logprep/ng/poc/async_pipeline/utils/worker_chain_validator.py b/logprep/ng/poc/async_pipeline/utils/worker_chain_validator.py
deleted file mode 100644
index c19cb4cef..000000000
--- a/logprep/ng/poc/async_pipeline/utils/worker_chain_validator.py
+++ /dev/null
@@ -1,140 +0,0 @@
-"""
-Worker chain validation utilities.
-
-This module validates queue-based wiring between workers to ensure a strict,
-linear pipeline topology. It provides deterministic ordering for execution and
-fails fast on ambiguous or unsafe configurations.
-"""
-
-import asyncio
-from typing import Any, TypeGuard, TypeVar
-
-from async_pipeline.worker.worker import Worker
-
-W = TypeVar("W", bound=Worker[Any])
-
-
-def _is_async_queue(obj: object) -> TypeGuard[asyncio.Queue[Any]]:
-    """Return True if *obj* is an asyncio.Queue."""
-    return isinstance(obj, asyncio.Queue)
-
-
-def _object_identity(obj: object) -> int:
-    """Return identity key used for queue wiring validation."""
-    return id(obj)
-
-
-def _input_queue_identity(worker: Worker[Any]) -> int | None:
-    """Return identity of the worker input queue, if queue-backed."""
-    input_source = worker.in_queue
-    return _object_identity(input_source) if _is_async_queue(input_source) else None
-
-
-def _output_queue_identity(worker: Worker[Any]) -> int | None:
-    """Return identity of the worker output queue, if configured."""
-    output_queue = worker.out_queue
-    return _object_identity(output_queue) if output_queue is not None else None
-
-
-def validate_and_sort_linear_worker_chain(workers: list[W]) -> list[W]:
-    """
-    Validate and order workers as a strict linear chain.
-
-    Ensures a single start worker, prohibits fan-in/fan-out, detects cycles,
-    and verifies full chain connectivity based on queue identity wiring.
-    """
-    if not workers:
-        return []
-
-    consumer_by_input_queue_id: dict[int, W] = {}
-
-    for worker in workers:
-        queue_id = _input_queue_identity(worker)
-
-        if queue_id is None:
-            continue
-
-        if queue_id in consumer_by_input_queue_id:
-            raise ValueError(
-                f"Invalid worker chain: multiple consumers detected for input queue id {queue_id}."
-            )
-
-        consumer_by_input_queue_id[queue_id] = worker
-
-    producer_by_output_queue_id: dict[int, W] = {}
-
-    for worker in workers:
-        queue_id = _output_queue_identity(worker)
-
-        if queue_id is None:
-            continue
-
-        if queue_id in producer_by_output_queue_id:
-            raise ValueError(
-                f"Invalid worker chain: multiple producers detected for output queue id {queue_id}."
-            )
-
-        producer_by_output_queue_id[queue_id] = worker
-
-    produced_output_queue_ids = set(producer_by_output_queue_id.keys())
-
-    start_workers: list[W] = []
-
-    for worker in workers:
-        input_queue_id = _input_queue_identity(worker)
-
-        if input_queue_id is None:
-            start_workers.append(worker)
-        elif input_queue_id not in produced_output_queue_ids:
-            start_workers.append(worker)
-
-    start_workers = list({id(w): w for w in start_workers}.values())
-
-    if len(start_workers) != 1:
-        names = ", ".join(worker.name for worker in start_workers) or "none"
-
-        raise ValueError(
-            f"Invalid worker chain: expected exactly one start worker, "
-            f"got {len(start_workers)} ({names})."
-        )
-
-    start_worker = start_workers[0]
-
-    ordered_workers: list[W] = []
-    visited_worker_ids: set[int] = set()
-
-    current_worker: W = start_worker
-
-    while True:
-        worker_identity = id(current_worker)
-
-        if worker_identity in visited_worker_ids:
-            raise ValueError(
-                f"Invalid worker chain: cycle detected at worker {current_worker.name}."
-            )
-
-        visited_worker_ids.add(worker_identity)
-        ordered_workers.append(current_worker)
-
-        if current_worker.out_queue is None:
-            break
-
-        next_queue_id = _object_identity(current_worker.out_queue)
-        next_worker = consumer_by_input_queue_id.get(next_queue_id)
-
-        if next_worker is None:
-            break
-
-        current_worker = next_worker
-
-    if len(ordered_workers) != len(workers):
-        unreachable_workers = [
-            worker.name for worker in workers if id(worker) not in visited_worker_ids
-        ]
-
-        raise ValueError(
-            "Invalid worker chain: chain is not fully connected. "
-            f"Unreachable workers: {', '.join(unreachable_workers)}."
-        )
-
-    return ordered_workers
diff --git a/logprep/ng/poc/async_pipeline/worker/__init__.py b/logprep/ng/poc/async_pipeline/worker/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/logprep/ng/poc/async_pipeline/worker/pipeline_worker.py b/logprep/ng/poc/async_pipeline/worker/pipeline_worker.py
deleted file mode 100644
index 741b4f4e5..000000000
--- a/logprep/ng/poc/async_pipeline/worker/pipeline_worker.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""
-Pipeline-aware worker specialization.
-
-This module defines PipelineWorker, a Worker variant that resolves its batch
-handler dynamically via a HandlerResolver. This enables late binding of handler
-implementations, supports reload/rebind scenarios, and keeps workers reusable
-outside of a concrete pipeline manager.
-"""
-
-from typing import Any, Generic, TypeVar
-
-from async_pipeline.types import AsyncHandler, Handler, HandlerResolver, SyncHandler
-from async_pipeline.worker.worker import Worker
-
-T = TypeVar("T")
-
-
-class PipelineWorker(Worker[T], Generic[T]):
-    """
-    Worker that resolves its handler dynamically via a HandlerResolver.
-
-    A PipelineWorker stores a logical handler identifier instead of a direct
-    callable. The handler is resolved lazily at runtime and cached until the
-    resolver is re-bound.
-
-    This keeps the worker decoupled from concrete handler implementations while
-    preserving the batching and forwarding semantics of the base Worker.
-    """
-
-    def __init__(
-        self,
-        *args: Any,
-        handler_name: str,
-        handler_resolver: HandlerResolver | None = None,
-        **kwargs: Any,
-    ) -> None:
-        """
-        Initialize a pipeline-bound worker.
-
-        Parameters
-        ----------
-        handler_name:
-            Logical handler identifier used for resolution via the resolver.
-
-        handler_resolver:
-            Optional resolver to bind immediately. If omitted, a resolver must be
-            provided via bind_resolver() before the first flush.
-        """
-        super().__init__(*args, **kwargs)
-        self._handler_name = handler_name
-        self._handler_resolver: HandlerResolver | None = None
-        self._resolved_handler: AsyncHandler[T] | SyncHandler[T] | None = None
-
-        if handler_resolver is not None:
-            self.bind_resolver(handler_resolver)
-
-    def bind_resolver(self, handler_resolver: HandlerResolver) -> None:
-        """
-        Bind a resolver used to resolve the configured handler name.
-
-        Rebinding clears any cached resolved handler so subsequent flushes will
-        resolve again against the new resolver.
-        """
-        # Fail fast: must be a real subclass of the ABC
-        if not isinstance(handler_resolver, HandlerResolver):
-            raise TypeError(
-                f"handler_resolver must be an instance of HandlerResolver (ABC). "
-                f"Got: {type(handler_resolver).__name__}"
-            )
-
-        self._handler_resolver = handler_resolver
-        self._resolved_handler = None
-
-    def _ensure_resolved_handler(self) -> AsyncHandler[T] | SyncHandler[T]:
-        """
-        Resolve and cache the handler for this worker.
-
-        Returns a callable matching the handler contract. Resolution is performed
-        once per binding and cached until bind_resolver() is called again.
-        """
-        if self._resolved_handler is not None:
-            return self._resolved_handler
-
-        if self._handler_resolver is None:
-            raise RuntimeError(
-                f"PipelineWorker {self.name!r} requires a resolver to resolve {self._handler_name!r}."
-            )
-
-        handler: Handler = self._handler_resolver.resolve(self._handler_name)
-
-        if not callable(handler):
-            raise TypeError(f"Resolved handler {self._handler_name!r} is not callable")
-
-        self._resolved_handler = handler
-        return self._resolved_handler
-
-    async def _flush_batch(self, batch: list[T]) -> None:
-        """
-        Flush a batch using a lazily resolved handler.
-
-        Ensures the handler is resolved before delegating to the base Worker
-        flush implementation.
-        """
-        self._handler = self._ensure_resolved_handler()
-        await super()._flush_batch(batch)
diff --git a/logprep/ng/poc/async_pipeline/worker/worker.py b/logprep/ng/poc/async_pipeline/worker/worker.py
deleted file mode 100644
index cc748c009..000000000
--- a/logprep/ng/poc/async_pipeline/worker/worker.py
+++ /dev/null
@@ -1,227 +0,0 @@
-"""
-Worker execution and batching mechanics.
-
-This module provides the standalone Worker abstraction responsible for
-input consumption, deterministic batching, optional batch processing,
-and cooperative shutdown behavior.
-
-The worker is intentionally decoupled from pipeline orchestration logic
-and focuses solely on predictable buffering, flushing, and backpressure
-interaction with the output queue.
-"""
-
-import asyncio
-import inspect
-from collections import deque
-from collections.abc import AsyncIterator
-from typing import Generic, TypeVar
-
-from async_pipeline.types import AsyncHandler, SizeLimitedQueue, SyncHandler
-
-T = TypeVar("T")
-
-
-class Worker(Generic[T]):
-    """
-    Generic batching worker with cooperative shutdown semantics.
-
-    A Worker consumes items from an input source, buffers them into batches
-    based on size and/or time thresholds, optionally applies a handler, and
-    forwards results to an output queue.
-
-    The worker is intentionally standalone and independent from pipeline
-    orchestration logic.
-
-    Responsibilities
-    ----------------
-    - Input consumption (Queue or AsyncIterator)
-    - Size/time-based batching
-    - Optional batch processing via handler
-    - Output forwarding
-    - Graceful cancellation and final flush
-
-    Lifecycle
-    ---------
-    run()
-        Start the worker loop until stop_event is set or the task is cancelled.
-
-    stop_event
-        Cooperative shutdown signal used by external coordinators.
-
-    Guarantees
-    ----------
-    - Buffered items are flushed on cancellation or shutdown.
-    - Batch triggers remain deterministic (size vs timer).
-    - No implicit threading or scheduling side effects.
-
-    Notes
-    -----
-    - The worker does not own the event loop.
-    - Backpressure behavior is delegated to the output queue.
-    - Handler execution may be synchronous or asynchronous.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        batch_size: int,
-        batch_interval_s: float,
-        in_queue: asyncio.Queue[T] | AsyncIterator[T],
-        out_queue: SizeLimitedQueue[T] | None = None,
-        handler: AsyncHandler[T] | SyncHandler[T] | None = None,
-    ) -> None:
-        self.name = name
-
-        self.in_queue = in_queue
-        self.out_queue = out_queue
-        self._handler = handler
-
-        self.stop_event = asyncio.Event()
-
-        self._buffer: deque[T] = deque()
-        self._buffer_lock = asyncio.Lock()
-
-        self._timer_task: asyncio.Task[None] | None = None
-        self._batch_size = batch_size
-        self._batch_interval_s = batch_interval_s
-
-    def _start_timer_locked(self) -> None:
-        """
-        Arm or re-arm the batch timer.
-
-        Must be called with _buffer_lock held. Ensures that at most one
-        timer task is active for the current batch window.
-        """
-        if self._timer_task and not self._timer_task.done():
-            self._timer_task.cancel()
-        self._timer_task = asyncio.create_task(self._flush_after_interval())
-
-    def _cancel_timer_if_needed(self) -> None:
-        """
-        Cancel the active timer task if it is still pending.
-
-        Avoids cancelling the currently executing timer task to prevent
-        self-cancellation race conditions.
-        """
-        t = self._timer_task
-        if not t or t.done():
-            return
-        if t is asyncio.current_task():
-            return
-        t.cancel()
-
-    async def _flush_after_interval(self) -> None:
-        """
-        Timer coroutine responsible for time-based batch flushing.
-
-        Sleeps for the configured interval and flushes the buffered items
-        if the batch has not already been drained by the size trigger.
-        """
-        try:
-            await asyncio.sleep(self._batch_interval_s)
-        except asyncio.CancelledError:
-            return
-
-        batch: list[T] | None = None
-        async with self._buffer_lock:
-            if self._buffer:
-                batch = self._drain_locked()
-            if self._timer_task is asyncio.current_task():
-                self._timer_task = None
-
-        if batch:
-            await self._flush_batch(batch)
-
-    def _drain_locked(self) -> list[T]:
-        """
-        Drain the current buffer contents.
-
-        Must be called with _buffer_lock held. Cancels any active timer
-        and returns a snapshot of buffered items.
-        """
-        batch = list(self._buffer)
-        self._buffer.clear()
-        self._cancel_timer_if_needed()
-        self._timer_task = None
-        return batch
-
-    async def add(self, item: T) -> None:
-        """
-        Add a single item to the batch buffer.
-
-        May trigger a flush if the size threshold is reached. Starts the
-        batch timer when the first item of a new batch arrives.
-        """
-        batch_to_flush: list[T] | None = None
-
-        async with self._buffer_lock:
-            self._buffer.append(item)
-
-            if len(self._buffer) == 1:
-                self._start_timer_locked()
-
-            if len(self._buffer) >= self._batch_size:
-                batch_to_flush = self._drain_locked()
-
-        if batch_to_flush:
-            await self._flush_batch(batch_to_flush)
-
-    async def flush(self) -> None:
-        """
-        Force flushing of buffered items.
-
-        Drains and processes the current buffer regardless of size or
-        timer state.
-        """
-        batch_to_flush: list[T] | None = None
-        async with self._buffer_lock:
-            if self._buffer:
-                batch_to_flush = self._drain_locked()
-        if batch_to_flush:
-            await self._flush_batch(batch_to_flush)
-
-    async def _flush_batch(self, batch: list[T]) -> None:
-        """
-        Process and forward a completed batch.
-
-        Applies the optional handler and forwards the resulting items to
-        the output queue if configured.
-        """
-        batch_result: list[T] = batch
-
-        if self._handler is not None:
-            result = self._handler(batch_result)
-            if inspect.isawaitable(result):
-                batch_result = await result
-            else:
-                batch_result = result
-
-        if self.out_queue is not None:
-            for item in batch_result:
-                await self.out_queue.put(item)
-            await asyncio.sleep(0)
-
-    async def run(self) -> None:
-        """
-        Execute the worker processing loop.
-
-        Continuously consumes items until stop_event is set or the task is
-        cancelled. Ensures a final buffer flush during shutdown.
-        """
-
-        try:
-            while not self.stop_event.is_set():
-                if isinstance(self.in_queue, asyncio.Queue):
-                    item = await self.in_queue.get()
-                    try:
-                        await self.add(item)
-                    finally:
-                        self.in_queue.task_done()
-                else:
-                    item = await anext(self.in_queue)
-                    await self.add(item)
-
-        except asyncio.CancelledError:
-            pass
-        finally:
-            await self.flush()
diff --git a/logprep/ng/poc/main.py b/logprep/ng/poc/main.py
deleted file mode 100644
index 85472b496..000000000
--- a/logprep/ng/poc/main.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import asyncio
-
-from async_pipeline.types import SizeLimitedQueue
-from async_pipeline.worker.pipeline_worker import PipelineWorker
-from mocked.mocking_functions import iter_input_pull
-from mocked.mocking_types import Event
-from pipeline_manager import ConcretePipelineManager
-
-MAX_QUEUE_SIZE = 100_000
-
-BATCH_SIZE = 20_000
-BATCH_INTERVAL_S = 5
-
-
-def get_workers() -> list[PipelineWorker[Event]]:
-    input_worker: PipelineWorker[Event] = PipelineWorker(
-        name="input_worker",
-        batch_size=BATCH_SIZE,
-        batch_interval_s=BATCH_INTERVAL_S,
-        in_queue=aiter(iter_input_pull()),
-        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-        handler_name="handler_input_data",
-    )
-
-    processor_worker: PipelineWorker[Event] = PipelineWorker(
-        name="processor_worker",
-        batch_size=BATCH_SIZE,
-        batch_interval_s=BATCH_INTERVAL_S,
-        in_queue=input_worker.out_queue,
-        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-        handler_name="handler_processor_data",
-    )
-
-    output_1_worker: PipelineWorker[Event] = PipelineWorker(
-        name="output_1_worker",
-        batch_size=BATCH_SIZE,
-        batch_interval_s=BATCH_INTERVAL_S,
-        in_queue=processor_worker.out_queue,
-        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-        handler_name="handler_output_1_data",
-    )
-
-    output_2_worker: PipelineWorker[Event] = PipelineWorker(
-        name="output_2_worker",
-        batch_size=BATCH_SIZE,
-        batch_interval_s=BATCH_INTERVAL_S,
-        in_queue=output_1_worker.out_queue,
-        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-        handler_name="handler_output_2_data",
-    )
-
-    acknowledge_worker: PipelineWorker[Event] = PipelineWorker(
-        name="acknowledge_worker",
-        batch_size=BATCH_SIZE,
-        batch_interval_s=BATCH_INTERVAL_S,
-        in_queue=output_2_worker.out_queue,
-        out_queue=None,
-        handler_name="handler_acknowledgement_data",
-    )
-
-    return [
-        input_worker,
-        processor_worker,
-        output_1_worker,
-        output_2_worker,
-        acknowledge_worker,
-    ]
-
-
-def main() -> None:
-    loop = asyncio.new_event_loop()
-    try:
-        asyncio.set_event_loop(loop)
-
-        pipeline_manager = ConcretePipelineManager(
-            workers=get_workers(),
-            loop=loop,
-        )
-
-        loop.run_until_complete(pipeline_manager.run())
-    finally:
-        loop.close()
-
-
-async def async_main() -> None:
-    pipeline_manager = ConcretePipelineManager(
-        workers=get_workers(),
-    )
-
-    await pipeline_manager.run()
-
-
-if __name__ == "__main__":
-    # asyncio.run(async_main())
-    main()
diff --git a/logprep/ng/poc/main_standalone_worker.py b/logprep/ng/poc/main_standalone_worker.py
deleted file mode 100644
index 41e539f6b..000000000
--- a/logprep/ng/poc/main_standalone_worker.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import asyncio
-
-from async_pipeline.types import SizeLimitedQueue
-from async_pipeline.worker.worker import Worker
-from mocked.mocking_functions import iter_input_pull
-from mocked.mocking_types import Event
-
-MAX_QUEUE_SIZE = 100_000
-
-BATCH_SIZE = 2_500
-BATCH_INTERVAL_S = 5
-
-
-# ---- handlers (match: (list[Event]) -> list[Event]) ----
-
-acked = 0
-
-
-async def handler_input_data(events: list[Event]) -> list[Event]:
-    print(f"[handler_input_data] batch={len(events)}")
-    await asyncio.sleep(1)
-    return events
-
-
-async def handler_processor_data(events: list[Event]) -> list[Event]:
-    print(f"[handler_processor_data] batch={len(events)}")
-    await asyncio.sleep(1)
-    return events
-
-
-async def handler_output_1_data(events: list[Event]) -> list[Event]:
-    print(f"[handler_output_1_data] batch={len(events)}")
-    await asyncio.sleep(1)
-    return events
-
-
-async def handler_output_2_data(events: list[Event]) -> list[Event]:
-    print(f"[handler_output_2_data] batch={len(events)}")
-    await asyncio.sleep(1)
-    return events
-
-
-async def handler_acknowledgement_data(events: list[Event]) -> list[Event]:
-    global acked
-
-    print(f"[handler_acknowledgement_data] batch={len(events)}")
-    await asyncio.sleep(1)
-
-    acked += len(events)
-    print(f">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Total {acked=}")
-    return events
-
-
-def get_workers() -> list[Worker[Event]]:
-    input_worker: Worker[Event] = Worker(
-        name="input_worker",
-        batch_size=BATCH_SIZE,
-        batch_interval_s=BATCH_INTERVAL_S,
-        in_queue=aiter(iter_input_pull()),
-        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-        handler=handler_input_data,
-    )
-
-    processor_worker: Worker[Event] = Worker(
-        name="processor_worker",
-        batch_size=BATCH_SIZE,
-        batch_interval_s=BATCH_INTERVAL_S,
-        in_queue=input_worker.out_queue,
-        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-        handler=handler_processor_data,
-    )
-
-    output_1_worker: Worker[Event] = Worker(
-        name="output_1_worker",
-        batch_size=BATCH_SIZE,
-        batch_interval_s=BATCH_INTERVAL_S,
-        in_queue=processor_worker.out_queue,
-        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-        handler=handler_output_1_data,
-    )
-
-    output_2_worker: Worker[Event] = Worker(
-        name="output_2_worker",
-        batch_size=BATCH_SIZE,
-        batch_interval_s=BATCH_INTERVAL_S,
-        in_queue=output_1_worker.out_queue,
-        out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-        handler=handler_output_2_data,
-    )
-
-    acknowledge_worker: Worker[Event] = Worker(
-        name="acknowledge_worker",
-        batch_size=BATCH_SIZE,
-        batch_interval_s=BATCH_INTERVAL_S,
-        in_queue=output_2_worker.out_queue,
-        out_queue=None,
-        handler=handler_acknowledgement_data,
-    )
-
-    return [
-        input_worker,
-        processor_worker,
-        output_1_worker,
-        output_2_worker,
-        acknowledge_worker,
-    ]
-
-
-def main() -> None:
-    loop = asyncio.new_event_loop()
-    try:
-        asyncio.set_event_loop(loop)
-
-        workers = get_workers()
-        tasks = [loop.create_task(w.run()) for w in workers]
-
-        # Demo: keep running; Ctrl+C stops
-        loop.run_until_complete(asyncio.gather(*tasks))
-    finally:
-        loop.close()
-
-
-async def async_main() -> None:
-    workers = get_workers()
-    await asyncio.gather(*(asyncio.create_task(w.run()) for w in workers))
-
-
-if __name__ == "__main__":
-    # asyncio.run(async_main())
-    main()
diff --git a/logprep/ng/poc/mocked/__init__.py b/logprep/ng/poc/mocked/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/logprep/ng/poc/mocked/mocking_functions.py b/logprep/ng/poc/mocked/mocking_functions.py
deleted file mode 100644
index c4e2f8e66..000000000
--- a/logprep/ng/poc/mocked/mocking_functions.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import json
-import random
-import time
-import uuid
-from collections.abc import AsyncIterator
-
-from logprep.ng.poc.mocked.mocking_types import Event
-
-
-# HELPER
-async def iter_input_pull() -> AsyncIterator[Event]:
-    while True:
-        event_id = uuid.uuid4()
-        yield Event(
-            event_id=event_id,
-            payload=json.dumps({"additional_data": f"{event_id}"}),
-        )
-
-
-async def store(events: list[Event], topic: str) -> None:
-    # blocking sleep
-    time.sleep(random.randint(1, 5) / 10)
-
-
-async def commit(events: list[Event]) -> None:
-    # blocking sleep
-    time.sleep(random.randint(1, 5) / 10)
diff --git a/logprep/ng/poc/mocked/mocking_processor.py b/logprep/ng/poc/mocked/mocking_processor.py
deleted file mode 100644
index 54a74579a..000000000
--- a/logprep/ng/poc/mocked/mocking_processor.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import json
-import random
-import time
-
-from mocked.mocking_types import Event
-
-
-class Processor:
-    @staticmethod
-    def process(events: list[Event]) -> list[Event]:
-        for event in events:
-            new_payload = json.loads(event.payload)
-            new_payload["processed"] = True
-            event.payload = json.dumps(new_payload)
-
-        # blocking sleep
-        time.sleep(random.randint(1, 5) / 10)
-
-        return events
diff --git a/logprep/ng/poc/mocked/mocking_types.py b/logprep/ng/poc/mocked/mocking_types.py
deleted file mode 100644
index a249e5e27..000000000
--- a/logprep/ng/poc/mocked/mocking_types.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import enum
-import json
-import uuid
-from collections.abc import Iterator
-from dataclasses import dataclass, field
-from typing import Any
-
-
-class State(str, enum.Enum):
-    RECEIVING = "receiving"
-    RECEIVED = "received"
-    PROCESSING = "processing"
-    PROCESSED = "processed"
-    STORING_OUTPUT_1 = "storing_output_1"
-    STORED_OUTPUT_1 = "stored_output_1"
-    STORING_OUTPUT_2 = "storing_output_2"
-    STORED_OUTPUT_2 = "stored_output_2"
-    DELIVERING = "delivering"
-    DELIVERED = "delivered"
-    ACKNOWLEDGING = "acknowledging"
-    ACKNOWLEDGED = "acknowledged"
-
-
-@dataclass
-class Event:
-    payload: str
-    state: State = State.RECEIVING
-    event_id: uuid.UUID = field(default_factory=uuid.uuid4)
-    errors: list[str] = field(default_factory=list)
-
-    def update_payload(self, payload: dict[str, Any]) -> None:
-        """Convenience helper for the demo: replace payload JSON."""
-
-        self.payload = json.dumps(payload)
-
-    def __iter__(self) -> Iterator[tuple[str, Any]]:
-        """Iterate over the JSON payload as key/value pairs.
-
-        This makes `Event` usable in contexts that expect an iterable without
-        relying on the previously incorrect `__iter__` signature.
-        """
-
-        try:
-            data = json.loads(self.payload)
-        except json.JSONDecodeError:
-            return iter(())
-
-        if isinstance(data, dict):
-            return iter(data.items())
-        return iter(())
diff --git a/logprep/ng/poc/pipeline_manager.py b/logprep/ng/poc/pipeline_manager.py
deleted file mode 100644
index 58779f01f..000000000
--- a/logprep/ng/poc/pipeline_manager.py
+++ /dev/null
@@ -1,218 +0,0 @@
-"""
-Concrete pipeline manager implementation.
-
-This module defines a PipelineManager specialization responsible for
-state tracking, backlog management, and runtime metrics. It encapsulates
-pipeline-specific processing behavior while preserving the generic
-lifecycle and orchestration semantics of the base manager.
-"""
-
-import asyncio
-import random
-import uuid
-from asyncio import AbstractEventLoop
-from collections import Counter
-from typing import Any
-
-from async_pipeline.pipeline_manager import PipelineManager, background_task
-from async_pipeline.worker.pipeline_worker import PipelineWorker
-from mocked.mocking_functions import commit, store
-from mocked.mocking_processor import Processor
-from mocked.mocking_types import Event, State
-
-
-class ConcretePipelineManager(PipelineManager):
-    """
-    PipelineManager specialization with event state tracking and metrics.
-
-    Maintains an internal backlog for lifecycle/state visibility and
-    provides concrete handler implementations for pipeline stages.
-    """
-
-    def __init__(
-        self,
-        workers: list[PipelineWorker[Event]],
-        loop: AbstractEventLoop | None = None,
-    ) -> None:
-        """Initialize backlog storage and runtime metric tracking."""
-        super().__init__(workers=workers, loop=loop)
-
-        self._event_backlog: dict[uuid.UUID, Event] = {}
-        self._event_backlog_lock = asyncio.Lock()
-
-        self._metric: dict[str, Any] = {
-            "start_time": None,
-            "last_time": None,
-            "last_acked": 0,
-            "peak_rate": 0,
-            "total_acknowledged": 0,
-        }
-
-    async def _update_event_states(
-        self,
-        events: list[Event],
-        new_state: State,
-        *,
-        locked: bool = False,
-    ) -> None:
-        """Update state for events, optionally assuming external lock ownership."""
-        if locked:
-            for event in events:
-                event.state = new_state
-                self._event_backlog[event.event_id].state = new_state
-        else:
-            async with self._event_backlog_lock:
-                for event in events:
-                    event.state = new_state
-                    self._event_backlog[event.event_id].state = new_state
-
-    async def handler_input_data(self, events: list[Event]) -> list[Event]:
-        """Register incoming events and mark them as received."""
-        async with self._event_backlog_lock:
-            for event in events:
-                self._event_backlog.setdefault(event.event_id, event)
-
-            await self._update_event_states(
-                events=events,
-                new_state=State.RECEIVED,
-                locked=True,
-            )
-
-        return events
-
-    async def handler_processor_data(self, events: list[Event]) -> list[Event]:
-        """Process events and transition through processing states."""
-        await self._update_event_states(events=events, new_state=State.PROCESSING)
-
-        processed_events = await asyncio.to_thread(Processor.process, events)
-        # processed_events = Processor.process(events)
-
-        await self._update_event_states(
-            events=processed_events,
-            new_state=State.PROCESSED,
-        )
-
-        return processed_events
-
-    async def handler_output_1_data(self, events: list[Event]) -> list[Event]:
-        """Simulate output stage 1 storage."""
-        await self._update_event_states(events=events, new_state=State.STORING_OUTPUT_1)
-
-        await asyncio.sleep(random.randint(1, 5) / 10)
-
-        await self._update_event_states(events=events, new_state=State.STORED_OUTPUT_1)
-
-        return events
-
-    async def handler_output_2_data(self, events: list[Event]) -> list[Event]:
-        """Simulate output stage 2 storage."""
-        await self._update_event_states(events=events, new_state=State.STORING_OUTPUT_2)
-
-        await asyncio.sleep(random.randint(1, 5) / 10)
-
-        await self._update_event_states(events=events, new_state=State.STORED_OUTPUT_2)
-
-        return events
-
-    async def handler_delivery_data(self, events: list[Event]) -> list[Event]:
-        """Deliver processed events to the external sink."""
-        await self._update_event_states(events=events, new_state=State.DELIVERING)
-
-        await store(events, "output_data")
-
-        await self._update_event_states(events=events, new_state=State.DELIVERED)
-
-        return events
-
-    async def handler_acknowledgement_data(self, events: list[Event]) -> list[Event]:
-        """Acknowledge delivered events and update metrics."""
-        await self._update_event_states(events=events, new_state=State.ACKNOWLEDGING)
-
-        await commit(events)
-
-        async with self._event_backlog_lock:
-            await self._update_event_states(
-                events=events,
-                new_state=State.ACKNOWLEDGED,
-                locked=True,
-            )
-
-            self._metric["total_acknowledged"] += len(events)
-            total_acked = self._metric["total_acknowledged"]
-
-        self._print_metric(total_acked=total_acked, acked=len(events))
-        return events
-
-    def _print_metric(self, total_acked: int, acked: int) -> None:
-        """Update and display runtime throughput metrics."""
-        now = asyncio.get_running_loop().time()
-
-        if self._metric["start_time"] is None:
-            self._metric["start_time"] = now
-
-        elapsed = now - self._metric["start_time"]
-
-        h, rem = divmod(int(elapsed), 3600)
-        m, s = divmod(rem, 60)
-
-        elapsed_min = elapsed / 60
-        acked_delta = total_acked - self._metric["last_acked"]
-
-        last_time = self._metric["last_time"]
-        time_delta = now - last_time if last_time else 0
-
-        live_rate = (acked_delta / time_delta) * 60 if time_delta else 0
-        avg_rate = total_acked / elapsed_min if elapsed_min else 0
-
-        self._metric["peak_rate"] = max(self._metric["peak_rate"], live_rate)
-
-        self._metric["last_time"] = now
-        self._metric["last_acked"] = total_acked
-
-        print(
-            f"Running: {h}h {m}m {s}s | "
-            f"Acked: {acked:_} | "
-            f"Total Acked: {total_acked:_} | "
-            f"Avg Rate: {avg_rate:_.1f}/min | "
-            f"Live Rate: {live_rate:_.1f}/min | "
-            f"Peak Rate: {self._metric['peak_rate']:_.1f}/min"
-        )
-
-    @background_task
-    async def _clean_up_delivered_events(self) -> None:
-        """Remove acknowledged events from the backlog."""
-        while not self.stop_event.is_set():
-            async with self._event_backlog_lock:
-                acknowledged = [
-                    eid for eid, e in self._event_backlog.items() if e.state is State.ACKNOWLEDGED
-                ]
-
-                for eid in acknowledged:
-                    del self._event_backlog[eid]
-
-            await asyncio.sleep(10)
-
-    async def _show_metric(self) -> None:
-        """Continuously display backlog state distribution."""
-        while not self.stop_event.is_set():
-            async with self._event_backlog_lock:
-                counter = Counter(event.state for event in self._event_backlog.values())
-                total = len(self._event_backlog)
-
-            print(
-                f"\nEvents: {total},\n"
-                f"Receiving: {counter[State.RECEIVING]:_},\n"
-                f"Received: {counter[State.RECEIVED]:_},\n"
-                f"Processing: {counter[State.PROCESSING]:_},\n"
-                f"Processed: {counter[State.PROCESSED]:_},\n"
-                f"Storing_output_1: {counter[State.STORING_OUTPUT_1]:_},\n"
-                f"Stored_output_1: {counter[State.STORED_OUTPUT_1]:_},\n"
-                f"Storing_output_2: {counter[State.STORING_OUTPUT_2]:_},\n"
-                f"Stored_output_2: {counter[State.STORED_OUTPUT_2]:_},\n"
-                f"Delivering: {counter[State.DELIVERING]:_},\n"
-                f"Delivered: {counter[State.DELIVERED]:_},\n"
-                f"Acknowledging: {counter[State.ACKNOWLEDGING]:_},\n"
-                f"Acknowledged: {counter[State.ACKNOWLEDGED]:_}\n"
-            )
-
-            await asyncio.sleep(random.randint(1, 5) / 2)

From 0ab26ac4886c892ee77917a04b6a13b360dddbed Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Thu, 26 Feb 2026 15:59:48 +0100
Subject: [PATCH 05/68] fix logging error

---
 logprep/ng/runner.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index 75f510cea..b530366d8 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -84,9 +84,11 @@ async def _shut_down_pipeline(
         await manager.shut_down()
         try:
             await asyncio.wait_for(manager_task, GRACEFUL_SHUTDOWN_TIMEOUT)
-            logger.error("graceful shut down of pipeline manager succeeded")
+            logger.info("graceful shut down of pipeline manager succeeded")
         except TimeoutError:
-            logger.error("could not gracefully shut down pipeline manager within timeframe")
+            logger.error(
+                "could not gracefully shut down pipeline manager within timeframe", exc_info=True
+            )
 
     async def _run(self) -> None:
         logger.debug("Running _run")

From a0fc08d9e517e28aa895f96ef14ebcf8ea30f868 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Fri, 27 Feb 2026 12:44:42 +0100
Subject: [PATCH 06/68] refactor to use more composition and less events in
 top-level code

---
 examples/exampledata/config/ng_pipeline.yml |   4 +-
 logprep/ng/manager.py                       |  22 +++-
 logprep/ng/runner.py                        | 109 +++++++++----------
 logprep/ng/util/async.py                    |  31 ------
 logprep/ng/util/async_helpers.py            | 110 ++++++++++++++++++++
 logprep/run_ng.py                           |   2 +
 6 files changed, 181 insertions(+), 97 deletions(-)
 delete mode 100644 logprep/ng/util/async.py
 create mode 100644 logprep/ng/util/async_helpers.py

diff --git a/examples/exampledata/config/ng_pipeline.yml b/examples/exampledata/config/ng_pipeline.yml
index fc8c4dbda..0ca42bef7 100644
--- a/examples/exampledata/config/ng_pipeline.yml
+++ b/examples/exampledata/config/ng_pipeline.yml
@@ -2,10 +2,10 @@ version: 2
 process_count: 1
 timeout: 5.0
 restart_count: 2
-config_refresh_interval: 300
+config_refresh_interval: 5
 error_backlog_size: 1500000
 logger:
-  level: INFO
+  level: DEBUG
   format: "%(asctime)-15s %(hostname)-5s %(name)-10s %(levelname)-8s: %(message)s"
   datefmt: "%Y-%m-%d %H:%M:%S"
   loggers:
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 7ae36f270..2a89440ca 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -2,8 +2,10 @@
 Runner module
 """
 
+import asyncio
 import logging
 import typing
+from asyncio import CancelledError
 from typing import cast
 
 from logprep.factory import Factory
@@ -31,10 +33,11 @@
 class PipelineManager:
     """Orchestrator class managing pipeline inputs, processors and outputs"""
 
-    def __init__(self, configuration: Configuration) -> None:
+    def __init__(self, configuration: Configuration, shutdown_timeout_s: float) -> None:
         """Initialize the component from the given `configuration`."""
 
         self.configuration = configuration
+        self._shutdown_timeout_s = shutdown_timeout_s
 
     def _setup(self):
         self._event_backlog = SetEventBacklog()
@@ -121,13 +124,24 @@ async def run(self) -> None:
         """Run the runner and continuously process events until stopped."""
 
         self._setup()
-        await self._orchestrator.run()
+        try:
+            await self._orchestrator.run()
+        except CancelledError:
+            # TODO cancelling() > 0 is no safe discriminator; improve
+            current_task = asyncio.current_task()
+            if current_task and current_task.cancelling() > 0:
+                logger.debug("PipelineManager.run has been cancelled. Shutting down")
+                await self._shut_down()
+            else:
+                logger.error("Orchestrator has been cancelled. Shutting down")
+                await self._shut_down()
 
-    async def shut_down(self) -> None:
+    async def _shut_down(self) -> None:
         """Shut down runner components, and required runner attributes."""
 
         if self._orchestrator is not None:
-            await self._orchestrator.shut_down(1)
+            # TODO only a fraction of shutdown_timeout_s should be passed to the orchestrator
+            await self._orchestrator.shut_down(self._shutdown_timeout_s)
 
         if self._sender is not None:
             self._sender.shut_down()
diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index b530366d8..000fd19fe 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -12,13 +12,15 @@
 from attrs import asdict
 
 from logprep.ng.manager import PipelineManager
+from logprep.ng.util.async_helpers import TerminateTaskGroup, restart_task_on_iter
 from logprep.ng.util.configuration import Configuration
 from logprep.ng.util.defaults import DEFAULT_LOG_CONFIG
 
 logger = logging.getLogger("Runner")
 
 
-GRACEFUL_SHUTDOWN_TIMEOUT = 10
+GRACEFUL_SHUTDOWN_TIMEOUT = 3
+HARD_SHUTDOWN_TIMEOUT = 5
 MAX_CONFIG_REFRESH_INTERVAL_DEVIATION_PERCENT = 0.05
 
 
@@ -39,22 +41,23 @@ def __init__(self, configuration: Configuration) -> None:
         Component wiring is deferred to `setup()` to preserve the required init order.
         """
 
-        self.configuration = configuration
+        self.config = configuration
         self._running_config_version: None | str = None
-        self._main_task: asyncio.Task | None = None
+        self._task_group = asyncio.TaskGroup()
+        self._stop_event = asyncio.Event()
 
         self._pipeline_manager: PipelineManager | None = None
 
     async def _refresh_configuration_gen(self) -> AsyncGenerator[Configuration, None]:
-        self.configuration.schedule_config_refresh()
-        refresh_interval = self.configuration.config_refresh_interval
+        self.config.schedule_config_refresh()
+        refresh_interval = self.config.config_refresh_interval
         while True:
-            self.configuration.refresh()
+            self.config.refresh()
 
-            if self.configuration.version != self._running_config_version:
-                yield self.configuration
-                self._running_config_version = self.configuration.version
-                refresh_interval = self.configuration.config_refresh_interval
+            if self.config.version != self._running_config_version:
+                yield self.config
+                self._running_config_version = self.config.version
+                refresh_interval = self.config.config_refresh_interval
 
             if refresh_interval is not None:
                 try:
@@ -73,66 +76,52 @@ async def _refresh_configuration_gen(self) -> AsyncGenerator[Configuration, None
                 logger.debug("Config refresh has been disabled.")
                 break
 
-    async def _run_pipeline(self, config: Configuration) -> tuple[PipelineManager, asyncio.Task]:
-        manager = PipelineManager(config)
-        manager_task = asyncio.create_task(manager.run(), name="pipeline_manager")
-        return manager, manager_task
-
-    async def _shut_down_pipeline(
-        self, manager: PipelineManager, manager_task: asyncio.Task
-    ) -> None:
-        await manager.shut_down()
-        try:
-            await asyncio.wait_for(manager_task, GRACEFUL_SHUTDOWN_TIMEOUT)
-            logger.info("graceful shut down of pipeline manager succeeded")
-        except TimeoutError:
-            logger.error(
-                "could not gracefully shut down pipeline manager within timeframe", exc_info=True
-            )
-
-    async def _run(self) -> None:
-        logger.debug("Running _run")
-        try:
-            manager, manager_task = await self._run_pipeline(self.configuration)
-
-            async for refreshed_config in self._refresh_configuration_gen():
-                logger.debug("Configuration change detected. Restarting pipeline...")
-                await self._shut_down_pipeline(manager, manager_task)
-                manager, manager_task = await self._run_pipeline(refreshed_config)
-
-            logger.debug("Configuration refresh disabled. Waiting for ")
-            await manager_task
-        except asyncio.CancelledError:
-            if manager is not None and manager_task is not None:
-                await self._shut_down_pipeline(manager, manager_task)
-
-        logger.debug("End of _run")
-
     async def run(self) -> None:
         """Run the runner and continuously process events until stopped."""
-        self._running_config_version = self.configuration.version
+        self._running_config_version = self.config.version
+
+        try:
+            async with self._task_group as tg:
+                tg.create_task(TerminateTaskGroup.raise_on_event(self._stop_event))
 
-        self._main_task = asyncio.create_task(self._run(), name="config_refresh")
+                def start_pipeline(config: Configuration) -> asyncio.Task:
+                    return tg.create_task(
+                        PipelineManager(config, shutdown_timeout_s=GRACEFUL_SHUTDOWN_TIMEOUT).run(),
+                        name="pipeline_manager",
+                    )
 
-        await self._main_task
+                try:
+                    async for _ in restart_task_on_iter(
+                        source=self._refresh_configuration_gen(),
+                        task_factory=start_pipeline,
+                        cancel_timeout_s=HARD_SHUTDOWN_TIMEOUT,
+                        inital_task=start_pipeline(self.config),
+                    ):
+                        logger.debug(
+                            "A new pipeline task has been spawned based on the latest configuration"
+                        )
+                except TimeoutError:
+                    logger.error(
+                        "Could not gracefully shut down pipeline manager within timeframe",
+                        exc_info=True,
+                    )
+                    raise
+        except ExceptionGroup as eg:
+            if not eg.exceptions or len(eg.exceptions) > 1:
+                raise
+            match list(eg.exceptions)[0]:
+                case TerminateTaskGroup():
+                    logger.debug("Task group terminated")
+                case _:
+                    raise
 
-        self.shut_down()
         logger.debug("End log processing.")
 
     def stop(self) -> None:
         """Stop the runner and signal the underlying processing pipeline to exit."""
 
         logger.info("Stopping runner and exiting...")
-        if self._main_task is not None:
-            logger.debug("Cancelling runner main task")
-            self._main_task.cancel()
-        else:
-            logger.debug("Attempting to stop inactive runner")
-
-    def shut_down(self) -> None:
-        """Shut down runner components, and required runner attributes."""
-
-        logger.info("Runner shut down complete.")
+        self._stop_event.set()
 
     def setup_logging(self) -> None:
         """Setup the logging configuration.
@@ -143,6 +132,6 @@ def setup_logging(self) -> None:
 
         warnings.simplefilter("always", DeprecationWarning)
         logging.captureWarnings(True)
-        log_config = DEFAULT_LOG_CONFIG | asdict(self.configuration.logger)
+        log_config = DEFAULT_LOG_CONFIG | asdict(self.config.logger)
         os.environ["LOGPREP_LOG_CONFIG"] = json.dumps(log_config)
         logging.config.dictConfig(log_config)
diff --git a/logprep/ng/util/async.py b/logprep/ng/util/async.py
deleted file mode 100644
index 764c4f634..000000000
--- a/logprep/ng/util/async.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import asyncio
-from collections.abc import Callable, Coroutine
-from typing import Any, ParamSpec, TypeVar
-
-T = TypeVar("T")
-P = ParamSpec("P")
-
-
-def create_task(
-    factory: Callable[P, Coroutine[Any, Any, T]], *args: P.args, **kwargs: P.kwargs
-) -> asyncio.Task[T]:
-    """
-    Wraps :code:`asyncio.create_task` to automatically assign a name derived from...
-
-    Parameters
-    ----------
-    factory : Callable[P, Coroutine[Any, Any, T]]
-        _description_
-
-    Returns
-    -------
-    asyncio.Task[T]
-        _description_
-    """
-    factory_self = getattr(factory, "__self__", None)
-    name = (
-        f"{factory_self.__class__.__name__}.{factory.__name__}"
-        if factory_self is not None
-        else f"{factory.__name__}"
-    )
-    return asyncio.create_task(factory(*args, **kwargs), name=name)
diff --git a/logprep/ng/util/async_helpers.py b/logprep/ng/util/async_helpers.py
new file mode 100644
index 000000000..f07ac4fd8
--- /dev/null
+++ b/logprep/ng/util/async_helpers.py
@@ -0,0 +1,110 @@
+"""A collection of helper utilitites for async code"""
+
+import asyncio
+from collections.abc import AsyncGenerator, AsyncIterable, AsyncIterator, Callable
+from typing import TypeVar
+
+T = TypeVar("T")
+D = TypeVar("D")
+
+
+class TerminateTaskGroup(Exception):
+    """Exception raised to terminate a task group."""
+
+    @staticmethod
+    async def raise_on_timeout(timeout_s: float, msg: str | None = None):
+        """Raises this exception type as soon as the timeout (in seconds) expires.
+
+        Parameters
+        ----------
+        timeout_s : float
+            Number of seconds after which the exception should be raised
+        msg : str | None, optional
+            Message for the exception, by default None
+
+        Raises
+        ------
+        TerminateTaskGroup
+            The exception for terminating the task group.
+        """
+        await asyncio.sleep(timeout_s)
+        raise TerminateTaskGroup(msg)
+
+    @staticmethod
+    async def raise_on_event(event: asyncio.Event, msg: str | None = None):
+        """Raises this exception type as soon as the event is set.
+
+        Parameters
+        ----------
+        event : asyncio.Event
+            Triggering event for the exception
+        msg : str | None, optional
+            Message for the exception, by default None
+
+        Raises
+        ------
+        TerminateTaskGroup
+            The exception for terminating the task group.
+        """
+        await event.wait()
+        raise TerminateTaskGroup(msg)
+
+
+async def cancel_task_and_wait(task: asyncio.Task[T], timeout_s: float) -> None:
+    """Cancels the given task and waits for it to actually stop.
+    Raises a :code:`TimeoutError` if timeout expires.
+    A :code:`CancelledError` will only be raised if the parent task is cancelled.
+
+    Parameters
+    ----------
+    task : asyncio.Task[T]
+        The task to cancel
+    timeout_s : float
+        The timeout in seconds to wait
+
+    Raises
+    ------
+    TimeoutError
+        Raised if the timeout expires and the task is still not done.
+    """
+    task.cancel()
+    done, _ = await asyncio.wait([task], timeout=timeout_s)
+    if not done:
+        raise TimeoutError(f"Task {task.get_name()} did not stop in time after cancellation")
+
+
+async def restart_task_on_iter(
+    source: AsyncIterator[D] | AsyncIterable[D],
+    task_factory: Callable[[D], asyncio.Task[T]],
+    cancel_timeout_s: float,
+    inital_task: asyncio.Task[T] | None = None,
+) -> AsyncGenerator[asyncio.Task[T], None]:
+    """Consumes an iterable data source and ensures that there is always one task executing on the latest data.
+
+    Parameters
+    ----------
+    source : AsyncIterator[D] | AsyncIterable[D]
+        The data source producing parameters for the spawned tasks
+    task_factory : Callable[[D], asyncio.Task[T]]
+        The factory to create new tasks from new data items
+    cancel_timeout_s : float
+        The number of seconds after which task cancellation is deemed not successful
+    inital_task : asyncio.Task[T] | None, optional
+        The initial task, by default None
+
+    Returns
+    -------
+    AsyncGenerator[asyncio.Task[T], None]
+        The stream of tasks which result from spawning fresh tasks on new data
+
+    Yields
+    ------
+    Iterator[AsyncGenerator[asyncio.Task[T], None]]
+        The stream of tasks which result from spawning fresh tasks on new data
+    """
+    task = inital_task
+    async for data in source:
+        if task is not None:
+            await cancel_task_and_wait(task, cancel_timeout_s)
+        task = task_factory(data)
+        yield task
diff --git a/logprep/run_ng.py b/logprep/run_ng.py
index 4ca695a51..d425a816a 100644
--- a/logprep/run_ng.py
+++ b/logprep/run_ng.py
@@ -86,6 +86,8 @@ def run(configs: tuple[str], version=None) -> None:
         logger.debug(f"Exit received with code {error.code}")
         sys.exit(error.code)
     # pylint: disable=broad-except
+    except ExceptionGroup as error_group:
+        logger.exception(f"Multiple errors occurred: {error_group}")
     except Exception as error:
         if os.environ.get("DEBUG", False):
             logger.exception(f"A critical error occurred: {error}")  # pragma: no cover

From b5b1e8b82d233b2176b8bd3671bd3710170c4c36 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 4 Mar 2026 10:07:30 +0100
Subject: [PATCH 07/68] feat: make confluent_kafka async; first steps towards
 async input handling

---
 logprep/abc/component.py                      |  9 +-
 logprep/ng/abc/input.py                       | 43 +++------
 logprep/ng/connector/confluent_kafka/input.py | 89 ++++++++++++-------
 logprep/ng/manager.py                         |  5 +-
 logprep/ng/runner.py                          | 11 ++-
 logprep/ng/util/async_helpers.py              |  9 +-
 logprep/ng/util/worker/worker.py              |  5 +-
 7 files changed, 97 insertions(+), 74 deletions(-)

diff --git a/logprep/abc/component.py b/logprep/abc/component.py
index db80d43c4..594d6b817 100644
--- a/logprep/abc/component.py
+++ b/logprep/abc/component.py
@@ -1,5 +1,6 @@
 """abstract module for components"""
 
+import asyncio
 import functools
 import inspect
 import logging
@@ -77,7 +78,7 @@ def metric_labels(self) -> dict:
         """Labels for the metrics"""
         return {"component": self._config.type, "name": self.name, "description": "", "type": ""}
 
-    def __init__(self, name: str, configuration: "Config", pipeline_index: int | None = None):
+    def __init__(self, name: str, configuration: Config, pipeline_index: int | None = None):
         self._config = configuration
         self.name = name
         self.pipeline_index = pipeline_index
@@ -105,6 +106,10 @@ def describe(self) -> str:
         """
         return f"{self.__class__.__name__} ({self.name})"
 
+    async def _asetup(self):
+        loop = asyncio.get_running_loop()
+        loop.run_in_executor(None, self.setup)
+
     def setup(self):
         """Set the component up."""
         self._populate_cached_properties()
@@ -146,7 +151,7 @@ def _shut_down(self) -> None:
         self._clear_scheduled_jobs()
         self._clear_properties()
 
-    def shut_down(self):
+    async def shut_down(self):
         """Stop processing of this component.
 
         Optional: Called when stopping the pipeline
diff --git a/logprep/ng/abc/input.py b/logprep/ng/abc/input.py
index be632f42d..63d5091f3 100644
--- a/logprep/ng/abc/input.py
+++ b/logprep/ng/abc/input.py
@@ -1,3 +1,5 @@
+# pylint: disable=line-too-long
+
 """This module provides the abstract base class for all input endpoints.
 New input endpoint types are created by implementing it.
 """
@@ -14,14 +16,12 @@
 from copy import deepcopy
 from functools import cached_property
 from hmac import HMAC
-from typing import Self
 from zoneinfo import ZoneInfo
 
 from attrs import define, field, validators
 
 from logprep.abc.connector import Connector
 from logprep.abc.exceptions import LogprepException
-from logprep.metrics.metrics import Metric
 from logprep.ng.abc.event import EventBacklog
 from logprep.ng.event.event_state import EventStateType
 from logprep.ng.event.log_event import LogEvent
@@ -104,18 +104,7 @@ def __init__(self, input_connector: "Input", timeout: float):
         self.input_connector = input_connector
         self.timeout = timeout
 
-    def __iter__(self) -> Self:
-        """Return the iterator instance itself.
-
-        Returns
-        -------
-        Self
-            The iterator instance (self).
-        """
-
-        return self
-
-    def __next__(self) -> LogEvent | None:
+    async def __anext__(self) -> LogEvent | None:
         """Return the next event in the Input Connector within the configured timeout.
 
         Returns
@@ -123,7 +112,7 @@ def __next__(self) -> LogEvent | None:
         LogEvent | None
             The next event retrieved from the underlying data source.
         """
-        event = self.input_connector.get_next(timeout=self.timeout)
+        event = await self.input_connector.get_next(timeout=self.timeout)
         logger.debug(
             "InputIterator fetching next event with timeout %s, is None: %s",
             self.timeout,
@@ -131,10 +120,6 @@ def __next__(self) -> LogEvent | None:
         )
         return event
 
-    async def __anext__(self):
-        # TODO implement properly
-        return self.__next__()
-
 
 class Input(Connector):
     """Connect to a source for log data."""
@@ -274,7 +259,7 @@ def _get_raw_event(self, timeout: float) -> bytes | None:  # pylint: disable=unu
         return None
 
     @abstractmethod
-    def _get_event(self, timeout: float) -> tuple:
+    async def _get_event(self, timeout: float) -> tuple:
         """Implements the details how to get the event
 
         Parameters
@@ -299,15 +284,15 @@ def _register_failed_event(
         error_log_event = LogEvent(
             data=event if isinstance(event, dict) else {},
             original=raw_event if raw_event is not None else b"",
-            metadata=metadata,  # type: ignore
+            metadata=metadata,  # type: ignore  # TODO: fix mypy issue
         )
         error_log_event.errors.append(error)
         error_log_event.state.current_state = EventStateType.FAILED
 
         self.event_backlog.register(events=[error_log_event])
 
-    @Metric.measure_time()
-    def get_next(self, timeout: float) -> LogEvent | None:
+    # @Metric.measure_time()
+    async def get_next(self, timeout: float) -> LogEvent | None:
         """Return the next document
 
         Parameters
@@ -326,7 +311,7 @@ def get_next(self, timeout: float) -> LogEvent | None:
         metadata: dict | None = None
 
         try:
-            event, raw_event, metadata = self._get_event(timeout)
+            event, raw_event, metadata = await self._get_event(timeout)
 
             if event is None:
                 return None
@@ -334,7 +319,7 @@ def get_next(self, timeout: float) -> LogEvent | None:
             if not isinstance(event, dict):
                 raise CriticalInputError(self, "not a dict", event)
 
-            self.metrics.number_of_processed_events += 1
+            # self.metrics.number_of_processed_events += 1
 
             try:
                 if self._add_full_event_to_target_field:
@@ -380,8 +365,8 @@ def get_next(self, timeout: float) -> LogEvent | None:
 
         log_event = LogEvent(
             data=event,
-            original=raw_event,  # type: ignore
-            metadata=metadata,  # type: ignore
+            original=raw_event,
+            metadata=metadata,  # type: ignore  # TODO: fix mypy issue
         )
 
         self.event_backlog.register(events=[log_event])
@@ -448,8 +433,8 @@ def _add_arrival_timedelta_information_to_event(
         log_arrival_time = get_dotted_field_value(event, log_arrival_time_target_field)
         if time_reference and isinstance(log_arrival_time, str) and isinstance(time_reference, str):
             delta_time_sec = (
-                TimeParser.from_string(log_arrival_time).astimezone(UTC)
-                - TimeParser.from_string(time_reference).astimezone(UTC)
+                TimeParser.from_string(log_arrival_time).astimezone(UTC)  # type: ignore  # TODO: fix mypy issue
+                - TimeParser.from_string(time_reference).astimezone(UTC)  # type: ignore  # TODO: fix mypy issue
             ).total_seconds()
             add_fields_to(event, fields={target_field: delta_time_sec})
 
diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index 5824f148f..8ffaf5a1b 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -44,12 +44,12 @@
     OFFSET_END,
     OFFSET_INVALID,
     OFFSET_STORED,
-    Consumer,
     KafkaException,
     Message,
     TopicPartition,
 )
 from confluent_kafka.admin import AdminClient
+from confluent_kafka.aio import AIOConsumer
 
 from logprep.metrics.metrics import CounterMetric, GaugeMetric
 from logprep.ng.abc.input import (
@@ -271,11 +271,12 @@ class Config(Input.Config):
 
     _last_valid_record: Message | None
 
-    __slots__ = ["_last_valid_record"]
+    __slots__ = ["_last_valid_record", "_consumer"]
 
     def __init__(self, name: str, configuration: "ConfluentKafkaInput.Config") -> None:
         super().__init__(name, configuration)
         self._last_valid_record = None
+        self._consumer: AIOConsumer | None = None
 
     @property
     def config(self) -> Config:
@@ -323,16 +324,28 @@ def _admin(self) -> AdminClient:
                 admin_config[key] = value
         return AdminClient(admin_config)
 
-    @cached_property
-    def _consumer(self) -> Consumer:
-        """configures and returns the consumer
+    async def get_consumer(self, max_workers: int = 4) -> AIOConsumer:
+        """
+        Configures and returns the asynchronous Kafka consumer.
+
+        Parameters
+        ----------
+        max_workers : int, optional
+            The maximum number of concurrent worker tasks for message processing.
+            Should generally not exceed the number of topic partitions.
+            Defaults to 4.
 
         Returns
         -------
-        Consumer
-            confluent_kafka consumer object
+        AIOConsumer
+            The pre-configured aiokafka consumer object.
         """
-        return Consumer(self._kafka_config)
+
+        if self._consumer is None:
+            consumer = AIOConsumer(self._kafka_config, max_workers=max_workers)
+            self._consumer = consumer
+
+        return self._consumer
 
     def _error_callback(self, error: KafkaException) -> None:
         """Callback for generic/global error events, these errors are typically
@@ -400,9 +413,9 @@ def _commit_callback(
             if `error` is not None
         """
         if error is not None:
-            self.metrics.commit_failures += 1
+            #   self.metrics.commit_failures += 1
             raise InputWarning(self, f"Could not commit offsets for {topic_partitions}: {error}")
-        self.metrics.commit_success += 1
+        # self.metrics.commit_success += 1
         for topic_partition in topic_partitions:
             offset = topic_partition.offset
             if offset in SPECIAL_OFFSETS:
@@ -424,8 +437,7 @@ def describe(self) -> str:
         base_description = super().describe()
         return f"{base_description} - Kafka Input: {self.config.kafka_config['bootstrap.servers']}"
 
-    def _get_raw_event(self, timeout: float) -> Message | None:  # type: ignore
-        # TODO type needs to be fixed
+    async def _get_raw_event(self, timeout: float) -> Message | None:  # type: ignore  # TODO: fix mypy issue
         """Get next raw Message from Kafka.
 
         Parameters
@@ -444,9 +456,12 @@ def _get_raw_event(self, timeout: float) -> Message | None:  # type: ignore
             Raises if an input is invalid or if it causes an error.
         """
         try:
-            message = self._consumer.poll(timeout=timeout)
+            consumer = await self.get_consumer()
+            message = await consumer.poll(timeout=timeout)
         except RuntimeError as error:
             raise FatalInputError(self, str(error)) from error
+        except Exception as error:  # remove this
+            pass
         if message is None:
             return None
         if message.value() is None or message.partition() is None or message.offset() is None:
@@ -463,7 +478,7 @@ def _get_raw_event(self, timeout: float) -> Message | None:  # type: ignore
 
         return message
 
-    def _get_event(self, timeout: float) -> tuple:
+    async def _get_event(self, timeout: float) -> tuple:
         """Parse the raw document from Kafka into a json.
 
         Parameters
@@ -486,7 +501,7 @@ def _get_event(self, timeout: float) -> tuple:
             Raises if an input is invalid or if it causes an error.
         """
 
-        message = self._get_raw_event(timeout)
+        message = await self._get_raw_event(timeout)
         # assert None not in (message.value(), message.partition(), message.offset())
 
         if message is None:
@@ -522,7 +537,7 @@ def _enable_auto_offset_store(self) -> bool:
     def _enable_auto_commit(self) -> bool:
         return self.config.kafka_config.get("enable.auto.commit") == "true"
 
-    def batch_finished_callback(self) -> None:
+    async def batch_finished_callback(self) -> None:  # type: ignore  # TODO: fix mypy issue
         """Store offsets for last message referenced by `self._last_valid_records`.
         Should be called after delivering the current message to the output or error queue.
         """
@@ -533,14 +548,17 @@ def batch_finished_callback(self) -> None:
         if not self._last_valid_record:
             return
         try:
-            self._consumer.store_offsets(message=self._last_valid_record)
+            consumer = await self.get_consumer()
+            await consumer.store_offsets(message=self._last_valid_record)
         except KafkaException as error:
             raise InputWarning(self, f"{error}, {self._last_valid_record}") from error
 
-    def _assign_callback(self, _: Consumer, topic_partitions: list[TopicPartition]) -> None:
+    async def _assign_callback(
+        self, _: AIOConsumer, topic_partitions: list[TopicPartition]
+    ) -> None:
         for topic_partition in topic_partitions:
             offset, partition = topic_partition.offset, topic_partition.partition
-            member_id = self._get_memberid()
+            member_id = await self._get_memberid()
             logger.info(
                 "%s was assigned to topic: %s | partition %s",
                 member_id,
@@ -554,23 +572,24 @@ def _assign_callback(self, _: Consumer, topic_partitions: list[TopicPartition])
             self.metrics.committed_offsets.add_with_labels(offset, labels)
             self.metrics.current_offsets.add_with_labels(offset, labels)
 
-    def _revoke_callback(self, _: Consumer, topic_partitions: list[TopicPartition]) -> None:
-
+    async def _revoke_callback(
+        self, _: AIOConsumer, topic_partitions: list[TopicPartition]
+    ) -> None:
         for topic_partition in topic_partitions:
             self.metrics.number_of_warnings += 1
-            member_id = self._get_memberid()
+            member_id = await self._get_memberid()
             logger.warning(
                 "%s to be revoked from topic: %s | partition %s",
                 member_id,
                 topic_partition.topic,
                 topic_partition.partition,
             )
-        self.batch_finished_callback()
+        await self.batch_finished_callback()
 
-    def _lost_callback(self, _: Consumer, topic_partitions: list[TopicPartition]) -> None:
+    async def _lost_callback(self, _: AIOConsumer, topic_partitions: list[TopicPartition]) -> None:
         for topic_partition in topic_partitions:
             self.metrics.number_of_warnings += 1
-            member_id = self._get_memberid()
+            member_id = await self._get_memberid()
             logger.warning(
                 "%s has lost topic: %s | partition %s - try to reassign",
                 member_id,
@@ -578,18 +597,20 @@ def _lost_callback(self, _: Consumer, topic_partitions: list[TopicPartition]) ->
                 topic_partition.partition,
             )
 
-    def _get_memberid(self) -> str | None:
+    async def _get_memberid(self) -> str | None:
         member_id = None
         try:
-            member_id = self._consumer.memberid()
+            consumer = await self.get_consumer()
+            member_id = consumer._consumer.memberid()
         except RuntimeError as error:
             logger.error("Failed to retrieve member ID: %s", error)
         return member_id
 
-    def _shut_down(self) -> None:
+    async def shut_down(self) -> None:
         """Close consumer, which also commits kafka offsets."""
-        self._consumer.close()
-        return super()._shut_down()
+        consumer = await self.get_consumer()
+        await consumer.close()
+        super()._shut_down()
 
     def health(self) -> bool:
         """Check the health of the component.
@@ -611,15 +632,17 @@ def health(self) -> bool:
             return False
         return super().health()
 
-    def setup(self) -> None:
+    async def _asetup(self):
         """Set the component up."""
         try:
-            self._consumer.subscribe(
+            consumer = await self.get_consumer()
+
+            await consumer.subscribe(
                 [self.config.topic],
                 on_assign=self._assign_callback,
                 on_revoke=self._revoke_callback,
                 on_lost=self._lost_callback,
             )
-            super().setup()
+            await super()._asetup()
         except KafkaException as error:
             raise FatalInputError(self, f"Could not setup kafka consumer: {error}") from error
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 2a89440ca..9cfbfacbe 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -39,12 +39,12 @@ def __init__(self, configuration: Configuration, shutdown_timeout_s: float) -> N
         self.configuration = configuration
         self._shutdown_timeout_s = shutdown_timeout_s
 
-    def _setup(self):
+    async def setup(self):
         self._event_backlog = SetEventBacklog()
 
         self._input_connector = cast(Input, Factory.create(self.configuration.input))
         self._input_connector.event_backlog = self._event_backlog  # TODO needs to be disentagled
-        self._input_connector.setup()
+        await self._input_connector._asetup()
 
         processors = [
             typing.cast(Processor, Factory.create(processor_config))
@@ -123,7 +123,6 @@ async def send(batch: list[LogEvent]) -> list[LogEvent]:
     async def run(self) -> None:
         """Run the runner and continuously process events until stopped."""
 
-        self._setup()
         try:
             await self._orchestrator.run()
         except CancelledError:
diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index 000fd19fe..7033fb17b 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -84,9 +84,14 @@ async def run(self) -> None:
             async with self._task_group as tg:
                 tg.create_task(TerminateTaskGroup.raise_on_event(self._stop_event))
 
-                def start_pipeline(config: Configuration) -> asyncio.Task:
+                async def start_pipeline(config: Configuration) -> asyncio.Task:
+                    pipeline_manager = PipelineManager(
+                        config, shutdown_timeout_s=GRACEFUL_SHUTDOWN_TIMEOUT
+                    )
+                    await pipeline_manager.setup()
+
                     return tg.create_task(
-                        PipelineManager(config, shutdown_timeout_s=GRACEFUL_SHUTDOWN_TIMEOUT).run(),
+                        pipeline_manager.run(),
                         name="pipeline_manager",
                     )
 
@@ -95,7 +100,7 @@ def start_pipeline(config: Configuration) -> asyncio.Task:
                         source=self._refresh_configuration_gen(),
                         task_factory=start_pipeline,
                         cancel_timeout_s=HARD_SHUTDOWN_TIMEOUT,
-                        inital_task=start_pipeline(self.config),
+                        inital_task=await start_pipeline(self.config),
                     ):
                         logger.debug(
                             "A new pipeline task has been spawned based on the latest configuration"
diff --git a/logprep/ng/util/async_helpers.py b/logprep/ng/util/async_helpers.py
index f07ac4fd8..875062148 100644
--- a/logprep/ng/util/async_helpers.py
+++ b/logprep/ng/util/async_helpers.py
@@ -2,12 +2,15 @@
 
 import asyncio
 from collections.abc import AsyncGenerator, AsyncIterable, AsyncIterator, Callable
-from typing import TypeVar
+from typing import Awaitable, TypeVar
 
 T = TypeVar("T")
 D = TypeVar("D")
 
 
+TaskFactory = Callable[[D], asyncio.Task[T] | Awaitable[asyncio.Task[T]]]
+
+
 class TerminateTaskGroup(Exception):
     """Exception raised to terminate a task group."""
 
@@ -75,7 +78,7 @@ async def cancel_task_and_wait(task: asyncio.Task[T], timeout_s: float) -> None:
 
 async def restart_task_on_iter(
     source: AsyncIterator[D] | AsyncIterable[D],
-    task_factory: Callable[[D], asyncio.Task[T]],
+    task_factory: TaskFactory,
     cancel_timeout_s: float,
     inital_task: asyncio.Task[T] | None = None,
 ) -> AsyncGenerator[asyncio.Task[T], None]:
@@ -106,5 +109,5 @@ async def restart_task_on_iter(
     async for data in source:
         if task is not None:
             await cancel_task_and_wait(task, cancel_timeout_s)
-        task = task_factory(data)
+        task = await task_factory(data)
         yield task
diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index 0b0d55913..445785d3a 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -187,7 +187,10 @@ async def run(self, stop_event: asyncio.Event) -> None:
             else:
                 while not stop_event.is_set():
                     item = await anext(self.in_queue)
-                    await self.add(item)
+
+                    if item is not None:
+                        await self.add(item)
+
                     # TODO is this await really necessary?
                     await asyncio.sleep(0.0)
 

From 4ecb2229b12c7528faf27f23b9fa99aa6ca86797 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 4 Mar 2026 10:09:17 +0100
Subject: [PATCH 08/68] feat: add pipeline configs for benchmark

---
 benchmark.py                                  |   4 +-
 .../config/_benchmark_ng_pipeline.yml         | 134 ++++++++++++++++++
 .../config/_benchmark_non_ng_pipeline.yml     | 134 ++++++++++++++++++
 3 files changed, 270 insertions(+), 2 deletions(-)
 create mode 100644 examples/exampledata/config/_benchmark_ng_pipeline.yml
 create mode 100644 examples/exampledata/config/_benchmark_non_ng_pipeline.yml

diff --git a/benchmark.py b/benchmark.py
index aa49b5805..6baffd78c 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -308,8 +308,8 @@ def resolve_pipeline_config(ng: int) -> Path:
         Pipeline config path.
     """
     if ng == 1:
-        return Path("./examples/exampledata/config/ng_pipeline.yml")
-    return Path("./examples/exampledata/config/pipeline.yml")
+        return Path("./examples/exampledata/config/_benchmark_ng_pipeline.yml")
+    return Path("./examples/exampledata/config/_benchmark_non_ng_pipeline.yml")
 
 
 def read_vm_max_map_count() -> int:
diff --git a/examples/exampledata/config/_benchmark_ng_pipeline.yml b/examples/exampledata/config/_benchmark_ng_pipeline.yml
new file mode 100644
index 000000000..0ca42bef7
--- /dev/null
+++ b/examples/exampledata/config/_benchmark_ng_pipeline.yml
@@ -0,0 +1,134 @@
+version: 2
+process_count: 1
+timeout: 5.0
+restart_count: 2
+config_refresh_interval: 5
+error_backlog_size: 1500000
+logger:
+  level: DEBUG
+  format: "%(asctime)-15s %(hostname)-5s %(name)-10s %(levelname)-8s: %(message)s"
+  datefmt: "%Y-%m-%d %H:%M:%S"
+  loggers:
+    "py.warnings": {"level": "ERROR"}
+    "Runner": {"level": "INFO"}
+    "Processor": {"level": "ERROR"}
+    "Exporter": {"level": "ERROR"}
+    "uvicorn": {"level": "ERROR"}
+    "uvicorn.access": {"level": "ERROR"}
+    "OpenSearchOutput": {"level": "DEBUG"}
+    "KafkaOutput": {"level": "ERROR"}
+    "Input": {"level": "ERROR"}
+metrics:
+  enabled: true
+  port: 8001
+pipeline:
+  - labelername:
+      type: ng_labeler
+      schema: examples/exampledata/rules/labeler/schema.json
+      include_parent_labels: true
+      rules:
+        - examples/exampledata/rules/labeler/rules
+  - dissector:
+      type: ng_dissector
+      rules:
+        - examples/exampledata/rules/dissector/rules
+  - dropper:
+      type: ng_dropper
+      rules:
+        - examples/exampledata/rules/dropper/rules
+        - filter: "test_dropper"
+          dropper:
+            drop:
+              - drop_me
+          description: "..."
+  - pre_detector:
+      type: ng_pre_detector
+      rules:
+        - examples/exampledata/rules/pre_detector/rules
+      outputs:
+        - opensearch: sre
+      tree_config: examples/exampledata/rules/pre_detector/tree_config.json
+      alert_ip_list_path: examples/exampledata/rules/pre_detector/alert_ips.yml
+  - amides:
+      type: ng_amides
+      rules:
+        - examples/exampledata/rules/amides/rules
+      models_path: examples/exampledata/models/model.zip
+      num_rule_attributions: 10
+      max_cache_entries: 1000000
+      decision_threshold: 0.32
+  - pseudonymizer:
+      type: ng_pseudonymizer
+      pubkey_analyst: examples/exampledata/rules/pseudonymizer/example_analyst_pub.pem
+      pubkey_depseudo: examples/exampledata/rules/pseudonymizer/example_depseudo_pub.pem
+      regex_mapping: examples/exampledata/rules/pseudonymizer/regex_mapping.yml
+      hash_salt: a_secret_tasty_ingredient
+      outputs:
+        - opensearch: pseudonyms
+      rules:
+        - examples/exampledata/rules/pseudonymizer/rules/
+      max_cached_pseudonyms: 1000000
+  - calculator:
+      type: ng_calculator
+      rules:
+        - filter: "test_label: execute"
+          calculator:
+            target_field: "calculation"
+            calc: "1 + 1"
+input:
+  kafka:
+    type: ng_confluentkafka_input
+    topic: consumer
+    kafka_config:
+      bootstrap.servers: 127.0.0.1:9092
+      group.id: cgroup3
+      enable.auto.commit: "true"
+      auto.commit.interval.ms: "10000"
+      enable.auto.offset.store: "false"
+      queued.min.messages: "100000"
+      queued.max.messages.kbytes: "65536"
+      statistics.interval.ms: "60000"
+    preprocessing:
+      version_info_target_field: Logprep_version_info
+      log_arrival_time_target_field: event.ingested
+      hmac:
+        target: <RAW_MSG>
+        key: "thisisasecureandrandomkey"
+        output_field: Full_event
+output:
+  opensearch:
+    type: ng_opensearch_output
+    hosts:
+      - 127.0.0.1:9200
+    default_index: processed
+    default_op_type: create
+    message_backlog_size: 7000
+    timeout: 10000
+    flush_timeout: 60
+    user: admin
+    secret: admin
+    desired_cluster_status: ["green", "yellow"]
+    chunk_size: 25
+  kafka:
+    type: ng_confluentkafka_output
+    default: false
+    topic: producer
+    flush_timeout: 300
+    kafka_config:
+      bootstrap.servers: 127.0.0.1:9092
+      statistics.interval.ms: "60000"
+error_output:
+  kafka_error_output:
+    type: ng_confluentkafka_output
+    topic: errors
+    flush_timeout: 300
+    send_timeout: 0
+    kafka_config:
+      bootstrap.servers: 127.0.0.1:9092
+      compression.type: none
+      statistics.interval.ms: "60000"
+      queue.buffering.max.messages: "10"
+      queue.buffering.max.kbytes: "1024"
+      queue.buffering.max.ms: "1000"
+      batch.size: "100"
+      request.required.acks: "-1"
diff --git a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
new file mode 100644
index 000000000..97b55e417
--- /dev/null
+++ b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
@@ -0,0 +1,134 @@
+version: 2
+process_count: 1
+timeout: 5.0
+restart_count: 2
+config_refresh_interval: 5
+error_backlog_size: 1500000
+logger:
+  level: DEBUG
+  format: "%(asctime)-15s %(hostname)-5s %(name)-10s %(levelname)-8s: %(message)s"
+  datefmt: "%Y-%m-%d %H:%M:%S"
+  loggers:
+    "py.warnings": {"level": "ERROR"}
+    "Runner": {"level": "INFO"}
+    "Processor": {"level": "ERROR"}
+    "Exporter": {"level": "ERROR"}
+    "uvicorn": {"level": "ERROR"}
+    "uvicorn.access": {"level": "ERROR"}
+    "OpenSearchOutput": {"level": "DEBUG"}
+    "KafkaOutput": {"level": "ERROR"}
+    "Input": {"level": "ERROR"}
+metrics:
+  enabled: true
+  port: 8001
+pipeline:
+  - labelername:
+      type: labeler
+      schema: examples/exampledata/rules/labeler/schema.json
+      include_parent_labels: true
+      rules:
+        - examples/exampledata/rules/labeler/rules
+  - dissector:
+      type: dissector
+      rules:
+        - examples/exampledata/rules/dissector/rules
+  - dropper:
+      type: dropper
+      rules:
+        - examples/exampledata/rules/dropper/rules
+        - filter: "test_dropper"
+          dropper:
+            drop:
+              - drop_me
+          description: "..."
+  - pre_detector:
+      type: pre_detector
+      rules:
+        - examples/exampledata/rules/pre_detector/rules
+      outputs:
+        - opensearch: sre
+      tree_config: examples/exampledata/rules/pre_detector/tree_config.json
+      alert_ip_list_path: examples/exampledata/rules/pre_detector/alert_ips.yml
+  - amides:
+      type: amides
+      rules:
+        - examples/exampledata/rules/amides/rules
+      models_path: examples/exampledata/models/model.zip
+      num_rule_attributions: 10
+      max_cache_entries: 1000000
+      decision_threshold: 0.32
+  - pseudonymizer:
+      type: pseudonymizer
+      pubkey_analyst: examples/exampledata/rules/pseudonymizer/example_analyst_pub.pem
+      pubkey_depseudo: examples/exampledata/rules/pseudonymizer/example_depseudo_pub.pem
+      regex_mapping: examples/exampledata/rules/pseudonymizer/regex_mapping.yml
+      hash_salt: a_secret_tasty_ingredient
+      outputs:
+        - opensearch: pseudonyms
+      rules:
+        - examples/exampledata/rules/pseudonymizer/rules/
+      max_cached_pseudonyms: 1000000
+  - calculator:
+      type: calculator
+      rules:
+        - filter: "test_label: execute"
+          calculator:
+            target_field: "calculation"
+            calc: "1 + 1"
+input:
+  kafka:
+    type: confluentkafka_input
+    topic: consumer
+    kafka_config:
+      bootstrap.servers: 127.0.0.1:9092
+      group.id: cgroup3
+      enable.auto.commit: "true"
+      auto.commit.interval.ms: "10000"
+      enable.auto.offset.store: "false"
+      queued.min.messages: "100000"
+      queued.max.messages.kbytes: "65536"
+      statistics.interval.ms: "60000"
+    preprocessing:
+      version_info_target_field: Logprep_version_info
+      log_arrival_time_target_field: event.ingested
+      hmac:
+        target: <RAW_MSG>
+        key: "thisisasecureandrandomkey"
+        output_field: Full_event
+output:
+  opensearch:
+    type: opensearch_output
+    hosts:
+      - 127.0.0.1:9200
+    default_index: processed
+    default_op_type: create
+    message_backlog_size: 7000
+    timeout: 10000
+    flush_timeout: 60
+    user: admin
+    secret: admin
+    desired_cluster_status: ["green", "yellow"]
+    chunk_size: 25
+  kafka:
+    type: confluentkafka_output
+    default: false
+    topic: producer
+    flush_timeout: 300
+    kafka_config:
+      bootstrap.servers: 127.0.0.1:9092
+      statistics.interval.ms: "60000"
+error_output:
+  kafka_error_output:
+    type: confluentkafka_output
+    topic: errors
+    flush_timeout: 300
+    send_timeout: 0
+    kafka_config:
+      bootstrap.servers: 127.0.0.1:9092
+      compression.type: none
+      statistics.interval.ms: "60000"
+      queue.buffering.max.messages: "10"
+      queue.buffering.max.kbytes: "1024"
+      queue.buffering.max.ms: "1000"
+      batch.size: "100"
+      request.required.acks: "-1"

From e0ab0f118632ca04bcee5e16eecb1474c780dc40 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 4 Mar 2026 11:44:09 +0100
Subject: [PATCH 09/68] WIP: initial async steps for sender and opensearch
 output

---
 benchmark.py                                  |  20 ++
 .../config/_benchmark_ng_pipeline.yml         |   2 +-
 .../config/_benchmark_non_ng_pipeline.yml     |   2 +-
 logprep/ng/abc/input.py                       |   2 +-
 logprep/ng/abc/output.py                      |   6 +-
 logprep/ng/connector/opensearch/output.py     | 223 +++++++++++++-----
 logprep/ng/manager.py                         |   4 +-
 logprep/ng/sender.py                          |  68 +++---
 pyproject.toml                                |   2 +-
 uv.lock                                       |   9 +-
 10 files changed, 241 insertions(+), 97 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index 6baffd78c..20c202458 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -611,6 +611,26 @@ def benchmark_run(
         opensearch_refresh(opensearch_url, processed_index)
 
         after = opensearch_count_processed(opensearch_url, processed_index)
+
+        def opensearch_debug_snapshot(opensearch_url: str) -> None:
+            # welche Indizes existieren überhaupt?
+            r = requests.get(f"{opensearch_url}/_cat/indices?v", timeout=10)
+            print("\n--- _cat/indices ---")
+            print(r.text)
+
+            # wie viele docs pro index? (sehr schnell, super aufschlussreich)
+            r = requests.get(f"{opensearch_url}/_cat/count?v", timeout=10)
+            print("\n--- _cat/count ---")
+            print(r.text)
+
+            # optional: aliases / data streams
+            r = requests.get(f"{opensearch_url}/_cat/aliases?v", timeout=10)
+            print("\n--- _cat/aliases ---")
+            print(r.text)
+
+        # im benchmark_run nach dem kill + refresh:
+        opensearch_debug_snapshot(opensearch_url)
+
         processed = max(0, after - baseline)
 
         return RunResult(
diff --git a/examples/exampledata/config/_benchmark_ng_pipeline.yml b/examples/exampledata/config/_benchmark_ng_pipeline.yml
index 0ca42bef7..d2c6d7abc 100644
--- a/examples/exampledata/config/_benchmark_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_ng_pipeline.yml
@@ -102,7 +102,7 @@ output:
       - 127.0.0.1:9200
     default_index: processed
     default_op_type: create
-    message_backlog_size: 7000
+    message_backlog_size: 1
     timeout: 10000
     flush_timeout: 60
     user: admin
diff --git a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
index 97b55e417..569af191a 100644
--- a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
@@ -102,7 +102,7 @@ output:
       - 127.0.0.1:9200
     default_index: processed
     default_op_type: create
-    message_backlog_size: 7000
+    message_backlog_size: 1
     timeout: 10000
     flush_timeout: 60
     user: admin
diff --git a/logprep/ng/abc/input.py b/logprep/ng/abc/input.py
index 63d5091f3..ea70ec0ba 100644
--- a/logprep/ng/abc/input.py
+++ b/logprep/ng/abc/input.py
@@ -319,7 +319,7 @@ async def get_next(self, timeout: float) -> LogEvent | None:
             if not isinstance(event, dict):
                 raise CriticalInputError(self, "not a dict", event)
 
-            # self.metrics.number_of_processed_events += 1
+            self.metrics.number_of_processed_events += 1
 
             try:
                 if self._add_full_event_to_target_field:
diff --git a/logprep/ng/abc/output.py b/logprep/ng/abc/output.py
index 8961e32ef..e5e24d246 100644
--- a/logprep/ng/abc/output.py
+++ b/logprep/ng/abc/output.py
@@ -81,7 +81,7 @@ def __init__(self, name: str, configuration: "Connector.Config"):
         self.input_connector = None
 
     @abstractmethod
-    def store(self, event: Event) -> None:
+    async def store(self, event: Event) -> None:
         """Store the event in the output destination.
 
         Parameters
@@ -91,7 +91,7 @@ def store(self, event: Event) -> None:
         """
 
     @abstractmethod
-    def store_custom(self, event: Event, target: str) -> None:
+    async def store_custom(self, event: Event, target: str) -> None:
         """Store the event in the output destination.
 
         Parameters
@@ -103,7 +103,7 @@ def store_custom(self, event: Event, target: str) -> None:
         """
 
     @abstractmethod
-    def flush(self):
+    async def flush(self):
         """Write the backlog to the output destination.
         Needs to be implemented in child classes to ensure
         that the backlog is written to the output destination.
diff --git a/logprep/ng/connector/opensearch/output.py b/logprep/ng/connector/opensearch/output.py
index 315192eea..ffcd067b9 100644
--- a/logprep/ng/connector/opensearch/output.py
+++ b/logprep/ng/connector/opensearch/output.py
@@ -30,15 +30,21 @@
         ca_cert: /path/to/cert.crt
 """
 
+import asyncio
+import json
 import logging
 import ssl
 import typing
 from functools import cached_property
 from typing import List, Optional
 
-import opensearchpy as search
 from attrs import define, field, validators
-from opensearchpy import OpenSearchException, helpers
+from opensearchpy import (
+    AsyncOpenSearch,
+    OpenSearchException,
+    SerializationError,
+    helpers,
+)
 from opensearchpy.serializer import JSONSerializer
 
 from logprep.abc.exceptions import LogprepException
@@ -82,7 +88,7 @@ def dumps(self, data):
         try:
             return self._encoder.encode(data).decode("utf-8")
         except (ValueError, TypeError) as e:
-            raise search.exceptions.SerializationError(data, e)
+            raise SerializationError(data, e)
 
     def loads(self, s):
         return self._decoder.decode(s)
@@ -160,7 +166,7 @@ class Config(Output.Config):
         """Default op_type for indexing documents. Default is 'index',
         Consider using 'create' for data streams or to prevent overwriting existing documents."""
 
-    __slots__ = ("_message_backlog",)
+    __slots__ = ("_message_backlog", "_flush_task")
 
     _message_backlog: list[Event]
     """List of messages to be sent to Opensearch."""
@@ -212,7 +218,7 @@ def http_auth(self) -> tuple | None:
     @cached_property
     def _search_context(self):
         """Returns the opensearch client."""
-        return search.OpenSearch(
+        return AsyncOpenSearch(
             self.config.hosts,
             scheme=self.schema,
             http_auth=self.http_auth,
@@ -229,11 +235,22 @@ def _search_context(self):
     def __init__(self, name: str, configuration: "OpensearchOutput.Config"):
         super().__init__(name, configuration)
         self._message_backlog = []
+        self._flush_task: asyncio.Task | None = None
 
-    def setup(self):
-        super().setup()
+    async def _asetup(self):
+        await super()._asetup()
         flush_timeout = self.config.flush_timeout
-        self._schedule_task(task=self.flush, seconds=flush_timeout)
+
+        # TODO: improve flush task handling
+        async def flush_task() -> None:
+            try:
+                while True:
+                    await asyncio.sleep(flush_timeout)
+                    await self.flush()
+            except asyncio.CancelledError:
+                pass
+
+        self._flush_task = asyncio.create_task(flush_task())
 
     def describe(self) -> str:
         """Get name of Opensearch endpoint with the host.
@@ -247,13 +264,13 @@ def describe(self) -> str:
         base_description = Output.describe(self)
         return f"{base_description} - Opensearch Output: {self.config.hosts}"
 
-    @Output._handle_errors
-    def store(self, event: Event) -> None:
+    # @Output._handle_errors
+    async def store(self, event: Event) -> None:
         """Store a document in the index defined in the document or to the default index."""
-        self.store_custom(event, event.data.get("_index", self.config.default_index))
+        await self.store_custom(event, event.data.get("_index", self.config.default_index))
 
-    @Output._handle_errors
-    def store_custom(self, event: Event, target: str) -> None:
+    # @Output._handle_errors
+    async def store_custom(self, event: Event, target: str) -> None:
         """Store document into backlog to be written into Opensearch with the target index.
         The target index is determined per document by parameter :code:`target`.
 
@@ -270,66 +287,160 @@ def store_custom(self, event: Event, target: str) -> None:
         document["_op_type"] = document.get("_op_type", self.config.default_op_type)
         self.metrics.number_of_processed_events += 1
         self._message_backlog.append(event)
-        self._write_to_search_context()
+        await self._write_to_search_context()
 
-    def _write_to_search_context(self):
+    async def _write_to_search_context(self):
         """Writes documents from a buffer into Opensearch indices.
 
         Writes documents in a bulk if the document buffer limit has been reached.
         This reduces connections to Opensearch and improves performance.
         """
         if len(self._message_backlog) >= self.config.message_backlog_size:
-            self.flush()
+            await self.flush()
 
-    @Metric.measure_time()
-    def flush(self):
+    # @Metric.measure_time()
+    async def flush(self):
         if not self._message_backlog:
             return
         logger.debug("Flushing %d documents to Opensearch", len(self._message_backlog))
-        self._bulk(self._search_context, self._message_backlog)
+        await self._bulk(self._search_context, self._message_backlog)
         self._message_backlog.clear()
 
-    def _bulk(self, client: search.OpenSearch, events: list[Event]) -> None:
-        """Bulk index documents into Opensearch.
-        Uses the parallel_bulk function from the opensearchpy library.
-
-        the error information is stored in a document with the following structure:
-
-        ```json
-        {
-            "op_type": {
-                "error": "error message",
-                "status": "status_code",
-                "exception": "exception message"
-                }
-            }
-        }
+    def _chunk_events_by_size(
+        self,
+        events: list["Event"],
+        *,
+        chunk_size: int,
+        max_chunk_bytes: int,
+    ) -> typing.Iterable[list["Event"]]:
+        """
+        Chunk events into batches respecting chunk_size and (best-effort) max_chunk_bytes.
+
+        Note: max_chunk_bytes is approximate because we estimate bytes via json.dumps.
+        """
+        batch: list["Event"] = []
+        approx_bytes = 0
+
+        for ev in events:
+            # best-effort byte estimation
+            try:
+                approx_bytes += len(json.dumps(ev.data, ensure_ascii=False).encode("utf-8")) + 200
+            except Exception:
+                approx_bytes += 1000  # fallback guess
+
+            batch.append(ev)
+
+            if len(batch) >= chunk_size or approx_bytes >= max_chunk_bytes:
+                yield batch
+                batch = []
+                approx_bytes = 0
+
+        if batch:
+            yield batch
+
+    def _build_bulk_body(self, events: list["Event"], *, default_op_type: str) -> list[dict]:
+        """
+        Build bulk request body as a list of dicts (action/meta + source lines).
+        opensearch-py will serialize this into NDJSON.
+        """
+        body: list[dict] = []
+
+        for ev in events:
+            doc = ev.data
+            op_type = doc.get("_op_type", default_op_type)
+            index = doc.get("_index")
+
+            if not index:
+                # safety: fall back to whatever your pipeline expects
+                # (ideally _index is always set before bulk)
+                index = doc.get("_index")
+
+            if op_type not in ("index", "create"):
+                # keep it strict: your Config only allows create/index
+                op_type = default_op_type
+
+            # bulk action line
+            action_meta = {op_type: {"_index": index}}
+            # optionally pass _id if present
+            if "_id" in doc:
+                action_meta[op_type]["_id"] = doc["_id"]
+
+            body.append(action_meta)
+
+            # source line: must NOT include bulk meta keys
+            source = {k: v for k, v in doc.items() if k not in ("_index", "_op_type")}
+            body.append(source)
+
+        return body
+
+    async def _bulk(self, client: AsyncOpenSearch, events: list["Event"]) -> None:
         """
-        kwargs = {
-            "max_chunk_bytes": self.config.max_chunk_bytes,
-            "chunk_size": self.config.chunk_size,
-            "queue_size": self.config.queue_size,
-            "thread_count": self.config.thread_count,
-            "raise_on_error": False,
-            "raise_on_exception": False,
-        }
-        actions = (event.data for event in events)
-        for index, result in enumerate(helpers.parallel_bulk(client, actions, **kwargs)):  # type: ignore
-            success, item = result
-            if success:
-                events[index].state.next_state(success=True)
+        Async bulk indexing.
+        Uses AsyncOpenSearch.bulk directly, and processes per-item results.
+
+        Behavior is intentionally close to your sync version:
+        - marks event.state success/failure
+        - appends BulkError for failures
+        """
+        default_op_type = self.config.default_op_type
+
+        for batch in self._chunk_events_by_size(
+            events,
+            chunk_size=self.config.chunk_size,
+            max_chunk_bytes=self.config.max_chunk_bytes,
+        ):
+            body = self._build_bulk_body(batch, default_op_type=default_op_type)
+
+            try:
+                resp = await client.bulk(body=body)
+            except OpenSearchException as e:
+                # whole bulk request failed → mark all events failed
+                for ev in batch:
+                    ev.state.next_state(success=False)
+                    ev.errors.append(BulkError("Bulk request failed", exception=str(e)))
                 continue
-            op_type = item.get("_op_type", self.config.default_op_type)
-            error_info = item.get(op_type, {})
-            error = BulkError(error_info.get("error", "Failed to index document"), **error_info)
-            event = events[index]
-            event.state.next_state(success=False)
-            event.errors.append(error)
-
-    def health(self) -> bool:
+
+            items = resp.get("items", [])
+            # One item per document (not per line). Our batch has N events, body has 2N lines.
+            # items length should match len(batch) if we're only doing index/create.
+            for i, item in enumerate(items):
+                if i >= len(batch):
+                    break
+
+                ev = batch[i]
+                # item shape: {"index": {...}} or {"create": {...}}
+                op_type = next(iter(item.keys()), default_op_type)
+                info = item.get(op_type, {}) if isinstance(item.get(op_type), dict) else {}
+
+                status = info.get("status")
+                error_obj = info.get("error")
+
+                ok = isinstance(status, int) and 200 <= status < 300 and not error_obj
+                if ok:
+                    ev.state.next_state(success=True)
+                    continue
+
+                # normalize error into your BulkError shape
+                # error_obj can be dict; keep it as "error" payload if present
+                if isinstance(error_obj, dict):
+                    message = error_obj.get("reason") or str(error_obj)
+                else:
+                    message = str(error_obj) if error_obj else "Failed to index document"
+
+                ev.state.next_state(success=False)
+                ev.errors.append(
+                    BulkError(
+                        message,
+                        status=str(status) if status is not None else None,
+                        exception=None,
+                        error=error_obj,  # keep original payload for debugging
+                    )
+                )
+
+    async def health(self) -> bool:  # type: ignore  # TODO: fix mypy issue
         """Check the health of the component."""
         try:
-            resp = self._search_context.cluster.health(
+            resp = await self._search_context.cluster.health(
                 params={"timeout": self.config.health_timeout}
             )
         except (OpenSearchException, ConnectionError) as error:
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 9cfbfacbe..439bc79d2 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -70,7 +70,7 @@ async def setup(self):
             logger.warning("No error output configured.")
 
         self._sender = Sender(outputs=output_connectors, error_output=error_output)
-        self._sender.setup()
+        await self._sender.setup()
 
         self._orchestrator = self._create_orchestrator()
 
@@ -96,7 +96,7 @@ async def process(batch: list[LogEvent]) -> list[LogEvent]:
         )
 
         async def send(batch: list[LogEvent]) -> list[LogEvent]:
-            return self._sender.process(batch)
+            return await self._sender.process(batch)
 
         output_worker: Worker[LogEvent, LogEvent] = Worker(
             name="output_worker",
diff --git a/logprep/ng/sender.py b/logprep/ng/sender.py
index a1257df17..ebae1869d 100644
--- a/logprep/ng/sender.py
+++ b/logprep/ng/sender.py
@@ -1,5 +1,6 @@
 """sender module"""
 
+import asyncio
 import logging
 import typing
 
@@ -32,62 +33,71 @@ def __init__(
         self._default_output = [output for output in outputs if output.default][0]
         self._error_output = error_output
 
-    def process(self, batch: list[LogEvent]) -> list[LogEvent]:
-        self._send_and_flush_processed_events(batch_events=batch)
+    async def process(self, batch: list[LogEvent]) -> list[LogEvent]:
+        await self._send_and_flush_processed_events(batch_events=batch)
         if self._error_output:
-            self._send_and_flush_failed_events(batch_events=batch)
+            await self._send_and_flush_failed_events(batch_events=batch)
         return batch
 
-    def _send_and_flush_failed_events(self, batch_events: list[LogEvent]) -> None:
-        error_events = [
-            self._send_failed(event)
+    async def _send_and_flush_failed_events(self, batch_events: list[LogEvent]) -> None:
+        failed = [
+            event
             for event in batch_events
             if event is not None and event.state == EventStateType.FAILED
         ]
-        if not error_events:
+        if not failed:
             return
 
-        self._error_output.flush()  # type: ignore[union-attr]
+        # send in parallel (minimal change vs. serial list comprehension)
+        error_events = await asyncio.gather(*(self._send_failed(event) for event in failed))
+
+        await self._error_output.flush()  # type: ignore[union-attr]
+
         failed_error_events = [
             event for event in error_events if event.state == EventStateType.FAILED
         ]
         for error_event in failed_error_events:
             logger.error("Error during sending to error output: %s", error_event)
 
-    def _send_and_flush_processed_events(self, batch_events: list[LogEvent]) -> None:
-        processed_events = [
-            self._send_processed(event)
+    async def _send_and_flush_processed_events(self, batch_events: list[LogEvent]) -> None:
+        processed = [
+            event
             for event in batch_events
             if event is not None and event.state == EventStateType.PROCESSED
         ]
-        if not processed_events:
+        if not processed:
             return
-        for output in self._outputs.values():
-            output.flush()
 
-    def _send_extra_data(self, event: LogEvent) -> None:
+        # send in parallel (minimal change vs. serial list comprehension)
+        await asyncio.gather(*(self._send_processed(event) for event in processed))
+
+        # flush once per output after sending
+        await asyncio.gather(*(output.flush() for output in self._outputs.values()))
+
+    async def _send_extra_data(self, event: LogEvent) -> None:
         extra_data_events = typing.cast(list[ExtraDataEvent], event.extra_data)
         for extra_data_event in extra_data_events:
             for output in extra_data_event.outputs:
                 for output_name, output_target in output.items():
                     if output_name in self._outputs:
-                        self._outputs[output_name].store_custom(extra_data_event, output_target)
+                        await self._outputs[output_name].store_custom(
+                            extra_data_event, output_target
+                        )
                     else:
                         raise ValueError(f"Output {output_name} not configured.")
 
-    def _send_processed(self, event: LogEvent) -> LogEvent:
+    async def _send_processed(self, event: LogEvent) -> LogEvent:
         if event.extra_data:
-            self._send_extra_data(event)
-        self._default_output.store(event)
+            await self._send_extra_data(event)
+        await self._default_output.store(event)
         return event
 
-    def _send_failed(self, event: LogEvent) -> ErrorEvent:
+    async def _send_failed(self, event: LogEvent) -> ErrorEvent:
         """Send the event to the error output.
         If event can't be sent, it will be logged as an error.
         """
-
         error_event = self._get_error_event(event)
-        self._error_output.store(error_event)  # type: ignore[union-attr]
+        await self._error_output.store(error_event)  # type: ignore[union-attr]
         return error_event
 
     def _get_error_event(self, event: LogEvent) -> ErrorEvent:
@@ -101,21 +111,19 @@ def _get_error_event(self, event: LogEvent) -> ErrorEvent:
         )
         return ErrorEvent(log_event=event, reason=reason, state=EventStateType.PROCESSED)
 
-    def shut_down(self) -> None:
+    async def shut_down(self) -> None:
         """Shutdown all outputs gracefully."""
-
         for _, output in self._outputs.items():
-            output.shut_down()
+            await output.shut_down()
         if self._error_output:
-            self._error_output.shut_down()
+            await self._error_output.shut_down()
         logger.info("All outputs have been shut down.")
-
         logger.info("Sender has been shut down.")
 
-    def setup(self) -> None:
+    async def setup(self) -> None:
         """Setup all outputs."""
         for _, output in self._outputs.items():
-            output.setup()
+            await output._asetup()
         if self._error_output:
-            self._error_output.setup()
+            await self._error_output._asetup()
         logger.info("All outputs have been set up.")
diff --git a/pyproject.toml b/pyproject.toml
index b4ffc8b5c..e353dff0a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,7 +67,7 @@ dependencies = [
   "luqum<2",
   "more-itertools==8.10.0",
   "numpy>=1.26.0,<3",
-  "opensearch-py<4",
+  "opensearch-py[async]<4",
   "prometheus_client<1",
   "protobuf>=3.20.2,<7",
   "pycryptodome<4",
diff --git a/uv.lock b/uv.lock
index e0d2b2af7..fd9adfdd8 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1392,7 +1392,7 @@ dependencies = [
     { name = "more-itertools" },
     { name = "msgspec" },
     { name = "numpy" },
-    { name = "opensearch-py" },
+    { name = "opensearch-py", extra = ["async"] },
     { name = "pandas" },
     { name = "prometheus-client" },
     { name = "protobuf" },
@@ -1474,7 +1474,7 @@ requires-dist = [
     { name = "nbsphinx", marker = "extra == 'doc'", specifier = ">=0.9" },
     { name = "numpy", specifier = ">=1.26.0,<3" },
     { name = "openpyxl", marker = "extra == 'doc'" },
-    { name = "opensearch-py", specifier = "<4" },
+    { name = "opensearch-py", extras = ["async"], specifier = "<4" },
     { name = "pandas", specifier = "<3" },
     { name = "pre-commit", marker = "extra == 'dev'" },
     { name = "prometheus-client", specifier = "<1" },
@@ -2140,6 +2140,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/08/a1/293c8ad81768ad625283d960685bde07c6302abf20a685e693b48ab6eb91/opensearch_py-3.1.0-py3-none-any.whl", hash = "sha256:e5af83d0454323e6ea9ddee8c0dcc185c0181054592d23cb701da46271a3b65b", size = 385729, upload-time = "2025-11-20T16:37:34.941Z" },
 ]
 
+[package.optional-dependencies]
+async = [
+    { name = "aiohttp" },
+]
+
 [[package]]
 name = "orderly-set"
 version = "5.5.0"

From 89e8c13e3185f3481ac8703cfbad4b420263e27d Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 4 Mar 2026 12:37:21 +0100
Subject: [PATCH 10/68] WIP

---
 benchmark.py                                  | 52 ++++++++++++-------
 logprep/ng/abc/input.py                       |  4 +-
 .../ng/connector/confluent_kafka/output.py    | 10 ++--
 logprep/ng/connector/opensearch/output.py     |  9 ++--
 logprep/ng/event/event_state.py               | 10 ++++
 logprep/ng/pipeline.py                        |  7 +--
 logprep/ng/sender.py                          | 29 +++++++++--
 7 files changed, 85 insertions(+), 36 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index 20c202458..d0406156d 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -1,5 +1,4 @@
 # pylint: disable=C0103
-
 """
 Benchmark runner for logprep (logprep-ng and non-ng).
 
@@ -286,6 +285,33 @@ def opensearch_count_processed(opensearch_url: str, processed_index: str) -> int
     return int(resp.json()["count"])
 
 
+def opensearch_debug_snapshot(opensearch_url: str) -> None:
+    """
+    Print a small OpenSearch state snapshot for debugging.
+    Never raises (best-effort).
+    """
+    try:
+        r = requests.get(f"{opensearch_url}/_cat/indices?v", timeout=10)
+        print("\n--- _cat/indices ---")
+        print(r.text)
+    except Exception as e:
+        print(f"\n--- _cat/indices (failed) ---\n{e}")
+
+    try:
+        r = requests.get(f"{opensearch_url}/_cat/count?v", timeout=10)
+        print("\n--- _cat/count ---")
+        print(r.text)
+    except Exception as e:
+        print(f"\n--- _cat/count (failed) ---\n{e}")
+
+    try:
+        r = requests.get(f"{opensearch_url}/_cat/aliases?v", timeout=10)
+        print("\n--- _cat/aliases ---")
+        print(r.text)
+    except Exception as e:
+        print(f"\n--- _cat/aliases (failed) ---\n{e}")
+
+
 def reset_prometheus_dir(path: str) -> None:
     """
     Recreate PROMETHEUS_MULTIPROC_DIR.
@@ -596,6 +622,9 @@ def benchmark_run(
 
         time.sleep(sleep_after_logprep_start_s)
 
+        print("\n=== OpenSearch snapshot (before measurement) ===")
+        opensearch_debug_snapshot(opensearch_url)
+
         baseline = opensearch_count_processed(opensearch_url, processed_index)
         startup_s = time.time() - t_startup
 
@@ -610,27 +639,10 @@ def benchmark_run(
         # ensure near-real-time writes are visible to _count before measuring
         opensearch_refresh(opensearch_url, processed_index)
 
-        after = opensearch_count_processed(opensearch_url, processed_index)
-
-        def opensearch_debug_snapshot(opensearch_url: str) -> None:
-            # welche Indizes existieren überhaupt?
-            r = requests.get(f"{opensearch_url}/_cat/indices?v", timeout=10)
-            print("\n--- _cat/indices ---")
-            print(r.text)
-
-            # wie viele docs pro index? (sehr schnell, super aufschlussreich)
-            r = requests.get(f"{opensearch_url}/_cat/count?v", timeout=10)
-            print("\n--- _cat/count ---")
-            print(r.text)
-
-            # optional: aliases / data streams
-            r = requests.get(f"{opensearch_url}/_cat/aliases?v", timeout=10)
-            print("\n--- _cat/aliases ---")
-            print(r.text)
-
-        # im benchmark_run nach dem kill + refresh:
+        print("\n=== OpenSearch snapshot (after run / after refresh) ===")
         opensearch_debug_snapshot(opensearch_url)
 
+        after = opensearch_count_processed(opensearch_url, processed_index)
         processed = max(0, after - baseline)
 
         return RunResult(
diff --git a/logprep/ng/abc/input.py b/logprep/ng/abc/input.py
index ea70ec0ba..043698daa 100644
--- a/logprep/ng/abc/input.py
+++ b/logprep/ng/abc/input.py
@@ -192,7 +192,7 @@ def acknowledge(self) -> None:
         self.event_backlog.unregister(state_type=EventStateType.ACKED)
 
         for event in self.event_backlog.get(state_type=EventStateType.DELIVERED):
-            event.state.next_state()
+            event.state.current_state = EventStateType.ACKED
 
     @property
     def _add_hmac(self) -> bool:
@@ -370,7 +370,7 @@ async def get_next(self, timeout: float) -> LogEvent | None:
         )
 
         self.event_backlog.register(events=[log_event])
-        log_event.state.next_state()
+        log_event.state.current_state = EventStateType.RECEIVED
 
         return log_event
 
diff --git a/logprep/ng/connector/confluent_kafka/output.py b/logprep/ng/connector/confluent_kafka/output.py
index 5ff83e786..80a982f82 100644
--- a/logprep/ng/connector/confluent_kafka/output.py
+++ b/logprep/ng/connector/confluent_kafka/output.py
@@ -36,6 +36,7 @@
 from logprep.metrics.metrics import GaugeMetric, Metric
 from logprep.ng.abc.event import Event
 from logprep.ng.abc.output import FatalOutputError, Output
+from logprep.ng.event.event_state import EventStateType
 from logprep.util.validators import keys_in_validator
 
 DEFAULTS = {
@@ -281,7 +282,7 @@ def describe(self) -> str:
             f"{self.config.kafka_config.get('bootstrap.servers')}"
         )
 
-    def store(self, event: Event) -> None:
+    def store(self, event: Event) -> None:  # type: ignore  # TODO: fix mypy issue
         """Store a document in the producer topic.
 
         Parameters
@@ -303,7 +304,8 @@ def store_custom(self, event: Event, target: str) -> None:
         target : str
             Topic to store event data in.
         """
-        event.state.next_state()
+        event.state.current_state = EventStateType.STORING_IN_OUTPUT
+
         document = event.data
         self.metrics.number_of_processed_events += 1
         try:
@@ -358,12 +360,12 @@ def setup(self) -> None:
     def on_delivery(self, event: Event, err: KafkaException, msg: Message) -> None:
         """Callback for delivery reports."""
         if err is not None:
-            event.state.next_state(success=False)
+            event.state.current_state = EventStateType.FAILED
             event.errors.append(err)
             logger.error("Message delivery failed: %s", err)
             self.metrics.number_of_errors += 1
             return
-        event.state.next_state(success=True)
+        event.state.current_state = EventStateType.DELIVERED
         logger.debug(
             "Message delivered to '%s' partition %s, offset %s",
             msg.topic(),
diff --git a/logprep/ng/connector/opensearch/output.py b/logprep/ng/connector/opensearch/output.py
index ffcd067b9..69ae4f550 100644
--- a/logprep/ng/connector/opensearch/output.py
+++ b/logprep/ng/connector/opensearch/output.py
@@ -51,6 +51,7 @@
 from logprep.metrics.metrics import Metric
 from logprep.ng.abc.event import Event
 from logprep.ng.abc.output import Output
+from logprep.ng.event.event_state import EventStateType
 
 logger = logging.getLogger("OpenSearchOutput")
 
@@ -281,7 +282,7 @@ async def store_custom(self, event: Event, target: str) -> None:
         target : str
             Index to store the document in.
         """
-        event.state.next_state()
+        event.state.current_state = EventStateType.STORING_IN_OUTPUT
         document = event.data
         document["_index"] = target
         document["_op_type"] = document.get("_op_type", self.config.default_op_type)
@@ -396,7 +397,7 @@ async def _bulk(self, client: AsyncOpenSearch, events: list["Event"]) -> None:
             except OpenSearchException as e:
                 # whole bulk request failed → mark all events failed
                 for ev in batch:
-                    ev.state.next_state(success=False)
+                    ev.state.current_state = EventStateType.FAILED
                     ev.errors.append(BulkError("Bulk request failed", exception=str(e)))
                 continue
 
@@ -417,7 +418,7 @@ async def _bulk(self, client: AsyncOpenSearch, events: list["Event"]) -> None:
 
                 ok = isinstance(status, int) and 200 <= status < 300 and not error_obj
                 if ok:
-                    ev.state.next_state(success=True)
+                    ev.state.current_state = EventStateType.STORED_IN_OUTPUT
                     continue
 
                 # normalize error into your BulkError shape
@@ -427,7 +428,7 @@ async def _bulk(self, client: AsyncOpenSearch, events: list["Event"]) -> None:
                 else:
                     message = str(error_obj) if error_obj else "Failed to index document"
 
-                ev.state.next_state(success=False)
+                ev.state.current_state = EventStateType.FAILED
                 ev.errors.append(
                     BulkError(
                         message,
diff --git a/logprep/ng/event/event_state.py b/logprep/ng/event/event_state.py
index 266d7ea43..1dd9b45da 100644
--- a/logprep/ng/event/event_state.py
+++ b/logprep/ng/event/event_state.py
@@ -19,16 +19,26 @@ class EventStateType(StrEnum):
     PROCESSED = "processed"
     """The event has been processed by all pipeline processors."""
 
+    STORING_IN_OUTPUT = "storing_in_output"
+    """The event is storing in the output connector."""
+
     STORED_IN_OUTPUT = "stored_in_output"
     """The event was successfully stored in the output connector."""
 
     FAILED = "failed"
     """The event failed during processing or output storage."""
 
+    STORING_IN_ERROR = "storing_in_error"
+    """The event is storing in the error output (e.g. error queue or
+    fallback output)."""
+
     STORED_IN_ERROR = "stored_in_error"
     """The event was stored in the error output (e.g. error queue or
     fallback output)."""
 
+    DELIVERING = "delivering"
+    """The event is delivering to the target system or final destination."""
+
     DELIVERED = "delivered"
     """The event was delivered to the target system or final destination."""
 
diff --git a/logprep/ng/pipeline.py b/logprep/ng/pipeline.py
index 5bca9ce05..29726493d 100644
--- a/logprep/ng/pipeline.py
+++ b/logprep/ng/pipeline.py
@@ -3,6 +3,7 @@
 import logging
 
 from logprep.ng.abc.processor import Processor
+from logprep.ng.event.event_state import EventStateType
 from logprep.ng.event.log_event import LogEvent
 
 logger = logging.getLogger("Pipeline")
@@ -12,15 +13,15 @@ def _process_event(event: LogEvent | None, processors: list[Processor]) -> LogEv
     """process all processors for one event"""
     if event is None or not event.data:
         raise ValueError("no event given")
-    event.state.next_state()
+    event.state.current_state = EventStateType.PROCESSING
     for processor in processors:
         if not event.data:
             break
         processor.process(event)
     if not event.errors:
-        event.state.next_state(success=True)
+        event.state.current_state = EventStateType.PROCESSED
     else:
-        event.state.next_state(success=False)
+        event.state.current_state = EventStateType.FAILED
         logger.error("event failed: %s with errors: %s", event, event.errors)
     return event
 
diff --git a/logprep/ng/sender.py b/logprep/ng/sender.py
index ebae1869d..25df94db8 100644
--- a/logprep/ng/sender.py
+++ b/logprep/ng/sender.py
@@ -68,11 +68,34 @@ async def _send_and_flush_processed_events(self, batch_events: list[LogEvent]) -
         if not processed:
             return
 
-        # send in parallel (minimal change vs. serial list comprehension)
-        await asyncio.gather(*(self._send_processed(event) for event in processed))
+        # send in parallel
+        try:
+            results = await asyncio.gather(
+                *(self._send_processed(event) for event in processed),
+                return_exceptions=True,
+            )
+            for r in results:
+                if isinstance(r, Exception):
+                    logger.exception("Error while sending processed event", exc_info=r)
+
+        finally:
+            for output in self._outputs.values():
+                try:
+                    await output.flush()
+                except Exception as e:
+                    logger.exception("Error while flushing output %s", output.name, exc_info=e)
 
         # flush once per output after sending
-        await asyncio.gather(*(output.flush() for output in self._outputs.values()))
+        try:
+            results = await asyncio.gather(
+                *(output.flush() for output in self._outputs.values()),
+                return_exceptions=True,
+            )
+            for r in results:
+                if isinstance(r, Exception):
+                    logger.exception("Error during final output flush", exc_info=r)
+        except Exception as e:
+            logger.exception("Unexpected error during final output flush", exc_info=e)
 
     async def _send_extra_data(self, event: LogEvent) -> None:
         extra_data_events = typing.cast(list[ExtraDataEvent], event.extra_data)

From eabae13443a7c6c52caa38d4b7a654bcd4c84b59 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Thu, 5 Mar 2026 11:09:22 +0100
Subject: [PATCH 11/68] WIP

---
 .../config/_benchmark_ng_pipeline.yml         |  35 ++--
 .../config/_benchmark_non_ng_pipeline.yml     |  35 ++--
 logprep/ng/abc/input.py                       |   4 +-
 logprep/ng/connector/confluent_kafka/input.py |   3 +-
 logprep/ng/connector/opensearch/output.py     | 168 +++++-------------
 5 files changed, 80 insertions(+), 165 deletions(-)

diff --git a/examples/exampledata/config/_benchmark_ng_pipeline.yml b/examples/exampledata/config/_benchmark_ng_pipeline.yml
index d2c6d7abc..194f3604d 100644
--- a/examples/exampledata/config/_benchmark_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_ng_pipeline.yml
@@ -109,26 +109,17 @@ output:
     secret: admin
     desired_cluster_status: ["green", "yellow"]
     chunk_size: 25
-  kafka:
-    type: ng_confluentkafka_output
-    default: false
-    topic: producer
-    flush_timeout: 300
-    kafka_config:
-      bootstrap.servers: 127.0.0.1:9092
-      statistics.interval.ms: "60000"
 error_output:
-  kafka_error_output:
-    type: ng_confluentkafka_output
-    topic: errors
-    flush_timeout: 300
-    send_timeout: 0
-    kafka_config:
-      bootstrap.servers: 127.0.0.1:9092
-      compression.type: none
-      statistics.interval.ms: "60000"
-      queue.buffering.max.messages: "10"
-      queue.buffering.max.kbytes: "1024"
-      queue.buffering.max.ms: "1000"
-      batch.size: "100"
-      request.required.acks: "-1"
+  opensearch:
+    type: opensearch_output
+    hosts:
+      - 127.0.0.1:9200
+    default_index: errors
+    default_op_type: create
+    message_backlog_size: 1
+    timeout: 10000
+    flush_timeout: 60
+    user: admin
+    secret: admin
+    desired_cluster_status: ["green", "yellow"]
+    chunk_size: 25
diff --git a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
index 569af191a..2c6cbf337 100644
--- a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
@@ -109,26 +109,17 @@ output:
     secret: admin
     desired_cluster_status: ["green", "yellow"]
     chunk_size: 25
-  kafka:
-    type: confluentkafka_output
-    default: false
-    topic: producer
-    flush_timeout: 300
-    kafka_config:
-      bootstrap.servers: 127.0.0.1:9092
-      statistics.interval.ms: "60000"
 error_output:
-  kafka_error_output:
-    type: confluentkafka_output
-    topic: errors
-    flush_timeout: 300
-    send_timeout: 0
-    kafka_config:
-      bootstrap.servers: 127.0.0.1:9092
-      compression.type: none
-      statistics.interval.ms: "60000"
-      queue.buffering.max.messages: "10"
-      queue.buffering.max.kbytes: "1024"
-      queue.buffering.max.ms: "1000"
-      batch.size: "100"
-      request.required.acks: "-1"
+  opensearch:
+    type: opensearch_output
+    hosts:
+      - 127.0.0.1:9200
+    default_index: errors
+    default_op_type: create
+    message_backlog_size: 1
+    timeout: 10000
+    flush_timeout: 60
+    user: admin
+    secret: admin
+    desired_cluster_status: ["green", "yellow"]
+    chunk_size: 25
diff --git a/logprep/ng/abc/input.py b/logprep/ng/abc/input.py
index 043698daa..37a60bb53 100644
--- a/logprep/ng/abc/input.py
+++ b/logprep/ng/abc/input.py
@@ -243,7 +243,9 @@ def _add_full_event_to_target_field(self) -> bool:
         """Check and return if the event should be written into one singular field."""
         return bool(self.config.preprocessing.add_full_event_to_target_field)
 
-    def _get_raw_event(self, timeout: float) -> bytes | None:  # pylint: disable=unused-argument
+    async def _get_raw_event(
+        self, timeout: float
+    ) -> bytes | None:  # pylint: disable=unused-argument
         """Implements the details how to get the raw event
 
         Parameters
diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index 8ffaf5a1b..95ca7ece6 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -634,6 +634,8 @@ def health(self) -> bool:
 
     async def _asetup(self):
         """Set the component up."""
+        await super()._asetup()
+
         try:
             consumer = await self.get_consumer()
 
@@ -643,6 +645,5 @@ async def _asetup(self):
                 on_revoke=self._revoke_callback,
                 on_lost=self._lost_callback,
             )
-            await super()._asetup()
         except KafkaException as error:
             raise FatalInputError(self, f"Could not setup kafka consumer: {error}") from error
diff --git a/logprep/ng/connector/opensearch/output.py b/logprep/ng/connector/opensearch/output.py
index 69ae4f550..78eed73cf 100644
--- a/logprep/ng/connector/opensearch/output.py
+++ b/logprep/ng/connector/opensearch/output.py
@@ -307,136 +307,66 @@ async def flush(self):
         await self._bulk(self._search_context, self._message_backlog)
         self._message_backlog.clear()
 
-    def _chunk_events_by_size(
-        self,
-        events: list["Event"],
-        *,
-        chunk_size: int,
-        max_chunk_bytes: int,
-    ) -> typing.Iterable[list["Event"]]:
-        """
-        Chunk events into batches respecting chunk_size and (best-effort) max_chunk_bytes.
-
-        Note: max_chunk_bytes is approximate because we estimate bytes via json.dumps.
-        """
-        batch: list["Event"] = []
-        approx_bytes = 0
-
-        for ev in events:
-            # best-effort byte estimation
-            try:
-                approx_bytes += len(json.dumps(ev.data, ensure_ascii=False).encode("utf-8")) + 200
-            except Exception:
-                approx_bytes += 1000  # fallback guess
-
-            batch.append(ev)
-
-            if len(batch) >= chunk_size or approx_bytes >= max_chunk_bytes:
-                yield batch
-                batch = []
-                approx_bytes = 0
-
-        if batch:
-            yield batch
-
-    def _build_bulk_body(self, events: list["Event"], *, default_op_type: str) -> list[dict]:
-        """
-        Build bulk request body as a list of dicts (action/meta + source lines).
-        opensearch-py will serialize this into NDJSON.
+    async def _bulk(self, client: AsyncOpenSearch, events: list[Event]) -> None:
+        """Bulk index documents into Opensearch. Uses the parallel_bulk function from the opensearchpy library.
+        The error information is stored in a document with the following structure:
+            json
+            {
+                "op_type": {
+                    "error": "error message",
+                    "status": "status_code",
+                    "exception": "exception message"
+                    }
+                }
+            }
         """
-        body: list[dict] = []
 
-        for ev in events:
-            doc = ev.data
-            op_type = doc.get("_op_type", default_op_type)
-            index = doc.get("_index")
+        kwargs = {
+            "max_chunk_bytes": self.config.max_chunk_bytes,
+            "chunk_size": self.config.chunk_size,
+            # "queue_size": self.config.queue_size,
+            # "thread_count": self.config.thread_count,
+            "raise_on_error": False,
+            "raise_on_exception": False,
+        }
 
-            if not index:
-                # safety: fall back to whatever your pipeline expects
-                # (ideally _index is always set before bulk)
-                index = doc.get("_index")
+        actions = (event.data for event in events)
 
-            if op_type not in ("index", "create"):
-                # keep it strict: your Config only allows create/index
-                op_type = default_op_type
+        index = 0
+        async for success, item in helpers.async_streaming_bulk(client, actions, **kwargs):  # type: ignore
+            if index >= len(events):
+                break
 
-            # bulk action line
-            action_meta = {op_type: {"_index": index}}
-            # optionally pass _id if present
-            if "_id" in doc:
-                action_meta[op_type]["_id"] = doc["_id"]
+            event = events[index]
+            index += 1
 
-            body.append(action_meta)
-
-            # source line: must NOT include bulk meta keys
-            source = {k: v for k, v in doc.items() if k not in ("_index", "_op_type")}
-            body.append(source)
-
-        return body
-
-    async def _bulk(self, client: AsyncOpenSearch, events: list["Event"]) -> None:
-        """
-        Async bulk indexing.
-        Uses AsyncOpenSearch.bulk directly, and processes per-item results.
-
-        Behavior is intentionally close to your sync version:
-        - marks event.state success/failure
-        - appends BulkError for failures
-        """
-        default_op_type = self.config.default_op_type
+            if success:
+                event.state.current_state = EventStateType.STORING_IN_OUTPUT
+                continue
 
-        for batch in self._chunk_events_by_size(
-            events,
-            chunk_size=self.config.chunk_size,
-            max_chunk_bytes=self.config.max_chunk_bytes,
-        ):
-            body = self._build_bulk_body(batch, default_op_type=default_op_type)
+            # parallel_bulk often returned item that allowed item.get("_op_type")
+            # streaming_bulk usually returns {"index": {...}} / {"create": {...}}
+            op_type = item.get("_op_type") if isinstance(item, dict) else None
+            if not op_type and isinstance(item, dict) and item:
+                op_type = next(iter(item.keys()))
 
-            try:
-                resp = await client.bulk(body=body)
-            except OpenSearchException as e:
-                # whole bulk request failed → mark all events failed
-                for ev in batch:
-                    ev.state.current_state = EventStateType.FAILED
-                    ev.errors.append(BulkError("Bulk request failed", exception=str(e)))
-                continue
+            op_type = op_type or self.config.default_op_type
+            error_info = {}
 
-            items = resp.get("items", [])
-            # One item per document (not per line). Our batch has N events, body has 2N lines.
-            # items length should match len(batch) if we're only doing index/create.
-            for i, item in enumerate(items):
-                if i >= len(batch):
-                    break
-
-                ev = batch[i]
-                # item shape: {"index": {...}} or {"create": {...}}
-                op_type = next(iter(item.keys()), default_op_type)
-                info = item.get(op_type, {}) if isinstance(item.get(op_type), dict) else {}
-
-                status = info.get("status")
-                error_obj = info.get("error")
-
-                ok = isinstance(status, int) and 200 <= status < 300 and not error_obj
-                if ok:
-                    ev.state.current_state = EventStateType.STORED_IN_OUTPUT
-                    continue
-
-                # normalize error into your BulkError shape
-                # error_obj can be dict; keep it as "error" payload if present
-                if isinstance(error_obj, dict):
-                    message = error_obj.get("reason") or str(error_obj)
+            if isinstance(item, dict):
+                # streaming_bulk shape
+                if op_type in item and isinstance(item[op_type], dict):
+                    error_info = item[op_type]
+                # fallback: old shape
                 else:
-                    message = str(error_obj) if error_obj else "Failed to index document"
-
-                ev.state.current_state = EventStateType.FAILED
-                ev.errors.append(
-                    BulkError(
-                        message,
-                        status=str(status) if status is not None else None,
-                        exception=None,
-                        error=error_obj,  # keep original payload for debugging
+                    error_info = (
+                        item.get(op_type, {}) if isinstance(item.get(op_type), dict) else {}
                     )
-                )
+
+            error = BulkError(error_info.get("error", "Failed to index document"), **error_info)
+
+            event.state.current_state = EventStateType.FAILED
+            event.errors.append(error)
 
     async def health(self) -> bool:  # type: ignore  # TODO: fix mypy issue
         """Check the health of the component."""

From ca4b1fc3e64b7987d4a69956cc3219d8343d2eca Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Sun, 8 Mar 2026 13:45:15 +0100
Subject: [PATCH 12/68] use async setup in ng-world; tests TBD

---
 logprep/abc/component.py                      |  5 -
 logprep/abc/connector.py                      |  8 +-
 logprep/abc/processor.py                      |  2 +-
 logprep/factory.py                            | 24 +++--
 logprep/ng/abc/component.py                   | 24 +++++
 logprep/ng/abc/processor.py                   | 10 +-
 logprep/ng/connector/confluent_kafka/input.py |  4 +-
 .../ng/connector/confluent_kafka/output.py    |  4 +-
 logprep/ng/connector/file/input.py            |  6 +-
 logprep/ng/connector/http/input.py            |  6 +-
 logprep/ng/connector/jsonl/output.py          | 22 +++--
 logprep/ng/connector/opensearch/output.py     |  6 +-
 logprep/ng/manager.py                         |  4 +-
 logprep/ng/pipeline.py                        | 12 ++-
 logprep/ng/processor/amides/processor.py      |  9 +-
 .../processor/generic_resolver/processor.py   |  4 +-
 .../ng/processor/geoip_enricher/processor.py  |  4 +-
 logprep/ng/processor/grokker/processor.py     |  4 +-
 logprep/ng/processor/labeler/processor.py     | 37 +++++---
 .../ng/processor/list_comparison/processor.py | 19 +++-
 .../ng/processor/pseudonymizer/processor.py   | 94 +++++++++++--------
 .../processor/template_replacer/processor.py  |  4 +-
 logprep/ng/sender.py                          |  4 +-
 logprep/ng/util/configuration.py              | 30 +++---
 logprep/processor/labeler/processor.py        | 31 ++++--
 logprep/processor/pseudonymizer/processor.py  | 89 ++++++++++--------
 logprep/run_ng.py                             | 68 +++++++-------
 logprep/util/helper.py                        |  6 +-
 28 files changed, 318 insertions(+), 222 deletions(-)
 create mode 100644 logprep/ng/abc/component.py

diff --git a/logprep/abc/component.py b/logprep/abc/component.py
index 594d6b817..6fa96c84a 100644
--- a/logprep/abc/component.py
+++ b/logprep/abc/component.py
@@ -1,6 +1,5 @@
 """abstract module for components"""
 
-import asyncio
 import functools
 import inspect
 import logging
@@ -106,10 +105,6 @@ def describe(self) -> str:
         """
         return f"{self.__class__.__name__} ({self.name})"
 
-    async def _asetup(self):
-        loop = asyncio.get_running_loop()
-        loop.run_in_executor(None, self.setup)
-
     def setup(self):
         """Set the component up."""
         self._populate_cached_properties()
diff --git a/logprep/abc/connector.py b/logprep/abc/connector.py
index 342943a76..7749687a3 100644
--- a/logprep/abc/connector.py
+++ b/logprep/abc/connector.py
@@ -2,19 +2,19 @@
 
 from attrs import define, field
 
-from logprep.abc.component import Component
 from logprep.metrics.metrics import CounterMetric, HistogramMetric
+from logprep.ng.abc.component import NgComponent
 
 
-class Connector(Component):
+class Connector(NgComponent):
     """Abstract Connector Class to define the Interface"""
 
     @define(kw_only=True)
-    class Config(Component.Config):
+    class Config(NgComponent.Config):
         """Configuration for the connector"""
 
     @define(kw_only=True)
-    class Metrics(Component.Metrics):
+    class Metrics(NgComponent.Metrics):
         """Tracks statistics about this connector"""
 
         number_of_processed_events: CounterMetric = field(
diff --git a/logprep/abc/processor.py b/logprep/abc/processor.py
index 45f3c4118..bd11c3bbf 100644
--- a/logprep/abc/processor.py
+++ b/logprep/abc/processor.py
@@ -151,7 +151,7 @@ def result(self, value: ProcessorResult):
         self._result = value
 
     @property
-    def rules(self) -> list["Rule"]:
+    def rules(self) -> Sequence["Rule"]:
         """Returns all rules
 
         Returns
diff --git a/logprep/factory.py b/logprep/factory.py
index 3c6b18d5b..85be2f92f 100644
--- a/logprep/factory.py
+++ b/logprep/factory.py
@@ -12,7 +12,7 @@ class Factory:
     """Create components for logprep."""
 
     @classmethod
-    def create(cls, configuration: dict) -> Component | None:
+    def create(cls, configuration: dict) -> Component:
         """Create component."""
         if configuration == {} or configuration is None:
             raise InvalidConfigurationError("The component definition is empty.")
@@ -23,16 +23,14 @@ def create(cls, configuration: dict) -> Component | None:
                 f"Found multiple component definitions ({', '.join(configuration.keys())}),"
                 + " but there must be exactly one."
             )
-        for component_name, component_configuration_dict in configuration.items():
-            if configuration == {} or component_configuration_dict is None:
-                raise InvalidConfigurationError(
-                    f'The definition of component "{component_name}" is empty.'
-                )
-            if not isinstance(component_configuration_dict, dict):
-                raise InvalidConfigSpecificationError(component_name)
-            component = Configuration.get_class(component_name, component_configuration_dict)
-            component_configuration = Configuration.create(
-                component_name, component_configuration_dict
+        # we know configuration has exactly one entry
+        [(component_name, component_configuration_dict)] = configuration.items()
+        if configuration == {} or component_configuration_dict is None:
+            raise InvalidConfigurationError(
+                f'The definition of component "{component_name}" is empty.'
             )
-            return component(component_name, component_configuration)
-        return None
+        if not isinstance(component_configuration_dict, dict):
+            raise InvalidConfigSpecificationError(component_name)
+        component = Configuration.get_class(component_name, component_configuration_dict)
+        component_configuration = Configuration.create(component_name, component_configuration_dict)
+        return component(component_name, component_configuration)
diff --git a/logprep/ng/abc/component.py b/logprep/ng/abc/component.py
new file mode 100644
index 000000000..551a7a391
--- /dev/null
+++ b/logprep/ng/abc/component.py
@@ -0,0 +1,24 @@
+"""abstract module for components"""
+
+import logging
+
+from logprep.abc.component import Component
+
+logger = logging.getLogger("Component")
+
+
+class NgComponent(Component):
+    """Abstract Component Class to define the Interface"""
+
+    # pylint: disable=invalid-overridden-method, useless-parent-delegation
+    # TODO fork ng-based Component properly
+    # We override the setup to be async in the ng component tree.
+    # This is unclean from an interface perspective, but works if the worlds doen't mix.
+
+    async def setup(self) -> None:
+        return super().setup()
+
+    async def shut_down(self) -> None:
+        return super().shut_down()
+
+    # pylint: enable=invalid-overridden-method,useless-parent-delegation
diff --git a/logprep/ng/abc/processor.py b/logprep/ng/abc/processor.py
index 6b074f3f4..cb730960b 100644
--- a/logprep/ng/abc/processor.py
+++ b/logprep/ng/abc/processor.py
@@ -9,9 +9,9 @@
 
 from attrs import define, field, validators
 
-from logprep.abc.component import Component
 from logprep.framework.rule_tree.rule_tree import RuleTree
 from logprep.metrics.metrics import Metric
+from logprep.ng.abc.component import NgComponent
 from logprep.ng.event.log_event import LogEvent
 from logprep.processor.base.exceptions import ProcessingCriticalError, ProcessingWarning
 from logprep.util.helper import (
@@ -28,11 +28,11 @@
 logger = logging.getLogger("Processor")
 
 
-class Processor(Component):
+class Processor(NgComponent):
     """Abstract Processor Class to define the Interface"""
 
     @define(kw_only=True, slots=False)
-    class Config(Component.Config):
+    class Config(NgComponent.Config):
         """Common Configurations"""
 
         rules: list[str] = field(
@@ -255,7 +255,7 @@ def _write_target_field(self, event: dict, rule: "Rule", result: Any) -> None:
                 overwrite_target=getattr(rule, "overwrite_target", False),
             )
 
-    def setup(self) -> None:
-        super().setup()
+    async def setup(self) -> None:
+        await super().setup()
         for rule in self.rules:
             _ = rule.metrics  # initialize metrics to show them on startup
diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index 95ca7ece6..808db2aca 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -632,9 +632,9 @@ def health(self) -> bool:
             return False
         return super().health()
 
-    async def _asetup(self):
+    async def setup(self):
         """Set the component up."""
-        await super()._asetup()
+        await super().setup()
 
         try:
             consumer = await self.get_consumer()
diff --git a/logprep/ng/connector/confluent_kafka/output.py b/logprep/ng/connector/confluent_kafka/output.py
index 80a982f82..ba8039fac 100644
--- a/logprep/ng/connector/confluent_kafka/output.py
+++ b/logprep/ng/connector/confluent_kafka/output.py
@@ -350,10 +350,10 @@ def health(self) -> bool:
             return False
         return super().health()
 
-    def setup(self) -> None:
+    async def setup(self) -> None:
         """Set the component up."""
         try:
-            super().setup()
+            await super().setup()
         except KafkaException as error:
             raise FatalOutputError(self, f"Could not setup kafka producer: {error}") from error
 
diff --git a/logprep/ng/connector/file/input.py b/logprep/ng/connector/file/input.py
index 72978330d..e13269c5b 100644
--- a/logprep/ng/connector/file/input.py
+++ b/logprep/ng/connector/file/input.py
@@ -161,7 +161,7 @@ def _line_to_dict(self, input_line: str) -> dict:
             return {"message": input_line}
         return {}
 
-    def _get_event(self, timeout: float) -> tuple:
+    async def _get_event(self, timeout: float) -> tuple:
         """Returns the first message from the threadsafe queue"""
         try:
             message: dict = self._messages.get(timeout=timeout)
@@ -170,12 +170,12 @@ def _get_event(self, timeout: float) -> tuple:
         except queue.Empty:
             return None, None, None
 
-    def setup(self) -> None:
+    async def setup(self) -> None:
         """Creates and starts the Thread that continuously monitors the given logfile.
         Right now this input connector is only started in the first process.
         It needs the class attribute pipeline_index before running setup in Pipeline
         Initiation"""
-        super().setup()
+        await super().setup()
         if not hasattr(self, "pipeline_index"):
             raise FatalInputError(
                 self, "Necessary instance attribute `pipeline_index` could not be found."  # type: ignore
diff --git a/logprep/ng/connector/http/input.py b/logprep/ng/connector/http/input.py
index 358ee810b..f206dd684 100644
--- a/logprep/ng/connector/http/input.py
+++ b/logprep/ng/connector/http/input.py
@@ -297,10 +297,10 @@ def config(self) -> Config:
         """Provides the properly typed rule configuration object"""
         return typing.cast(HttpInput.Config, self._config)
 
-    def setup(self) -> None:
+    async def setup(self) -> None:
         """setup starts the actual functionality of this connector."""
 
-        super().setup()
+        await super().setup()
 
         if self.messages is None:
             raise ValueError("message queue `messages` has not been set")
@@ -344,7 +344,7 @@ def _get_asgi_app(endpoints_config: dict) -> falcon.asgi.App:
             app.add_sink(endpoint, prefix=route_compile_helper(endpoint_path))
         return app
 
-    def _get_event(self, timeout: float) -> tuple:
+    async def _get_event(self, timeout: float) -> tuple:
         """Returns the first message from the queue"""
         messages = typing.cast(Queue, self.messages)
 
diff --git a/logprep/ng/connector/jsonl/output.py b/logprep/ng/connector/jsonl/output.py
index b93b0ed40..249f658e3 100644
--- a/logprep/ng/connector/jsonl/output.py
+++ b/logprep/ng/connector/jsonl/output.py
@@ -19,6 +19,7 @@
 """
 
 import json
+import typing
 
 from attrs import define, field, validators
 
@@ -61,11 +62,16 @@ def __init__(self, name: str, configuration: "Output.Config"):
         self.events = []
         self.failed_events = []
 
-    def setup(self):
-        super().setup()
-        open(self._config.output_file, "a+", encoding="utf8").close()
-        if self._config.output_file_custom:
-            open(self._config.output_file_custom, "a+", encoding="utf8").close()
+    @property
+    def config(self) -> Config:
+        """Provides the properly typed configuration object"""
+        return typing.cast(JsonlOutput.Config, self._config)
+
+    async def setup(self):
+        await super().setup()
+        open(self.config.output_file, "a+", encoding="utf8").close()
+        if self.config.output_file_custom:
+            open(self.config.output_file_custom, "a+", encoding="utf8").close()
 
     @staticmethod
     def _write_json(filepath: str, line: dict):
@@ -78,7 +84,7 @@ def store(self, event: Event) -> None:
         """Store the event in the output destination."""
         event.state.next_state()
         self.events.append(event.data)
-        JsonlOutput._write_json(self._config.output_file, event.data)
+        JsonlOutput._write_json(self.config.output_file, event.data)
         self.metrics.number_of_processed_events += 1
         event.state.next_state(success=True)
 
@@ -89,8 +95,8 @@ def store_custom(self, event: Event, target: str) -> None:
         document = {target: event.data}
         self.events.append(document)
 
-        if self._config.output_file_custom:
-            JsonlOutput._write_json(self._config.output_file_custom, document)
+        if self.config.output_file_custom:
+            JsonlOutput._write_json(self.config.output_file_custom, document)
         self.metrics.number_of_processed_events += 1
         event.state.next_state(success=True)
 
diff --git a/logprep/ng/connector/opensearch/output.py b/logprep/ng/connector/opensearch/output.py
index 78eed73cf..b773673e1 100644
--- a/logprep/ng/connector/opensearch/output.py
+++ b/logprep/ng/connector/opensearch/output.py
@@ -31,7 +31,6 @@
 """
 
 import asyncio
-import json
 import logging
 import ssl
 import typing
@@ -48,7 +47,6 @@
 from opensearchpy.serializer import JSONSerializer
 
 from logprep.abc.exceptions import LogprepException
-from logprep.metrics.metrics import Metric
 from logprep.ng.abc.event import Event
 from logprep.ng.abc.output import Output
 from logprep.ng.event.event_state import EventStateType
@@ -238,8 +236,8 @@ def __init__(self, name: str, configuration: "OpensearchOutput.Config"):
         self._message_backlog = []
         self._flush_task: asyncio.Task | None = None
 
-    async def _asetup(self):
-        await super()._asetup()
+    async def setup(self):
+        await super().setup()
         flush_timeout = self.config.flush_timeout
 
         # TODO: improve flush task handling
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 439bc79d2..510100cbc 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -44,14 +44,14 @@ async def setup(self):
 
         self._input_connector = cast(Input, Factory.create(self.configuration.input))
         self._input_connector.event_backlog = self._event_backlog  # TODO needs to be disentagled
-        await self._input_connector._asetup()
+        await self._input_connector.setup()
 
         processors = [
             typing.cast(Processor, Factory.create(processor_config))
             for processor_config in self.configuration.pipeline
         ]
         for processor in processors:
-            processor.setup()
+            await processor.setup()
 
         self._pipeline = Pipeline(processors)
 
diff --git a/logprep/ng/pipeline.py b/logprep/ng/pipeline.py
index 29726493d..eac8f22ef 100644
--- a/logprep/ng/pipeline.py
+++ b/logprep/ng/pipeline.py
@@ -1,5 +1,6 @@
 """pipeline module for processing events through a series of processors."""
 
+import asyncio
 import logging
 
 from logprep.ng.abc.processor import Processor
@@ -50,19 +51,20 @@ def process(self, event: LogEvent) -> LogEvent:
         """
         return _process_event(event, processors=self.processors)
 
-    def shut_down(self) -> None:
+    async def shut_down(self) -> None:
         """Shutdown the pipeline gracefully."""
 
         for processor in self.processors:
-            processor.shut_down()
+            await processor.shut_down()
 
         logger.info("All processors has been shut down.")
         logger.info("Pipeline has been shut down.")
 
-    def setup(self) -> None:
+    async def setup(self) -> None:
         """Setup the pipeline components."""
 
-        for processor in self.processors:
-            processor.setup()
+        await asyncio.gather(
+            *(processor.setup() for processor in self.processors), return_exceptions=True
+        )
 
         logger.info("Pipeline has been set up.")
diff --git a/logprep/ng/processor/amides/processor.py b/logprep/ng/processor/amides/processor.py
index 0d124c472..8903a3e53 100644
--- a/logprep/ng/processor/amides/processor.py
+++ b/logprep/ng/processor/amides/processor.py
@@ -83,6 +83,7 @@
 """
 
 import logging
+import typing
 from functools import cached_property, lru_cache
 from multiprocessing import current_process
 from pathlib import Path
@@ -97,6 +98,7 @@
 from logprep.processor.amides.detection import MisuseDetector, RuleAttributor
 from logprep.processor.amides.normalize import CommandLineNormalizer
 from logprep.processor.amides.rule import AmidesRule
+from logprep.processor.base.rule import Rule
 from logprep.util.getter import GetterFactory
 from logprep.util.helper import get_dotted_field_value
 
@@ -201,8 +203,8 @@ def _normalizer(self):
     def _evaluate_cmdline_cached(self):
         return lru_cache(maxsize=self._config.max_cache_entries)(self._evaluate_cmdline)
 
-    def setup(self):
-        super().setup()
+    async def setup(self):
+        await super().setup()
         models = self._load_and_unpack_models()
 
         self._misuse_detector = MisuseDetector(models["single"], self._config.decision_threshold)
@@ -227,7 +229,8 @@ def _load_and_unpack_models(self):
 
         return models
 
-    def _apply_rules(self, event: dict, rule: AmidesRule):
+    def _apply_rules(self, event: dict, rule: Rule):
+        rule = typing.cast(AmidesRule, rule)
         cmdline = get_dotted_field_value(event, rule.source_fields[0])
         if self._handle_missing_fields(event, rule, rule.source_fields, [cmdline]):
             return
diff --git a/logprep/ng/processor/generic_resolver/processor.py b/logprep/ng/processor/generic_resolver/processor.py
index 163392852..ab6dbba23 100644
--- a/logprep/ng/processor/generic_resolver/processor.py
+++ b/logprep/ng/processor/generic_resolver/processor.py
@@ -213,6 +213,6 @@ def _update_cache_metrics(self) -> None:
         self.metrics.num_cache_entries += cache_info.currsize
         self.metrics.cache_load += cache_info.currsize / self.max_cache_entries
 
-    def setup(self) -> None:
-        super().setup()
+    async def setup(self) -> None:
+        await super().setup()
         self._cache_metrics_skip_count = 0
diff --git a/logprep/ng/processor/geoip_enricher/processor.py b/logprep/ng/processor/geoip_enricher/processor.py
index 70e4894a8..7694f0f87 100644
--- a/logprep/ng/processor/geoip_enricher/processor.py
+++ b/logprep/ng/processor/geoip_enricher/processor.py
@@ -95,8 +95,8 @@ def _city_db(self) -> database.Reader:
             logger.exception("failed to load GeoIP database")
             raise
 
-    def setup(self) -> None:
-        super().setup()
+    async def setup(self) -> None:
+        await super().setup()
         _ = self._city_db  # trigger download
 
     def _try_getting_geoip_data(self, ip_string: str) -> dict:
diff --git a/logprep/ng/processor/grokker/processor.py b/logprep/ng/processor/grokker/processor.py
index c77191626..b72c7c1b6 100644
--- a/logprep/ng/processor/grokker/processor.py
+++ b/logprep/ng/processor/grokker/processor.py
@@ -110,9 +110,9 @@ def _apply_rules(self, event: dict, rule: Rule) -> None:
         if not matches:
             raise ProcessingWarning("no grok pattern matched", rule, event)
 
-    def setup(self) -> None:
+    async def setup(self) -> None:
         """Loads the action mapping. Has to be called before processing"""
-        super().setup()
+        await super().setup()
         custom_patterns_dir = self.config.custom_patterns_dir
         if re.search(r"http(s)?:\/\/.*?\.zip", custom_patterns_dir):
             with tempfile.TemporaryDirectory("grok") as patterns_tmp_path:
diff --git a/logprep/ng/processor/labeler/processor.py b/logprep/ng/processor/labeler/processor.py
index 370069955..991dfc47a 100644
--- a/logprep/ng/processor/labeler/processor.py
+++ b/logprep/ng/processor/labeler/processor.py
@@ -24,11 +24,13 @@
 .. automodule:: logprep.processor.labeler.rule
 """
 
-from typing import Optional
+import typing
+from collections.abc import Iterable, Sequence
 
 from attrs import define, field, validators
 
 from logprep.ng.abc.processor import Processor
+from logprep.processor.base.rule import Rule
 from logprep.processor.labeler.labeling_schema import LabelingSchema
 from logprep.processor.labeler.rule import LabelerRule
 from logprep.util.helper import add_fields_to, get_dotted_field_value
@@ -43,9 +45,7 @@ class Config(Processor.Config):
 
         schema: str = field(validator=validators.instance_of(str))
         """Path to a labeling schema file. For string format see :ref:`getters`."""
-        include_parent_labels: Optional[bool] = field(
-            default=False, validator=validators.optional(validator=validators.instance_of(bool))
-        )
+        include_parent_labels: bool = field(default=False, validator=validators.instance_of(bool))
         """If the option is deactivated only labels defined in a rule will be activated.
         Otherwise, also allowed labels in the path to the *root* of the corresponding category
         of a label will be added.
@@ -58,24 +58,35 @@ class Config(Processor.Config):
 
     rule_class = LabelerRule
 
-    def __init__(self, name: str, configuration: Processor.Config) -> None:
+    def __init__(self, name: str, configuration: "Labeler.Config") -> None:
         self._schema = LabelingSchema.create_from_file(configuration.schema)
         super().__init__(name, configuration=configuration)
 
-    def setup(self) -> None:
-        super().setup()
+    @property
+    def config(self) -> Config:
+        """Provides the properly typed configuration object"""
+        return typing.cast(Labeler.Config, self._config)
+
+    @property
+    def rules(self) -> Sequence[LabelerRule]:
+        """Returns all rules"""
+        return typing.cast(Sequence[LabelerRule], super().rules)
+
+    async def setup(self) -> None:
+        await super().setup()
         for rule in self.rules:
-            if self._config.include_parent_labels:
+            if self.config.include_parent_labels:
                 rule.add_parent_labels_from_schema(self._schema)
             rule.conforms_to_schema(self._schema)
 
-    def _apply_rules(self, event: dict, rule: LabelerRule) -> None:
+    def _apply_rules(self, event: dict, rule: Rule) -> None:
         """Applies the rule to the current event"""
-        fields = {key: value for key, value in rule.prefixed_label.items()}
-        add_fields_to(event, fields, rule=rule, merge_with_target=True)
-        # convert sets into sorted lists
+        rule = typing.cast(LabelerRule, rule)
+        add_fields_to(event, rule.prefixed_label, rule=rule, merge_with_target=True)
+        # we have already added (merged) the prefixed_labels with list values
+        # now we extract them to make them unique and sorted
         fields = {
-            key: sorted(set(get_dotted_field_value(event, key)))
+            key: sorted(set(typing.cast(Iterable, get_dotted_field_value(event, key))))
             for key, _ in rule.prefixed_label.items()
         }
         add_fields_to(event, fields, rule=rule, overwrite_target=True)
diff --git a/logprep/ng/processor/list_comparison/processor.py b/logprep/ng/processor/list_comparison/processor.py
index bae57be27..bb3d16537 100644
--- a/logprep/ng/processor/list_comparison/processor.py
+++ b/logprep/ng/processor/list_comparison/processor.py
@@ -26,6 +26,9 @@
 .. automodule:: logprep.processor.list_comparison.rule
 """
 
+import typing
+from collections.abc import Sequence
+
 from attrs import define, field, validators
 
 from logprep.ng.abc.processor import Processor
@@ -49,10 +52,20 @@ class Config(Processor.Config):
 
     rule_class = ListComparisonRule
 
-    def setup(self) -> None:
-        super().setup()
+    @property
+    def config(self) -> Config:
+        """Provides the properly typed configuration object"""
+        return typing.cast(ListComparison.Config, self._config)
+
+    @property
+    def rules(self) -> Sequence[ListComparisonRule]:
+        """Returns all rules"""
+        return typing.cast(Sequence[ListComparisonRule], super().rules)
+
+    async def setup(self) -> None:
+        await super().setup()
         for rule in self.rules:
-            rule.init_list_comparison(self._config.list_search_base_path)
+            rule.init_list_comparison(self.config.list_search_base_path)
 
     def _apply_rules(self, event, rule):
         """Apply matching rule to given log event.
diff --git a/logprep/ng/processor/pseudonymizer/processor.py b/logprep/ng/processor/pseudonymizer/processor.py
index ff68ff6a9..a55bde538 100644
--- a/logprep/ng/processor/pseudonymizer/processor.py
+++ b/logprep/ng/processor/pseudonymizer/processor.py
@@ -44,6 +44,8 @@
 """
 
 import re
+import typing
+from collections.abc import Sequence
 from functools import cached_property, lru_cache
 from itertools import chain
 from typing import Callable, Pattern
@@ -56,15 +58,17 @@
 from logprep.metrics.metrics import CounterMetric, GaugeMetric
 from logprep.ng.event.pseudonym_event import PseudonymEvent
 from logprep.ng.processor.field_manager.processor import FieldManager
+from logprep.processor.base.rule import Rule
 from logprep.processor.pseudonymizer.rule import PseudonymizerRule
 from logprep.util.getter import GetterFactory
 from logprep.util.hasher import SHA256Hasher
-from logprep.util.helper import add_fields_to, get_dotted_field_value
+from logprep.util.helper import add_fields_to, get_dotted_field_values
 from logprep.util.pseudo.encrypter import (
     DualPKCS1HybridCTREncrypter,
     DualPKCS1HybridGCMEncrypter,
     Encrypter,
 )
+from logprep.util.typing import is_lru_cached
 from logprep.util.url.url import extract_urls
 
 
@@ -76,20 +80,16 @@ class Config(FieldManager.Config):
         """Pseudonymizer config"""
 
         outputs: tuple[dict[str, str]] = field(
-            validator=[
-                validators.deep_iterable(
-                    member_validator=[
-                        validators.instance_of(dict),
-                        validators.deep_mapping(
-                            key_validator=validators.instance_of(str),
-                            value_validator=validators.instance_of(str),
-                            mapping_validator=validators.max_len(1),
-                        ),
-                    ],
-                    iterable_validator=validators.instance_of(tuple),
-                ),
-                validators.min_len(1),
-            ],
+            validator=validators.deep_iterable(
+                member_validator=[
+                    validators.deep_mapping(
+                        key_validator=validators.instance_of(str),
+                        value_validator=validators.instance_of(str),
+                        mapping_validator=(validators.instance_of(dict), validators.max_len(1)),
+                    ),
+                ],
+                iterable_validator=(validators.instance_of(tuple), validators.min_len(1)),
+            ),
             converter=tuple,
         )
         """list of output mappings in form of :code:`output_name:topic`.
@@ -193,27 +193,38 @@ def _hasher(self) -> SHA256Hasher:
 
     @cached_property
     def _encrypter(self) -> Encrypter:
-        if self._config.mode == "CTR":
-            encrypter = DualPKCS1HybridCTREncrypter()
-        else:
-            encrypter = DualPKCS1HybridGCMEncrypter()
-        encrypter.load_public_keys(self._config.pubkey_analyst, self._config.pubkey_depseudo)
+        encrypter = (
+            DualPKCS1HybridCTREncrypter()
+            if self.config.mode == "CTR"
+            else DualPKCS1HybridGCMEncrypter()
+        )
+        encrypter.load_public_keys(self.config.pubkey_analyst, self.config.pubkey_depseudo)
         return encrypter
 
     @cached_property
     def _regex_mapping(self) -> dict:
-        return GetterFactory.from_string(self._config.regex_mapping).get_yaml()
+        return GetterFactory.from_string(self.config.regex_mapping).get_dict()
 
     @cached_property
     def _get_pseudonym_dict_cached(self) -> Callable:
-        return lru_cache(maxsize=self._config.max_cached_pseudonyms)(self._pseudonymize)
+        return lru_cache(maxsize=self.config.max_cached_pseudonyms)(self._pseudonymize)
 
     @cached_property
     def _pseudonymize_url_cached(self) -> Callable:
-        return lru_cache(maxsize=self._config.max_cached_pseudonymized_urls)(self._pseudonymize_url)
+        return lru_cache(maxsize=self.config.max_cached_pseudonymized_urls)(self._pseudonymize_url)
+
+    @property
+    def config(self) -> Config:
+        """Provides the properly typed configuration object"""
+        return typing.cast(Pseudonymizer.Config, self._config)
+
+    @property
+    def rules(self) -> Sequence[PseudonymizerRule]:
+        """Returns all rules"""
+        return typing.cast(Sequence[PseudonymizerRule], super().rules)
 
-    def setup(self) -> None:
-        super().setup()
+    async def setup(self) -> None:
+        await super().setup()
         self._replace_regex_keywords_by_regex_expression()
 
     def _replace_regex_keywords_by_regex_expression(self) -> None:
@@ -223,13 +234,12 @@ def _replace_regex_keywords_by_regex_expression(self) -> None:
                     rule.pseudonyms[dotted_field] = re.compile(self._regex_mapping[regex_keyword])
                 elif isinstance(regex_keyword, str):  # after the first run, the regex is compiled
                     raise InvalidConfigurationError(
-                        f"Regex keyword '{regex_keyword}' not found in regex_mapping '{self._config.regex_mapping}'"
+                        f"Regex keyword '{regex_keyword}' not found in regex_mapping '{self.config.regex_mapping}'"
                     )
 
-    def _apply_rules(self, event: dict, rule: PseudonymizerRule) -> None:
-        source_dict = {}
-        for source_field in rule.pseudonyms:
-            source_dict[source_field] = get_dotted_field_value(event, source_field)
+    def _apply_rules(self, event: dict, rule: Rule) -> None:
+        rule = typing.cast(PseudonymizerRule, rule)
+        source_dict = get_dotted_field_values(event, rule.pseudonyms)
         self._handle_missing_fields(event, rule, source_dict.keys(), source_dict.values())
 
         for dotted_field, field_value in source_dict.items():
@@ -242,7 +252,7 @@ def _apply_rules(self, event: dict, rule: PseudonymizerRule) -> None:
                     for value in field_value
                 ]
             else:
-                field_value = self._pseudonymize_field(rule, dotted_field, regex, field_value)
+                field_value = self._pseudonymize_field(rule, dotted_field, regex, str(field_value))
             add_fields_to(
                 event, fields={dotted_field: field_value}, rule=rule, overwrite_target=True
             )
@@ -277,13 +287,13 @@ def _pseudonymize_string(self, value: str) -> str:
         if self.pseudonymized_pattern.match(value):
             return value
         pseudonym_dict = self._get_pseudonym_dict_cached(value)
-        pseudonym_event = PseudonymEvent(pseudonym_dict, outputs=self._config.outputs)
+        pseudonym_event = PseudonymEvent(pseudonym_dict, outputs=self.config.outputs)
         if pseudonym_event not in self._event.extra_data:
             self._event.extra_data.append(pseudonym_event)
         return self._wrap_hash(pseudonym_dict["pseudonym"])
 
     def _pseudonymize(self, value: str) -> dict[str, str]:
-        hash_string = self._hasher.hash_str(value, salt=self._config.hash_salt)
+        hash_string = self._hasher.hash_str(value, salt=self.config.hash_salt)
         encrypted_origin = self._encrypter.encrypt(value)
         return {"pseudonym": hash_string, "origin": encrypted_origin}
 
@@ -325,11 +335,15 @@ def _wrap_hash(self, hash_string: str) -> str:
         return self.HASH_PREFIX + hash_string + self.HASH_SUFFIX
 
     def _update_cache_metrics(self) -> None:
-        cache_info_pseudonyms = self._get_pseudonym_dict_cached.cache_info()
-        cache_info_urls = self._pseudonymize_url_cached.cache_info()
-        self.metrics.new_results += cache_info_pseudonyms.misses + cache_info_urls.misses
-        self.metrics.cached_results += cache_info_pseudonyms.hits + cache_info_urls.hits
-        self.metrics.num_cache_entries += cache_info_pseudonyms.currsize + cache_info_urls.currsize
-        self.metrics.cache_load += (cache_info_pseudonyms.currsize + cache_info_urls.currsize) / (
-            cache_info_pseudonyms.maxsize + cache_info_urls.maxsize
+        caches = [
+            f.cache_info()
+            for f in [self._get_pseudonym_dict_cached, self._pseudonymize_url_cached]
+            if is_lru_cached(f)
+        ]
+
+        self.metrics.new_results += sum(c.misses for c in caches)
+        self.metrics.cached_results += sum(c.hits for c in caches)
+        self.metrics.num_cache_entries += sum(c.currsize for c in caches)
+        self.metrics.cache_load += (sum(c.currsize for c in caches)) / (
+            sum(typing.cast(int, c.maxsize) for c in caches)
         )
diff --git a/logprep/ng/processor/template_replacer/processor.py b/logprep/ng/processor/template_replacer/processor.py
index 3a08b4161..d53dabaf9 100644
--- a/logprep/ng/processor/template_replacer/processor.py
+++ b/logprep/ng/processor/template_replacer/processor.py
@@ -175,8 +175,8 @@ def _perform_replacement(
             overwrite_target=overwrite,
         )
 
-    def setup(self) -> None:
-        super().setup()
+    async def setup(self) -> None:
+        await super().setup()
         self._target_field = self.config.pattern["target_field"]
         self._fields = self.config.pattern["fields"]
         self._initialize_replacement_mapping()
diff --git a/logprep/ng/sender.py b/logprep/ng/sender.py
index 25df94db8..4d961e5a0 100644
--- a/logprep/ng/sender.py
+++ b/logprep/ng/sender.py
@@ -146,7 +146,7 @@ async def shut_down(self) -> None:
     async def setup(self) -> None:
         """Setup all outputs."""
         for _, output in self._outputs.items():
-            await output._asetup()
+            await output.setup()
         if self._error_output:
-            await self._error_output._asetup()
+            await self._error_output.setup()
         logger.info("All outputs have been set up.")
diff --git a/logprep/ng/util/configuration.py b/logprep/ng/util/configuration.py
index 37b2aedb4..434147514 100644
--- a/logprep/ng/util/configuration.py
+++ b/logprep/ng/util/configuration.py
@@ -193,6 +193,7 @@
 import json
 import logging
 import os
+import typing
 from copy import deepcopy
 from importlib.metadata import version
 from itertools import chain
@@ -207,12 +208,12 @@
 from ruamel.yaml.scanner import ScannerError
 from schedule import Scheduler
 
-from logprep.abc.component import Component
 from logprep.abc.getter import Getter
-from logprep.abc.processor import Processor
 from logprep.factory import Factory
 from logprep.factory_error import FactoryError, InvalidConfigurationError
 from logprep.metrics.metrics import CounterMetric, GaugeMetric
+from logprep.ng.abc.component import NgComponent
+from logprep.ng.abc.processor import Processor
 from logprep.ng.util.defaults import (
     DEFAULT_CONFIG_LOCATION,
     DEFAULT_LOG_CONFIG,
@@ -253,9 +254,9 @@ def dump(self, data: Any, stream: Any | None = None, **kw: Any) -> Any:
 class InvalidConfigurationErrors(InvalidConfigurationError):
     """Raise for multiple Configuration related exceptions."""
 
-    errors: List[InvalidConfigurationError]
+    errors: Sequence[InvalidConfigurationError]
 
-    def __init__(self, errors: List[Exception]) -> None:
+    def __init__(self, errors: Sequence[Exception]) -> None:
         unique_errors = []
         for error in errors:
             if not isinstance(error, InvalidConfigurationError):
@@ -653,7 +654,6 @@ class Configuration:
     _metrics: "Configuration.Metrics" = field(init=False, repr=False, eq=False)
 
     _getter: Getter = field(
-        validator=validators.instance_of(Getter),
         default=GetterFactory.from_string(DEFAULT_CONFIG_LOCATION),
         repr=False,
         eq=False,
@@ -683,7 +683,7 @@ class Configuration:
     )
 
     @define(kw_only=True)
-    class Metrics(Component.Metrics):
+    class Metrics(NgComponent.Metrics):
         """Metrics for the Logprep Runner."""
 
         version_info: GaugeMetric = field(
@@ -780,7 +780,7 @@ def from_source(cls, config_path: str) -> "Configuration":
         return config
 
     @classmethod
-    def from_sources(cls, config_paths: Iterable[str] | None = None) -> "Configuration":
+    async def from_sources(cls, config_paths: Iterable[str] | None = None) -> "Configuration":
         """Creates configuration from a list of configuration sources.
 
         Parameters
@@ -822,7 +822,7 @@ def from_sources(cls, config_paths: Iterable[str] | None = None) -> "Configurati
         except InvalidConfigurationErrors as error:
             errors = [*errors, *error.errors]
         try:
-            configuration._verify()
+            await configuration._verify()
         except InvalidConfigurationErrors as error:
             errors = [*errors, *error.errors]
         if errors:
@@ -845,7 +845,7 @@ def as_yaml(self) -> str:
         """Return the configuration as yaml string."""
         return yaml.dump(self.as_dict())
 
-    def reload(self) -> None:
+    async def reload(self) -> None:
         """Reload the application's configuration from the configured sources.
 
         This method attempts to rebuild the configuration from all paths listed in
@@ -875,7 +875,7 @@ def reload(self) -> None:
 
         errors: List[Exception] = []
         try:
-            new_config = Configuration.from_sources(self.config_paths)
+            new_config = await Configuration.from_sources(self.config_paths)
             if new_config.config_refresh_interval is None:
                 new_config.config_refresh_interval = self.config_refresh_interval
             self._configs = new_config._configs  # pylint: disable=protected-access
@@ -1009,7 +1009,7 @@ def _get_last_non_default_value(configs: Sequence["Configuration"], attribute: s
             return values[-1]
         return getattr(Configuration(), attribute)
 
-    def _verify(self) -> None:
+    async def _verify(self) -> None:
         """Verify the configuration."""
         errors: list[Exception] = []
         try:
@@ -1038,8 +1038,8 @@ def _verify(self) -> None:
                     errors.append(error)
         for processor_config in self.pipeline:
             try:
-                processor = Factory.create(deepcopy(processor_config))
-                processor.setup()
+                processor = typing.cast(Processor, Factory.create(deepcopy(processor_config)))
+                await processor.setup()
                 self._verify_rules(processor)
             except (
                 FactoryError,
@@ -1055,9 +1055,9 @@ def _verify(self) -> None:
                 self._verify_processor_outputs(processor_config)
             except Exception as error:  # pylint: disable=broad-except
                 errors.append(error)
-        if ENV_NAME_LOGPREP_CREDENTIALS_FILE in os.environ:
+        credentials_file_path = os.environ.get(ENV_NAME_LOGPREP_CREDENTIALS_FILE)
+        if credentials_file_path is not None:
             try:
-                credentials_file_path = os.environ.get(ENV_NAME_LOGPREP_CREDENTIALS_FILE)
                 _ = CredentialsFactory.get_content(Path(credentials_file_path))
             except Exception as error:  # pylint: disable=broad-except
                 errors.append(error)
diff --git a/logprep/processor/labeler/processor.py b/logprep/processor/labeler/processor.py
index 52cdfac21..57c8d8be5 100644
--- a/logprep/processor/labeler/processor.py
+++ b/logprep/processor/labeler/processor.py
@@ -24,11 +24,13 @@
 .. automodule:: logprep.processor.labeler.rule
 """
 
-from typing import Optional
+import typing
+from collections.abc import Iterable, Sequence
 
 from attrs import define, field, validators
 
 from logprep.abc.processor import Processor
+from logprep.processor.base.rule import Rule
 from logprep.processor.labeler.labeling_schema import LabelingSchema
 from logprep.processor.labeler.rule import LabelerRule
 from logprep.util.helper import add_fields_to, get_dotted_field_value
@@ -58,9 +60,7 @@ class Config(Processor.Config):
            authenticity and integrity of the loaded values.
 
         """
-        include_parent_labels: Optional[bool] = field(
-            default=False, validator=validators.optional(validator=validators.instance_of(bool))
-        )
+        include_parent_labels: bool = field(default=False, validator=validators.instance_of(bool))
         """If the option is deactivated only labels defined in a rule will be activated.
         Otherwise, also allowed labels in the path to the *root* of the corresponding category
         of a label will be added.
@@ -73,10 +73,20 @@ class Config(Processor.Config):
 
     rule_class = LabelerRule
 
-    def __init__(self, name: str, configuration: Processor.Config):
+    def __init__(self, name: str, configuration: "Labeler.Config"):
         self._schema = LabelingSchema.create_from_file(configuration.schema)
         super().__init__(name, configuration=configuration)
 
+    @property
+    def config(self) -> Config:
+        """Provides the properly typed configuration object"""
+        return typing.cast(Labeler.Config, self._config)
+
+    @property
+    def rules(self) -> Sequence[LabelerRule]:
+        """Returns all rules"""
+        return typing.cast(Sequence[LabelerRule], super().rules)
+
     def setup(self):
         super().setup()
         for rule in self.rules:
@@ -84,13 +94,14 @@ def setup(self):
                 rule.add_parent_labels_from_schema(self._schema)
             rule.conforms_to_schema(self._schema)
 
-    def _apply_rules(self, event, rule):
+    def _apply_rules(self, event: dict, rule: Rule) -> None:
         """Applies the rule to the current event"""
-        fields = {key: value for key, value in rule.prefixed_label.items()}
-        add_fields_to(event, fields, rule=rule, merge_with_target=True)
-        # convert sets into sorted lists
+        rule = typing.cast(LabelerRule, rule)
+        add_fields_to(event, rule.prefixed_label, rule=rule, merge_with_target=True)
+        # we have already added (merged) the prefixed_labels with list values
+        # now we extract them to make them unique and sorted
         fields = {
-            key: sorted(set(get_dotted_field_value(event, key)))
+            key: sorted(set(typing.cast(Iterable, get_dotted_field_value(event, key))))
             for key, _ in rule.prefixed_label.items()
         }
         add_fields_to(event, fields, rule=rule, overwrite_target=True)
diff --git a/logprep/processor/pseudonymizer/processor.py b/logprep/processor/pseudonymizer/processor.py
index 020968cba..899b4732c 100644
--- a/logprep/processor/pseudonymizer/processor.py
+++ b/logprep/processor/pseudonymizer/processor.py
@@ -33,6 +33,8 @@
 """
 
 import re
+import typing
+from collections.abc import Sequence
 from functools import cached_property, lru_cache
 from itertools import chain
 from typing import Pattern
@@ -47,12 +49,13 @@
 from logprep.processor.pseudonymizer.rule import PseudonymizerRule
 from logprep.util.getter import GetterFactory
 from logprep.util.hasher import SHA256Hasher
-from logprep.util.helper import add_fields_to, get_dotted_field_value
+from logprep.util.helper import add_fields_to, get_dotted_field_values
 from logprep.util.pseudo.encrypter import (
     DualPKCS1HybridCTREncrypter,
     DualPKCS1HybridGCMEncrypter,
     Encrypter,
 )
+from logprep.util.typing import is_lru_cached
 from logprep.util.url.url import extract_urls
 
 
@@ -64,20 +67,16 @@ class Config(FieldManager.Config):
         """Pseudonymizer config"""
 
         outputs: tuple[dict[str, str]] = field(
-            validator=[
-                validators.deep_iterable(
-                    member_validator=[
-                        validators.instance_of(dict),
-                        validators.deep_mapping(
-                            key_validator=validators.instance_of(str),
-                            value_validator=validators.instance_of(str),
-                            mapping_validator=validators.max_len(1),
-                        ),
-                    ],
-                    iterable_validator=validators.instance_of(tuple),
-                ),
-                validators.min_len(1),
-            ],
+            validator=validators.deep_iterable(
+                member_validator=[
+                    validators.deep_mapping(
+                        key_validator=validators.instance_of(str),
+                        value_validator=validators.instance_of(str),
+                        mapping_validator=(validators.instance_of(dict), validators.max_len(1)),
+                    ),
+                ],
+                iterable_validator=(validators.instance_of(tuple), validators.min_len(1)),
+            ),
             converter=tuple,
         )
         """list of output mappings in form of :code:`output_name:topic`.
@@ -227,24 +226,35 @@ def _hasher(self):
 
     @cached_property
     def _encrypter(self) -> Encrypter:
-        if self._config.mode == "CTR":
-            encrypter = DualPKCS1HybridCTREncrypter()
-        else:
-            encrypter = DualPKCS1HybridGCMEncrypter()
-        encrypter.load_public_keys(self._config.pubkey_analyst, self._config.pubkey_depseudo)
+        encrypter = (
+            DualPKCS1HybridCTREncrypter()
+            if self.config.mode == "CTR"
+            else DualPKCS1HybridGCMEncrypter()
+        )
+        encrypter.load_public_keys(self.config.pubkey_analyst, self.config.pubkey_depseudo)
         return encrypter
 
     @cached_property
     def _regex_mapping(self) -> dict:
-        return GetterFactory.from_string(self._config.regex_mapping).get_yaml()
+        return GetterFactory.from_string(self.config.regex_mapping).get_dict()
 
     @cached_property
     def _get_pseudonym_dict_cached(self):
-        return lru_cache(maxsize=self._config.max_cached_pseudonyms)(self._pseudonymize)
+        return lru_cache(maxsize=self.config.max_cached_pseudonyms)(self._pseudonymize)
 
     @cached_property
     def _pseudonymize_url_cached(self):
-        return lru_cache(maxsize=self._config.max_cached_pseudonymized_urls)(self._pseudonymize_url)
+        return lru_cache(maxsize=self.config.max_cached_pseudonymized_urls)(self._pseudonymize_url)
+
+    @property
+    def config(self) -> Config:
+        """Provides the properly typed configuration object"""
+        return typing.cast(Pseudonymizer.Config, self._config)
+
+    @property
+    def rules(self) -> Sequence[PseudonymizerRule]:
+        """Returns all rules"""
+        return typing.cast(Sequence[PseudonymizerRule], super().rules)
 
     def setup(self):
         super().setup()
@@ -257,13 +267,12 @@ def _replace_regex_keywords_by_regex_expression(self):
                     rule.pseudonyms[dotted_field] = re.compile(self._regex_mapping[regex_keyword])
                 elif isinstance(regex_keyword, str):  # after the first run, the regex is compiled
                     raise InvalidConfigurationError(
-                        f"Regex keyword '{regex_keyword}' not found in regex_mapping '{self._config.regex_mapping}'"
+                        f"Regex keyword '{regex_keyword}' not found in regex_mapping '{self.config.regex_mapping}'"
                     )
 
     def _apply_rules(self, event: dict, rule: PseudonymizerRule):
-        source_dict = {}
-        for source_field in rule.pseudonyms:
-            source_dict[source_field] = get_dotted_field_value(event, source_field)
+        rule = typing.cast(PseudonymizerRule, rule)
+        source_dict = get_dotted_field_values(event, rule.pseudonyms)
         self._handle_missing_fields(event, rule, source_dict.keys(), source_dict.values())
 
         for dotted_field, field_value in source_dict.items():
@@ -276,7 +285,7 @@ def _apply_rules(self, event: dict, rule: PseudonymizerRule):
                     for value in field_value
                 ]
             else:
-                field_value = self._pseudonymize_field(rule, dotted_field, regex, field_value)
+                field_value = self._pseudonymize_field(rule, dotted_field, regex, str(field_value))
             add_fields_to(
                 event, fields={dotted_field: field_value}, rule=rule, overwrite_target=True
             )
@@ -311,13 +320,13 @@ def _pseudonymize_string(self, value: str) -> str:
         if self.pseudonymized_pattern.match(value):
             return value
         pseudonym_dict = self._get_pseudonym_dict_cached(value)
-        extra = (pseudonym_dict, self._config.outputs)
+        extra = (pseudonym_dict, self.config.outputs)
         if extra not in self.result.data:
             self.result.data.append(extra)
         return self._wrap_hash(pseudonym_dict["pseudonym"])
 
     def _pseudonymize(self, value):
-        hash_string = self._hasher.hash_str(value, salt=self._config.hash_salt)
+        hash_string = self._hasher.hash_str(value, salt=self.config.hash_salt)
         encrypted_origin = self._encrypter.encrypt(value)
         return {"pseudonym": hash_string, "origin": encrypted_origin}
 
@@ -358,12 +367,16 @@ def _pseudonymize_url(self, url_string: str) -> str:
     def _wrap_hash(self, hash_string: str) -> str:
         return self.HASH_PREFIX + hash_string + self.HASH_SUFFIX
 
-    def _update_cache_metrics(self):
-        cache_info_pseudonyms = self._get_pseudonym_dict_cached.cache_info()
-        cache_info_urls = self._pseudonymize_url_cached.cache_info()
-        self.metrics.new_results += cache_info_pseudonyms.misses + cache_info_urls.misses
-        self.metrics.cached_results += cache_info_pseudonyms.hits + cache_info_urls.hits
-        self.metrics.num_cache_entries += cache_info_pseudonyms.currsize + cache_info_urls.currsize
-        self.metrics.cache_load += (cache_info_pseudonyms.currsize + cache_info_urls.currsize) / (
-            cache_info_pseudonyms.maxsize + cache_info_urls.maxsize
+    def _update_cache_metrics(self) -> None:
+        caches = [
+            f.cache_info()
+            for f in [self._get_pseudonym_dict_cached, self._pseudonymize_url_cached]
+            if is_lru_cached(f)
+        ]
+
+        self.metrics.new_results += sum(c.misses for c in caches)
+        self.metrics.cached_results += sum(c.hits for c in caches)
+        self.metrics.num_cache_entries += sum(c.currsize for c in caches)
+        self.metrics.cache_load += (sum(c.currsize for c in caches)) / (
+            sum(typing.cast(int, c.maxsize) for c in caches)
         )
diff --git a/logprep/run_ng.py b/logprep/run_ng.py
index d425a816a..3014710a1 100644
--- a/logprep/run_ng.py
+++ b/logprep/run_ng.py
@@ -28,9 +28,9 @@ def _print_version(config: "Configuration") -> None:
     sys.exit(EXITCODES.SUCCESS)
 
 
-def _get_configuration(config_paths: tuple[str]) -> Configuration:
+async def _get_configuration(config_paths: tuple[str]) -> Configuration:
     try:
-        config = Configuration.from_sources(config_paths)
+        config = await Configuration.from_sources(config_paths)
         logger.info("Log level set to '%s'", config.logger.level)
         return config
     except InvalidConfigurationError as error:
@@ -67,36 +67,40 @@ def run(configs: tuple[str], version=None) -> None:
 
     CONFIG is a path to configuration file (filepath or URL).
     """
-    configuration = _get_configuration(configs)
-    runner = Runner(configuration)
-    runner.setup_logging()
-    if version:
-        _print_version(configuration)
-    for version in get_versions_string(configuration).split("\n"):
-        logger.info(version)
-    logger.debug(f"Metric export enabled: {configuration.metrics.enabled}")
-    logger.debug(f"Config path: {configs}")
-    try:
-        if "pytest" not in sys.modules:  # needed for not blocking tests
-            signal.signal(signal.SIGTERM, signal_handler)
-            signal.signal(signal.SIGINT, signal_handler)
-        logger.debug("Configuration loaded")
-        uvloop.run(runner.run())
-    except SystemExit as error:
-        logger.debug(f"Exit received with code {error.code}")
-        sys.exit(error.code)
-    # pylint: disable=broad-except
-    except ExceptionGroup as error_group:
-        logger.exception(f"Multiple errors occurred: {error_group}")
-    except Exception as error:
-        if os.environ.get("DEBUG", False):
-            logger.exception(f"A critical error occurred: {error}")  # pragma: no cover
-        else:
-            logger.critical(f"A critical error occurred: {error}")
-        if runner:
-            runner.stop()
-        sys.exit(EXITCODES.ERROR)
-    # pylint: enable=broad-except
+
+    async def _run(configs: tuple[str], version=None):
+        configuration = await _get_configuration(configs)
+        runner = Runner(configuration)
+        runner.setup_logging()
+        if version:
+            _print_version(configuration)
+        for v in get_versions_string(configuration).split("\n"):
+            logger.info(v)
+        logger.debug(f"Metric export enabled: {configuration.metrics.enabled}")
+        logger.debug(f"Config path: {configs}")
+        try:
+            if "pytest" not in sys.modules:  # needed for not blocking tests
+                signal.signal(signal.SIGTERM, signal_handler)
+                signal.signal(signal.SIGINT, signal_handler)
+            logger.debug("Configuration loaded")
+            await runner.run()
+        except SystemExit as error:
+            logger.debug(f"Exit received with code {error.code}")
+            sys.exit(error.code)
+        # pylint: disable=broad-except
+        except ExceptionGroup as error_group:
+            logger.exception(f"Multiple errors occurred: {error_group}")
+        except Exception as error:
+            if os.environ.get("DEBUG", False):
+                logger.exception(f"A critical error occurred: {error}")  # pragma: no cover
+            else:
+                logger.critical(f"A critical error occurred: {error}")
+            if runner:
+                runner.stop()
+            sys.exit(EXITCODES.ERROR)
+        # pylint: enable=broad-except
+
+    uvloop.run(_run(configs, version))
 
 
 def signal_handler(__: int, _) -> None:
diff --git a/logprep/util/helper.py b/logprep/util/helper.py
index 131ff3033..da10d5fc7 100644
--- a/logprep/util/helper.py
+++ b/logprep/util/helper.py
@@ -25,6 +25,7 @@
 from logprep.util.defaults import DEFAULT_CONFIG_LOCATION
 
 if TYPE_CHECKING:  # pragma: no cover
+    from logprep.ng.util.configuration import Configuration as NgConfiguration
     from logprep.processor.base.rule import Rule
     from logprep.util.configuration import Configuration
 
@@ -144,6 +145,7 @@ def _add_field_to(
     elif isinstance(existing_value, (int, float, str, bool)) and isinstance(content, list):
         target_parent[target_key] = [existing_value, *content]
     else:
+        # FIXME combining ll 117 & 135, overwrite_target can never be True here
         if not overwrite_target:
             raise FieldExistsWarning(rule, event, [target_field])
         target_parent[target_key] = [existing_value, content]
@@ -738,7 +740,9 @@ def get_source_fields_dict(event, rule):
     return source_field_dict
 
 
-def get_versions_string(config: Optional["Configuration"] = None) -> str:
+def get_versions_string(
+    config: Optional["Configuration"] | Optional["NgConfiguration"] = None,
+) -> str:
     """
     Prints the version and exists. If a configuration was found then it's version
     is printed as well

From 9f57cdb0c8a93946a84738eb0d82adebd79f7b2e Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Mon, 9 Mar 2026 08:53:29 +0100
Subject: [PATCH 13/68] rename debug configurations and use internalConsole

---
 .vscode/launch.json | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 3c0da7fe0..c1c36a3d8 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,11 +2,11 @@
     "version": "0.2.0",
     "configurations": [
         {
-            "name": "Debug non-ng example pipeline",
+            "name": "non-ng example pipeline",
             "type": "debugpy",
             "request": "launch",
             "program": "logprep/run_logprep.py",
-            "console": "integratedTerminal",
+            "console": "internalConsole",
             "args": [
                 "run",
                 "examples/exampledata/config/pipeline.yml"
@@ -17,11 +17,11 @@
             "justMyCode": false
         },
         {
-            "name": "Debug ng example pipeline",
+            "name": "ng example pipeline",
             "type": "debugpy",
             "request": "launch",
             "program": "logprep/run_ng.py",
-            "console": "integratedTerminal",
+            "console": "internalConsole",
             "args": [
                 "run",
                 "examples/exampledata/config/ng_pipeline.yml"

From 06272decefaae58901be81d2078a2cd0eedefb48 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Mon, 9 Mar 2026 08:54:13 +0100
Subject: [PATCH 14/68] show taskName in log messages

---
 logprep/ng/util/defaults.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/logprep/ng/util/defaults.py b/logprep/ng/util/defaults.py
index a897f4862..d29ffb8db 100644
--- a/logprep/ng/util/defaults.py
+++ b/logprep/ng/util/defaults.py
@@ -21,7 +21,9 @@ class EXITCODES(IntEnum):
 DEFAULT_MESSAGE_BACKLOG_SIZE = 15000
 DEFAULT_RESTART_COUNT = 5
 DEFAULT_CONFIG_LOCATION = "file:///etc/logprep/pipeline.yml"
-DEFAULT_LOG_FORMAT = "%(asctime)-15s %(process)-6s %(name)-10s %(levelname)-8s: %(message)s"
+DEFAULT_LOG_FORMAT = (
+    "%(asctime)-15s %(process)-6s %(taskName)s %(name)-10s %(levelname)-8s: %(message)s"
+)
 DEFAULT_LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
 DEFAULT_AES_KEY_LENGTH = 32
 DEFAULT_BATCH_SIZE = 1

From 909893292b5a2918228f8a8f9a791f5304102bf7 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Mon, 9 Mar 2026 08:55:00 +0100
Subject: [PATCH 15/68] fix benchmark pipeline configs

---
 examples/exampledata/config/_benchmark_ng_pipeline.yml     | 6 +++---
 examples/exampledata/config/_benchmark_non_ng_pipeline.yml | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/exampledata/config/_benchmark_ng_pipeline.yml b/examples/exampledata/config/_benchmark_ng_pipeline.yml
index 194f3604d..0ba916535 100644
--- a/examples/exampledata/config/_benchmark_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_ng_pipeline.yml
@@ -102,7 +102,7 @@ output:
       - 127.0.0.1:9200
     default_index: processed
     default_op_type: create
-    message_backlog_size: 1
+    message_backlog_size: 2500
     timeout: 10000
     flush_timeout: 60
     user: admin
@@ -111,12 +111,12 @@ output:
     chunk_size: 25
 error_output:
   opensearch:
-    type: opensearch_output
+    type: ng_opensearch_output
     hosts:
       - 127.0.0.1:9200
     default_index: errors
     default_op_type: create
-    message_backlog_size: 1
+    message_backlog_size: 2500
     timeout: 10000
     flush_timeout: 60
     user: admin
diff --git a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
index 2c6cbf337..73abbf88f 100644
--- a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
@@ -102,7 +102,7 @@ output:
       - 127.0.0.1:9200
     default_index: processed
     default_op_type: create
-    message_backlog_size: 1
+    message_backlog_size: 2500
     timeout: 10000
     flush_timeout: 60
     user: admin
@@ -116,7 +116,7 @@ error_output:
       - 127.0.0.1:9200
     default_index: errors
     default_op_type: create
-    message_backlog_size: 1
+    message_backlog_size: 2500
     timeout: 10000
     flush_timeout: 60
     user: admin

From 29ed218d871d630ada665371101e6e3f8bf35509 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Mon, 9 Mar 2026 09:03:59 +0100
Subject: [PATCH 16/68] update debug logs

---
 logprep/ng/sender.py             | 2 ++
 logprep/ng/util/worker/worker.py | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/logprep/ng/sender.py b/logprep/ng/sender.py
index 4d961e5a0..69077a20f 100644
--- a/logprep/ng/sender.py
+++ b/logprep/ng/sender.py
@@ -34,6 +34,7 @@ def __init__(
         self._error_output = error_output
 
     async def process(self, batch: list[LogEvent]) -> list[LogEvent]:
+        logger.debug("Receiving event from worker: %d", len(batch))
         await self._send_and_flush_processed_events(batch_events=batch)
         if self._error_output:
             await self._send_and_flush_failed_events(batch_events=batch)
@@ -87,6 +88,7 @@ async def _send_and_flush_processed_events(self, batch_events: list[LogEvent]) -
 
         # flush once per output after sending
         try:
+            logger.debug("Flushing all outputs after sending %d events", len(batch_events))
             results = await asyncio.gather(
                 *(output.flush() for output in self._outputs.values()),
                 return_exceptions=True,
diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index 445785d3a..3bcfc3f0b 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -88,10 +88,8 @@ async def _flush_after_interval(self) -> None:
         if the batch has not already been drained by the size trigger.
         """
         try:
-            logger.debug("timer sleeping")
             await asyncio.sleep(self._batch_interval_s)
         except asyncio.CancelledError:
-            logger.debug("timer caught cancelled error")
             return
 
         batch: list[Input] | None = None
@@ -102,6 +100,7 @@ async def _flush_after_interval(self) -> None:
                 self._flush_timer = None
 
         if batch:
+            logger.debug("Flushing messages based on timer")
             await self._flush_batch(batch)
 
     def _drain_locked(self) -> list[Input]:
@@ -136,6 +135,9 @@ async def add(self, item: Input) -> None:
                 batch_to_flush = self._drain_locked()
 
         if batch_to_flush:
+            logger.debug("Flushing messages based on backlog size")
+            logger.debug("Remaining items in _batch_buffer: %d", len(self._batch_buffer))
+            logger.debug("Batch size to flush after: %d", self._batch_size)
             await self._flush_batch(batch_to_flush)
 
     async def flush(self) -> None:
@@ -150,6 +152,7 @@ async def flush(self) -> None:
             if self._batch_buffer:
                 batch_to_flush = self._drain_locked()
         if batch_to_flush:
+            logger.debug("Flushing messages based on manual trigger")
             await self._flush_batch(batch_to_flush)
 
     async def _process_batch(self, batch: list[Input]) -> list[Output]:

From 333ed60fe40c665fa8630c4605af411f5e21f98e Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Mon, 9 Mar 2026 14:36:15 +0100
Subject: [PATCH 17/68] fix benchmark configs for MacOS

---
 examples/exampledata/config/_benchmark_ng_pipeline.yml     | 2 +-
 examples/exampledata/config/_benchmark_non_ng_pipeline.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/exampledata/config/_benchmark_ng_pipeline.yml b/examples/exampledata/config/_benchmark_ng_pipeline.yml
index 0ba916535..a8ed1b630 100644
--- a/examples/exampledata/config/_benchmark_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_ng_pipeline.yml
@@ -3,7 +3,7 @@ process_count: 1
 timeout: 5.0
 restart_count: 2
 config_refresh_interval: 5
-error_backlog_size: 1500000
+error_backlog_size: 15000
 logger:
   level: DEBUG
   format: "%(asctime)-15s %(hostname)-5s %(name)-10s %(levelname)-8s: %(message)s"
diff --git a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
index 73abbf88f..ac956e549 100644
--- a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
@@ -3,7 +3,7 @@ process_count: 1
 timeout: 5.0
 restart_count: 2
 config_refresh_interval: 5
-error_backlog_size: 1500000
+error_backlog_size: 15000
 logger:
   level: DEBUG
   format: "%(asctime)-15s %(hostname)-5s %(name)-10s %(levelname)-8s: %(message)s"

From dc36fa3dfeebe573ce4b4a974e48bc9f642ae5d1 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Mon, 9 Mar 2026 14:37:14 +0100
Subject: [PATCH 18/68] ensure taskName property is set for older python
 versions

---
 logprep/ng/util/defaults.py |  2 +-
 logprep/ng/util/logging.py  | 45 +++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 logprep/ng/util/logging.py

diff --git a/logprep/ng/util/defaults.py b/logprep/ng/util/defaults.py
index d29ffb8db..b3060ecf3 100644
--- a/logprep/ng/util/defaults.py
+++ b/logprep/ng/util/defaults.py
@@ -36,7 +36,7 @@ class EXITCODES(IntEnum):
     "version": 1,
     "formatters": {
         "logprep": {
-            "class": "logprep.util.logging.LogprepFormatter",
+            "class": "logprep.ng.util.logging.LogprepFormatter",
             "format": DEFAULT_LOG_FORMAT,
             "datefmt": DEFAULT_LOG_DATE_FORMAT,
         }
diff --git a/logprep/ng/util/logging.py b/logprep/ng/util/logging.py
new file mode 100644
index 000000000..d5dac9411
--- /dev/null
+++ b/logprep/ng/util/logging.py
@@ -0,0 +1,45 @@
+"""helper classes for logprep logging"""
+
+import asyncio
+import threading
+
+from logprep.util.logging import LogprepFormatter as NonNgLogprepFormatter
+from logprep.util.logging import LogprepMPQueueListener as NonNgLogprepMPQueueListener
+
+
+class LogprepFormatter(NonNgLogprepFormatter):
+    """
+    A custom formatter for logprep logging with additional attributes.
+
+    The Formatter can be initialized with a format string which makes use of
+    knowledge of the LogRecord attributes - e.g. the default value mentioned
+    above makes use of the fact that the user's message and arguments are pre-
+    formatted into a LogRecord's message attribute. The available attributes
+    are listed in the
+    `python documentation <https://docs.python.org/3/library/logging.html#logrecord-attributes>`_ .
+    Additionally, the formatter provides the following logprep specific attributes:
+
+    .. table::
+
+        +-----------------------+--------------------------------------------------+
+        | attribute             | description                                      |
+        +=======================+==================================================+
+        | %(hostname)           | (Logprep specific) The hostname of the machine   |
+        |                       | where the log was emitted                        |
+        +-----------------------+--------------------------------------------------+
+        | %(taskName)           | The name of the executing asyncio task.          |
+        +-----------------------+--------------------------------------------------+
+
+    """
+
+    def format(self, record):
+        # patch taskName for older python version (at least 3.11)
+        try:
+            record.taskName = asyncio.current_task().get_name()
+        except Exception:  # pylint: disable=broad-exception-caught
+            record.taskName = threading.current_thread().name
+        return super().format(record)
+
+
+class LogprepMPQueueListener(NonNgLogprepMPQueueListener):
+    """Logprep specific QueueListener that uses a multiprocessing instead of threading"""

From 92df15b8c85d683706160615b15091d06be816ff Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Mon, 9 Mar 2026 16:45:47 +0100
Subject: [PATCH 19/68] avoid sending events multiple times in sender

---
 logprep/ng/sender.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/logprep/ng/sender.py b/logprep/ng/sender.py
index 69077a20f..85ca62a47 100644
--- a/logprep/ng/sender.py
+++ b/logprep/ng/sender.py
@@ -69,15 +69,10 @@ async def _send_and_flush_processed_events(self, batch_events: list[LogEvent]) -
         if not processed:
             return
 
-        # send in parallel
+        # TODO send bulk of events
         try:
-            results = await asyncio.gather(
-                *(self._send_processed(event) for event in processed),
-                return_exceptions=True,
-            )
-            for r in results:
-                if isinstance(r, Exception):
-                    logger.exception("Error while sending processed event", exc_info=r)
+            for event in processed:
+                await self._send_processed(event)
 
         finally:
             for output in self._outputs.values():

From 9442f00c8c8d7a3aa02dca773e8b9b461b9935a6 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Mon, 9 Mar 2026 17:00:27 +0100
Subject: [PATCH 20/68] add draft for store_batch

---
 logprep/ng/abc/output.py                      | 20 ++++
 .../ng/connector/confluent_kafka/output.py    | 27 ++++--
 logprep/ng/connector/opensearch/output.py     | 65 +++++--------
 logprep/ng/manager.py                         | 28 +++++-
 logprep/ng/sender.py                          | 94 +++++++------------
 5 files changed, 125 insertions(+), 109 deletions(-)

diff --git a/logprep/ng/abc/output.py b/logprep/ng/abc/output.py
index e5e24d246..41c44a60f 100644
--- a/logprep/ng/abc/output.py
+++ b/logprep/ng/abc/output.py
@@ -3,6 +3,7 @@
 """
 
 from abc import abstractmethod
+from collections.abc import Sequence
 from copy import deepcopy
 from typing import Any, Callable
 
@@ -102,6 +103,25 @@ async def store_custom(self, event: Event, target: str) -> None:
             Custom target for the event.
         """
 
+    @abstractmethod
+    async def store_batch(
+        self, events: Sequence[Event], target: str | None = None
+    ) -> tuple[Sequence[Event], Sequence[Event]]:
+        """Stores the events in the output destination.
+
+        Parameters
+        ----------
+        events : Sequence[Event]
+            Events to be stored.
+        target : str | None
+            Custom target for the events, defaults to None
+
+        Returns
+        -------
+        tuple[Sequence[Event], Sequence[Event]]
+            Successful and failed events after sending.
+        """
+
     @abstractmethod
     async def flush(self):
         """Write the backlog to the output destination.
diff --git a/logprep/ng/connector/confluent_kafka/output.py b/logprep/ng/connector/confluent_kafka/output.py
index ba8039fac..e4356f08f 100644
--- a/logprep/ng/connector/confluent_kafka/output.py
+++ b/logprep/ng/connector/confluent_kafka/output.py
@@ -25,6 +25,7 @@
 
 import logging
 import typing
+from collections.abc import Sequence
 from functools import cached_property, partial
 from socket import getfqdn
 from types import MappingProxyType
@@ -33,7 +34,7 @@
 from confluent_kafka import KafkaException, Message, Producer  # type: ignore
 from confluent_kafka.admin import AdminClient
 
-from logprep.metrics.metrics import GaugeMetric, Metric
+from logprep.metrics.metrics import GaugeMetric
 from logprep.ng.abc.event import Event
 from logprep.ng.abc.output import FatalOutputError, Output
 from logprep.ng.event.event_state import EventStateType
@@ -282,7 +283,18 @@ def describe(self) -> str:
             f"{self.config.kafka_config.get('bootstrap.servers')}"
         )
 
-    def store(self, event: Event) -> None:  # type: ignore  # TODO: fix mypy issue
+    async def store_batch(
+        self, events: Sequence[Event], target: str | None = None
+    ) -> tuple[Sequence[Event], Sequence[Event]]:
+        store_target = target if target is not None else self.config.topic
+        for event in events:
+            await self.store_custom(event, store_target)
+        return (
+            [e for e in events if e.state == EventStateType.DELIVERED],
+            [e for e in events if e.state == EventStateType.FAILED],
+        )
+
+    async def store(self, event: Event) -> None:
         """Store a document in the producer topic.
 
         Parameters
@@ -290,11 +302,11 @@ def store(self, event: Event) -> None:  # type: ignore  # TODO: fix mypy issue
         event : Event
             The event to store.
         """
-        self.store_custom(event, self.config.topic)
+        await self.store_custom(event, self.config.topic)
 
-    @Output._handle_errors
-    @Metric.measure_time()
-    def store_custom(self, event: Event, target: str) -> None:
+    # @Output._handle_errors
+    # @Metric.measure_time()
+    async def store_custom(self, event: Event, target: str) -> None:
         """Write document to Kafka into target topic.
 
         Parameters
@@ -316,12 +328,13 @@ def store_custom(self, event: Event, target: str) -> None:
             )
             logger.debug("Produced message %s to topic %s", str(document), target)
             self._producer.poll(self.config.send_timeout)
+            self._producer.flush()
         except BufferError:
             # block program until buffer is empty or timeout is reached
             self._producer.flush(timeout=self.config.flush_timeout)
             logger.debug("Buffer full, flushing")
 
-    def flush(self) -> None:
+    async def flush(self) -> None:
         """ensures that all messages are flushed. According to
         https://confluent-kafka-python.readthedocs.io/en/latest/#confluent_kafka.Producer.flush
         flush without the timeout parameter will block until all messages are delivered.
diff --git a/logprep/ng/connector/opensearch/output.py b/logprep/ng/connector/opensearch/output.py
index b773673e1..57bfe7b33 100644
--- a/logprep/ng/connector/opensearch/output.py
+++ b/logprep/ng/connector/opensearch/output.py
@@ -30,10 +30,10 @@
         ca_cert: /path/to/cert.crt
 """
 
-import asyncio
 import logging
 import ssl
 import typing
+from collections.abc import Sequence
 from functools import cached_property
 from typing import List, Optional
 
@@ -233,23 +233,9 @@ def _search_context(self):
 
     def __init__(self, name: str, configuration: "OpensearchOutput.Config"):
         super().__init__(name, configuration)
-        self._message_backlog = []
-        self._flush_task: asyncio.Task | None = None
 
     async def setup(self):
         await super().setup()
-        flush_timeout = self.config.flush_timeout
-
-        # TODO: improve flush task handling
-        async def flush_task() -> None:
-            try:
-                while True:
-                    await asyncio.sleep(flush_timeout)
-                    await self.flush()
-            except asyncio.CancelledError:
-                pass
-
-        self._flush_task = asyncio.create_task(flush_task())
 
     def describe(self) -> str:
         """Get name of Opensearch endpoint with the host.
@@ -263,10 +249,9 @@ def describe(self) -> str:
         base_description = Output.describe(self)
         return f"{base_description} - Opensearch Output: {self.config.hosts}"
 
-    # @Output._handle_errors
     async def store(self, event: Event) -> None:
         """Store a document in the index defined in the document or to the default index."""
-        await self.store_custom(event, event.data.get("_index", self.config.default_index))
+        await self.store_batch([event], event.data.get("_index", self.config.default_index))
 
     # @Output._handle_errors
     async def store_custom(self, event: Event, target: str) -> None:
@@ -280,32 +265,34 @@ async def store_custom(self, event: Event, target: str) -> None:
         target : str
             Index to store the document in.
         """
-        event.state.current_state = EventStateType.STORING_IN_OUTPUT
-        document = event.data
-        document["_index"] = target
-        document["_op_type"] = document.get("_op_type", self.config.default_op_type)
-        self.metrics.number_of_processed_events += 1
-        self._message_backlog.append(event)
-        await self._write_to_search_context()
-
-    async def _write_to_search_context(self):
-        """Writes documents from a buffer into Opensearch indices.
-
-        Writes documents in a bulk if the document buffer limit has been reached.
-        This reduces connections to Opensearch and improves performance.
-        """
-        if len(self._message_backlog) >= self.config.message_backlog_size:
-            await self.flush()
+        await self.store_batch([event], target)
+
+    # @Output._handle_errors
+    async def store_batch(
+        self, events: Sequence[Event], target: str | None = None
+    ) -> tuple[Sequence[Event], Sequence[Event]]:
+        logger.debug("store_batch called with %d events, target=%s", len(events), target)
+        for event in events:
+            event.state.current_state = EventStateType.STORING_IN_OUTPUT
+            document = event.data
+            if target is None:
+                document["_index"] = document.get("_index", self.config.default_index)
+            else:
+                document["_index"] = document.get("_index", target)
+            document["_op_type"] = document.get("_op_type", self.config.default_op_type)
+        self.metrics.number_of_processed_events += len(events)
+        logger.debug("Flushing %d documents to Opensearch", len(events))
+        await self._bulk(self._search_context, events)
+        return (
+            [e for e in events if e.state == EventStateType.DELIVERED],
+            [e for e in events if e.state == EventStateType.FAILED],
+        )
 
     # @Metric.measure_time()
     async def flush(self):
-        if not self._message_backlog:
-            return
-        logger.debug("Flushing %d documents to Opensearch", len(self._message_backlog))
-        await self._bulk(self._search_context, self._message_backlog)
-        self._message_backlog.clear()
+        logger.debug("flush is not required")
 
-    async def _bulk(self, client: AsyncOpenSearch, events: list[Event]) -> None:
+    async def _bulk(self, client: AsyncOpenSearch, events: Sequence[Event]) -> None:
         """Bulk index documents into Opensearch. Uses the parallel_bulk function from the opensearchpy library.
         The error information is stored in a document with the following structure:
             json
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 510100cbc..555dae0c9 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -95,16 +95,28 @@ async def process(batch: list[LogEvent]) -> list[LogEvent]:
             handler=process,
         )
 
-        async def send(batch: list[LogEvent]) -> list[LogEvent]:
-            return await self._sender.process(batch)
+        async def send_extras(batch: list[LogEvent]) -> list[LogEvent]:
+            return await self._sender.send_extras(batch)
+
+        extra_output_worker: Worker[LogEvent, LogEvent] = Worker(
+            name="extra_output_worker",
+            batch_size=BATCH_SIZE,
+            batch_interval_s=BATCH_INTERVAL_S,
+            in_queue=processing_worker.out_queue,  # type: ignore
+            out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+            handler=send_extras,
+        )
+
+        async def send_default_output(batch: list[LogEvent]) -> list[LogEvent]:
+            return await self._sender.send_default_output(batch)
 
         output_worker: Worker[LogEvent, LogEvent] = Worker(
             name="output_worker",
             batch_size=BATCH_SIZE,
             batch_interval_s=BATCH_INTERVAL_S,
-            in_queue=processing_worker.out_queue,  # type: ignore
+            in_queue=extra_output_worker.out_queue,  # type: ignore
             out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-            handler=send,
+            handler=send_default_output,
         )
 
         acknowledge_worker: Worker[LogEvent, LogEvent] = Worker(
@@ -117,7 +129,13 @@ async def send(batch: list[LogEvent]) -> list[LogEvent]:
         )
 
         return WorkerOrchestrator(
-            workers=[input_worker, processing_worker, output_worker, acknowledge_worker]
+            workers=[
+                input_worker,
+                processing_worker,
+                extra_output_worker,
+                output_worker,
+                acknowledge_worker,
+            ]
         )
 
     async def run(self) -> None:
diff --git a/logprep/ng/sender.py b/logprep/ng/sender.py
index 85ca62a47..c63af0617 100644
--- a/logprep/ng/sender.py
+++ b/logprep/ng/sender.py
@@ -3,6 +3,8 @@
 import asyncio
 import logging
 import typing
+from collections import defaultdict
+from collections.abc import Sequence
 
 from logprep.ng.abc.event import ExtraDataEvent
 from logprep.ng.abc.output import Output
@@ -33,12 +35,40 @@ def __init__(
         self._default_output = [output for output in outputs if output.default][0]
         self._error_output = error_output
 
-    async def process(self, batch: list[LogEvent]) -> list[LogEvent]:
-        logger.debug("Receiving event from worker: %d", len(batch))
-        await self._send_and_flush_processed_events(batch_events=batch)
-        if self._error_output:
-            await self._send_and_flush_failed_events(batch_events=batch)
-        return batch
+    async def send_extras(self, batch_events: Sequence[LogEvent]) -> Sequence[LogEvent]:
+        output_buffers: dict[str, dict[str, list[ExtraDataEvent]]] = {
+            output_name: defaultdict(list) for output_name in self._outputs.keys()
+        }
+
+        for event in batch_events:
+            for extra in typing.cast(Sequence[ExtraDataEvent], event.extra_data):
+                for output in extra.outputs:
+                    for name, target in output.items():
+                        try:
+                            output_buffers[name][target].append(extra)
+                        except KeyError as error:
+                            raise ValueError(f"Output {name} not configured.") from error
+
+        results = await asyncio.gather(
+            *(
+                self._outputs[name].store_batch(events, target)
+                for name, target_events in output_buffers.items()
+                for target, events in target_events.items()
+            ),
+            return_exceptions=True,
+        )
+        for r in results:
+            if isinstance(r, Exception):
+                logger.exception("Error while sending processed event", exc_info=r)
+        # TODO handle successful, failed
+
+        logger.debug("return send_extras %d", len(batch_events))
+        return batch_events
+
+    async def send_default_output(self, batch_events: Sequence[LogEvent]) -> Sequence[LogEvent]:
+        logger.debug("send_default_output %d", len(batch_events))
+        await self._default_output.store_batch(batch_events)
+        return batch_events
 
     async def _send_and_flush_failed_events(self, batch_events: list[LogEvent]) -> None:
         failed = [
@@ -60,58 +90,6 @@ async def _send_and_flush_failed_events(self, batch_events: list[LogEvent]) -> N
         for error_event in failed_error_events:
             logger.error("Error during sending to error output: %s", error_event)
 
-    async def _send_and_flush_processed_events(self, batch_events: list[LogEvent]) -> None:
-        processed = [
-            event
-            for event in batch_events
-            if event is not None and event.state == EventStateType.PROCESSED
-        ]
-        if not processed:
-            return
-
-        # TODO send bulk of events
-        try:
-            for event in processed:
-                await self._send_processed(event)
-
-        finally:
-            for output in self._outputs.values():
-                try:
-                    await output.flush()
-                except Exception as e:
-                    logger.exception("Error while flushing output %s", output.name, exc_info=e)
-
-        # flush once per output after sending
-        try:
-            logger.debug("Flushing all outputs after sending %d events", len(batch_events))
-            results = await asyncio.gather(
-                *(output.flush() for output in self._outputs.values()),
-                return_exceptions=True,
-            )
-            for r in results:
-                if isinstance(r, Exception):
-                    logger.exception("Error during final output flush", exc_info=r)
-        except Exception as e:
-            logger.exception("Unexpected error during final output flush", exc_info=e)
-
-    async def _send_extra_data(self, event: LogEvent) -> None:
-        extra_data_events = typing.cast(list[ExtraDataEvent], event.extra_data)
-        for extra_data_event in extra_data_events:
-            for output in extra_data_event.outputs:
-                for output_name, output_target in output.items():
-                    if output_name in self._outputs:
-                        await self._outputs[output_name].store_custom(
-                            extra_data_event, output_target
-                        )
-                    else:
-                        raise ValueError(f"Output {output_name} not configured.")
-
-    async def _send_processed(self, event: LogEvent) -> LogEvent:
-        if event.extra_data:
-            await self._send_extra_data(event)
-        await self._default_output.store(event)
-        return event
-
     async def _send_failed(self, event: LogEvent) -> ErrorEvent:
         """Send the event to the error output.
         If event can't be sent, it will be logged as an error.

From b5cd79346ed7257ef88505880085ecc7ad2f7d50 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Mon, 9 Mar 2026 23:26:41 +0100
Subject: [PATCH 21/68] WIP: small adaptions + benchmark results with adapted
 pipeline configs -> opensearch message_backlog_size: 1

---
 .../20260309_231610/asyncNG_python3.11.txt    | 83 +++++++++++++++++++
 .../20260309_231610/nonNG_python3.11.txt      | 83 +++++++++++++++++++
 .../config/_benchmark_ng_pipeline.yml         |  4 +-
 .../config/_benchmark_non_ng_pipeline.yml     |  4 +-
 logprep/ng/abc/output.py                      |  6 +-
 logprep/ng/connector/opensearch/output.py     | 16 ++--
 logprep/ng/manager.py                         | 21 ++++-
 logprep/ng/sender.py                          | 20 +++--
 logprep/ng/util/worker/worker.py              |  3 +-
 run_benchmarks.py                             | 60 ++++++++++++++
 10 files changed, 270 insertions(+), 30 deletions(-)
 create mode 100644 benchmark_results/20260309_231610/asyncNG_python3.11.txt
 create mode 100644 benchmark_results/20260309_231610/nonNG_python3.11.txt
 create mode 100644 run_benchmarks.py

diff --git a/benchmark_results/20260309_231610/asyncNG_python3.11.txt b/benchmark_results/20260309_231610/asyncNG_python3.11.txt
new file mode 100644
index 000000000..b1c881ae9
--- /dev/null
+++ b/benchmark_results/20260309_231610/asyncNG_python3.11.txt
@@ -0,0 +1,83 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-09T23:17:50.692991
+timestamp (UTC)               : 2026-03-09T22:17:50.692995+00:00
+python version                : 3.11.14
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 1
+  ↳ mode                      : logprep-ng
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260309_231610/asyncNG_python3.11.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        NFlpeREcQX6uhmpZXTdNnA   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability pjUG0ykhQ9yITaSLZuvSOA   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773094728 22:18:48  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 HgKcRCSeT2-KxTAjoHGLGA   1   1       5832            0   1007.9kb       1007.9kb
+green  open   .opensearch-observability pjUG0ykhQ9yITaSLZuvSOA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        NFlpeREcQX6uhmpZXTdNnA   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       KKmiQGjPTEyb2dlSW3Xr5A   1   1        754            0      221kb          221kb
+yellow open   pseudonyms                FEnG8tOxQJuXNj0YPUrfRA   1   1        756            0       76kb           76kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773094759 22:19:19  7343
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.051 s
+measurement window:     30.000 s
+processed (OpenSearch): 5_832
+throughput:             194.40 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  1
+total runtime:         30.000 s
+total processed:       5_832
+
+throughput (weighted): 194.40 docs/s
+throughput (median):   194.40 docs/s
+throughput (average):  194.40 docs/s
+throughput (min/max):  194.40 / 194.40 docs/s
+throughput (std dev):  0.00 docs/s
+================================
diff --git a/benchmark_results/20260309_231610/nonNG_python3.11.txt b/benchmark_results/20260309_231610/nonNG_python3.11.txt
new file mode 100644
index 000000000..151abe767
--- /dev/null
+++ b/benchmark_results/20260309_231610/nonNG_python3.11.txt
@@ -0,0 +1,83 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-09T23:16:11.042971
+timestamp (UTC)               : 2026-03-09T22:16:11.042974+00:00
+python version                : 3.11.14
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 0
+  ↳ mode                      : logprep
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260309_231610/nonNG_python3.11.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability 4LufVowAQ6iWBbEmSsisNg   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        LIwyBs_0T0K7QQJ4gw0UwQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773094627 22:17:07  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 AumBEANnRreI6jEWzhph1Q   1   1       1283            0    447.5kb        447.5kb
+green  open   .opensearch-observability 4LufVowAQ6iWBbEmSsisNg   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        LIwyBs_0T0K7QQJ4gw0UwQ   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       ZxkLYFZRTDqppWkN6eKRZg   1   1        162            0    153.1kb        153.1kb
+yellow open   pseudonyms                kiVDDQK_T0C8dW31aTibRw   1   1        165            0    110.2kb        110.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773094659 22:17:39  1611
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.041 s
+measurement window:     30.000 s
+processed (OpenSearch): 1_283
+throughput:             42.77 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  1
+total runtime:         30.000 s
+total processed:       1_283
+
+throughput (weighted): 42.77 docs/s
+throughput (median):   42.77 docs/s
+throughput (average):  42.77 docs/s
+throughput (min/max):  42.77 / 42.77 docs/s
+throughput (std dev):  0.00 docs/s
+================================
diff --git a/examples/exampledata/config/_benchmark_ng_pipeline.yml b/examples/exampledata/config/_benchmark_ng_pipeline.yml
index a8ed1b630..efdbf8900 100644
--- a/examples/exampledata/config/_benchmark_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_ng_pipeline.yml
@@ -102,7 +102,7 @@ output:
       - 127.0.0.1:9200
     default_index: processed
     default_op_type: create
-    message_backlog_size: 2500
+    message_backlog_size: 1
     timeout: 10000
     flush_timeout: 60
     user: admin
@@ -116,7 +116,7 @@ error_output:
       - 127.0.0.1:9200
     default_index: errors
     default_op_type: create
-    message_backlog_size: 2500
+    message_backlog_size: 1
     timeout: 10000
     flush_timeout: 60
     user: admin
diff --git a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
index ac956e549..39ee41b08 100644
--- a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
@@ -102,7 +102,7 @@ output:
       - 127.0.0.1:9200
     default_index: processed
     default_op_type: create
-    message_backlog_size: 2500
+    message_backlog_size: 1
     timeout: 10000
     flush_timeout: 60
     user: admin
@@ -116,7 +116,7 @@ error_output:
       - 127.0.0.1:9200
     default_index: errors
     default_op_type: create
-    message_backlog_size: 2500
+    message_backlog_size: 1
     timeout: 10000
     flush_timeout: 60
     user: admin
diff --git a/logprep/ng/abc/output.py b/logprep/ng/abc/output.py
index 41c44a60f..979945560 100644
--- a/logprep/ng/abc/output.py
+++ b/logprep/ng/abc/output.py
@@ -106,7 +106,7 @@ async def store_custom(self, event: Event, target: str) -> None:
     @abstractmethod
     async def store_batch(
         self, events: Sequence[Event], target: str | None = None
-    ) -> tuple[Sequence[Event], Sequence[Event]]:
+    ) -> Sequence[Event]:
         """Stores the events in the output destination.
 
         Parameters
@@ -118,8 +118,8 @@ async def store_batch(
 
         Returns
         -------
-        tuple[Sequence[Event], Sequence[Event]]
-            Successful and failed events after sending.
+        Sequence[Event]
+            Events after sending.
         """
 
     @abstractmethod
diff --git a/logprep/ng/connector/opensearch/output.py b/logprep/ng/connector/opensearch/output.py
index 57bfe7b33..87656f9bb 100644
--- a/logprep/ng/connector/opensearch/output.py
+++ b/logprep/ng/connector/opensearch/output.py
@@ -270,10 +270,9 @@ async def store_custom(self, event: Event, target: str) -> None:
     # @Output._handle_errors
     async def store_batch(
         self, events: Sequence[Event], target: str | None = None
-    ) -> tuple[Sequence[Event], Sequence[Event]]:
+    ) -> Sequence[Event]:
         logger.debug("store_batch called with %d events, target=%s", len(events), target)
         for event in events:
-            event.state.current_state = EventStateType.STORING_IN_OUTPUT
             document = event.data
             if target is None:
                 document["_index"] = document.get("_index", self.config.default_index)
@@ -283,10 +282,7 @@ async def store_batch(
         self.metrics.number_of_processed_events += len(events)
         logger.debug("Flushing %d documents to Opensearch", len(events))
         await self._bulk(self._search_context, events)
-        return (
-            [e for e in events if e.state == EventStateType.DELIVERED],
-            [e for e in events if e.state == EventStateType.FAILED],
-        )
+        return events
 
     # @Metric.measure_time()
     async def flush(self):
@@ -323,12 +319,16 @@ async def _bulk(self, client: AsyncOpenSearch, events: Sequence[Event]) -> None:
                 break
 
             event = events[index]
+            event.state.current_state = EventStateType.STORING_IN_OUTPUT
+
             index += 1
 
             if success:
-                event.state.current_state = EventStateType.STORING_IN_OUTPUT
+                event.state.current_state = EventStateType.DELIVERED
                 continue
 
+            event.state.current_state = EventStateType.FAILED
+
             # parallel_bulk often returned item that allowed item.get("_op_type")
             # streaming_bulk usually returns {"index": {...}} / {"create": {...}}
             op_type = item.get("_op_type") if isinstance(item, dict) else None
@@ -349,8 +349,6 @@ async def _bulk(self, client: AsyncOpenSearch, events: Sequence[Event]) -> None:
                     )
 
             error = BulkError(error_info.get("error", "Failed to index document"), **error_info)
-
-            event.state.current_state = EventStateType.FAILED
             event.errors.append(error)
 
     async def health(self) -> bool:  # type: ignore  # TODO: fix mypy issue
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 555dae0c9..2218f0408 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -8,6 +8,8 @@
 from asyncio import CancelledError
 from typing import cast
 
+from numpy.random.mtrand import Sequence
+
 from logprep.factory import Factory
 from logprep.ng.abc.input import Input
 from logprep.ng.abc.output import Output
@@ -75,12 +77,19 @@ async def setup(self):
         self._orchestrator = self._create_orchestrator()
 
     def _create_orchestrator(self) -> WorkerOrchestrator:
-        input_worker: Worker[LogEvent, LogEvent] = TransferWorker(
+        async def transfer_batch(batch: list[LogEvent]) -> list[LogEvent]:
+            for event in batch:
+                event.state.current_state = EventStateType.RECEIVED
+
+            return batch
+
+        input_worker: Worker[LogEvent, LogEvent] = Worker(
             name="input_worker",
             batch_size=1,
             batch_interval_s=BATCH_INTERVAL_S,
             in_queue=self._input_connector(timeout=self.configuration.timeout),
             out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+            handler=transfer_batch,
         )
 
         async def process(batch: list[LogEvent]) -> list[LogEvent]:
@@ -95,7 +104,7 @@ async def process(batch: list[LogEvent]) -> list[LogEvent]:
             handler=process,
         )
 
-        async def send_extras(batch: list[LogEvent]) -> list[LogEvent]:
+        async def send_extras(batch: list[LogEvent]) -> Sequence:
             return await self._sender.send_extras(batch)
 
         extra_output_worker: Worker[LogEvent, LogEvent] = Worker(
@@ -119,13 +128,17 @@ async def send_default_output(batch: list[LogEvent]) -> list[LogEvent]:
             handler=send_default_output,
         )
 
+        async def _handle_sent_events(batch: list[LogEvent]) -> list[LogEvent]:
+            # TODO: call await self._input_connector.acknowledge() ???
+            return await self._process_sent_events(batch)
+
         acknowledge_worker: Worker[LogEvent, LogEvent] = Worker(
             name="acknowledge_worker",
             batch_size=BATCH_SIZE,
             batch_interval_s=BATCH_INTERVAL_S,
             in_queue=output_worker.out_queue,  # type: ignore
             out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-            handler=self._process_sent_events,
+            handler=_handle_sent_events,
         )
 
         return WorkerOrchestrator(
@@ -161,7 +174,7 @@ async def _shut_down(self) -> None:
             await self._orchestrator.shut_down(self._shutdown_timeout_s)
 
         if self._sender is not None:
-            self._sender.shut_down()
+            await self._sender.shut_down()
         self._input_connector.acknowledge()
 
         len_delivered_events = len(
diff --git a/logprep/ng/sender.py b/logprep/ng/sender.py
index c63af0617..e316fc6db 100644
--- a/logprep/ng/sender.py
+++ b/logprep/ng/sender.py
@@ -60,22 +60,24 @@ async def send_extras(self, batch_events: Sequence[LogEvent]) -> Sequence[LogEve
         for r in results:
             if isinstance(r, Exception):
                 logger.exception("Error while sending processed event", exc_info=r)
-        # TODO handle successful, failed
+
+        # TODO: filter and handle successful + failed
+        # succeed_events, failed_events = (
+        #    [e for e in batch_events if e.state == EventStateType.DELIVERED],
+        #    [e for e in batch_events if e.state == EventStateType.FAILED],
+        # )
+        # assert len(succeed_events) + len(failed_events) == len(batch_events), "Lost events in batch"
 
         logger.debug("return send_extras %d", len(batch_events))
+
         return batch_events
 
     async def send_default_output(self, batch_events: Sequence[LogEvent]) -> Sequence[LogEvent]:
         logger.debug("send_default_output %d", len(batch_events))
-        await self._default_output.store_batch(batch_events)
-        return batch_events
+        return await self._default_output.store_batch(batch_events)  # type: ignore
 
     async def _send_and_flush_failed_events(self, batch_events: list[LogEvent]) -> None:
-        failed = [
-            event
-            for event in batch_events
-            if event is not None and event.state == EventStateType.FAILED
-        ]
+        failed = [event for event in batch_events if event.state is EventStateType.FAILED]
         if not failed:
             return
 
@@ -85,7 +87,7 @@ async def _send_and_flush_failed_events(self, batch_events: list[LogEvent]) -> N
         await self._error_output.flush()  # type: ignore[union-attr]
 
         failed_error_events = [
-            event for event in error_events if event.state == EventStateType.FAILED
+            event for event in error_events if event.state is EventStateType.FAILED
         ]
         for error_event in failed_error_events:
             logger.error("Error during sending to error output: %s", error_event)
diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index 3bcfc3f0b..eaad052c4 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -170,7 +170,8 @@ async def _flush_batch(self, batch: list[Input]) -> None:
         if self.out_queue is not None:
             for item in batch_result:
                 await self.out_queue.put(item)
-            await asyncio.sleep(0)
+
+        await asyncio.sleep(0)
 
     async def run(self, stop_event: asyncio.Event) -> None:
         """
diff --git a/run_benchmarks.py b/run_benchmarks.py
new file mode 100644
index 000000000..7a57df5cb
--- /dev/null
+++ b/run_benchmarks.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+
+import subprocess
+import sys
+from datetime import datetime
+from pathlib import Path
+
+PYTHON_VERSIONS = ["3.11"]  # , "3.12", "3.13", "3.14"]
+MODES = [
+    ("nonNG", "0"),
+    ("asyncNG", "1"),
+]
+
+
+def run_benchmarks() -> None:
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    out_dir = Path("benchmark_results") / timestamp
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    print(f"Results directory: {out_dir}\n")
+
+    commands = []
+
+    for mode_name, ng_flag in MODES:
+        for py in PYTHON_VERSIONS:
+            outfile = out_dir / f"{mode_name}_python{py}.txt"
+
+            cmd = [
+                "uv",
+                "run",
+                "--python",
+                py,
+                "benchmark.py",
+                "--event-num",
+                "120000",
+                "--runs",
+                "30",
+                "--ng",
+                ng_flag,
+                "--out",
+                str(outfile),
+            ]
+
+            commands.append(cmd)
+
+    for i, cmd in enumerate(commands, start=1):
+        print(f"=== Run {i}/{len(commands)} ===")
+        print(" ".join(cmd))
+
+        try:
+            subprocess.run(cmd, check=True)
+        except subprocess.CalledProcessError as e:
+            print(f"Run failed with exit code {e.returncode}")
+            sys.exit(e.returncode)
+
+    print("\nAll benchmark runs finished.")
+
+
+if __name__ == "__main__":
+    run_benchmarks()

From 55ebd8025817215b2a6db72a22d5b8fee0ebe2a3 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Mon, 9 Mar 2026 23:31:39 +0100
Subject: [PATCH 22/68] WIP: benchmark results with adapted pipeline configs ->
 opensearch message_backlog_size: 2500

---
 .../20260309_232743/asyncNG_python3.11.txt    | 85 +++++++++++++++++++
 .../20260309_232743/nonNG_python3.11.txt      | 83 ++++++++++++++++++
 .../config/_benchmark_ng_pipeline.yml         |  4 +-
 .../config/_benchmark_non_ng_pipeline.yml     |  4 +-
 4 files changed, 172 insertions(+), 4 deletions(-)
 create mode 100644 benchmark_results/20260309_232743/asyncNG_python3.11.txt
 create mode 100644 benchmark_results/20260309_232743/nonNG_python3.11.txt

diff --git a/benchmark_results/20260309_232743/asyncNG_python3.11.txt b/benchmark_results/20260309_232743/asyncNG_python3.11.txt
new file mode 100644
index 000000000..4a9e69ba7
--- /dev/null
+++ b/benchmark_results/20260309_232743/asyncNG_python3.11.txt
@@ -0,0 +1,85 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-09T23:29:22.520526
+timestamp (UTC)               : 2026-03-09T22:29:22.520530+00:00
+python version                : 3.11.14
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 1
+  ↳ mode                      : logprep-ng
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260309_232743/asyncNG_python3.11.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config           kD16pAJGRZ2MhuLKsXihAg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    u4eFMth_RJ-cuygSTg7evg   1   0          0            0       208b           208b
+green  open   top_queries-2026.03.09-25320 5kyK9IkeQweUUuHDq10k3A   1   0          8           16     79.8kb         79.8kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773095419 22:30:19  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                    JHWMU0uiQFWhmimuzOfo0A   1   1       5820            0    771.4kb        771.4kb
+green  open   .opensearch-observability    u4eFMth_RJ-cuygSTg7evg   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           kD16pAJGRZ2MhuLKsXihAg   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          PgeiHyQTR3y2A0Mf4byXaw   1   1        750            0    231.5kb        231.5kb
+yellow open   pseudonyms                   5cAB-TyRRh-Mp2QT8PV9_Q   1   1        749            0     73.9kb         73.9kb
+green  open   top_queries-2026.03.09-25320 5kyK9IkeQweUUuHDq10k3A   1   0          8           16     79.8kb         79.8kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773095450 22:30:50  7328
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.041 s
+measurement window:     30.000 s
+processed (OpenSearch): 5_820
+throughput:             194.00 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  1
+total runtime:         30.000 s
+total processed:       5_820
+
+throughput (weighted): 194.00 docs/s
+throughput (median):   194.00 docs/s
+throughput (average):  194.00 docs/s
+throughput (min/max):  194.00 / 194.00 docs/s
+throughput (std dev):  0.00 docs/s
+================================
diff --git a/benchmark_results/20260309_232743/nonNG_python3.11.txt b/benchmark_results/20260309_232743/nonNG_python3.11.txt
new file mode 100644
index 000000000..bbbb08e30
--- /dev/null
+++ b/benchmark_results/20260309_232743/nonNG_python3.11.txt
@@ -0,0 +1,83 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-09T23:27:44.332323
+timestamp (UTC)               : 2026-03-09T22:27:44.332326+00:00
+python version                : 3.11.14
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 0
+  ↳ mode                      : logprep
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260309_232743/nonNG_python3.11.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability PPo2XTsbS3i5-Md9d2YEYw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        soKZV8HHQDesGw2I-NZ15A   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773095319 22:28:39  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 6IWSH9bcSbiUitvFF4CQPQ   1   1      30007            0        4mb            4mb
+green  open   .opensearch-observability PPo2XTsbS3i5-Md9d2YEYw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        soKZV8HHQDesGw2I-NZ15A   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       7RFX8KAGToCq_JmWUNTXXA   1   1       3747            0    839.8kb        839.8kb
+yellow open   pseudonyms                NBQlOkAcQ56O3a-lORDaYQ   1   1       3746            0    251.3kb        251.3kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773095351 22:29:11  37501
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.037 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_007
+throughput:             1,000.23 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  1
+total runtime:         30.000 s
+total processed:       30_007
+
+throughput (weighted): 1,000.23 docs/s
+throughput (median):   1,000.23 docs/s
+throughput (average):  1,000.23 docs/s
+throughput (min/max):  1,000.23 / 1,000.23 docs/s
+throughput (std dev):  0.00 docs/s
+================================
diff --git a/examples/exampledata/config/_benchmark_ng_pipeline.yml b/examples/exampledata/config/_benchmark_ng_pipeline.yml
index efdbf8900..a8ed1b630 100644
--- a/examples/exampledata/config/_benchmark_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_ng_pipeline.yml
@@ -102,7 +102,7 @@ output:
       - 127.0.0.1:9200
     default_index: processed
     default_op_type: create
-    message_backlog_size: 1
+    message_backlog_size: 2500
     timeout: 10000
     flush_timeout: 60
     user: admin
@@ -116,7 +116,7 @@ error_output:
       - 127.0.0.1:9200
     default_index: errors
     default_op_type: create
-    message_backlog_size: 1
+    message_backlog_size: 2500
     timeout: 10000
     flush_timeout: 60
     user: admin
diff --git a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
index 39ee41b08..ac956e549 100644
--- a/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+++ b/examples/exampledata/config/_benchmark_non_ng_pipeline.yml
@@ -102,7 +102,7 @@ output:
       - 127.0.0.1:9200
     default_index: processed
     default_op_type: create
-    message_backlog_size: 1
+    message_backlog_size: 2500
     timeout: 10000
     flush_timeout: 60
     user: admin
@@ -116,7 +116,7 @@ error_output:
       - 127.0.0.1:9200
     default_index: errors
     default_op_type: create
-    message_backlog_size: 1
+    message_backlog_size: 2500
     timeout: 10000
     flush_timeout: 60
     user: admin

From ae79764c82bd7a35742be9c65d51da250b745e08 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Tue, 10 Mar 2026 10:32:54 +0100
Subject: [PATCH 23/68] fix wrong import

---
 logprep/ng/manager.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 2218f0408..9eb74ae3a 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -8,8 +8,6 @@
 from asyncio import CancelledError
 from typing import cast
 
-from numpy.random.mtrand import Sequence
-
 from logprep.factory import Factory
 from logprep.ng.abc.input import Input
 from logprep.ng.abc.output import Output
@@ -104,7 +102,7 @@ async def process(batch: list[LogEvent]) -> list[LogEvent]:
             handler=process,
         )
 
-        async def send_extras(batch: list[LogEvent]) -> Sequence:
+        async def send_extras(batch: list[LogEvent]) -> list[LogEvent]:
             return await self._sender.send_extras(batch)
 
         extra_output_worker: Worker[LogEvent, LogEvent] = Worker(

From 79290e3068ad44434116b0d5a84cc002c4e6c633 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Tue, 10 Mar 2026 10:33:39 +0100
Subject: [PATCH 24/68] set MAX_QUEUE_SIZE to BATCH_SIZE and increase
 input_worker batch_size

---
 logprep/ng/manager.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 9eb74ae3a..c4803e6ff 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -19,16 +19,16 @@
 from logprep.ng.sender import Sender
 from logprep.ng.util.configuration import Configuration
 from logprep.ng.util.worker.types import SizeLimitedQueue
-from logprep.ng.util.worker.worker import TransferWorker, Worker, WorkerOrchestrator
+from logprep.ng.util.worker.worker import Worker, WorkerOrchestrator
 
 logger = logging.getLogger("PipelineManager")
 
 
-MAX_QUEUE_SIZE = 100_000
-
 BATCH_SIZE = 2_500
 BATCH_INTERVAL_S = 5
 
+MAX_QUEUE_SIZE = BATCH_SIZE
+
 
 class PipelineManager:
     """Orchestrator class managing pipeline inputs, processors and outputs"""
@@ -83,7 +83,7 @@ async def transfer_batch(batch: list[LogEvent]) -> list[LogEvent]:
 
         input_worker: Worker[LogEvent, LogEvent] = Worker(
             name="input_worker",
-            batch_size=1,
+            batch_size=250,
             batch_interval_s=BATCH_INTERVAL_S,
             in_queue=self._input_connector(timeout=self.configuration.timeout),
             out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),

From 49e198feaac6026fbc5d75a261dbc7d9d22c6e25 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Tue, 10 Mar 2026 10:59:01 +0100
Subject: [PATCH 25/68] disentangle EventBacklog and input

---
 logprep/ng/abc/input.py                       | 31 ++++++-------------
 logprep/ng/connector/confluent_kafka/input.py |  4 +++
 logprep/ng/manager.py                         |  8 ++---
 logprep/ng/util/worker/worker.py              |  2 --
 4 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/logprep/ng/abc/input.py b/logprep/ng/abc/input.py
index 37a60bb53..547759467 100644
--- a/logprep/ng/abc/input.py
+++ b/logprep/ng/abc/input.py
@@ -22,10 +22,8 @@
 
 from logprep.abc.connector import Connector
 from logprep.abc.exceptions import LogprepException
-from logprep.ng.abc.event import EventBacklog
 from logprep.ng.event.event_state import EventStateType
 from logprep.ng.event.log_event import LogEvent
-from logprep.ng.event.set_event_backlog import SetEventBacklog
 from logprep.processor.base.exceptions import FieldExistsWarning
 from logprep.util.converters import convert_from_dict
 from logprep.util.helper import (
@@ -149,7 +147,6 @@ class Config(Connector.Config):
         )
 
     def __init__(self, name: str, configuration: "Input.Config") -> None:
-        self.event_backlog: EventBacklog = SetEventBacklog()
         super().__init__(name, configuration)
 
     @property
@@ -182,17 +179,9 @@ def __call__(self, *, timeout: float) -> InputIterator:
 
         return InputIterator(self, timeout)
 
-    def acknowledge(self) -> None:
-        """Acknowledge all delivered events, so Input Connector can return final ACK state.
-
-        As side effect, all older events with state ACKED has to be removed from `event_backlog`
-        before acknowledging new ones.
-        """
-
-        self.event_backlog.unregister(state_type=EventStateType.ACKED)
-
-        for event in self.event_backlog.get(state_type=EventStateType.DELIVERED):
-            event.state.current_state = EventStateType.ACKED
+    @abstractmethod
+    async def acknowledge(self, events: list[LogEvent]) -> None:
+        """Acknowledge all delivered events, so Input Connector can return final ACK state."""
 
     @property
     def _add_hmac(self) -> bool:
@@ -274,13 +263,13 @@ async def _get_event(self, timeout: float) -> tuple:
         (event, raw_event, metadata)
         """
 
-    def _register_failed_event(
+    def _produce_failed_event(
         self,
         event: dict | None,
         raw_event: bytes | None,
         metadata: dict | None,
         error: Exception,
-    ) -> None:
+    ) -> LogEvent:
         """Helper method to register the failed event to event backlog."""
 
         error_log_event = LogEvent(
@@ -291,7 +280,7 @@ def _register_failed_event(
         error_log_event.errors.append(error)
         error_log_event.state.current_state = EventStateType.FAILED
 
-        self.event_backlog.register(events=[error_log_event])
+        return error_log_event
 
     # @Metric.measure_time()
     async def get_next(self, timeout: float) -> LogEvent | None:
@@ -307,7 +296,7 @@ async def get_next(self, timeout: float) -> LogEvent | None:
         input : LogEvent, None
             Input log data.
         """
-        self.acknowledge()
+        # self.acknowledge()
         event: dict | None = None
         raw_event: bytes | None = None
         metadata: dict | None = None
@@ -357,7 +346,8 @@ async def get_next(self, timeout: float) -> LogEvent | None:
             except (FieldExistsWarning, TimeParserException) as error:
                 raise CriticalInputError(self, error.args[0], event) from error
         except CriticalInputError as error:
-            self._register_failed_event(
+            # TODO handle failed events
+            self._produce_failed_event(
                 event=event,
                 raw_event=raw_event,
                 metadata=metadata,  # type: ignore
@@ -371,12 +361,11 @@ async def get_next(self, timeout: float) -> LogEvent | None:
             metadata=metadata,  # type: ignore  # TODO: fix mypy issue
         )
 
-        self.event_backlog.register(events=[log_event])
         log_event.state.current_state = EventStateType.RECEIVED
 
         return log_event
 
-    def batch_finished_callback(self) -> None:
+    async def batch_finished_callback(self) -> None:
         """Can be called by output connectors after processing a batch of one or more records."""
 
     def _add_env_enrichment_to_event(self, event: dict, enrichments: dict) -> None:
diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index 808db2aca..5211072b5 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -60,6 +60,7 @@
     InputWarning,
 )
 from logprep.ng.connector.confluent_kafka.metadata import ConfluentKafkaMetadata
+from logprep.ng.event.log_event import LogEvent
 from logprep.util.validators import keys_in_validator
 
 DEFAULTS = {
@@ -632,6 +633,9 @@ def health(self) -> bool:
             return False
         return super().health()
 
+    async def acknowledge(self, events: list[LogEvent]):
+        logger.debug("acknowledge called")
+
     async def setup(self):
         """Set the component up."""
         await super().setup()
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index c4803e6ff..73a70f1c5 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -26,7 +26,6 @@
 
 BATCH_SIZE = 2_500
 BATCH_INTERVAL_S = 5
-
 MAX_QUEUE_SIZE = BATCH_SIZE
 
 
@@ -43,7 +42,6 @@ async def setup(self):
         self._event_backlog = SetEventBacklog()
 
         self._input_connector = cast(Input, Factory.create(self.configuration.input))
-        self._input_connector.event_backlog = self._event_backlog  # TODO needs to be disentagled
         await self._input_connector.setup()
 
         processors = [
@@ -173,11 +171,9 @@ async def _shut_down(self) -> None:
 
         if self._sender is not None:
             await self._sender.shut_down()
-        self._input_connector.acknowledge()
+        # self._input_connector.acknowledge()
 
-        len_delivered_events = len(
-            list(self._input_connector.event_backlog.get(EventStateType.DELIVERED))
-        )
+        len_delivered_events = len(list(self._event_backlog.get(EventStateType.DELIVERED)))
         if len_delivered_events:
             logger.error(
                 "Input connector has %d non-acked events in event_backlog.", len_delivered_events
diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index eaad052c4..9bb954ac2 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -136,8 +136,6 @@ async def add(self, item: Input) -> None:
 
         if batch_to_flush:
             logger.debug("Flushing messages based on backlog size")
-            logger.debug("Remaining items in _batch_buffer: %d", len(self._batch_buffer))
-            logger.debug("Batch size to flush after: %d", self._batch_size)
             await self._flush_batch(batch_to_flush)
 
     async def flush(self) -> None:

From e8dad3a5d22c4f8f85d22c6c24b12cdda7d85db9 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 10 Mar 2026 14:30:09 +0100
Subject: [PATCH 26/68] docs: benchmark logs

---
 .../20260310_114644/asyncNG_python3.11.txt    | 462 +++++++++++++++++
 .../20260310_114644/asyncNG_python3.12.txt    | 463 +++++++++++++++++
 .../20260310_114644/asyncNG_python3.13.txt    | 465 ++++++++++++++++++
 .../20260310_114644/asyncNG_python3.14.txt    | 464 +++++++++++++++++
 .../20260310_114644/nonNG_python3.11.txt      | 464 +++++++++++++++++
 .../20260310_114644/nonNG_python3.12.txt      | 464 +++++++++++++++++
 .../20260310_114644/nonNG_python3.13.txt      | 462 +++++++++++++++++
 .../20260310_114644/nonNG_python3.14.txt      | 465 ++++++++++++++++++
 8 files changed, 3709 insertions(+)
 create mode 100644 benchmark_results/20260310_114644/asyncNG_python3.11.txt
 create mode 100644 benchmark_results/20260310_114644/asyncNG_python3.12.txt
 create mode 100644 benchmark_results/20260310_114644/asyncNG_python3.13.txt
 create mode 100644 benchmark_results/20260310_114644/asyncNG_python3.14.txt
 create mode 100644 benchmark_results/20260310_114644/nonNG_python3.11.txt
 create mode 100644 benchmark_results/20260310_114644/nonNG_python3.12.txt
 create mode 100644 benchmark_results/20260310_114644/nonNG_python3.13.txt
 create mode 100644 benchmark_results/20260310_114644/nonNG_python3.14.txt

diff --git a/benchmark_results/20260310_114644/asyncNG_python3.11.txt b/benchmark_results/20260310_114644/asyncNG_python3.11.txt
new file mode 100644
index 000000000..d16ed0c72
--- /dev/null
+++ b/benchmark_results/20260310_114644/asyncNG_python3.11.txt
@@ -0,0 +1,462 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-10T11:46:44.880438
+timestamp (UTC)               : 2026-03-10T10:46:44.880442+00:00
+python version                : 3.11.14
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 1
+  ↳ mode                      : logprep-ng
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260310_114644/asyncNG_python3.11.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability ljQZHXlgS763a1yot74u5Q   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        4O2BXmbdSlCu2AnWk24AmQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773139659 10:47:39  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 xkYvH-rXTRuodc9RFoN9pQ   1   1      40000            0      4.2mb          4.2mb
+green  open   .opensearch-observability ljQZHXlgS763a1yot74u5Q   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        4O2BXmbdSlCu2AnWk24AmQ   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       vFjaX9g8R0GBtAhD2NgPrA   1   1       5620            0      1.1mb          1.1mb
+yellow open   pseudonyms                LPV1P7acTiOcksbF5kVFZQ   1   1       5623            0    282.7kb        282.7kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773139693 10:48:13  51244
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.050 s
+measurement window:     30.000 s
+processed (OpenSearch): 40_000
+throughput:             1,333.33 docs/s
+--------------
+
+----- Run Round 2: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        IuK839AzSBqJ79-EzPvPRw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability L1BBTE1rRj-eA3JCHeeYJg   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773139759 10:49:19  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 9n6JkGUdROSFGDSfYQAdEw   1   1      40000            0      4.3mb          4.3mb
+green  open   .plugins-ml-config        IuK839AzSBqJ79-EzPvPRw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability L1BBTE1rRj-eA3JCHeeYJg   1   0          0            0       208b           208b
+yellow open   sre                       5aygQ7VvSY6s3c61XbycNg   1   1       5618            0      1.1mb          1.1mb
+yellow open   pseudonyms                chsT5gAmRaCPHwZgpu74rw   1   1       5626            0    286.9kb        286.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773139792 10:49:52  51245
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.037 s
+measurement window:     30.000 s
+processed (OpenSearch): 40_000
+throughput:             1,333.33 docs/s
+--------------
+
+----- Run Round 3: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability tWuePL_yQNmjYvXyfIw3AA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        zOANW9xsSWK24agvbJjUOQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773139858 10:50:58  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 O5Pu0apVTQ2prxfw_f4Bkw   1   1      40000            0      4.4mb          4.4mb
+green  open   .opensearch-observability tWuePL_yQNmjYvXyfIw3AA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        zOANW9xsSWK24agvbJjUOQ   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       9xFBenolREOrIpSL65fgUA   1   1       5628            0      1.1mb          1.1mb
+yellow open   pseudonyms                etDOzg1yRp23BdPIqUa86Q   1   1       5633            0    286.2kb        286.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773139892 10:51:32  51262
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.034 s
+measurement window:     30.000 s
+processed (OpenSearch): 40_000
+throughput:             1,333.33 docs/s
+--------------
+
+----- Run Round 4: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability EvjiI8pXRF6HvJ6sqbpSRw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        WuQMFz3RSp2NB-oCNGOMZA   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773139956 10:52:36  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 0QTcrtu7RyiSyRos7efdFQ   1   1      65000            0      5.7mb          5.7mb
+green  open   .opensearch-observability EvjiI8pXRF6HvJ6sqbpSRw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        WuQMFz3RSp2NB-oCNGOMZA   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       1vnFL02cRqGo0qItm5oEPQ   1   1       6253            0      1.4mb          1.4mb
+yellow open   pseudonyms                AecG8NCBQtC3_R1lb93a3A   1   1       6240            0    352.2kb        352.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140006 10:53:26  82496
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.032 s
+measurement window:     45.000 s
+processed (OpenSearch): 65_000
+throughput:             1,444.44 docs/s
+--------------
+
+----- Run Round 5: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        7JTeJqsgSYmQmMmjmqQ8jw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability pP0c35tzQLqY-QdoRV0Gaw   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140072 10:54:32  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 MTcPp51gQFaUFonTPxALTg   1   0          9           18     80.6kb         80.6kb
+yellow open   processed                    2xwfYuzxS1iubgIdWK8zdA   1   1      67500            0      5.8mb          5.8mb
+green  open   .plugins-ml-config           7JTeJqsgSYmQmMmjmqQ8jw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    pP0c35tzQLqY-QdoRV0Gaw   1   0          0            0       208b           208b
+yellow open   sre                          _A__3FeCSpakdijO5Nfilw   1   1       6246            0      1.4mb          1.4mb
+yellow open   pseudonyms                   n_JildKtQ9aJOBnAZN7jhw   1   1       6252            0    359.9kb        359.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140122 10:55:22  85391
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.036 s
+measurement window:     45.000 s
+processed (OpenSearch): 67_500
+throughput:             1,500.00 docs/s
+--------------
+
+----- Run Round 6: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        HFqnEIUfTuG_eHhwtJI0lA   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability lcEdLdTJTvq74deB88jCBw   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140187 10:56:27  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 D-9ZuAtkQPu2iamUHpBbIQ   1   1      65000            0      5.6mb          5.6mb
+green  open   .plugins-ml-config        HFqnEIUfTuG_eHhwtJI0lA   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability lcEdLdTJTvq74deB88jCBw   1   0          0            0       208b           208b
+yellow open   sre                       eSHkGeyJRMONZ5Rmheq-9A   1   1       6092            0      1.4mb          1.4mb
+yellow open   pseudonyms                Rq_DQIzhTGSCgiB9sCZCOQ   1   1       6038            0    359.7kb        359.7kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140236 10:57:16  82504
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.035 s
+measurement window:     45.000 s
+processed (OpenSearch): 65_000
+throughput:             1,444.44 docs/s
+--------------
+
+----- Run Round 7: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        ITAcInvAROytzKYUuPp0Rw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability 8B6CWE3gRLSU-KyMWE6b8A   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140301 10:58:21  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 I6WJzfGTQsG_-uoVeA-irA   1   1      87500            0      7.9mb          7.9mb
+green  open   .plugins-ml-config        ITAcInvAROytzKYUuPp0Rw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability 8B6CWE3gRLSU-KyMWE6b8A   1   0          0            0       208b           208b
+yellow open   sre                       kmCZCRafQRmnQ7qOA7BVQw   1   1       6245            0      1.6mb          1.6mb
+yellow open   pseudonyms                WRr1GJmGTP-uAzRSjcrXAQ   1   1       6252            0    389.5kb        389.5kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140362 10:59:22  110624
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.033 s
+measurement window:     60.000 s
+processed (OpenSearch): 87_500
+throughput:             1,458.33 docs/s
+--------------
+
+----- Run Round 8: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 dv7JdD5ZQbK6l2SUFJqW1g   1   0          8           16       81kb           81kb
+green  open   .opensearch-observability    8nEuAtFZRhiIa-oQc4LrWQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           688s5erDTFSoLki9p4HaFg   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140427 11:00:27  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 dv7JdD5ZQbK6l2SUFJqW1g   1   0          8           16       81kb           81kb
+yellow open   processed                    sIxo3cZ1Rka_C-sO8MKvlQ   1   1      82500            0      7.6mb          7.6mb
+green  open   .opensearch-observability    8nEuAtFZRhiIa-oQc4LrWQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           688s5erDTFSoLki9p4HaFg   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          OiNFT3mdRdGdH73MyYmcyg   1   1       6447            0      1.5mb          1.5mb
+yellow open   pseudonyms                   YTAlCVV8T2KWYWOcX9U2JA   1   1       6506            0    427.6kb        427.6kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140488 11:01:28  104386
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.034 s
+measurement window:     60.000 s
+processed (OpenSearch): 82_500
+throughput:             1,374.99 docs/s
+--------------
+
+----- Run Round 9: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability QME35PLlSmmi3TC6rZqv_Q   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        zGGQHOo0T2iUot0aQ4Op5w   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140556 11:02:36  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 yF17H8PVTA6EdWXOMZjaug   1   1      65000            0      6.3mb          6.3mb
+green  open   .opensearch-observability QME35PLlSmmi3TC6rZqv_Q   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        zGGQHOo0T2iUot0aQ4Op5w   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       JTHGdLguQEyaV68ixpBLew   1   1       4731            0      1.1mb          1.1mb
+yellow open   pseudonyms                Ss2ViQ40SmeFG7xGYgE7TA   1   1       4697            0    293.8kb        293.8kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140617 11:03:37  82496
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.044 s
+measurement window:     60.000 s
+processed (OpenSearch): 65_000
+throughput:             1,083.33 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  9
+total runtime:         405.001 s
+total processed:       552_500
+
+throughput (weighted): 1,364.19 docs/s
+throughput (median):   1,374.99 docs/s
+throughput (average):  1,367.28 docs/s
+throughput (min/max):  1,083.33 / 1,500.00 docs/s
+throughput (std dev):  123.47 docs/s
+================================
diff --git a/benchmark_results/20260310_114644/asyncNG_python3.12.txt b/benchmark_results/20260310_114644/asyncNG_python3.12.txt
new file mode 100644
index 000000000..89c35c472
--- /dev/null
+++ b/benchmark_results/20260310_114644/asyncNG_python3.12.txt
@@ -0,0 +1,463 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-10T12:20:52.101701
+timestamp (UTC)               : 2026-03-10T11:20:52.101704+00:00
+python version                : 3.12.12
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 1
+  ↳ mode                      : logprep-ng
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260310_114644/asyncNG_python3.12.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        qURp8X-hQAaKc_KQBzuc4Q   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability RZk55MljQm6XIUiPSH4Dag   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141706 11:21:46  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 pLH1ngXsRsi4foNDpcr14w   1   1      30000            0      3.5mb          3.5mb
+green  open   .plugins-ml-config        qURp8X-hQAaKc_KQBzuc4Q   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability RZk55MljQm6XIUiPSH4Dag   1   0          0            0       208b           208b
+yellow open   sre                       y36g6YgjQuO4WohhzjE8iw   1   1       4362            0    941.9kb        941.9kb
+yellow open   pseudonyms                nG2lWUmlRyehxnCPdhGt9g   1   1       4374            0    204.2kb        204.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141739 11:22:19  38737
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.036 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_000
+throughput:             1,000.00 docs/s
+--------------
+
+----- Run Round 2: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        527LnEIXRXGZabP3jFs55w   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability yxUHZZLZQFKfGVNXSLKgsg   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141804 11:23:24  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 yt14um7oQVa6jQYnOjuCzw   1   1      30000            0      3.4mb          3.4mb
+green  open   .opensearch-observability yxUHZZLZQFKfGVNXSLKgsg   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        527LnEIXRXGZabP3jFs55w   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       KddhBiwlQoyELuLXisuaAA   1   1       4386            0    974.4kb        974.4kb
+yellow open   pseudonyms                JRBGgITdQm-8SMEitpXNEA   1   1       4368            0    234.1kb        234.1kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141837 11:23:57  38755
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.034 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_000
+throughput:             999.99 docs/s
+--------------
+
+----- Run Round 3: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability pjthS6PeS_GkZNFf2ZcFpA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        JX4Dlb_uSCq7T1W8vVUdOw   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141902 11:25:02  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 _XAzXa-VRZWAMD0dc0z0Fg   1   0          8           16     69.2kb         69.2kb
+yellow open   processed                    KVIx75vxQI2AdSPMEwDF5Q   1   1      30000            0      3.3mb          3.3mb
+green  open   .opensearch-observability    pjthS6PeS_GkZNFf2ZcFpA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           JX4Dlb_uSCq7T1W8vVUdOw   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          UloRgdo2TlKRN7lHNhpNsQ   1   1       4367            0    924.1kb        924.1kb
+yellow open   pseudonyms                   j6pld3rXQtuP5zkxRFYnPg   1   1       4374            0    242.7kb        242.7kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141935 11:25:35  38750
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.032 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_000
+throughput:             1,000.00 docs/s
+--------------
+
+----- Run Round 4: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        2DK8U6OmSyW2d2e1I_E6xQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability nWDddVeDSDeXHRZm9lmCpQ   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141999 11:26:39  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 Dy4zsNKMQQGiBXWP4P_fIw   1   1      47500            0      3.9mb          3.9mb
+green  open   .opensearch-observability nWDddVeDSDeXHRZm9lmCpQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        2DK8U6OmSyW2d2e1I_E6xQ   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       Nn4SuE-fQSipF5dfmoJ4ew   1   1       4689            0        1mb            1mb
+yellow open   pseudonyms                KHU2D5tDSKGxZbsGCL4q6A   1   1       4682            0    270.5kb        270.5kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142048 11:27:28  60621
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.033 s
+measurement window:     45.000 s
+processed (OpenSearch): 47_500
+throughput:             1,055.55 docs/s
+--------------
+
+----- Run Round 5: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability m873XzwBQr67H-si4terAg   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        8HAVu81gSDiVeHkD_No_jA   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142112 11:28:32  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 F3qPm27QQLmzNV4zqAg_NA   1   1      47500            0      3.9mb          3.9mb
+green  open   .opensearch-observability m873XzwBQr67H-si4terAg   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        8HAVu81gSDiVeHkD_No_jA   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       0-559WijQ1KCSH1eBrfzuA   1   1       4679            0        1mb            1mb
+yellow open   pseudonyms                lmgqw11gTcWucl8320DrLw   1   1       4698            0    269.8kb        269.8kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142161 11:29:21  60630
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.032 s
+measurement window:     45.000 s
+processed (OpenSearch): 47_500
+throughput:             1,055.55 docs/s
+--------------
+
+----- Run Round 6: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 6pV90QVTRLeuhHGmVv7Eqw   1   0          8           16     74.3kb         74.3kb
+green  open   .opensearch-observability    _nWuczwbTS2EtwLYc1EnIw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           xXQtIHTaQeqIh5J1op25IQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142226 11:30:26  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 6pV90QVTRLeuhHGmVv7Eqw   1   0          8           16     74.3kb         74.3kb
+yellow open   processed                    RhnMmdt_TdyPihuDw0S5Uw   1   1      50000            0        4mb            4mb
+green  open   .plugins-ml-config           xXQtIHTaQeqIh5J1op25IQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    _nWuczwbTS2EtwLYc1EnIw   1   0          0            0       208b           208b
+yellow open   sre                          ORtxJK5qQHWqZTPx2C8XxA   1   1       4693            0        1mb            1mb
+yellow open   pseudonyms                   OTqIRQUnT1eeS9MbukaDqQ   1   1       4739            0      285kb          285kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142275 11:31:15  63768
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.035 s
+measurement window:     45.000 s
+processed (OpenSearch): 50_000
+throughput:             1,111.11 docs/s
+--------------
+
+----- Run Round 7: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability p_AUWtLpRpyMypqizqTOig   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        Zpbjtp0RRZ-9lYKtLf0wcA   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142340 11:32:20  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 BX_pUvliQvCf3PbMVS8vUA   1   1      65000            0      6.2mb          6.2mb
+green  open   .opensearch-observability p_AUWtLpRpyMypqizqTOig   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        Zpbjtp0RRZ-9lYKtLf0wcA   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       aExQe77SQR-NGMlm_CFwOQ   1   1       4689            0      1.2mb          1.2mb
+yellow open   pseudonyms                gNJ4yw5QTbqjiviBEDycpQ   1   1       4689            0    328.2kb        328.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142401 11:33:21  82494
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.035 s
+measurement window:     60.000 s
+processed (OpenSearch): 65_000
+throughput:             1,083.33 docs/s
+--------------
+
+----- Run Round 8: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        siziilcbQRu_h5HXR8Rxjg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability w_ggeYyIToyP2BD0xCpVig   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142465 11:34:25  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 sPK_PKFQREOSmS5LVF8pZA   1   0          9           18    101.7kb        101.7kb
+yellow open   processed                    kI4K46jhTy-hT-7S-pMZ7w   1   1      62500            0        6mb            6mb
+green  open   .plugins-ml-config           siziilcbQRu_h5HXR8Rxjg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    w_ggeYyIToyP2BD0xCpVig   1   0          0            0       208b           208b
+yellow open   sre                          TqULOHejStmMB_5l8E_v4A   1   1       4682            0      1.1mb          1.1mb
+yellow open   pseudonyms                   QWkkVgh7T3Wmnm3n2R12Bg   1   1       4694            0    291.3kb        291.3kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142527 11:35:27  79388
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.035 s
+measurement window:     60.000 s
+processed (OpenSearch): 62_500
+throughput:             1,041.67 docs/s
+--------------
+
+----- Run Round 9: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability fqTKW3dSRaSROCbvNIes3A   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        rYv2BXNWTxa2RbBoWvIg2A   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142591 11:36:31  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 Czlip08tTW-PO0_WrNLBlA   1   1      62500            0        6mb            6mb
+green  open   .opensearch-observability fqTKW3dSRaSROCbvNIes3A   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        rYv2BXNWTxa2RbBoWvIg2A   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       gZoTmPBNS--bIYMqzSgvZQ   1   1       4696            0      1.1mb          1.1mb
+yellow open   pseudonyms                4et-xUB9QxCPWQWEoHn20Q   1   1       4690            0    304.1kb        304.1kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142652 11:37:32  79383
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.032 s
+measurement window:     60.000 s
+processed (OpenSearch): 62_500
+throughput:             1,041.66 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  9
+total runtime:         405.001 s
+total processed:       425_000
+
+throughput (weighted): 1,049.38 docs/s
+throughput (median):   1,041.67 docs/s
+throughput (average):  1,043.21 docs/s
+throughput (min/max):  999.99 / 1,111.11 docs/s
+throughput (std dev):  38.94 docs/s
+================================
diff --git a/benchmark_results/20260310_114644/asyncNG_python3.13.txt b/benchmark_results/20260310_114644/asyncNG_python3.13.txt
new file mode 100644
index 000000000..dfac3c584
--- /dev/null
+++ b/benchmark_results/20260310_114644/asyncNG_python3.13.txt
@@ -0,0 +1,465 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-10T12:54:33.437232
+timestamp (UTC)               : 2026-03-10T11:54:33.437236+00:00
+python version                : 3.13.9
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 1
+  ↳ mode                      : logprep-ng
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260310_114644/asyncNG_python3.13.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 I_xv31UiQriOJfobYjhmOg   1   0          8           16     41.8kb         41.8kb
+green  open   .opensearch-observability    ztMiR5HIRvqpmGZsU_BnBQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           ji8Y9AnSTAaOfSVm9AMhXg   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143728 11:55:28  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 I_xv31UiQriOJfobYjhmOg   1   0          8           16     41.8kb         41.8kb
+yellow open   processed                    MnfF_3PCTaelpIquQ-D2VA   1   1      30000            0      3.4mb          3.4mb
+green  open   .opensearch-observability    ztMiR5HIRvqpmGZsU_BnBQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           ji8Y9AnSTAaOfSVm9AMhXg   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          wgZ-y1VASkGvBKTnZixmcQ   1   1       4372            0    915.8kb        915.8kb
+yellow open   pseudonyms                   o_SYaU7bQg-BqXpo7X1BRA   1   1       4370            0    257.6kb        257.6kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143761 11:56:01  38751
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.032 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_000
+throughput:             1,000.00 docs/s
+--------------
+
+----- Run Round 2: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability X2ZS_rvBRjuY3T8XGagaWw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        7XhPz67bTluTSndxZ5AzXg   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143825 11:57:05  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 6643q4wQRmWk7ZjvgsZ0rQ   1   1      30000            0      3.4mb          3.4mb
+green  open   .opensearch-observability X2ZS_rvBRjuY3T8XGagaWw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        7XhPz67bTluTSndxZ5AzXg   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       dJYe8r6ARte2dXTaUr4bkw   1   1       4365            0    936.5kb        936.5kb
+yellow open   pseudonyms                jWUGsg__RXKXE4Wmi2a68A   1   1       4368            0    259.6kb        259.6kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143858 11:57:38  38734
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.035 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_000
+throughput:             1,000.00 docs/s
+--------------
+
+----- Run Round 3: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability YBGrVG7rSSuAa6JjVVXfXw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        eidC_2AuSgmzQ3mcQSfg7Q   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143921 11:58:41  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 DPq2_avaS9C3r1_EJ5nmVg   1   1      30000            0      3.3mb          3.3mb
+green  open   .opensearch-observability YBGrVG7rSSuAa6JjVVXfXw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        eidC_2AuSgmzQ3mcQSfg7Q   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       E2it4BdxRmW9uqS-CRHl2A   1   1       4368            0    914.7kb        914.7kb
+yellow open   pseudonyms                zex0d1kjQvKySA_NX7zqog   1   1       4376            0    239.5kb        239.5kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143954 11:59:14  38745
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.033 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_000
+throughput:             1,000.00 docs/s
+--------------
+
+----- Run Round 4: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 hz-t4A3wQUOqXFynvq94Tg   1   0          8           16     79.9kb         79.9kb
+green  open   .opensearch-observability    vJraBAgiTz-TRcZA-qkxow   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           CpvyXPTGTtuSxtNv_4Owmg   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144017 12:00:17  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 hz-t4A3wQUOqXFynvq94Tg   1   0          8           16     79.9kb         79.9kb
+yellow open   processed                    rkucaoTuT-GlrmWwPcTQ0w   1   1      50000            0      4.8mb          4.8mb
+green  open   .plugins-ml-config           CpvyXPTGTtuSxtNv_4Owmg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    vJraBAgiTz-TRcZA-qkxow   1   0          0            0       208b           208b
+yellow open   sre                          MGimxtLFSIeCTkYBrL3WvQ   1   1       4807            0      1.1mb          1.1mb
+yellow open   pseudonyms                   m4ugOB34RJqstihgsvM30g   1   1       4707            0      293kb          293kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144067 12:01:07  63751
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.038 s
+measurement window:     45.000 s
+processed (OpenSearch): 50_000
+throughput:             1,111.11 docs/s
+--------------
+
+----- Run Round 5: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability F8SYagy1TDWVgHRJJUN77w   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        z0vOy7RxTym2hFi7y0Buew   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144130 12:02:10  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 Ynngz-V5Ts29CRZU79LfIg   1   1      47500            0      4.7mb          4.7mb
+green  open   .opensearch-observability F8SYagy1TDWVgHRJJUN77w   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        z0vOy7RxTym2hFi7y0Buew   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       qsdSGzxgR6io2o69J1uULQ   1   1       4712            0        1mb            1mb
+yellow open   pseudonyms                ZkY7ZyJ6T5e7arGy0Cdd5A   1   1       4698            0    231.5kb        231.5kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144179 12:02:59  60629
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.032 s
+measurement window:     45.000 s
+processed (OpenSearch): 47_500
+throughput:             1,055.55 docs/s
+--------------
+
+----- Run Round 6: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        4mVF2oh0R5eo8X7okMFMRg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability kB9ZgpZuTKe7Jfuigv70-A   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144243 12:04:03  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 u8sS3SX8TrqtMJjqU1BqOg   1   1      47500            0      4.6mb          4.6mb
+green  open   .plugins-ml-config        4mVF2oh0R5eo8X7okMFMRg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability kB9ZgpZuTKe7Jfuigv70-A   1   0          0            0       208b           208b
+yellow open   sre                       27TIfJxPQAmJdYBGa_V2gw   1   1       4691            0      1.1mb          1.1mb
+yellow open   pseudonyms                eM8L7T3zRa6s3FEC_FMAKw   1   1       4687            0    283.3kb        283.3kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144292 12:04:52  60626
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.033 s
+measurement window:     45.000 s
+processed (OpenSearch): 47_500
+throughput:             1,055.55 docs/s
+--------------
+
+----- Run Round 7: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability WlSHsARmRvuKZ08uNrjUGg   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        3Yj4LkPFRImpm7VbEHhwTQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144356 12:05:56  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 lQ6WSC1dQOOmD1Oimv0BXg   1   1      62500            0      6.1mb          6.1mb
+green  open   .plugins-ml-config        3Yj4LkPFRImpm7VbEHhwTQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability WlSHsARmRvuKZ08uNrjUGg   1   0          0            0       208b           208b
+yellow open   sre                       iqMuWQZjQnGjB98-uiS-cA   1   1       4695            0      1.1mb          1.1mb
+yellow open   pseudonyms                t4iNpAx-TU6e6IsTQ1gjTQ   1   1       4552            0    277.1kb        277.1kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144417 12:06:57  79377
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.034 s
+measurement window:     60.000 s
+processed (OpenSearch): 62_500
+throughput:             1,041.66 docs/s
+--------------
+
+----- Run Round 8: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability dBLmUMg7RoaDKtbBLRtSbw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        7onXSIPETwCIS0WuVjMYBQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144481 12:08:01  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 8vYXllK0QhGDJYr9DKurTg   1   1      65000            0      6.2mb          6.2mb
+green  open   .opensearch-observability dBLmUMg7RoaDKtbBLRtSbw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        7onXSIPETwCIS0WuVjMYBQ   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       MVfehTxTROeCPxHlWNbEHw   1   1       4691            0      1.1mb          1.1mb
+yellow open   pseudonyms                uxw3LtF5RNyNzp9D6mOZGw   1   1       4683            0    278.1kb        278.1kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144542 12:09:02  82501
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.032 s
+measurement window:     60.000 s
+processed (OpenSearch): 65_000
+throughput:             1,083.33 docs/s
+--------------
+
+----- Run Round 9: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 SxW3D0SRQ52N_EF5tMaRwg   1   0          8           16     80.2kb         80.2kb
+green  open   .plugins-ml-config           7dTZoopnS9aS1aFDO_zG-A   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    1hyqTPRzQWKkYsV3swCWsQ   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144605 12:10:05  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 SxW3D0SRQ52N_EF5tMaRwg   1   0          8           16     80.2kb         80.2kb
+yellow open   processed                    A3FCiy-2QnKDOY2wrqW8iw   1   1      65000            0      6.2mb          6.2mb
+green  open   .plugins-ml-config           7dTZoopnS9aS1aFDO_zG-A   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    1hyqTPRzQWKkYsV3swCWsQ   1   0          0            0       208b           208b
+yellow open   sre                          N0HqW2xRR3GCrWXaPIO6PQ   1   1       4679            0      1.2mb          1.2mb
+yellow open   pseudonyms                   neSFnajFRxGv1mrognFv2g   1   1       4691            0    270.7kb        270.7kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144667 12:11:07  82509
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.033 s
+measurement window:     60.000 s
+processed (OpenSearch): 65_000
+throughput:             1,083.33 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  9
+total runtime:         405.001 s
+total processed:       427_500
+
+throughput (weighted): 1,055.55 docs/s
+throughput (median):   1,055.55 docs/s
+throughput (average):  1,047.84 docs/s
+throughput (min/max):  1,000.00 / 1,111.11 docs/s
+throughput (std dev):  41.15 docs/s
+================================
diff --git a/benchmark_results/20260310_114644/asyncNG_python3.14.txt b/benchmark_results/20260310_114644/asyncNG_python3.14.txt
new file mode 100644
index 000000000..7e350dc55
--- /dev/null
+++ b/benchmark_results/20260310_114644/asyncNG_python3.14.txt
@@ -0,0 +1,464 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-10T13:27:59.682681
+timestamp (UTC)               : 2026-03-10T12:27:59.682684+00:00
+python version                : 3.14.0
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 1
+  ↳ mode                      : logprep-ng
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260310_114644/asyncNG_python3.14.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability Y-yoEoY2SBadAyzjcl610w   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        7em18HdfRESYMOc2Eu4UHg   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145733 12:28:53  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 8HTmbKfTQPWK1AfbmkjzHQ   1   1      30000            0      3.3mb          3.3mb
+green  open   .plugins-ml-config        7em18HdfRESYMOc2Eu4UHg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability Y-yoEoY2SBadAyzjcl610w   1   0          0            0       208b           208b
+yellow open   sre                       UAcyxVSLSUCY360brDSGYg   1   1       4374            0    993.7kb        993.7kb
+yellow open   pseudonyms                x_-RtrZhSDmDUH00IX2dXw   1   1       4380            0      229kb          229kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145767 12:29:27  38755
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.035 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_000
+throughput:             999.99 docs/s
+--------------
+
+----- Run Round 2: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 eScOqdCsTja6b4SVZtL-Fg   1   0          8           16     73.3kb         73.3kb
+green  open   .opensearch-observability    LOf3NiaFSam63_AVT2NayA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           GtFKSirSQEK7i18i8VNFKg   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145830 12:30:30  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 eScOqdCsTja6b4SVZtL-Fg   1   0          8           16     73.3kb         73.3kb
+yellow open   processed                    mo2nLn40RRazLcF2jP4wng   1   1      30000            0      3.4mb          3.4mb
+green  open   .opensearch-observability    LOf3NiaFSam63_AVT2NayA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           GtFKSirSQEK7i18i8VNFKg   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          AwodoNPnSi2c1hXSbnls6g   1   1       4379            0    977.4kb        977.4kb
+yellow open   pseudonyms                   MXoFwE2tS_6NTE19Y99mnw   1   1       4369            0    219.1kb        219.1kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145863 12:31:03  38757
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.032 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_000
+throughput:             1,000.00 docs/s
+--------------
+
+----- Run Round 3: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        rhuNRvHsQ_q64N5pVnlgxQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability oXS9zgOQSsi4RDuQxvxktg   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145926 12:32:06  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 KqklODL0SwWgsfhsXiLO0w   1   1      30000            0      3.4mb          3.4mb
+green  open   .plugins-ml-config        rhuNRvHsQ_q64N5pVnlgxQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability oXS9zgOQSsi4RDuQxvxktg   1   0          0            0       208b           208b
+yellow open   sre                       fEzl_d_tTsy-WEDuNDwmzw   1   1       4364            0    951.2kb        951.2kb
+yellow open   pseudonyms                yB_9FDq4Tji4trPpRoUe5A   1   1       4373            0    242.7kb        242.7kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145960 12:32:40  38738
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.036 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_000
+throughput:             1,000.00 docs/s
+--------------
+
+----- Run Round 4: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability 4QyYkV6TQtuNzly8V-gehQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        1LuTJ8s8SWGPdi94zzuHhw   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146023 12:33:43  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 Te_n8RWsQ0KJvKK1khIrlg   1   1      47500            0      4.7mb          4.7mb
+green  open   .opensearch-observability 4QyYkV6TQtuNzly8V-gehQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        1LuTJ8s8SWGPdi94zzuHhw   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       50KmSfKVRHKT4PpGftZ9bA   1   1       4684            0        1mb            1mb
+yellow open   pseudonyms                qHkqu10XQJiPVNs7bQQIGg   1   1       4686            0    271.3kb        271.3kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146073 12:34:33  60628
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.035 s
+measurement window:     45.000 s
+processed (OpenSearch): 47_500
+throughput:             1,055.55 docs/s
+--------------
+
+----- Run Round 5: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 IxdUyj8JRyCm26QNggMVHQ   1   0          8           16     61.6kb         61.6kb
+green  open   .opensearch-observability    XeqnedYCTDmdhJ0jmIP8-g   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           XhLawENBSmaN-FvcZEfifg   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146136 12:35:36  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 IxdUyj8JRyCm26QNggMVHQ   1   0          8           16     61.6kb         61.6kb
+yellow open   processed                    40Zw8znvQ2m2jAhzAsxJOg   1   1      50000            0      4.7mb          4.7mb
+green  open   .opensearch-observability    XeqnedYCTDmdhJ0jmIP8-g   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           XhLawENBSmaN-FvcZEfifg   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          17hOtqdvRDm3zgwC9Dwhng   1   1       4684            0      1.1mb          1.1mb
+yellow open   pseudonyms                   MywuZYAkTWONDN3h3yczsw   1   1       4680            0    273.2kb        273.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146186 12:36:26  63759
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.034 s
+measurement window:     45.000 s
+processed (OpenSearch): 50_000
+throughput:             1,111.11 docs/s
+--------------
+
+----- Run Round 6: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        qZaV-M09Qzi7D5n0Gq6OMA   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability hcHkIfVWQr2m7kaXgBz9Xg   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146249 12:37:29  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 MnI7YvuGSTmKj-74TR-2_A   1   1      50000            0      4.9mb          4.9mb
+green  open   .plugins-ml-config        qZaV-M09Qzi7D5n0Gq6OMA   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability hcHkIfVWQr2m7kaXgBz9Xg   1   0          0            0       208b           208b
+yellow open   sre                       PqoCdebnRquRlyy_Phrk7Q   1   1       4679            0        1mb            1mb
+yellow open   pseudonyms                iy2YRNUyQGmIfLscS_xsHQ   1   1       4685            0    255.5kb        255.5kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146298 12:38:18  63741
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.031 s
+measurement window:     45.000 s
+processed (OpenSearch): 50_000
+throughput:             1,111.11 docs/s
+--------------
+
+----- Run Round 7: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability tUZM9KHbRNy9ikTUUhVSsg   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        HuAU0aysShKw1VqY2oqRFw   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146361 12:39:21  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 QxrQenXFTEib_JDqlwQfMw   1   0          9           18    101.3kb        101.3kb
+yellow open   processed                    zp9xCs97QiGaPjxKpOXgfA   1   1      62500            0        6mb            6mb
+green  open   .opensearch-observability    tUZM9KHbRNy9ikTUUhVSsg   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           HuAU0aysShKw1VqY2oqRFw   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          NY0dnxYyQGOoM_fFp4YdjQ   1   1       4693            0      1.2mb          1.2mb
+yellow open   pseudonyms                   okETaS-0RJCzCnlBML2uWQ   1   1       4681            0    275.5kb        275.5kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146422 12:40:22  79379
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.036 s
+measurement window:     60.000 s
+processed (OpenSearch): 62_500
+throughput:             1,041.66 docs/s
+--------------
+
+----- Run Round 8: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability vmKR2wz2QeyC9U2QcWM1fA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        eKEyvdORQNWUT73s9j-w7g   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146485 12:41:25  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 djqBfC4tQ1-EZVn3f7_8lw   1   1      65000            0      6.2mb          6.2mb
+green  open   .plugins-ml-config        eKEyvdORQNWUT73s9j-w7g   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability vmKR2wz2QeyC9U2QcWM1fA   1   0          0            0       208b           208b
+yellow open   sre                       UoGzmiCYSneaBq_XYlbwJg   1   1       4817            0      1.2mb          1.2mb
+yellow open   pseudonyms                QMwDKVLNS4SpuR6EJj8ylg   1   1       4737            0    300.2kb        300.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146547 12:42:27  82506
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.034 s
+measurement window:     60.000 s
+processed (OpenSearch): 65_000
+throughput:             1,083.33 docs/s
+--------------
+
+----- Run Round 9: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability ienQTDobRt-Atfck40B57A   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        3CKvJaitQBeWvNg3yhGnxw   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146610 12:43:30  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 _9ePdVa-TH-pq5fUffGZ3g   1   1      65000            0      6.2mb          6.2mb
+green  open   .opensearch-observability ienQTDobRt-Atfck40B57A   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        3CKvJaitQBeWvNg3yhGnxw   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       Pbnhi4uXRBiWgDljH_cHcQ   1   1       4681            0      1.2mb          1.2mb
+yellow open   pseudonyms                eh06XRVlQOiKWrk9I3NFyg   1   1       4682            0    312.3kb        312.3kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146671 12:44:31  82497
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.037 s
+measurement window:     60.000 s
+processed (OpenSearch): 65_000
+throughput:             1,083.33 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  9
+total runtime:         405.001 s
+total processed:       430_000
+
+throughput (weighted): 1,061.73 docs/s
+throughput (median):   1,055.55 docs/s
+throughput (average):  1,054.01 docs/s
+throughput (min/max):  999.99 / 1,111.11 docs/s
+throughput (std dev):  46.30 docs/s
+================================
diff --git a/benchmark_results/20260310_114644/nonNG_python3.11.txt b/benchmark_results/20260310_114644/nonNG_python3.11.txt
new file mode 100644
index 000000000..a0477efc9
--- /dev/null
+++ b/benchmark_results/20260310_114644/nonNG_python3.11.txt
@@ -0,0 +1,464 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-10T12:03:48.919675
+timestamp (UTC)               : 2026-03-10T11:03:48.919679+00:00
+python version                : 3.11.14
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 0
+  ↳ mode                      : logprep
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260310_114644/nonNG_python3.11.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        e0dNQ2vlSWy-u9bIhSyqzQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability 0ixjhV-YRFWedi8wwZ6DpA   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140682 11:04:42  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 xys1N9SHQWGwzunQXyTM8g   1   0          9           18     80.9kb         80.9kb
+yellow open   processed                    sGRLNDwTSTqIVIGUXvEkZA   1   1      38823            0      5.1mb          5.1mb
+green  open   .plugins-ml-config           e0dNQ2vlSWy-u9bIhSyqzQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    0ixjhV-YRFWedi8wwZ6DpA   1   0          0            0       208b           208b
+yellow open   sre                          KRCsqvtETYCgaMk5mpBqVA   1   1       4753            0        1mb            1mb
+yellow open   pseudonyms                   OLOPiy0PS06gZ-M0Hj4Lkw   1   1       4746            0    269.4kb        269.4kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140717 11:05:17  48332
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.041 s
+measurement window:     30.000 s
+processed (OpenSearch): 38_823
+throughput:             1,294.10 docs/s
+--------------
+
+----- Run Round 2: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        hcYEqSHOTcighhKWCvQ6Cg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability 1E3wgaLTRXiqDKcDzy3_lw   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140781 11:06:21  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 9TP8NrSVQl6C5ft7Mbc-gA   1   1      33989            0      4.5mb          4.5mb
+green  open   .plugins-ml-config        hcYEqSHOTcighhKWCvQ6Cg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability 1E3wgaLTRXiqDKcDzy3_lw   1   0          0            0       208b           208b
+yellow open   sre                       jYsPzo3lR_GAKfzQkk71HA   1   1       4249            0    936.5kb        936.5kb
+yellow open   pseudonyms                8N7__rR9T_qHXab5iPVYnQ   1   1       4262            0    240.2kb        240.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140813 11:06:53  42501
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.039 s
+measurement window:     30.000 s
+processed (OpenSearch): 33_989
+throughput:             1,132.96 docs/s
+--------------
+
+----- Run Round 3: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        ALbsk9XRQdyrzoOvXr6_Xw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability C7IMFIveRWaWed0RkOuAmA   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140879 11:07:59  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 HOiBSU97SNq9nOvQ_lGmNg   1   1      25988            0      3.4mb          3.4mb
+green  open   .plugins-ml-config        ALbsk9XRQdyrzoOvXr6_Xw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability C7IMFIveRWaWed0RkOuAmA   1   0          0            0       208b           208b
+yellow open   sre                       eQuHAqoESRurtnLhbJgSPg   1   1       3251            0    729.6kb        729.6kb
+yellow open   pseudonyms                D1VRuZnkSwaSwXgz7fEuEA   1   1       3261            0      232kb          232kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140911 11:08:31  32501
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.046 s
+measurement window:     30.000 s
+processed (OpenSearch): 25_988
+throughput:             866.26 docs/s
+--------------
+
+----- Run Round 4: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability Y8BA1ot5Tuqnte2qDkwVkw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        it8qid6uSEyb1O2qBRTtMA   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773140980 11:09:40  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 3jpkmJf8S72GNFkT7UjGnw   1   0          9           18     81.6kb         81.6kb
+yellow open   processed                    PcQ5ce3CQPWBgM2ryKql9A   1   1      46002            0      4.6mb          4.6mb
+green  open   .plugins-ml-config           it8qid6uSEyb1O2qBRTtMA   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    Y8BA1ot5Tuqnte2qDkwVkw   1   0          0            0       208b           208b
+yellow open   sre                          2R49DDPURBirxOPxVVJ4jg   1   1       4007            0    971.3kb        971.3kb
+yellow open   pseudonyms                   Vue0yAx2T7GF49ZJapeYLQ   1   1       3994            0    268.8kb        268.8kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141028 11:10:28  57510
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.038 s
+measurement window:     45.000 s
+processed (OpenSearch): 46_002
+throughput:             1,022.27 docs/s
+--------------
+
+----- Run Round 5: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        ZWljULjpRVivsPA8KhsEAg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability GI_Z4tpERa2TxIRRSvNoIA   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141092 11:11:32  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 h6VzerBpQfGfwdGxbIs6sw   1   1      55995            0      7.5mb          7.5mb
+green  open   .plugins-ml-config        ZWljULjpRVivsPA8KhsEAg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability GI_Z4tpERa2TxIRRSvNoIA   1   0          0            0       208b           208b
+yellow open   sre                       Nu15yppQRmCrNP_oELyAVA   1   1       4752            0      1.1mb          1.1mb
+yellow open   pseudonyms                LkmhVj4sRmCR03WunwEKNA   1   1       4751            0    309.2kb        309.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141140 11:12:20  70001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.038 s
+measurement window:     45.000 s
+processed (OpenSearch): 55_995
+throughput:             1,244.33 docs/s
+--------------
+
+----- Run Round 6: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability rs3PBvC3ToSxl2LsnMBMMQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        LPB7fh76RCyVMuiINKYR5w   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141206 11:13:26  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 2CRTiebITJ2YbaJMs98mQw   1   1      56003            0      5.8mb          5.8mb
+green  open   .opensearch-observability rs3PBvC3ToSxl2LsnMBMMQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        LPB7fh76RCyVMuiINKYR5w   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       eDRTOdepSl67xuSGcroDvw   1   1       5000            0      1.1mb          1.1mb
+yellow open   pseudonyms                CdUFe96eQVOD3qkht369tg   1   1       5004            0      304kb          304kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141253 11:14:13  70001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.037 s
+measurement window:     45.000 s
+processed (OpenSearch): 56_003
+throughput:             1,244.51 docs/s
+--------------
+
+----- Run Round 7: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 U2NwNDP8SdmXYeMJONkXSw   1   0          8           16     74.6kb         74.6kb
+green  open   .opensearch-observability    uUFb8qADQ_u9jxwlQZHfOQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           wpAPHI8WSlS-VyQJw7eUPA   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141318 11:15:18  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 U2NwNDP8SdmXYeMJONkXSw   1   0          8           16     74.6kb         74.6kb
+yellow open   processed                    -TuXzK4LTyW8Nt1Mzdb9Ig   1   1      64649            0      7.3mb          7.3mb
+green  open   .plugins-ml-config           wpAPHI8WSlS-VyQJw7eUPA   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    uUFb8qADQ_u9jxwlQZHfOQ   1   0          0            0       208b           208b
+yellow open   sre                          vOEtOV_YRLiwt54paX9QfA   1   1       3826            0        1mb            1mb
+yellow open   pseudonyms                   b6doPBSMQf2Yc_oCHpthbw   1   1       3863            0    347.4kb        347.4kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141382 11:16:22  80809
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.039 s
+measurement window:     60.000 s
+processed (OpenSearch): 64_649
+throughput:             1,077.48 docs/s
+--------------
+
+----- Run Round 8: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability Wt9akI1uQsSDDN3MSs1CtA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        uCwxE8PhST64L1u3P0QG2w   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141447 11:17:27  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 3LNxvFmZRUW_BlmGMvACNw   1   1      64011            0        8mb            8mb
+green  open   .opensearch-observability Wt9akI1uQsSDDN3MSs1CtA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        uCwxE8PhST64L1u3P0QG2w   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       dp-kx6hDQWWW2uo_PKbHEA   1   1       4051            0        1mb            1mb
+yellow open   pseudonyms                k0Jw3iwQQMyvflX3FkbpUg   1   1       4073            0    385.1kb        385.1kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141512 11:18:32  80015
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.039 s
+measurement window:     60.000 s
+processed (OpenSearch): 64_011
+throughput:             1,066.85 docs/s
+--------------
+
+----- Run Round 9: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        ZymTSsC2TbOtt1UYIq-rGQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability rjJiI8OxQrWJma4zAR9gcQ   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141577 11:19:37  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 IPjYfgdlSUGzd2W0tClrKg   1   0          9           18     79.2kb         79.2kb
+yellow open   processed                    LNRy0sR1RCO_PD-l70bAYA   1   1      66002            0      8.3mb          8.3mb
+green  open   .opensearch-observability    rjJiI8OxQrWJma4zAR9gcQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           ZymTSsC2TbOtt1UYIq-rGQ   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          DDOwsXA3TSWNq0rgacnY8A   1   1       3995            0      1.1mb          1.1mb
+yellow open   pseudonyms                   bVyv95HkSXOCfRBDlODBWA   1   1       3995            0    343.8kb        343.8kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773141640 11:20:40  82510
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.042 s
+measurement window:     60.000 s
+processed (OpenSearch): 66_002
+throughput:             1,100.03 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  9
+total runtime:         405.001 s
+total processed:       451_462
+
+throughput (weighted): 1,114.72 docs/s
+throughput (median):   1,100.03 docs/s
+throughput (average):  1,116.53 docs/s
+throughput (min/max):  866.26 / 1,294.10 docs/s
+throughput (std dev):  132.43 docs/s
+================================
diff --git a/benchmark_results/20260310_114644/nonNG_python3.12.txt b/benchmark_results/20260310_114644/nonNG_python3.12.txt
new file mode 100644
index 000000000..9a706b0b7
--- /dev/null
+++ b/benchmark_results/20260310_114644/nonNG_python3.12.txt
@@ -0,0 +1,464 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-10T12:37:43.976924
+timestamp (UTC)               : 2026-03-10T11:37:43.976927+00:00
+python version                : 3.12.12
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 0
+  ↳ mode                      : logprep
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260310_114644/nonNG_python3.12.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        xKQmQAbuRzuM4Xz8cy3khg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability 2qNHCcNWTB6ika5VkxFjYA   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142717 11:38:37  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 nc4nyA52QwiO2UJWYONB-A   1   1      28004            0      3.8mb          3.8mb
+green  open   .plugins-ml-config        xKQmQAbuRzuM4Xz8cy3khg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability 2qNHCcNWTB6ika5VkxFjYA   1   0          0            0       208b           208b
+yellow open   sre                       hxVQzqVWSvqEopVDKte1Xw   1   1       3500            0    786.1kb        786.1kb
+yellow open   pseudonyms                u5UrgkSbR_Oj8q0rCmcgxA   1   1       3496            0    224.1kb        224.1kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142749 11:39:09  35001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.043 s
+measurement window:     30.000 s
+processed (OpenSearch): 28_004
+throughput:             933.46 docs/s
+--------------
+
+----- Run Round 2: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 l4jRWRXNSKCIJC7Pau3C3w   1   0          8           16     76.8kb         76.8kb
+green  open   .plugins-ml-config           hcdPw3aZQQKu_NWmJzRInw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    OWRDrvyaSOCu2Agsn654lw   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142814 11:40:14  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 l4jRWRXNSKCIJC7Pau3C3w   1   0          8           16     76.8kb         76.8kb
+yellow open   processed                    AQkKpCIbSzaNihOePbaTfw   1   1      30006            0      3.6mb          3.6mb
+green  open   .opensearch-observability    OWRDrvyaSOCu2Agsn654lw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           hcdPw3aZQQKu_NWmJzRInw   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          dgohKDktSSW3nk8Qrgc-CQ   1   1       3737            0      842kb          842kb
+yellow open   pseudonyms                   5OdFQ75JQcSR5bjScPeeKA   1   1       3757            0    263.3kb        263.3kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142846 11:40:46  37509
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.043 s
+measurement window:     30.000 s
+processed (OpenSearch): 30_006
+throughput:             1,000.19 docs/s
+--------------
+
+----- Run Round 3: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        vaF7jivvQ3q-LE7leg4ngg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability 4Wncf1pvTd-IBDDuSP2D5Q   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142911 11:41:51  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 NavRA1jdQj2kumJYdHx_PQ   1   1      28002            0      3.3mb          3.3mb
+green  open   .plugins-ml-config        vaF7jivvQ3q-LE7leg4ngg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability 4Wncf1pvTd-IBDDuSP2D5Q   1   0          0            0       208b           208b
+yellow open   sre                       P7KbJtIRRDuGCbDQF-1NZg   1   1       3498            0    805.9kb        805.9kb
+yellow open   pseudonyms                1XDdXNx0Ql-4QV56TuGmgA   1   1       3500            0    233.6kb        233.6kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773142942 11:42:22  35001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.038 s
+measurement window:     30.000 s
+processed (OpenSearch): 28_002
+throughput:             933.40 docs/s
+--------------
+
+----- Run Round 4: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability gOy8kOq2QgqTYGSWsVlD1Q   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        hT3OFEI2Tv-aUFfd1zoiWQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143007 11:43:27  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 rBEjkYYuRia1f1SLwJkGcw   1   1      47995            0      6.1mb          6.1mb
+green  open   .opensearch-observability gOy8kOq2QgqTYGSWsVlD1Q   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        hT3OFEI2Tv-aUFfd1zoiWQ   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       kPftvXArSziJCu-o6-CN_g   1   1       4233            0   1010.1kb       1010.1kb
+yellow open   pseudonyms                DOzjs81cQ8uvlPMLDE5YfA   1   1       4198            0    293.4kb        293.4kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143054 11:44:14  60001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.040 s
+measurement window:     45.000 s
+processed (OpenSearch): 47_995
+throughput:             1,066.55 docs/s
+--------------
+
+----- Run Round 5: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 DXTP-mHWTiezn0-lXx2_Qg   1   0          8           16     78.2kb         78.2kb
+green  open   .opensearch-observability    MkraE3jSTSaY79x-oVi5jA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           wUS0_VB9RjSXGh6udkMLeQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143119 11:45:19  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 DXTP-mHWTiezn0-lXx2_Qg   1   0          8           16     78.2kb         78.2kb
+yellow open   processed                    BpBbCmAwRg6f11vkHJdawQ   1   1      48004            0      6.2mb          6.2mb
+green  open   .opensearch-observability    MkraE3jSTSaY79x-oVi5jA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           wUS0_VB9RjSXGh6udkMLeQ   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          y0IMG4UVTQe-1niPwTGm0A   1   1       4251            0    999.4kb        999.4kb
+yellow open   pseudonyms                   15u0c1EvTU2oEbZzHRhhLw   1   1       4246            0      289kb          289kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143166 11:46:06  60009
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.037 s
+measurement window:     45.000 s
+processed (OpenSearch): 48_004
+throughput:             1,066.75 docs/s
+--------------
+
+----- Run Round 6: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability TxW6AO_NSmy2MfpcFj7C_g   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        OZNiJUm1TWyUq2nZIczXZw   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143231 11:47:11  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 XdaP6tdcQ0WWO0ltTDKPsg   1   1      47998            0        6mb            6mb
+green  open   .opensearch-observability TxW6AO_NSmy2MfpcFj7C_g   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        OZNiJUm1TWyUq2nZIczXZw   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       ahfuIkBtSPO0BzFbsNjwYQ   1   1       4258            0    966.3kb        966.3kb
+yellow open   pseudonyms                Gk8KZ1W1QvKLnmJd4aD-ug   1   1       4236            0      273kb          273kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143279 11:47:59  60001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.036 s
+measurement window:     45.000 s
+processed (OpenSearch): 47_998
+throughput:             1,066.62 docs/s
+--------------
+
+----- Run Round 7: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability 2xBpCxrMR3qqSyYEB9e5BQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        ppET6MUASJWlV8WbPIFSTQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143343 11:49:03  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 NMsHrRSBQPuRTSRiRg09gw   1   0          9           18     81.5kb         81.5kb
+yellow open   processed                    Bd9aTJXFS-SWsfGBobCIdA   1   1      65996            0        8mb            8mb
+green  open   .plugins-ml-config           ppET6MUASJWlV8WbPIFSTQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    2xBpCxrMR3qqSyYEB9e5BQ   1   0          0            0       208b           208b
+yellow open   sre                          2hv5ZQovRqq7-UQgHBK9kg   1   1       4107            0        1mb            1mb
+yellow open   pseudonyms                   dXvGHJ3hQXumrglDYLwAmA   1   1       4137            0    333.8kb        333.8kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143407 11:50:07  82510
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.037 s
+measurement window:     60.000 s
+processed (OpenSearch): 65_996
+throughput:             1,099.93 docs/s
+--------------
+
+----- Run Round 8: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability E9N4pVo9TeS3XZIfgRvnGQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        ael2q5MuTm6sdHlo-Fu1wQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143470 11:51:10  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 -hgNVet2TGmiFgQ7Xv8LpA   1   1      68003            0      8.4mb          8.4mb
+green  open   .plugins-ml-config        ael2q5MuTm6sdHlo-Fu1wQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability E9N4pVo9TeS3XZIfgRvnGQ   1   0          0            0       208b           208b
+yellow open   sre                       Xe66T3d9TVusbE2RdfMMbQ   1   1       4252            0      1.1mb          1.1mb
+yellow open   pseudonyms                AdkJmwWdQlCPZZm_wc-3UQ   1   1       4247            0    321.4kb        321.4kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143534 11:52:14  85002
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.037 s
+measurement window:     60.000 s
+processed (OpenSearch): 68_003
+throughput:             1,133.38 docs/s
+--------------
+
+----- Run Round 9: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability u2XpQWx_Qcet2YoOIZ3Xvw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        M0bJfEV1R_6G3naYoMeLFA   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143598 11:53:18  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 16L2NGeqQma5afA9wIiDEQ   1   1      68003            0      8.6mb          8.6mb
+green  open   .opensearch-observability u2XpQWx_Qcet2YoOIZ3Xvw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        M0bJfEV1R_6G3naYoMeLFA   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       odTnQ7X1SSSikOWHskCJ1g   1   1       4228            0      1.1mb          1.1mb
+yellow open   pseudonyms                qDhqwhCBT2K1EM7noUDp_g   1   1       4216            0      312kb          312kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773143661 11:54:21  85001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.039 s
+measurement window:     60.000 s
+processed (OpenSearch): 68_003
+throughput:             1,133.38 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  9
+total runtime:         405.001 s
+total processed:       432_011
+
+throughput (weighted): 1,066.69 docs/s
+throughput (median):   1,066.62 docs/s
+throughput (average):  1,048.19 docs/s
+throughput (min/max):  933.40 / 1,133.38 docs/s
+throughput (std dev):  76.53 docs/s
+================================
diff --git a/benchmark_results/20260310_114644/nonNG_python3.13.txt b/benchmark_results/20260310_114644/nonNG_python3.13.txt
new file mode 100644
index 000000000..d0bf7dccb
--- /dev/null
+++ b/benchmark_results/20260310_114644/nonNG_python3.13.txt
@@ -0,0 +1,462 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-10T13:11:18.131638
+timestamp (UTC)               : 2026-03-10T12:11:18.131642+00:00
+python version                : 3.13.9
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 0
+  ↳ mode                      : logprep
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260310_114644/nonNG_python3.13.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability GhZ_Q10kTueOTYUFCPDQpQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        -pTQ-NYXQ--Bu20qR4d-lw   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144730 12:12:10  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 Rq3pEFoDQ1iVJzubmn4cHw   1   1      27999            0      3.8mb          3.8mb
+green  open   .plugins-ml-config        -pTQ-NYXQ--Bu20qR4d-lw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability GhZ_Q10kTueOTYUFCPDQpQ   1   0          0            0       208b           208b
+yellow open   sre                       aLe6vgxEQ2qXdnCSykfiyQ   1   1       3502            0    821.4kb        821.4kb
+yellow open   pseudonyms                tdTSBC5sS_S18kLbvUkp4w   1   1       3499            0    182.4kb        182.4kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144762 12:12:42  35001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.046 s
+measurement window:     30.000 s
+processed (OpenSearch): 27_999
+throughput:             933.30 docs/s
+--------------
+
+----- Run Round 2: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability LODCq9KgSAmWAASmavB2Mw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        RPF-s9RnSN-0Q_baaYMiLg   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144825 12:13:45  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 dVuoA_CITae3FD_Dw1whtw   1   1      29998            0      4.2mb          4.2mb
+green  open   .opensearch-observability LODCq9KgSAmWAASmavB2Mw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        RPF-s9RnSN-0Q_baaYMiLg   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       eKiFJj5-SpKSKGu0hV4X6w   1   1       3750            0    846.1kb        846.1kb
+yellow open   pseudonyms                7HWhIA1oQsG6Ij-Th1K1Cw   1   1       3752            0    249.3kb        249.3kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144857 12:14:17  37501
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.037 s
+measurement window:     30.000 s
+processed (OpenSearch): 29_998
+throughput:             999.93 docs/s
+--------------
+
+----- Run Round 3: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 wOqBWSjnQ0eRgg-CS0QoVA   1   0          8           16     75.7kb         75.7kb
+green  open   .plugins-ml-config           rGnW4MmYR9iFQo3YHKydJw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    -OBFtrlPSo2Nt7e4dlRCDA   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144921 12:15:21  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 wOqBWSjnQ0eRgg-CS0QoVA   1   0          8           16     75.7kb         75.7kb
+yellow open   processed                    1Yi1lKaEQ8yVNrjrklU-sw   1   1      28001            0      3.5mb          3.5mb
+green  open   .plugins-ml-config           rGnW4MmYR9iFQo3YHKydJw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    -OBFtrlPSo2Nt7e4dlRCDA   1   0          0            0       208b           208b
+yellow open   sre                          I19K4-K4RJibM7ytC7bUHw   1   1       3510            0    800.4kb        800.4kb
+yellow open   pseudonyms                   LSrz2MgORMCpzJ2Tvpyq-w   1   1       3489            0    225.6kb        225.6kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773144953 12:15:53  35009
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.039 s
+measurement window:     30.000 s
+processed (OpenSearch): 28_001
+throughput:             933.36 docs/s
+--------------
+
+----- Run Round 4: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        jGahqoFIR9yyg3qC6Nx6Eg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability OimVB9IFSdSccQbAVEQqZA   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145016 12:16:56  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 iLscfogJRf22H0MVBbmgzw   1   1      48003            0        6mb            6mb
+green  open   .plugins-ml-config        jGahqoFIR9yyg3qC6Nx6Eg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability OimVB9IFSdSccQbAVEQqZA   1   0          0            0       208b           208b
+yellow open   sre                       A52BkQxvSz6B4taMOz6PdQ   1   1       4147            0    996.6kb        996.6kb
+yellow open   pseudonyms                6uUArIg1Rl6gQE1ORgVVJg   1   1       4167            0    241.7kb        241.7kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145065 12:17:45  60001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.037 s
+measurement window:     45.000 s
+processed (OpenSearch): 48_003
+throughput:             1,066.73 docs/s
+--------------
+
+----- Run Round 5: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability _KgaawBWQMe0A76liAne3g   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        rI8ghtu8SBaS8s9Msbp88g   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145128 12:18:48  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 D7p7tkUYTgiqLHQf5aqEQQ   1   1      48008            0      6.4mb          6.4mb
+green  open   .plugins-ml-config        rI8ghtu8SBaS8s9Msbp88g   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability _KgaawBWQMe0A76liAne3g   1   0          0            0       208b           208b
+yellow open   sre                       NpOm125NQeu4PAsgZhZJ6Q   1   1       4134            0        1mb            1mb
+yellow open   pseudonyms                PZejCcGMRPeHnaKFaHdcGw   1   1       4176            0    265.8kb        265.8kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145176 12:19:36  60001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.039 s
+measurement window:     45.000 s
+processed (OpenSearch): 48_008
+throughput:             1,066.84 docs/s
+--------------
+
+----- Run Round 6: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        doLiPPuZT9-w19PCQeYBOg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability fWgjmJdbTJ-LcPwcC-uBPw   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145239 12:20:39  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 U5SB1ZSKQumVi-WfIAKB3g   1   1      48006            0      6.1mb          6.1mb
+green  open   .opensearch-observability fWgjmJdbTJ-LcPwcC-uBPw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        doLiPPuZT9-w19PCQeYBOg   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       vcS1BkVzQXmDWWnuY6HUAw   1   1       4064            0        1mb            1mb
+yellow open   pseudonyms                ETFH5v29QKuW1qJMtjsgdQ   1   1       4082            0    297.3kb        297.3kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145287 12:21:27  60001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.038 s
+measurement window:     45.000 s
+processed (OpenSearch): 48_006
+throughput:             1,066.80 docs/s
+--------------
+
+----- Run Round 7: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability 48HoQbc_QnSHHSC-NtV55g   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        eV2Ar8vaQC2Vdu9h8F0RRw   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145350 12:22:30  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 Nbuxpz_hQlWnbP3uyZ1uig   1   1      64006            0      8.1mb          8.1mb
+green  open   .plugins-ml-config        eV2Ar8vaQC2Vdu9h8F0RRw   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability 48HoQbc_QnSHHSC-NtV55g   1   0          0            0       208b           208b
+yellow open   sre                       vKALGUEmRpSesKGg1uDiXg   1   1       4001            0        1mb            1mb
+yellow open   pseudonyms                xAg7PHPQSwCT8FeRQIWKaQ   1   1       3996            0    264.7kb        264.7kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145413 12:23:33  80001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.037 s
+measurement window:     60.000 s
+processed (OpenSearch): 64_006
+throughput:             1,066.77 docs/s
+--------------
+
+----- Run Round 8: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability zPdZzI8mQSqvzmd2BY07xw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        n47LCeyTTv2XSqyxH_KBLQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145476 12:24:36  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 DWKAiZxHQ6ysaZX8YWaaXA   1   0          9           18    131.8kb        131.8kb
+yellow open   processed                    6O884SuQSwyE83onNCeiYA   1   1      66003            0      7.6mb          7.6mb
+green  open   .opensearch-observability    zPdZzI8mQSqvzmd2BY07xw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           n47LCeyTTv2XSqyxH_KBLQ   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          wtmyUfvjT_a2gXxtppbZhQ   1   1       4164            0      1.1mb          1.1mb
+yellow open   pseudonyms                   6tY0e0gsTt-l8p44ubzcrA   1   1       4170            0    320.4kb        320.4kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145539 12:25:39  82510
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.036 s
+measurement window:     60.000 s
+processed (OpenSearch): 66_003
+throughput:             1,100.05 docs/s
+--------------
+
+----- Run Round 9: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .plugins-ml-config        oZaWa2laRT-9EP6d1C_n0w   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability OuIcREjMQOSPr-RN8iMSMQ   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145604 12:26:44  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 jHEGAcydRquETyE5Soh4vA   1   1      65205            0      8.4mb          8.4mb
+green  open   .opensearch-observability OuIcREjMQOSPr-RN8iMSMQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        oZaWa2laRT-9EP6d1C_n0w   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       vKPm6zMoTKmA8XoUup_d0A   1   1       4163            0        1mb            1mb
+yellow open   pseudonyms                Y2Qukw_wR8qPqV8RHdL3SQ   1   1       4144            0    355.2kb        355.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773145667 12:27:47  81513
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.041 s
+measurement window:     60.000 s
+processed (OpenSearch): 65_205
+throughput:             1,086.75 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  9
+total runtime:         405.001 s
+total processed:       425_229
+
+throughput (weighted): 1,049.95 docs/s
+throughput (median):   1,066.77 docs/s
+throughput (average):  1,035.61 docs/s
+throughput (min/max):  933.30 / 1,100.05 docs/s
+throughput (std dev):  64.08 docs/s
+================================
diff --git a/benchmark_results/20260310_114644/nonNG_python3.14.txt b/benchmark_results/20260310_114644/nonNG_python3.14.txt
new file mode 100644
index 000000000..b315844da
--- /dev/null
+++ b/benchmark_results/20260310_114644/nonNG_python3.14.txt
@@ -0,0 +1,465 @@
+
+=== BENCHMARK CONFIGURATION ===
+timestamp (local)             : 2026-03-10T13:44:42.685154
+timestamp (UTC)               : 2026-03-10T12:44:42.685156+00:00
+python version                : 3.14.0
+----------------------------------------
+bootstrap_servers             : 127.0.0.1:9092
+compose_dir                   : examples/compose
+event_num                     : 120_000
+gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
+ng                            : 0
+  ↳ mode                      : logprep
+  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
+opensearch_url                : http://localhost:9200
+out                           : benchmark_results/20260310_114644/nonNG_python3.14.txt
+processed_index               : processed
+prometheus_multiproc_dir      : /tmp/logprep
+runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
+services                      : ['kafka', 'opensearch']
+sleep_after_compose_up_s      : 30
+sleep_after_generate_s        : 2
+sleep_after_logprep_start_s   : 5
+================================
+
+----- Run Round 1: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 wTfhsr4jReihcr52EZm73A   1   0          8           16     79.5kb         79.5kb
+green  open   .opensearch-observability    ghFN5G_9RxmxUVeSCMeKpQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           0m8zc-2BSHW5TtbZTMK03A   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146735 12:45:35  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 wTfhsr4jReihcr52EZm73A   1   0          8           16     79.5kb         79.5kb
+yellow open   processed                    3y8pq-1CSiuf8VsU7qJHGg   1   1      28001            0      3.6mb          3.6mb
+green  open   .opensearch-observability    ghFN5G_9RxmxUVeSCMeKpQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           0m8zc-2BSHW5TtbZTMK03A   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                          MUEKAO8ISD-nIEZlXX7QSQ   1   1       3497            0    818.5kb        818.5kb
+yellow open   pseudonyms                   szt3S6hGSwW-IlozBLTmKQ   1   1       3502            0    221.7kb        221.7kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146767 12:46:07  35009
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.041 s
+measurement window:     30.000 s
+processed (OpenSearch): 28_001
+throughput:             933.36 docs/s
+--------------
+
+----- Run Round 2: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability RLxaHRkLSNGtbCrsno1X_Q   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        kcwdGG7_S8SWowp-r41x_w   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146831 12:47:11  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 WkmzndxHRvaivd1sul1k1w   1   1      29991            0      4.2mb          4.2mb
+green  open   .opensearch-observability RLxaHRkLSNGtbCrsno1X_Q   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        kcwdGG7_S8SWowp-r41x_w   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       0JAF4G7OQKy9vNlkC8c5lA   1   1       3746            0    890.4kb        890.4kb
+yellow open   pseudonyms                ZAjDzx4mQX-NtUTQ4XTI0A   1   1       3763            0    272.3kb        272.3kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146863 12:47:43  37501
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.038 s
+measurement window:     30.000 s
+processed (OpenSearch): 29_991
+throughput:             999.70 docs/s
+--------------
+
+----- Run Round 3: 30 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability sAZMJUSCSNixxIPS52aRRQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        uE8bPiYhSUKAPhuoGdG-rA   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146932 12:48:52  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 GSQ0Ss9jQem31eIXA9uDYw   1   1      29996            0      4.3mb          4.3mb
+green  open   .opensearch-observability sAZMJUSCSNixxIPS52aRRQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        uE8bPiYhSUKAPhuoGdG-rA   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       r9CaK135QZ-WtRi1ZCKPRQ   1   1       3756            0    906.4kb        906.4kb
+yellow open   pseudonyms                XNgS-N5OSQKLkrgBKjD58w   1   1       3748            0    271.1kb        271.1kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773146964 12:49:24  37501
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            30
+events generated:       120_000
+startup time:           5.039 s
+measurement window:     30.000 s
+processed (OpenSearch): 29_996
+throughput:             999.86 docs/s
+--------------
+
+----- Run Round 4: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 aqSXnnT7Qgi0yno_WHXWZg   1   0          8           16     41.6kb         41.6kb
+green  open   .plugins-ml-config           gxIqBIxGTOmQ-goDqFdRgQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    pfkfRezlQx-Y21t1RdzsqQ   1   0          0            0       208b           208b
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147028 12:50:28  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 aqSXnnT7Qgi0yno_WHXWZg   1   0          8           16     41.6kb         41.6kb
+yellow open   processed                    3ArVWlRGQhKxBDwMdpa0Fg   1   1      48002            0      6.1mb          6.1mb
+green  open   .plugins-ml-config           gxIqBIxGTOmQ-goDqFdRgQ   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    pfkfRezlQx-Y21t1RdzsqQ   1   0          0            0       208b           208b
+yellow open   sre                          d9sYOegfTF-aVOIn9_9HKg   1   1       4239            0   1006.1kb       1006.1kb
+yellow open   pseudonyms                   3Rf2yN8NSeCQKodiaZDwcg   1   1       4253            0    315.4kb        315.4kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147076 12:51:16  60009
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.038 s
+measurement window:     45.000 s
+processed (OpenSearch): 48_002
+throughput:             1,066.71 docs/s
+--------------
+
+----- Run Round 5: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability gMjtlp--SYKvyfi479Hisw   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        mOpTFlItTzSXaHc11j-9yA   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147140 12:52:20  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 C-ytG9gMSbiLxEnnpqOHvQ   1   1      43994            0      5.6mb          5.6mb
+green  open   .plugins-ml-config        mOpTFlItTzSXaHc11j-9yA   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability gMjtlp--SYKvyfi479Hisw   1   0          0            0       208b           208b
+yellow open   sre                       aGsNXkXGR8K9xMUZ-Hjoyw   1   1       3952            0    984.6kb        984.6kb
+yellow open   pseudonyms                l17ZBkFuTfGNQFHx-ICMEA   1   1       3972            0    279.5kb        279.5kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147188 12:53:08  55001
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.036 s
+measurement window:     45.000 s
+processed (OpenSearch): 43_994
+throughput:             977.64 docs/s
+--------------
+
+----- Run Round 6: 45 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability mW0DeAqfSBCq8my-61mIsQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        ZftEcawOTlqAP4oBF26sCA   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147251 12:54:11  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 2j57uaIRRAy5JIU-HozJbA   1   1      51515            0      6.4mb          6.4mb
+green  open   .opensearch-observability mW0DeAqfSBCq8my-61mIsQ   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        ZftEcawOTlqAP4oBF26sCA   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       djf3PpZ5SMKT4303ndClBw   1   1       4001            0    971.9kb        971.9kb
+yellow open   pseudonyms                EuaoYjKiQf-sMtnIbmTfOw   1   1       4006            0    238.9kb        238.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147302 12:55:02  64401
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            45
+events generated:       120_000
+startup time:           5.040 s
+measurement window:     45.000 s
+processed (OpenSearch): 51_515
+throughput:             1,144.78 docs/s
+--------------
+
+----- Run Round 7: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability s_znwlXOSbm4pJE48LvO3A   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        AlgnH6BPTo-Rs5xqpZbSHQ   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147365 12:56:05  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 pA0rbFT4T16FqmKJSdNuvw   1   1      72309            0      8.6mb          8.6mb
+green  open   .opensearch-observability s_znwlXOSbm4pJE48LvO3A   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        AlgnH6BPTo-Rs5xqpZbSHQ   1   0          1            0      3.9kb          3.9kb
+yellow open   sre                       LXXTW843RqyJ7upcicf1IA   1   1       4132            0      1.1mb          1.1mb
+yellow open   pseudonyms                lHAcPQynS3aSVQmshYDLEg   1   1       4198            0    337.2kb        337.2kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147431 12:57:11  90391
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.043 s
+measurement window:     60.000 s
+processed (OpenSearch): 72_309
+throughput:             1,205.15 docs/s
+--------------
+
+----- Run Round 8: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   .opensearch-observability KhsGGaqWT3202jGfKuwJnA   1   0          0            0       208b           208b
+green  open   .plugins-ml-config        jn7FRHaoSxiN6sTsBFDfVg   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147494 12:58:14  1
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+yellow open   processed                 dUD2jrHfSJuUWN5AeV1INw   1   1      71444            0      8.9mb          8.9mb
+green  open   .plugins-ml-config        jn7FRHaoSxiN6sTsBFDfVg   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability KhsGGaqWT3202jGfKuwJnA   1   0          0            0       208b           208b
+yellow open   sre                       PvzVblZnTJm9JnGX_wVu2w   1   1       4005            0        1mb            1mb
+yellow open   pseudonyms                D9fQm1IzSIuFeidc53FJdQ   1   1       3991            0    319.9kb        319.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147560 12:59:20  89301
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.041 s
+measurement window:     60.000 s
+processed (OpenSearch): 71_444
+throughput:             1,190.73 docs/s
+--------------
+
+----- Run Round 9: 60 seconds -----
+
+=== OpenSearch snapshot (before measurement) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 _wnKuX6KRrCWdoLFY9h4KQ   1   0          8           16     77.7kb         77.7kb
+green  open   .opensearch-observability    Gn8XMh3fSlOw8jkT8mrRBg   1   0          0            0       208b           208b
+green  open   .plugins-ml-config           rGLw-23MRcCjk4PxFrai8w   1   0          1            0      3.9kb          3.9kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147623 13:00:23  9
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+
+=== OpenSearch snapshot (after run / after refresh) ===
+
+--- _cat/indices ---
+health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
+green  open   top_queries-2026.03.10-25342 _wnKuX6KRrCWdoLFY9h4KQ   1   0          8           16     77.7kb         77.7kb
+yellow open   processed                    9KGLSbXKRxuS59WWvtmYGA   1   1      62002            0      7.5mb          7.5mb
+green  open   .plugins-ml-config           rGLw-23MRcCjk4PxFrai8w   1   0          1            0      3.9kb          3.9kb
+green  open   .opensearch-observability    Gn8XMh3fSlOw8jkT8mrRBg   1   0          0            0       208b           208b
+yellow open   sre                          EvnF1wdZRu-XSeXlJk-9FQ   1   1       3750            0        1mb            1mb
+yellow open   pseudonyms                   SEAC2lkmToauaS59SCts7w   1   1       3763            0    300.6kb        300.6kb
+
+
+--- _cat/count ---
+epoch      timestamp count
+1773147686 13:01:26  77509
+
+
+--- _cat/aliases ---
+alias index filter routing.index routing.search is_write_index
+
+--- RESULT ---
+run_seconds:            60
+events generated:       120_000
+startup time:           5.041 s
+measurement window:     60.000 s
+processed (OpenSearch): 62_002
+throughput:             1,033.37 docs/s
+--------------
+
+
+=== FINAL BENCHMARK SUMMARY ===
+runs:                  9
+total runtime:         405.001 s
+total processed:       437_254
+
+throughput (weighted): 1,079.64 docs/s
+throughput (median):   1,033.37 docs/s
+throughput (average):  1,061.26 docs/s
+throughput (min/max):  933.36 / 1,205.15 docs/s
+throughput (std dev):  97.57 docs/s
+================================

From 7a946ec06c2cfec470c86051ccac68856e20ea62 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Tue, 10 Mar 2026 11:35:57 +0100
Subject: [PATCH 27/68] remove terminal out_queue

---
 logprep/ng/manager.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 73a70f1c5..dd757a7ba 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -133,7 +133,6 @@ async def _handle_sent_events(batch: list[LogEvent]) -> list[LogEvent]:
             batch_size=BATCH_SIZE,
             batch_interval_s=BATCH_INTERVAL_S,
             in_queue=output_worker.out_queue,  # type: ignore
-            out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
             handler=_handle_sent_events,
         )
 

From 6fd235d1c3518e36485d2eed3cb6baf2244fb908 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Tue, 10 Mar 2026 11:36:25 +0100
Subject: [PATCH 28/68] add error logs for worker flush timers

---
 logprep/ng/util/worker/worker.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index 9bb954ac2..1584257a2 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -62,8 +62,13 @@ def _start_timer_locked(self) -> None:
         Must be called with _buffer_lock held. Ensures that at most one
         timer task is active for the current batch window.
         """
-        if self._flush_timer and not self._flush_timer.done():
-            self._flush_timer.cancel()
+        if self._flush_timer:
+            if self._flush_timer.done():
+                exc = self._flush_timer.exception()
+                if exc is not None:
+                    logger.error("flush timer task has failed", exc_info=exc)
+            else:
+                self._flush_timer.cancel()
         self._flush_timer = asyncio.create_task(self._flush_after_interval())
 
     def _cancel_timer_if_needed(self) -> None:
@@ -74,7 +79,12 @@ def _cancel_timer_if_needed(self) -> None:
         self-cancellation race conditions.
         """
         t = self._flush_timer
-        if not t or t.done():
+        if not t:
+            return
+        if t.done():
+            exc = t.exception()
+            if exc is not None:
+                logger.error("flush timer task has failed", exc_info=exc)
             return
         if t is asyncio.current_task():
             return

From b1b23dab12cb545b26e5a89cc8bcc17a65437a5b Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Wed, 11 Mar 2026 14:38:15 +0100
Subject: [PATCH 29/68] include shutdown time in performance measurement

---
 benchmark.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/benchmark.py b/benchmark.py
index d0406156d..92b3118bd 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -630,9 +630,10 @@ def benchmark_run(
 
         t_run = time.time()
         time.sleep(run_seconds)
-        window_s = time.time() - t_run
 
         kill_hard(logprep_proc)
+
+        window_s = time.time() - t_run
         logprep_proc = None
         _current_logprep_proc = None
 

From c8774ef39f314ef783ee03eb30a9e5e6f7ba34e1 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Wed, 11 Mar 2026 14:41:00 +0100
Subject: [PATCH 30/68] distribute events to different queues after processing;
 add debug logs about queue sizes; add state reporting for terminal events

---
 logprep/ng/manager.py     | 105 +++++++++++++++++++++++++-------------
 logprep/ng/util/events.py |  17 ++++++
 2 files changed, 87 insertions(+), 35 deletions(-)
 create mode 100644 logprep/ng/util/events.py

diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index dd757a7ba..58ca56c6f 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -18,6 +18,7 @@
 from logprep.ng.pipeline import Pipeline
 from logprep.ng.sender import Sender
 from logprep.ng.util.configuration import Configuration
+from logprep.ng.util.events import partition_by_state
 from logprep.ng.util.worker.types import SizeLimitedQueue
 from logprep.ng.util.worker.worker import Worker, WorkerOrchestrator
 
@@ -70,9 +71,23 @@ async def setup(self):
         self._sender = Sender(outputs=output_connectors, error_output=error_output)
         await self._sender.setup()
 
+        self._queues = []
         self._orchestrator = self._create_orchestrator()
 
     def _create_orchestrator(self) -> WorkerOrchestrator:
+        process_queue = SizeLimitedQueue[LogEvent](maxsize=MAX_QUEUE_SIZE)
+        send_to_default_queue = SizeLimitedQueue[LogEvent](maxsize=MAX_QUEUE_SIZE)
+        send_to_extras_queue = SizeLimitedQueue[LogEvent](maxsize=MAX_QUEUE_SIZE)
+        send_to_error_queue = SizeLimitedQueue[LogEvent](maxsize=MAX_QUEUE_SIZE)
+        acknowledge_queue = SizeLimitedQueue[LogEvent](maxsize=MAX_QUEUE_SIZE)
+        self._queues = [
+            process_queue,
+            send_to_default_queue,
+            send_to_extras_queue,
+            send_to_error_queue,
+            acknowledge_queue,
+        ]
+
         async def transfer_batch(batch: list[LogEvent]) -> list[LogEvent]:
             for event in batch:
                 event.state.current_state = EventStateType.RECEIVED
@@ -81,59 +96,85 @@ async def transfer_batch(batch: list[LogEvent]) -> list[LogEvent]:
 
         input_worker: Worker[LogEvent, LogEvent] = Worker(
             name="input_worker",
-            batch_size=250,
+            batch_size=500,
             batch_interval_s=BATCH_INTERVAL_S,
             in_queue=self._input_connector(timeout=self.configuration.timeout),
-            out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
+            out_queue=process_queue,
             handler=transfer_batch,
         )
 
-        async def process(batch: list[LogEvent]) -> list[LogEvent]:
-            return [self._pipeline.process(event) for event in batch]
+        async def _processor_handler(batch: list[LogEvent]) -> list[LogEvent]:
+            async def _handle(event: LogEvent):
+                # TODO make processing async
+                self._pipeline.process(event)
+                # TODO handle all possible states
+                if event.state != EventStateType.FAILED:
+                    if event.extra_data:
+                        await send_to_extras_queue.put(event)
+                    else:
+                        await send_to_default_queue.put(event)
+                else:
+                    await send_to_error_queue.put(event)
+
+            await asyncio.gather(*map(_handle, batch))
+            return batch
 
         processing_worker: Worker[LogEvent, LogEvent] = Worker(
             name="processing_worker",
             batch_size=BATCH_SIZE,
             batch_interval_s=BATCH_INTERVAL_S,
-            in_queue=input_worker.out_queue,  # type: ignore
-            out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-            handler=process,
+            in_queue=process_queue,
+            handler=_processor_handler,
         )
 
-        async def send_extras(batch: list[LogEvent]) -> list[LogEvent]:
+        async def _send_extras_handler(batch: list[LogEvent]) -> list[LogEvent]:
             return await self._sender.send_extras(batch)
 
         extra_output_worker: Worker[LogEvent, LogEvent] = Worker(
             name="extra_output_worker",
             batch_size=BATCH_SIZE,
             batch_interval_s=BATCH_INTERVAL_S,
-            in_queue=processing_worker.out_queue,  # type: ignore
-            out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-            handler=send_extras,
+            in_queue=send_to_extras_queue,
+            out_queue=send_to_default_queue,
+            handler=_send_extras_handler,
         )
 
-        async def send_default_output(batch: list[LogEvent]) -> list[LogEvent]:
+        async def _send_default_output_handler(batch: list[LogEvent]) -> list[LogEvent]:
             return await self._sender.send_default_output(batch)
 
         output_worker: Worker[LogEvent, LogEvent] = Worker(
             name="output_worker",
             batch_size=BATCH_SIZE,
             batch_interval_s=BATCH_INTERVAL_S,
-            in_queue=extra_output_worker.out_queue,  # type: ignore
-            out_queue=SizeLimitedQueue(maxsize=MAX_QUEUE_SIZE),
-            handler=send_default_output,
+            in_queue=send_to_default_queue,
+            out_queue=acknowledge_queue,
+            handler=_send_default_output_handler,
         )
 
-        async def _handle_sent_events(batch: list[LogEvent]) -> list[LogEvent]:
-            # TODO: call await self._input_connector.acknowledge() ???
-            return await self._process_sent_events(batch)
+        async def _report_event_state(batch: list[LogEvent]) -> list[LogEvent]:
+            events_by_state = partition_by_state(batch)
+            logger.info(
+                "Finished processing %d events: %s",
+                len(batch),
+                ", ".join(f"#{state}={len(events)}" for state, events in events_by_state.items()),
+            )
+            return batch
+
+        error_worker: Worker[LogEvent, LogEvent] = Worker(
+            name="error_worker",
+            batch_size=BATCH_SIZE,
+            batch_interval_s=BATCH_INTERVAL_S,
+            in_queue=send_to_error_queue,
+            # TODO implement handling and sending failed events
+            handler=_report_event_state,
+        )
 
         acknowledge_worker: Worker[LogEvent, LogEvent] = Worker(
             name="acknowledge_worker",
             batch_size=BATCH_SIZE,
             batch_interval_s=BATCH_INTERVAL_S,
-            in_queue=output_worker.out_queue,  # type: ignore
-            handler=_handle_sent_events,
+            in_queue=acknowledge_queue,
+            handler=_report_event_state,
         )
 
         return WorkerOrchestrator(
@@ -142,6 +183,7 @@ async def _handle_sent_events(batch: list[LogEvent]) -> list[LogEvent]:
                 processing_worker,
                 extra_output_worker,
                 output_worker,
+                error_worker,
                 acknowledge_worker,
             ]
         )
@@ -164,10 +206,18 @@ async def run(self) -> None:
     async def _shut_down(self) -> None:
         """Shut down runner components, and required runner attributes."""
 
+        logger.debug(
+            "Remaining items in queues: [%s]", ", ".join(f"{q.qsize()}" for q in self._queues)
+        )
+
         if self._orchestrator is not None:
             # TODO only a fraction of shutdown_timeout_s should be passed to the orchestrator
             await self._orchestrator.shut_down(self._shutdown_timeout_s)
 
+        logger.debug(
+            "Remaining items in queues: [%s]", ", ".join(f"{q.qsize()}" for q in self._queues)
+        )
+
         if self._sender is not None:
             await self._sender.shut_down()
         # self._input_connector.acknowledge()
@@ -179,18 +229,3 @@ async def _shut_down(self) -> None:
             )
 
         logger.info("Runner shut down complete.")
-
-    async def _process_sent_events(self, batch: list[LogEvent]) -> list[LogEvent]:
-        """Process a batch of events got from sender iterator."""
-
-        logger.debug("Got batch of events from sender (batch_size=%d).", len(batch))
-        for event in batch:
-            if event is None:
-                continue
-
-            if event.state == EventStateType.FAILED:
-                logger.error("event failed: %s", event)
-            else:
-                logger.debug("event processed: %s", event.state)
-
-        return batch
diff --git a/logprep/ng/util/events.py b/logprep/ng/util/events.py
new file mode 100644
index 000000000..32f3b9c24
--- /dev/null
+++ b/logprep/ng/util/events.py
@@ -0,0 +1,17 @@
+from collections import defaultdict
+from collections.abc import Sequence
+from typing import TypeVar
+
+from logprep.ng.abc.event import Event
+from logprep.ng.event.event_state import EventStateType
+
+E_co = TypeVar("E_co", bound=Event, covariant=True)
+
+
+def partition_by_state(events: Sequence[E_co]) -> dict[EventStateType, list[E_co]]:
+    result = defaultdict(list)
+
+    for event in events:
+        result[event.state.current_state].append(event)
+
+    return result

From 2265739a79c0163e9718baa865adbe754794a0be Mon Sep 17 00:00:00 2001
From: Pablu23 <43807157+Pablu23@users.noreply.github.com>
Date: Thu, 2 Apr 2026 11:50:44 +0200
Subject: [PATCH 31/68] Cleanup opensearch output (#947)

* feat: add async pipeline manager POC

* add debug launch configuration for ng

* add integration draft

* remove isolated poc code

* fix logging error

* refactor to use more composition and less events in top-level code

* feat: make confluent_kafka async; first steps towards async input handling

* feat: add pipeline configs for benchmark

* WIP: initial async steps for sender and opensearch output

* WIP

* WIP

* use async setup in ng-world; tests TBD

* rename debug configurations and use internalConsole

* show taskName in log messages

* fix benchmark pipeline configs

* update debug logs

* fix benchmark configs for MacOS

* ensure taskName property is set for older python versions

* avoid sending events multiple times in sender

* add draft for store_batch

* WIP: small adaptions + benchmark results with adapted pipeline configs -> opensearch message_backlog_size: 1

* WIP: benchmark results with adapted pipeline configs -> opensearch message_backlog_size: 2500

* fix wrong import

* set MAX_QUEUE_SIZE to BATCH_SIZE and increase input_worker batch_size

* disentangle EventBacklog and input

* docs: benchmark logs

* remove terminal out_queue

* add error logs for worker flush timers

* include shutdown time in performance measurement

* distribute events to different queues after processing; add debug logs about queue sizes; add state reporting for terminal events

* Cleanup opensearch output bulk function, and add shut_down

* revise some old code

* add easier to read and faster keys_list and check if atleast enough keys are in list to not panic on indexing

* Add deprecation Notice

* Apply suggestion from @mhoff

Co-authored-by: Michael Hoff <9436725+mhoff@users.noreply.github.com>

* Remove assert

---------

Co-authored-by: David Kaya <david.kaya@gmx.net>
Co-authored-by: Michael Hoff <9436725+mhoff@users.noreply.github.com>
---
 logprep/ng/connector/opensearch/output.py | 69 +++++++++++------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/logprep/ng/connector/opensearch/output.py b/logprep/ng/connector/opensearch/output.py
index 87656f9bb..9aa5efbe2 100644
--- a/logprep/ng/connector/opensearch/output.py
+++ b/logprep/ng/connector/opensearch/output.py
@@ -45,6 +45,7 @@
     helpers,
 )
 from opensearchpy.serializer import JSONSerializer
+from typing_extensions import override
 
 from logprep.abc.exceptions import LogprepException
 from logprep.ng.abc.event import Event
@@ -137,17 +138,22 @@ class Config(Output.Config):
         thread_count: int = field(
             default=4, validator=(validators.instance_of(int), validators.gt(1))
         )
-        """Number of threads to use for bulk requests."""
+        """Number of threads to use for bulk requests.
+        DEPCRECATED: This Argument is deprecated and doesnt do anything anymore,
+        it will be removed in the future"""
         queue_size: int = field(
             default=4, validator=(validators.instance_of(int), validators.gt(1))
         )
-        """Number of queue size to use for bulk requests."""
+        """Number of queue size to use for bulk requests.
+        DEPCRECATED: This Argument is deprecated and doesnt do anything anymore,
+        it will be removed in the future"""
         chunk_size: int = field(
             default=500, validator=(validators.instance_of(int), validators.gt(1))
         )
         """Chunk size to use for bulk requests."""
         max_chunk_bytes: int = field(
-            default=100 * 1024 * 1024, validator=(validators.instance_of(int), validators.gt(1))
+            default=100 * 1024 * 1024,
+            validator=(validators.instance_of(int), validators.gt(1)),
         )
         """Max chunk size to use for bulk requests. The default is 100MB."""
         max_retries: int = field(
@@ -160,7 +166,10 @@ class Config(Output.Config):
         """Desired cluster status for health check as list of strings. Default is ["green"]"""
         default_op_type: str = field(
             default="index",
-            validator=(validators.instance_of(str), validators.in_(["create", "index"])),
+            validator=(
+                validators.instance_of(str),
+                validators.in_(["create", "index"]),
+            ),
         )
         """Default op_type for indexing documents. Default is 'index',
         Consider using 'create' for data streams or to prevent overwriting existing documents."""
@@ -170,6 +179,11 @@ class Config(Output.Config):
     _message_backlog: list[Event]
     """List of messages to be sent to Opensearch."""
 
+    @property
+    def _metrics(self) -> Output.Metrics:
+        """Provides the properly typed metrics object"""
+        return typing.cast(Output.Metrics, self.metrics)
+
     @property
     def config(self) -> Config:
         """Provides the properly typed rule configuration object"""
@@ -272,14 +286,16 @@ async def store_batch(
         self, events: Sequence[Event], target: str | None = None
     ) -> Sequence[Event]:
         logger.debug("store_batch called with %d events, target=%s", len(events), target)
+        target = target if target else self.config.default_index
+
         for event in events:
             document = event.data
-            if target is None:
-                document["_index"] = document.get("_index", self.config.default_index)
-            else:
-                document["_index"] = document.get("_index", target)
+            document["_index"] = document.get("_index", target)
             document["_op_type"] = document.get("_op_type", self.config.default_op_type)
-        self.metrics.number_of_processed_events += len(events)
+
+            event.state.current_state = EventStateType.STORING_IN_OUTPUT
+
+        self._metrics.number_of_processed_events += len(events)
         logger.debug("Flushing %d documents to Opensearch", len(events))
         await self._bulk(self._search_context, events)
         return events
@@ -305,8 +321,6 @@ async def _bulk(self, client: AsyncOpenSearch, events: Sequence[Event]) -> None:
         kwargs = {
             "max_chunk_bytes": self.config.max_chunk_bytes,
             "chunk_size": self.config.chunk_size,
-            # "queue_size": self.config.queue_size,
-            # "thread_count": self.config.thread_count,
             "raise_on_error": False,
             "raise_on_exception": False,
         }
@@ -314,13 +328,8 @@ async def _bulk(self, client: AsyncOpenSearch, events: Sequence[Event]) -> None:
         actions = (event.data for event in events)
 
         index = 0
-        async for success, item in helpers.async_streaming_bulk(client, actions, **kwargs):  # type: ignore
-            if index >= len(events):
-                break
-
+        async for success, item in helpers.async_streaming_bulk(client, actions, **kwargs):
             event = events[index]
-            event.state.current_state = EventStateType.STORING_IN_OUTPUT
-
             index += 1
 
             if success:
@@ -329,24 +338,8 @@ async def _bulk(self, client: AsyncOpenSearch, events: Sequence[Event]) -> None:
 
             event.state.current_state = EventStateType.FAILED
 
-            # parallel_bulk often returned item that allowed item.get("_op_type")
-            # streaming_bulk usually returns {"index": {...}} / {"create": {...}}
-            op_type = item.get("_op_type") if isinstance(item, dict) else None
-            if not op_type and isinstance(item, dict) and item:
-                op_type = next(iter(item.keys()))
-
-            op_type = op_type or self.config.default_op_type
-            error_info = {}
-
-            if isinstance(item, dict):
-                # streaming_bulk shape
-                if op_type in item and isinstance(item[op_type], dict):
-                    error_info = item[op_type]
-                # fallback: old shape
-                else:
-                    error_info = (
-                        item.get(op_type, {}) if isinstance(item.get(op_type), dict) else {}
-                    )
+            op_infos = item.values()
+            error_info = op_infos[0] if len(op_infos) > 0 else {}
 
             error = BulkError(error_info.get("error", "Failed to index document"), **error_info)
             event.errors.append(error)
@@ -359,6 +352,10 @@ async def health(self) -> bool:  # type: ignore  # TODO: fix mypy issue
             )
         except (OpenSearchException, ConnectionError) as error:
             logger.error("Health check failed: %s", error)
-            self.metrics.number_of_errors += 1
+            self._metrics.number_of_errors += 1
             return False
         return super().health() and resp.get("status") in self.config.desired_cluster_status
+
+    @override
+    async def shut_down(self):
+        await self._search_context.close()

From 6463412406de5e1e40b105da531f5bfed16b9415 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Thu, 19 Mar 2026 13:46:10 +0100
Subject: [PATCH 32/68] refactor: remove unnecessary types

---
 logprep/ng/util/worker/types.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/logprep/ng/util/worker/types.py b/logprep/ng/util/worker/types.py
index 78593ed8c..61897cbe4 100644
--- a/logprep/ng/util/worker/types.py
+++ b/logprep/ng/util/worker/types.py
@@ -18,9 +18,7 @@
 Input = TypeVar("Input")
 Output = TypeVar("Output")
 
-SyncHandler = Callable[[list[Input]], list[Output]]
 AsyncHandler = Callable[[list[Input]], Coroutine[object, object, list[Output]]]
-Handler = SyncHandler[Input, Output] | AsyncHandler[Input, Output]
 
 
 class SizeLimitedQueue(asyncio.Queue[T]):

From 2c06beff21e70c6febcea2683687842a660ebbe5 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Thu, 19 Mar 2026 15:31:27 +0100
Subject: [PATCH 33/68] refactor: replace uvloop.run with asyncio.Runner and
 configurable loop_factory

---
 logprep/run_ng.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/logprep/run_ng.py b/logprep/run_ng.py
index 3014710a1..e4c6fae5c 100644
--- a/logprep/run_ng.py
+++ b/logprep/run_ng.py
@@ -1,6 +1,7 @@
 # pylint: disable=logging-fstring-interpolation
 """This module can be used to start the logprep."""
 
+import asyncio
 import logging
 import os
 import signal
@@ -8,7 +9,6 @@
 from multiprocessing import set_start_method
 
 import click
-import uvloop
 
 from logprep.ng.runner import Runner
 from logprep.ng.util.configuration import Configuration, InvalidConfigurationError
@@ -70,8 +70,8 @@ def run(configs: tuple[str], version=None) -> None:
 
     async def _run(configs: tuple[str], version=None):
         configuration = await _get_configuration(configs)
-        runner = Runner(configuration)
-        runner.setup_logging()
+        _runner = Runner(configuration)
+        _runner.setup_logging()
         if version:
             _print_version(configuration)
         for v in get_versions_string(configuration).split("\n"):
@@ -83,7 +83,7 @@ async def _run(configs: tuple[str], version=None):
                 signal.signal(signal.SIGTERM, signal_handler)
                 signal.signal(signal.SIGINT, signal_handler)
             logger.debug("Configuration loaded")
-            await runner.run()
+            await _runner.run()
         except SystemExit as error:
             logger.debug(f"Exit received with code {error.code}")
             sys.exit(error.code)
@@ -95,12 +95,27 @@ async def _run(configs: tuple[str], version=None):
                 logger.exception(f"A critical error occurred: {error}")  # pragma: no cover
             else:
                 logger.critical(f"A critical error occurred: {error}")
-            if runner:
-                runner.stop()
+            if _runner:
+                _runner.stop()
             sys.exit(EXITCODES.ERROR)
         # pylint: enable=broad-except
 
-    uvloop.run(_run(configs, version))
+    def _get_loop_factory(mode: str):
+        match mode:
+            case "uvloop":
+                import uvloop
+
+                logger.info("Using event loop: uvloop")
+                return uvloop.new_event_loop
+            case "asyncio":
+                logger.info("Using event loop: asyncio")
+                return asyncio.new_event_loop
+
+            case _:
+                raise ValueError(f"Unknown loop mode: {mode}")
+
+    with asyncio.Runner(loop_factory=_get_loop_factory(mode="uvloop")) as runner:
+        runner.run(_run(configs, version))
 
 
 def signal_handler(__: int, _) -> None:

From 70f15425e876e46a200fe2c4a8f776928e21cd54 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 25 Mar 2026 10:42:52 +0100
Subject: [PATCH 34/68] fix: fix config refresh, remove config scheduler, small
 adaptions

---
 logprep/ng/connector/confluent_kafka/input.py | 14 ++---
 logprep/ng/manager.py                         | 21 +++----
 logprep/ng/runner.py                          | 19 ++++--
 logprep/ng/util/configuration.py              | 59 +++++++++++++++----
 logprep/ng/util/worker/worker.py              | 27 +--------
 logprep/run_ng.py                             | 22 +++----
 pyproject.toml                                | 11 +++-
 7 files changed, 98 insertions(+), 75 deletions(-)

diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index 5211072b5..a05588132 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -28,14 +28,12 @@
             auto.offset.reset: "earliest"
 """
 
-# pylint: enable=line-too-long
 import logging
 import os
 import typing
 from functools import cached_property, partial
 from socket import getfqdn
-from types import MappingProxyType
-from typing import Union
+from types import MappingProxyType  # pylint: disable=no-name-in-module
 
 import msgspec
 from attrs import define, field, validators
@@ -81,7 +79,7 @@
 
 DEFAULT_RETURN = 0
 
-logger = logging.getLogger("KafkaInput")
+logger = logging.getLogger("KafkaInput")  # pylint: disable=no-member
 
 
 class ConfluentKafkaInput(Input):
@@ -396,7 +394,9 @@ def _stats_callback(self, stats_raw: str) -> None:
         )
 
     def _commit_callback(
-        self, error: Union[KafkaException, None], topic_partitions: list[TopicPartition]
+        self,
+        error: KafkaException | None,
+        topic_partitions: list[TopicPartition],
     ) -> None:
         """Callback used to indicate success or failure of asynchronous and
         automatic commit requests. This callback is served upon calling consumer.poll()
@@ -461,8 +461,6 @@ async def _get_raw_event(self, timeout: float) -> Message | None:  # type: ignor
             message = await consumer.poll(timeout=timeout)
         except RuntimeError as error:
             raise FatalInputError(self, str(error)) from error
-        except Exception as error:  # remove this
-            pass
         if message is None:
             return None
         if message.value() is None or message.partition() is None or message.offset() is None:
@@ -602,7 +600,7 @@ async def _get_memberid(self) -> str | None:
         member_id = None
         try:
             consumer = await self.get_consumer()
-            member_id = consumer._consumer.memberid()
+            member_id = consumer._consumer.memberid()  # pylint: disable=protected-access
         except RuntimeError as error:
             logger.error("Failed to retrieve member ID: %s", error)
         return member_id
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 58ca56c6f..124827347 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -22,7 +22,7 @@
 from logprep.ng.util.worker.types import SizeLimitedQueue
 from logprep.ng.util.worker.worker import Worker, WorkerOrchestrator
 
-logger = logging.getLogger("PipelineManager")
+logger = logging.getLogger("PipelineManager")  # pylint: disable=no-member
 
 
 BATCH_SIZE = 2_500
@@ -40,6 +40,8 @@ def __init__(self, configuration: Configuration, shutdown_timeout_s: float) -> N
         self._shutdown_timeout_s = shutdown_timeout_s
 
     async def setup(self):
+        """Setup the pipeline manager."""
+
         self._event_backlog = SetEventBacklog()
 
         self._input_connector = cast(Input, Factory.create(self.configuration.input))
@@ -74,7 +76,7 @@ async def setup(self):
         self._queues = []
         self._orchestrator = self._create_orchestrator()
 
-    def _create_orchestrator(self) -> WorkerOrchestrator:
+    def _create_orchestrator(self) -> WorkerOrchestrator:  # pylint: disable=too-many-locals
         process_queue = SizeLimitedQueue[LogEvent](maxsize=MAX_QUEUE_SIZE)
         send_to_default_queue = SizeLimitedQueue[LogEvent](maxsize=MAX_QUEUE_SIZE)
         send_to_extras_queue = SizeLimitedQueue[LogEvent](maxsize=MAX_QUEUE_SIZE)
@@ -194,14 +196,13 @@ async def run(self) -> None:
         try:
             await self._orchestrator.run()
         except CancelledError:
-            # TODO cancelling() > 0 is no safe discriminator; improve
-            current_task = asyncio.current_task()
-            if current_task and current_task.cancelling() > 0:
-                logger.debug("PipelineManager.run has been cancelled. Shutting down")
-                await self._shut_down()
-            else:
-                logger.error("Orchestrator has been cancelled. Shutting down")
-                await self._shut_down()
+            logger.debug("PipelineManager.run cancelled. Shutting down.")
+            await self._shut_down()
+            raise
+        except Exception:
+            logger.exception("PipelineManager.run failed. Shutting down.")
+            await self._shut_down()
+            raise
 
     async def _shut_down(self) -> None:
         """Shut down runner components, and required runner attributes."""
diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index 7033fb17b..6d43a61a8 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -50,15 +50,21 @@ def __init__(self, configuration: Configuration) -> None:
 
     async def _refresh_configuration_gen(self) -> AsyncGenerator[Configuration, None]:
         self.config.schedule_config_refresh()
+
+        self._running_config_version = self.config.version
         refresh_interval = self.config.config_refresh_interval
+
         while True:
             self.config.refresh()
 
             if self.config.version != self._running_config_version:
-                yield self.config
+                logger.info(f"Detected new config version: {self.config.version}")
+
                 self._running_config_version = self.config.version
                 refresh_interval = self.config.config_refresh_interval
 
+                yield self.config
+
             if refresh_interval is not None:
                 try:
                     await asyncio.sleep(
@@ -85,13 +91,14 @@ async def run(self) -> None:
                 tg.create_task(TerminateTaskGroup.raise_on_event(self._stop_event))
 
                 async def start_pipeline(config: Configuration) -> asyncio.Task:
-                    pipeline_manager = PipelineManager(
+                    logger.debug(">>>>> Starting pipeline")
+                    self._pipeline_manager = PipelineManager(
                         config, shutdown_timeout_s=GRACEFUL_SHUTDOWN_TIMEOUT
                     )
-                    await pipeline_manager.setup()
+                    await self._pipeline_manager.setup()
 
                     return tg.create_task(
-                        pipeline_manager.run(),
+                        self._pipeline_manager.run(),
                         name="pipeline_manager",
                     )
 
@@ -136,7 +143,7 @@ def setup_logging(self) -> None:
         """
 
         warnings.simplefilter("always", DeprecationWarning)
-        logging.captureWarnings(True)
+        logging.captureWarnings(True)  # pylint: disable=no-member
         log_config = DEFAULT_LOG_CONFIG | asdict(self.config.logger)
         os.environ["LOGPREP_LOG_CONFIG"] = json.dumps(log_config)
-        logging.config.dictConfig(log_config)
+        logging.config.dictConfig(log_config)  # pylint: disable=no-member
diff --git a/logprep/ng/util/configuration.py b/logprep/ng/util/configuration.py
index 434147514..ca5c81091 100644
--- a/logprep/ng/util/configuration.py
+++ b/logprep/ng/util/configuration.py
@@ -190,6 +190,7 @@
                 group.id: test"
 """
 
+import asyncio
 import json
 import logging
 import os
@@ -197,7 +198,6 @@
 from copy import deepcopy
 from importlib.metadata import version
 from itertools import chain
-from logging.config import dictConfig
 from pathlib import Path
 from typing import Any, Iterable, List, Optional, Sequence, Tuple
 
@@ -232,21 +232,21 @@
 )
 from logprep.util.rule_loader import RuleLoader
 
-logger = logging.getLogger("Config")
+logger = logging.getLogger("Config")  # pylint: disable=no-member
 
 
 class MyYAML(YAML):
     """helper class to dump yaml with ruamel.yaml"""
 
     def dump(self, data: Any, stream: Any | None = None, **kw: Any) -> Any:
-        inefficient = False
         if stream is None:
-            inefficient = True
             stream = StringIO()
-        YAML.dump(self, data, stream, **kw)
-        if inefficient:
+            YAML.dump(self, data, stream, **kw)
             return stream.getvalue()
 
+        YAML.dump(self, data, stream, **kw)
+        return None
+
 
 yaml = MyYAML(pure=True)
 
@@ -340,7 +340,7 @@ class LoggerConfig:
     compatible with :func:`logging.config.dictConfig`.
     """
 
-    _LOG_LEVELS = (
+    _log_levels = (
         logging.NOTSET,  # 0
         logging.DEBUG,  # 10
         logging.INFO,  # 20
@@ -358,7 +358,7 @@ class LoggerConfig:
         default="INFO",
         validator=[
             validators.instance_of(str),
-            validators.in_([logging.getLevelName(level) for level in _LOG_LEVELS]),
+            validators.in_([logging.getLevelName(level) for level in _log_levels]),
         ],
         eq=False,
     )
@@ -443,7 +443,7 @@ def setup_logging(self) -> None:
 
         log_config = asdict(self)
         os.environ["LOGPREP_LOG_CONFIG"] = json.dumps(log_config)
-        dictConfig(log_config)
+        logging.config.dictConfig(log_config)
 
     def _set_loggers_levels(self) -> None:
         """Normalize per-logger configuration and preserve explicit levels.
@@ -673,6 +673,22 @@ class Configuration:
 
     _config_failure: bool = field(default=False, repr=False, eq=False, init=False)
 
+    _background_tasks: set = field(
+        factory=set,
+        validator=validators.instance_of(set),
+        repr=False,
+        eq=False,
+        init=False,
+    )
+
+    _reload_lock: asyncio.Lock = field(
+        factory=asyncio.Lock,
+        validator=validators.instance_of(asyncio.Lock),
+        repr=False,
+        eq=False,
+        init=False,
+    )
+
     _unserializable_fields = (
         "_getter",
         "_configs",
@@ -680,6 +696,8 @@ class Configuration:
         "_scheduler",
         "_metrics",
         "_unserializable_fields",
+        "_reload_lock",
+        "_background_tasks",
     )
 
     @define(kw_only=True)
@@ -943,7 +961,28 @@ def schedule_config_refresh(self) -> None:
         if scheduler.jobs:
             scheduler.cancel_job(scheduler.jobs[0])
         if isinstance(refresh_interval, int):
-            scheduler.every(refresh_interval).seconds.do(self.reload)
+
+            async def _reload_wrapper() -> None:
+                if self._reload_lock.locked():
+                    logger.warning(
+                        "config reload already running; skipping scheduled config reload run",
+                    )
+                    return
+
+                async with self._reload_lock:
+                    try:
+                        await self.reload()
+                    except Exception:
+                        logger.exception("config reload failed")
+                        raise
+
+            def _schedule_reload() -> None:
+                task = asyncio.create_task(_reload_wrapper())
+                self._background_tasks.add(task)
+                task.add_done_callback(self._background_tasks.discard)
+
+            scheduler.every(refresh_interval).seconds.do(_schedule_reload)
+
             logger.info("Config refresh interval is set to: %s seconds", refresh_interval)
 
     def refresh(self) -> None:
diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index 1584257a2..5e2fca9e1 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -19,7 +19,7 @@
 
 from logprep.ng.util.worker.types import AsyncHandler, SizeLimitedQueue
 
-logger = logging.getLogger("Worker")
+logger = logging.getLogger("Worker")  # pylint: disable=no-member
 
 T = TypeVar("T")
 Input = TypeVar("Input")
@@ -194,7 +194,6 @@ async def run(self, stop_event: asyncio.Event) -> None:
                 while not stop_event.is_set():
                     item = await self.in_queue.get()
                     await self.add(item)
-                    # TODO is this await really necessary?
                     await asyncio.sleep(0.0)
             else:
                 while not stop_event.is_set():
@@ -203,7 +202,6 @@ async def run(self, stop_event: asyncio.Event) -> None:
                     if item is not None:
                         await self.add(item)
 
-                    # TODO is this await really necessary?
                     await asyncio.sleep(0.0)
 
         except asyncio.CancelledError:
@@ -213,29 +211,6 @@ async def run(self, stop_event: asyncio.Event) -> None:
             await self.flush()
 
 
-class TransferWorker(Worker[T, T]):
-    def __init__(
-        self,
-        name: str,
-        batch_size: int,
-        batch_interval_s: float,
-        in_queue: asyncio.Queue[T] | AsyncIterator[T],
-        out_queue: SizeLimitedQueue[T] | None = None,
-    ) -> None:
-        super().__init__(
-            name=name,
-            batch_size=batch_size,
-            batch_interval_s=batch_interval_s,
-            in_queue=in_queue,
-            out_queue=out_queue,
-            handler=self.__handle_noop,
-        )
-
-    async def __handle_noop(self, batch: list[T]) -> list[T]:
-        await asyncio.sleep(0)
-        return [e for e in batch if e is not None]
-
-
 class WorkerOrchestrator:
     """
     Orchestrates a chain of workers.
diff --git a/logprep/run_ng.py b/logprep/run_ng.py
index e4c6fae5c..ed3c3eb92 100644
--- a/logprep/run_ng.py
+++ b/logprep/run_ng.py
@@ -3,7 +3,6 @@
 
 import asyncio
 import logging
-import os
 import signal
 import sys
 from multiprocessing import set_start_method
@@ -20,7 +19,7 @@
 init_yaml_loader_tags("safe", "rt")
 
 
-logger = logging.getLogger("root")
+logger = logging.getLogger("root")  # pylint: disable=no-member
 
 
 def _print_version(config: "Configuration") -> None:
@@ -68,16 +67,16 @@ def run(configs: tuple[str], version=None) -> None:
     CONFIG is a path to configuration file (filepath or URL).
     """
 
-    async def _run(configs: tuple[str], version=None):
-        configuration = await _get_configuration(configs)
+    async def _run(configs_: tuple[str], version_=None):
+        configuration = await _get_configuration(configs_)
         _runner = Runner(configuration)
         _runner.setup_logging()
-        if version:
+        if version_:
             _print_version(configuration)
         for v in get_versions_string(configuration).split("\n"):
             logger.info(v)
         logger.debug(f"Metric export enabled: {configuration.metrics.enabled}")
-        logger.debug(f"Config path: {configs}")
+        logger.debug(f"Config path: {configs_}")
         try:
             if "pytest" not in sys.modules:  # needed for not blocking tests
                 signal.signal(signal.SIGTERM, signal_handler)
@@ -91,10 +90,8 @@ async def _run(configs: tuple[str], version=None):
         except ExceptionGroup as error_group:
             logger.exception(f"Multiple errors occurred: {error_group}")
         except Exception as error:
-            if os.environ.get("DEBUG", False):
-                logger.exception(f"A critical error occurred: {error}")  # pragma: no cover
-            else:
-                logger.critical(f"A critical error occurred: {error}")
+            logger.exception(f"A critical error occurred: {error}")
+
             if _runner:
                 _runner.stop()
             sys.exit(EXITCODES.ERROR)
@@ -103,14 +100,11 @@ async def _run(configs: tuple[str], version=None):
     def _get_loop_factory(mode: str):
         match mode:
             case "uvloop":
-                import uvloop
+                import uvloop  # pylint: disable=import-outside-toplevel
 
-                logger.info("Using event loop: uvloop")
                 return uvloop.new_event_loop
             case "asyncio":
-                logger.info("Using event loop: asyncio")
                 return asyncio.new_event_loop
-
             case _:
                 raise ValueError(f"Unknown loop mode: {mode}")
 
diff --git a/pyproject.toml b/pyproject.toml
index e353dff0a..97cbd5d81 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -172,7 +172,16 @@ max-line-length=100
 no-docstring-rgx="^test_|^.*TestCase|^_|^Test"
 
 [tool.pylint."MESAGES CONTROL"]
-disable="too-few-public-methods,unsupported-membership-test"
+disable = [
+  "too-few-public-methods",
+  "unsupported-membership-test",
+  "too-many-positional-arguments",
+  "too-many-arguments",
+  "too-many-branches",
+  "too-many-instance-attributes",
+  "too-many-lines",
+  "line-too-long",
+]
 
 [tool.pylint.DESIGN]
 min-public-methods=1

From 443816586ba5cc2e76e3773344399baf1cdb4787 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 25 Mar 2026 10:51:21 +0100
Subject: [PATCH 35/68] refactor: remove loop_factory

---
 logprep/run_ng.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/logprep/run_ng.py b/logprep/run_ng.py
index ed3c3eb92..248bf0684 100644
--- a/logprep/run_ng.py
+++ b/logprep/run_ng.py
@@ -8,6 +8,7 @@
 from multiprocessing import set_start_method
 
 import click
+import uvloop
 
 from logprep.ng.runner import Runner
 from logprep.ng.util.configuration import Configuration, InvalidConfigurationError
@@ -97,18 +98,7 @@ async def _run(configs_: tuple[str], version_=None):
             sys.exit(EXITCODES.ERROR)
         # pylint: enable=broad-except
 
-    def _get_loop_factory(mode: str):
-        match mode:
-            case "uvloop":
-                import uvloop  # pylint: disable=import-outside-toplevel
-
-                return uvloop.new_event_loop
-            case "asyncio":
-                return asyncio.new_event_loop
-            case _:
-                raise ValueError(f"Unknown loop mode: {mode}")
-
-    with asyncio.Runner(loop_factory=_get_loop_factory(mode="uvloop")) as runner:
+    with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner:
         runner.run(_run(configs, version))
 
 

From 6cee075281e610c8b84e8d3dce67a87e9ff50fa9 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 25 Mar 2026 11:25:18 +0100
Subject: [PATCH 36/68] feat: add asyncio exception handler for unhandled
 errors

---
 logprep/ng/connector/confluent_kafka/input.py |  2 +-
 logprep/ng/manager.py                         |  2 +-
 logprep/ng/runner.py                          |  4 +--
 logprep/ng/util/__init__.py                   |  0
 logprep/ng/util/async_helpers.py              | 33 +++++++++++++++++++
 logprep/ng/util/configuration.py              |  2 +-
 logprep/ng/util/defaults.py                   |  2 +-
 .../util/{logging.py => logprep_logging.py}   |  0
 logprep/ng/util/worker/__init__.py            |  0
 logprep/ng/util/worker/worker.py              |  2 +-
 logprep/run_ng.py                             |  7 +++-
 11 files changed, 46 insertions(+), 8 deletions(-)
 create mode 100644 logprep/ng/util/__init__.py
 rename logprep/ng/util/{logging.py => logprep_logging.py} (100%)
 create mode 100644 logprep/ng/util/worker/__init__.py

diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index a05588132..5765c0d4f 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -79,7 +79,7 @@
 
 DEFAULT_RETURN = 0
 
-logger = logging.getLogger("KafkaInput")  # pylint: disable=no-member
+logger = logging.getLogger("KafkaInput")
 
 
 class ConfluentKafkaInput(Input):
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 124827347..934f17ab3 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -22,7 +22,7 @@
 from logprep.ng.util.worker.types import SizeLimitedQueue
 from logprep.ng.util.worker.worker import Worker, WorkerOrchestrator
 
-logger = logging.getLogger("PipelineManager")  # pylint: disable=no-member
+logger = logging.getLogger("PipelineManager")
 
 
 BATCH_SIZE = 2_500
diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index 6d43a61a8..6e91e7335 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -143,7 +143,7 @@ def setup_logging(self) -> None:
         """
 
         warnings.simplefilter("always", DeprecationWarning)
-        logging.captureWarnings(True)  # pylint: disable=no-member
+        logging.captureWarnings(True)
         log_config = DEFAULT_LOG_CONFIG | asdict(self.config.logger)
         os.environ["LOGPREP_LOG_CONFIG"] = json.dumps(log_config)
-        logging.config.dictConfig(log_config)  # pylint: disable=no-member
+        logging.config.dictConfig(log_config)
diff --git a/logprep/ng/util/__init__.py b/logprep/ng/util/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/logprep/ng/util/async_helpers.py b/logprep/ng/util/async_helpers.py
index 875062148..972d723b8 100644
--- a/logprep/ng/util/async_helpers.py
+++ b/logprep/ng/util/async_helpers.py
@@ -2,6 +2,7 @@
 
 import asyncio
 from collections.abc import AsyncGenerator, AsyncIterable, AsyncIterator, Callable
+from logging import Logger
 from typing import Awaitable, TypeVar
 
 T = TypeVar("T")
@@ -111,3 +112,35 @@ async def restart_task_on_iter(
             await cancel_task_and_wait(task, cancel_timeout_s)
         task = await task_factory(data)
         yield task
+
+
+def asyncio_exception_handler(
+    loop: asyncio.AbstractEventLoop,  # pylint: disable=unused-argument
+    context: dict,
+    logger: Logger,
+) -> None:
+    """
+    Handle unhandled exceptions reported by the asyncio event loop.
+
+    Covers exceptions from background tasks, callbacks, and loop internals.
+    Does not handle exceptions from awaited coroutines (e.g. runner.run()).
+
+    Args:
+        loop: The current event loop.
+        context: Asyncio error context (may contain message, exception, task/future).
+        logger: Logger used to record the error.
+    """
+
+    msg = context.get("message", "Unhandled exception in event loop")
+    exception = context.get("exception")
+    task = context.get("task") or context.get("future")
+
+    logger.error(f"[asyncio] {msg}")
+
+    if task:
+        logger.error(f"[asyncio] Task: {task!r}")
+
+    if exception:
+        logger.error("[asyncio] Exception:", exc_info=exception)
+    else:
+        logger.error("[asyncio] Context: %s", context)
diff --git a/logprep/ng/util/configuration.py b/logprep/ng/util/configuration.py
index ca5c81091..3872a2291 100644
--- a/logprep/ng/util/configuration.py
+++ b/logprep/ng/util/configuration.py
@@ -232,7 +232,7 @@
 )
 from logprep.util.rule_loader import RuleLoader
 
-logger = logging.getLogger("Config")  # pylint: disable=no-member
+logger = logging.getLogger("Config")
 
 
 class MyYAML(YAML):
diff --git a/logprep/ng/util/defaults.py b/logprep/ng/util/defaults.py
index b3060ecf3..a97bebfe9 100644
--- a/logprep/ng/util/defaults.py
+++ b/logprep/ng/util/defaults.py
@@ -36,7 +36,7 @@ class EXITCODES(IntEnum):
     "version": 1,
     "formatters": {
         "logprep": {
-            "class": "logprep.ng.util.logging.LogprepFormatter",
+            "class": "logprep.ng.util.logprep_logging.LogprepFormatter",
             "format": DEFAULT_LOG_FORMAT,
             "datefmt": DEFAULT_LOG_DATE_FORMAT,
         }
diff --git a/logprep/ng/util/logging.py b/logprep/ng/util/logprep_logging.py
similarity index 100%
rename from logprep/ng/util/logging.py
rename to logprep/ng/util/logprep_logging.py
diff --git a/logprep/ng/util/worker/__init__.py b/logprep/ng/util/worker/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index 5e2fca9e1..532a7fbd5 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -19,7 +19,7 @@
 
 from logprep.ng.util.worker.types import AsyncHandler, SizeLimitedQueue
 
-logger = logging.getLogger("Worker")  # pylint: disable=no-member
+logger = logging.getLogger("Worker")
 
 T = TypeVar("T")
 Input = TypeVar("Input")
diff --git a/logprep/run_ng.py b/logprep/run_ng.py
index 248bf0684..0f13f0bd8 100644
--- a/logprep/run_ng.py
+++ b/logprep/run_ng.py
@@ -5,12 +5,14 @@
 import logging
 import signal
 import sys
+from functools import partial
 from multiprocessing import set_start_method
 
 import click
 import uvloop
 
 from logprep.ng.runner import Runner
+from logprep.ng.util.async_helpers import asyncio_exception_handler
 from logprep.ng.util.configuration import Configuration, InvalidConfigurationError
 from logprep.util.defaults import EXITCODES
 from logprep.util.helper import get_versions_string
@@ -20,7 +22,7 @@
 init_yaml_loader_tags("safe", "rt")
 
 
-logger = logging.getLogger("root")  # pylint: disable=no-member
+logger = logging.getLogger("root")
 
 
 def _print_version(config: "Configuration") -> None:
@@ -99,6 +101,9 @@ async def _run(configs_: tuple[str], version_=None):
         # pylint: enable=broad-except
 
     with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner:
+        handler = partial(asyncio_exception_handler, logger=logger)
+        loop = runner.get_loop()
+        loop.set_exception_handler(handler)
         runner.run(_run(configs, version))
 
 

From dea4dbd4870db49ae5fcae95c37454b32274aef9 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 25 Mar 2026 14:08:36 +0100
Subject: [PATCH 37/68] feat: improve config refresh sync/async

---
 logprep/ng/runner.py             | 59 +++++++++++++++++---------------
 logprep/ng/util/configuration.py | 54 +++++++++++++++++++++++------
 2 files changed, 76 insertions(+), 37 deletions(-)

diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index 6e91e7335..23947d7fd 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -54,33 +54,38 @@ async def _refresh_configuration_gen(self) -> AsyncGenerator[Configuration, None
         self._running_config_version = self.config.version
         refresh_interval = self.config.config_refresh_interval
 
-        while True:
-            self.config.refresh()
-
-            if self.config.version != self._running_config_version:
-                logger.info(f"Detected new config version: {self.config.version}")
-
-                self._running_config_version = self.config.version
-                refresh_interval = self.config.config_refresh_interval
-
-                yield self.config
-
-            if refresh_interval is not None:
-                try:
-                    await asyncio.sleep(
-                        # realistic bad case: starting to sleep just a moment before scheduled time
-                        # unlikely worst case: starting to sleep even after scheduled time
-                        #                      (if yield takes some time and interval is short)
-                        # --> compensate bad case by giving an upper boundary to the deviation
-                        refresh_interval
-                        * MAX_CONFIG_REFRESH_INTERVAL_DEVIATION_PERCENT
-                    )
-                except asyncio.CancelledError:
-                    logger.debug("Config refresh cancelled. Exiting...")
-                    raise
-            else:
-                logger.debug("Config refresh has been disabled.")
-                break
+        try:
+            while True:
+                self.config.refresh()
+
+                if self.config.version != self._running_config_version:
+                    logger.info(f"Detected new config version: {self.config.version}")
+
+                    self._running_config_version = self.config.version
+                    refresh_interval = self.config.config_refresh_interval
+
+                    yield self.config
+
+                if refresh_interval is not None:
+                    try:
+                        await asyncio.sleep(
+                            # realistic bad case: starting to sleep just a moment before scheduled time
+                            # unlikely worst case: starting to sleep even after scheduled time
+                            #                      (if yield takes some time and interval is short)
+                            # --> compensate bad case by giving an upper boundary to the deviation
+                            refresh_interval
+                            * MAX_CONFIG_REFRESH_INTERVAL_DEVIATION_PERCENT
+                        )
+                    except asyncio.CancelledError:
+                        logger.debug("Config refresh cancelled. Exiting...")
+                        raise
+                else:
+                    logger.debug("Config refresh has been disabled.")
+                    break
+        except Exception:
+            raise
+        finally:
+            self.config.stop_config_refresh()
 
     async def run(self) -> None:
         """Run the runner and continuously process events until stopped."""
diff --git a/logprep/ng/util/configuration.py b/logprep/ng/util/configuration.py
index 3872a2291..d4bb41f71 100644
--- a/logprep/ng/util/configuration.py
+++ b/logprep/ng/util/configuration.py
@@ -202,6 +202,7 @@
 from typing import Any, Iterable, List, Optional, Sequence, Tuple
 
 from attrs import asdict, define, field, fields, validators
+from numpy.distutils.conv_template import named_re
 from requests import RequestException
 from ruamel.yaml import YAML
 from ruamel.yaml.compat import StringIO
@@ -673,9 +674,9 @@ class Configuration:
 
     _config_failure: bool = field(default=False, repr=False, eq=False, init=False)
 
-    _background_tasks: set = field(
-        factory=set,
-        validator=validators.instance_of(set),
+    _refresh_task: asyncio.Task | None = field(
+        default=None,
+        validator=validators.optional(validators.instance_of(asyncio.Task)),
         repr=False,
         eq=False,
         init=False,
@@ -696,8 +697,8 @@ class Configuration:
         "_scheduler",
         "_metrics",
         "_unserializable_fields",
+        "_refresh_task",
         "_reload_lock",
-        "_background_tasks",
     )
 
     @define(kw_only=True)
@@ -952,6 +953,12 @@ def schedule_config_refresh(self) -> None:
         if self.config_refresh_interval is None:
             if scheduler.jobs:
                 scheduler.cancel_job(scheduler.jobs[0])
+
+                if self._refresh_task is not None:
+                    self._refresh_task.cancel()
+
+                self._refresh_task = None
+
             return
 
         self.config_refresh_interval = max(
@@ -960,9 +967,17 @@ def schedule_config_refresh(self) -> None:
         refresh_interval = self.config_refresh_interval
         if scheduler.jobs:
             scheduler.cancel_job(scheduler.jobs[0])
+
+            if self._refresh_task is not None:
+                self._refresh_task.cancel()
+
+            self._refresh_task = None
+
         if isinstance(refresh_interval, int):
 
             async def _reload_wrapper() -> None:
+                current_task = asyncio.current_task()
+
                 if self._reload_lock.locked():
                     logger.warning(
                         "config reload already running; skipping scheduled config reload run",
@@ -972,23 +987,42 @@ async def _reload_wrapper() -> None:
                 async with self._reload_lock:
                     try:
                         await self.reload()
+                    except asyncio.CancelledError:
+                        logger.info("scheduled config reload task cancelled")
+                        raise
                     except Exception:
-                        logger.exception("config reload failed")
+                        logger.exception("scheduled config reload failed")
                         raise
+                    finally:
+                        if self._refresh_task is current_task:
+                            self._refresh_task = None
 
             def _schedule_reload() -> None:
-                task = asyncio.create_task(_reload_wrapper())
-                self._background_tasks.add(task)
-                task.add_done_callback(self._background_tasks.discard)
+                old_task = self._refresh_task
 
-            scheduler.every(refresh_interval).seconds.do(_schedule_reload)
+                if old_task is not None:
+                    old_task.cancel()
+
+                self._refresh_task = asyncio.create_task(_reload_wrapper())
 
-            logger.info("Config refresh interval is set to: %s seconds", refresh_interval)
+            scheduler.every(refresh_interval).seconds.do(_schedule_reload)
+            logger.info(f"Config refresh interval is set to: {refresh_interval} seconds")
 
     def refresh(self) -> None:
         """Wrap the scheduler run_pending method hide the implementation details."""
         self._scheduler.run_pending()
 
+    def stop_config_refresh(self) -> None:
+        """Stop scheduled config refresh."""
+
+        self._scheduler.clear()
+
+        task = self._refresh_task
+        self._refresh_task = None
+
+        if task is not None:
+            task.cancel()
+
     def _set_attributes_from_configs(self) -> None:
         for attribute in filter(lambda x: x.repr, fields(self.__class__)):
             setattr(

From 8ac6cd2ba1826e1cb9caf9f2c78b9b01c7a977fe Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Thu, 26 Mar 2026 07:18:16 +0100
Subject: [PATCH 38/68] feat: improve config refresh setup and teardown logic +
 improve types for Worker and some logs

---
 logprep/ng/runner.py             | 4 ++--
 logprep/ng/util/configuration.py | 2 ++
 logprep/ng/util/worker/worker.py | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index 23947d7fd..8990f9c44 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -84,8 +84,6 @@ async def _refresh_configuration_gen(self) -> AsyncGenerator[Configuration, None
                     break
         except Exception:
             raise
-        finally:
-            self.config.stop_config_refresh()
 
     async def run(self) -> None:
         """Run the runner and continuously process events until stopped."""
@@ -131,6 +129,8 @@ async def start_pipeline(config: Configuration) -> asyncio.Task:
                     logger.debug("Task group terminated")
                 case _:
                     raise
+        finally:
+            self.config.stop_config_refresh()
 
         logger.debug("End log processing.")
 
diff --git a/logprep/ng/util/configuration.py b/logprep/ng/util/configuration.py
index d4bb41f71..5cbebd6a9 100644
--- a/logprep/ng/util/configuration.py
+++ b/logprep/ng/util/configuration.py
@@ -1023,6 +1023,8 @@ def stop_config_refresh(self) -> None:
         if task is not None:
             task.cancel()
 
+        logger.debug("Config refresh task cancelled")
+
     def _set_attributes_from_configs(self) -> None:
         for attribute in filter(lambda x: x.repr, fields(self.__class__)):
             setattr(
diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index 532a7fbd5..d0ad36b1b 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -37,7 +37,7 @@ def __init__(
         batch_size: int,
         batch_interval_s: float,
         handler: AsyncHandler[Input, Output],
-        in_queue: asyncio.Queue[Input] | AsyncIterator[Input],
+        in_queue: SizeLimitedQueue[Input] | AsyncIterator[Input],
         out_queue: SizeLimitedQueue[Output] | None = None,
     ) -> None:
         self.name = name

From 4815394b5bc3e61956d5a502a3fb3f553985f151 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Thu, 26 Mar 2026 07:33:13 +0100
Subject: [PATCH 39/68] refactor: adjust naming to follow Python conventions
 (shadowing)

---
 logprep/run_ng.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/logprep/run_ng.py b/logprep/run_ng.py
index 0f13f0bd8..7146142fa 100644
--- a/logprep/run_ng.py
+++ b/logprep/run_ng.py
@@ -72,8 +72,8 @@ def run(configs: tuple[str], version=None) -> None:
 
     async def _run(configs_: tuple[str], version_=None):
         configuration = await _get_configuration(configs_)
-        _runner = Runner(configuration)
-        _runner.setup_logging()
+        runner_ = Runner(configuration)
+        runner_.setup_logging()
         if version_:
             _print_version(configuration)
         for v in get_versions_string(configuration).split("\n"):
@@ -85,7 +85,7 @@ async def _run(configs_: tuple[str], version_=None):
                 signal.signal(signal.SIGTERM, signal_handler)
                 signal.signal(signal.SIGINT, signal_handler)
             logger.debug("Configuration loaded")
-            await _runner.run()
+            await runner_.run()
         except SystemExit as error:
             logger.debug(f"Exit received with code {error.code}")
             sys.exit(error.code)
@@ -95,8 +95,8 @@ async def _run(configs_: tuple[str], version_=None):
         except Exception as error:
             logger.exception(f"A critical error occurred: {error}")
 
-            if _runner:
-                _runner.stop()
+            if runner_:
+                runner_.stop()
             sys.exit(EXITCODES.ERROR)
         # pylint: enable=broad-except
 

From a2c4183d1be0c734ada1f9640c7abb0169c8632a Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Mon, 30 Mar 2026 09:15:57 +0200
Subject: [PATCH 40/68] refactor: remove print

---
 logprep/ng/runner.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index 8990f9c44..42a38e7a1 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -94,7 +94,6 @@ async def run(self) -> None:
                 tg.create_task(TerminateTaskGroup.raise_on_event(self._stop_event))
 
                 async def start_pipeline(config: Configuration) -> asyncio.Task:
-                    logger.debug(">>>>> Starting pipeline")
                     self._pipeline_manager = PipelineManager(
                         config, shutdown_timeout_s=GRACEFUL_SHUTDOWN_TIMEOUT
                     )

From 50ea0b8d346b6b0a04f6a3252dea0b852764dda0 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Mon, 30 Mar 2026 11:27:59 +0200
Subject: [PATCH 41/68] refactor: remove unused import

---
 logprep/ng/util/configuration.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/logprep/ng/util/configuration.py b/logprep/ng/util/configuration.py
index 5cbebd6a9..9c8c6e7e1 100644
--- a/logprep/ng/util/configuration.py
+++ b/logprep/ng/util/configuration.py
@@ -202,7 +202,6 @@
 from typing import Any, Iterable, List, Optional, Sequence, Tuple
 
 from attrs import asdict, define, field, fields, validators
-from numpy.distutils.conv_template import named_re
 from requests import RequestException
 from ruamel.yaml import YAML
 from ruamel.yaml.compat import StringIO

From 6b6990cbdbf60ed8376ca2309fa1a084889938b4 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 31 Mar 2026 11:00:20 +0200
Subject: [PATCH 42/68] refactor: simplify config refresh

---
 logprep/ng/runner.py             |  70 ++++++++--------
 logprep/ng/util/configuration.py | 136 ++-----------------------------
 2 files changed, 45 insertions(+), 161 deletions(-)

diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index 42a38e7a1..975beaa18 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -14,7 +14,7 @@
 from logprep.ng.manager import PipelineManager
 from logprep.ng.util.async_helpers import TerminateTaskGroup, restart_task_on_iter
 from logprep.ng.util.configuration import Configuration
-from logprep.ng.util.defaults import DEFAULT_LOG_CONFIG
+from logprep.ng.util.defaults import DEFAULT_LOG_CONFIG, MIN_CONFIG_REFRESH_INTERVAL
 
 logger = logging.getLogger("Runner")
 
@@ -46,44 +46,48 @@ def __init__(self, configuration: Configuration) -> None:
         self._task_group = asyncio.TaskGroup()
         self._stop_event = asyncio.Event()
 
-        self._pipeline_manager: PipelineManager | None = None
-
     async def _refresh_configuration_gen(self) -> AsyncGenerator[Configuration, None]:
-        self.config.schedule_config_refresh()
-
         self._running_config_version = self.config.version
         refresh_interval = self.config.config_refresh_interval
 
-        try:
-            while True:
-                self.config.refresh()
+        if refresh_interval is None:
+            logger.debug("Config refresh has been disabled.")
+            return
+
+        loop = asyncio.get_running_loop()
+        next_run = loop.time() + refresh_interval
+
+        while True:
+            sleep_time = next_run - loop.time()
+            if sleep_time < 0:
+                sleep_time = 0.0
+
+            try:
+                await asyncio.sleep(sleep_time)
+            except asyncio.CancelledError:
+                logger.debug("Config refresh cancelled. Exiting...")
+                raise
 
+            try:
+                await self.config.reload()
+            except asyncio.CancelledError:
+                logger.debug("Config reload cancelled. Exiting...")
+                raise
+            except Exception:
+                logger.exception("scheduled config reload failed")
+                raise
+            else:
                 if self.config.version != self._running_config_version:
                     logger.info(f"Detected new config version: {self.config.version}")
-
                     self._running_config_version = self.config.version
-                    refresh_interval = self.config.config_refresh_interval
-
                     yield self.config
 
-                if refresh_interval is not None:
-                    try:
-                        await asyncio.sleep(
-                            # realistic bad case: starting to sleep just a moment before scheduled time
-                            # unlikely worst case: starting to sleep even after scheduled time
-                            #                      (if yield takes some time and interval is short)
-                            # --> compensate bad case by giving an upper boundary to the deviation
-                            refresh_interval
-                            * MAX_CONFIG_REFRESH_INTERVAL_DEVIATION_PERCENT
-                        )
-                    except asyncio.CancelledError:
-                        logger.debug("Config refresh cancelled. Exiting...")
-                        raise
-                else:
-                    logger.debug("Config refresh has been disabled.")
-                    break
-        except Exception:
-            raise
+            refresh_interval = self.config.config_refresh_interval
+            if refresh_interval is None:
+                logger.debug("Config refresh has been disabled.")
+                break
+
+            next_run += refresh_interval
 
     async def run(self) -> None:
         """Run the runner and continuously process events until stopped."""
@@ -94,13 +98,13 @@ async def run(self) -> None:
                 tg.create_task(TerminateTaskGroup.raise_on_event(self._stop_event))
 
                 async def start_pipeline(config: Configuration) -> asyncio.Task:
-                    self._pipeline_manager = PipelineManager(
+                    pipeline_manager = PipelineManager(
                         config, shutdown_timeout_s=GRACEFUL_SHUTDOWN_TIMEOUT
                     )
-                    await self._pipeline_manager.setup()
+                    await pipeline_manager.setup()
 
                     return tg.create_task(
-                        self._pipeline_manager.run(),
+                        pipeline_manager.run(),
                         name="pipeline_manager",
                     )
 
@@ -128,8 +132,6 @@ async def start_pipeline(config: Configuration) -> asyncio.Task:
                     logger.debug("Task group terminated")
                 case _:
                     raise
-        finally:
-            self.config.stop_config_refresh()
 
         logger.debug("End log processing.")
 
diff --git a/logprep/ng/util/configuration.py b/logprep/ng/util/configuration.py
index 9c8c6e7e1..d674dfc97 100644
--- a/logprep/ng/util/configuration.py
+++ b/logprep/ng/util/configuration.py
@@ -190,7 +190,6 @@
                 group.id: test"
 """
 
-import asyncio
 import json
 import logging
 import os
@@ -206,7 +205,6 @@
 from ruamel.yaml import YAML
 from ruamel.yaml.compat import StringIO
 from ruamel.yaml.scanner import ScannerError
-from schedule import Scheduler
 
 from logprep.abc.getter import Getter
 from logprep.factory import Factory
@@ -663,41 +661,14 @@ class Configuration:
         validator=validators.instance_of(tuple), factory=tuple, repr=False, eq=False
     )
 
-    _scheduler: Scheduler = field(
-        factory=Scheduler,
-        validator=validators.instance_of(Scheduler),
-        repr=False,
-        eq=False,
-        init=False,
-    )
-
     _config_failure: bool = field(default=False, repr=False, eq=False, init=False)
 
-    _refresh_task: asyncio.Task | None = field(
-        default=None,
-        validator=validators.optional(validators.instance_of(asyncio.Task)),
-        repr=False,
-        eq=False,
-        init=False,
-    )
-
-    _reload_lock: asyncio.Lock = field(
-        factory=asyncio.Lock,
-        validator=validators.instance_of(asyncio.Lock),
-        repr=False,
-        eq=False,
-        init=False,
-    )
-
     _unserializable_fields = (
         "_getter",
         "_configs",
         "_config_failure",
-        "_scheduler",
         "_metrics",
         "_unserializable_fields",
-        "_refresh_task",
-        "_reload_lock",
     )
 
     @define(kw_only=True)
@@ -894,8 +865,16 @@ async def reload(self) -> None:
         errors: List[Exception] = []
         try:
             new_config = await Configuration.from_sources(self.config_paths)
+            refresh_interval = (
+                MIN_CONFIG_REFRESH_INTERVAL
+                if self.config_refresh_interval is None
+                else max(
+                    self.config_refresh_interval,
+                    MIN_CONFIG_REFRESH_INTERVAL,
+                )
+            )
             if new_config.config_refresh_interval is None:
-                new_config.config_refresh_interval = self.config_refresh_interval
+                new_config.config_refresh_interval = refresh_interval
             self._configs = new_config._configs  # pylint: disable=protected-access
             self._set_attributes_from_configs()
             self._set_version_info_metric()
@@ -925,105 +904,8 @@ def _set_config_refresh_interval(self, config_refresh_interval: int | None) -> N
             return
         config_refresh_interval = max(config_refresh_interval, MIN_CONFIG_REFRESH_INTERVAL)
         self.config_refresh_interval = config_refresh_interval
-        self.schedule_config_refresh()
         self._metrics.config_refresh_interval += config_refresh_interval
 
-    def schedule_config_refresh(self) -> None:
-        """
-        Schedules a periodic configuration refresh based on the specified interval.
-
-        Cancels any existing scheduled configuration refresh job and schedules a new one
-        using the current :code:`config_refresh_interval`.
-        The refresh job will call the :code:`reload` method at the specified interval
-        in seconds on invoking the :code:`refresh` method.
-
-        Notes
-        -----
-        - Only one configuration refresh job is scheduled at a time
-        - Any existing job is cancelled before scheduling a new one.
-        - The interval must be an integer representing seconds.
-
-        Examples
-        --------
-        >>> self.schedule_config_refresh()
-        Config refresh interval is set to: 60 seconds
-        """
-        scheduler = self._scheduler
-        if self.config_refresh_interval is None:
-            if scheduler.jobs:
-                scheduler.cancel_job(scheduler.jobs[0])
-
-                if self._refresh_task is not None:
-                    self._refresh_task.cancel()
-
-                self._refresh_task = None
-
-            return
-
-        self.config_refresh_interval = max(
-            self.config_refresh_interval, MIN_CONFIG_REFRESH_INTERVAL
-        )
-        refresh_interval = self.config_refresh_interval
-        if scheduler.jobs:
-            scheduler.cancel_job(scheduler.jobs[0])
-
-            if self._refresh_task is not None:
-                self._refresh_task.cancel()
-
-            self._refresh_task = None
-
-        if isinstance(refresh_interval, int):
-
-            async def _reload_wrapper() -> None:
-                current_task = asyncio.current_task()
-
-                if self._reload_lock.locked():
-                    logger.warning(
-                        "config reload already running; skipping scheduled config reload run",
-                    )
-                    return
-
-                async with self._reload_lock:
-                    try:
-                        await self.reload()
-                    except asyncio.CancelledError:
-                        logger.info("scheduled config reload task cancelled")
-                        raise
-                    except Exception:
-                        logger.exception("scheduled config reload failed")
-                        raise
-                    finally:
-                        if self._refresh_task is current_task:
-                            self._refresh_task = None
-
-            def _schedule_reload() -> None:
-                old_task = self._refresh_task
-
-                if old_task is not None:
-                    old_task.cancel()
-
-                self._refresh_task = asyncio.create_task(_reload_wrapper())
-
-            scheduler.every(refresh_interval).seconds.do(_schedule_reload)
-            logger.info(f"Config refresh interval is set to: {refresh_interval} seconds")
-
-    def refresh(self) -> None:
-        """Wrap the scheduler run_pending method hide the implementation details."""
-        self._scheduler.run_pending()
-
-    def stop_config_refresh(self) -> None:
-        """Stop scheduled config refresh."""
-
-        self._scheduler.clear()
-
-        task = self._refresh_task
-        self._refresh_task = None
-
-        if task is not None:
-            task.cancel()
-
-        logger.debug("Config refresh task cancelled")
-
     def _set_attributes_from_configs(self) -> None:
         for attribute in filter(lambda x: x.repr, fields(self.__class__)):
             setattr(

From d8422c15cfbf45fa1b2b9c1e7017edb75f37c858 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 31 Mar 2026 11:05:30 +0200
Subject: [PATCH 43/68] refactor: annotation

---
 logprep/ng/util/async_helpers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/logprep/ng/util/async_helpers.py b/logprep/ng/util/async_helpers.py
index 972d723b8..842fd2f1c 100644
--- a/logprep/ng/util/async_helpers.py
+++ b/logprep/ng/util/async_helpers.py
@@ -115,7 +115,7 @@ async def restart_task_on_iter(
 
 
 def asyncio_exception_handler(
-    loop: asyncio.AbstractEventLoop,  # pylint: disable=unused-argument
+    _: asyncio.AbstractEventLoop,
     context: dict,
     logger: Logger,
 ) -> None:
@@ -126,7 +126,7 @@ def asyncio_exception_handler(
     Does not handle exceptions from awaited coroutines (e.g. runner.run()).
 
     Args:
-        loop: The current event loop.
+        _: The current event loop. Currently not used.
         context: Asyncio error context (may contain message, exception, task/future).
         logger: Logger used to record the error.
     """

From 99ef937e8a31f60f61a25284405373ce53ea4e09 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 31 Mar 2026 11:07:35 +0200
Subject: [PATCH 44/68] refactor: improve exception handler

---
 logprep/ng/util/async_helpers.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/logprep/ng/util/async_helpers.py b/logprep/ng/util/async_helpers.py
index 842fd2f1c..016d58517 100644
--- a/logprep/ng/util/async_helpers.py
+++ b/logprep/ng/util/async_helpers.py
@@ -124,23 +124,21 @@ def asyncio_exception_handler(
 
     Covers exceptions from background tasks, callbacks, and loop internals.
     Does not handle exceptions from awaited coroutines (e.g. runner.run()).
-
-    Args:
-        _: The current event loop. Currently not used.
-        context: Asyncio error context (may contain message, exception, task/future).
-        logger: Logger used to record the error.
     """
 
     msg = context.get("message", "Unhandled exception in event loop")
     exception = context.get("exception")
     task = context.get("task") or context.get("future")
 
-    logger.error(f"[asyncio] {msg}")
+    logger.error(f"{msg}")
 
     if task:
-        logger.error(f"[asyncio] Task: {task!r}")
+        logger.error(f"Task: {task!r}")
+
+        if isinstance(task, asyncio.Task):
+            logger.error(f"Task name: {task.get_name()}")
 
     if exception:
-        logger.error("[asyncio] Exception:", exc_info=exception)
+        logger.error(f"Unhandled exception: {exception!r}", exc_info=exception)
     else:
-        logger.error("[asyncio] Context: %s", context)
+        logger.error(f"Context: {context!r}")

From 74a1b4358916cf28d5aee7414060f70ea24aa136 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 06:42:37 +0200
Subject: [PATCH 45/68] fix: correct kafka delivery semantics and unify async
 shutdown lifecycle across components

- unify component lifecycle by introducing async setup/shut_down across NG components
- remove legacy _shut_down pattern and simplify base Component shutdown logic
- align Connector/Input/Output/Processor lifecycle interfaces
- fix kafka output delivery semantics by setting DELIVERED only via on_delivery callback
- improve kafka error handling (BufferError retry, KafkaException -> FAILED)
- ensure proper resource cleanup (consumer unsubscribe/close, producer flush, opensearch context close)
- improve worker shutdown by cancelling only unfinished tasks

# Conflicts:
#	logprep/ng/connector/opensearch/output.py
---
 logprep/abc/component.py                      |  9 +--
 logprep/abc/connector.py                      | 10 +++
 logprep/ng/abc/component.py                   |  8 +-
 logprep/ng/abc/input.py                       | 10 +++
 logprep/ng/abc/output.py                      | 14 +++-
 logprep/ng/abc/processor.py                   |  7 ++
 logprep/ng/connector/confluent_kafka/input.py | 26 +++---
 .../ng/connector/confluent_kafka/output.py    | 79 +++++++++++++------
 logprep/ng/connector/file/input.py            |  4 +-
 logprep/ng/connector/http/input.py            |  4 +-
 logprep/ng/manager.py                         |  7 +-
 logprep/ng/util/worker/worker.py              | 10 ++-
 12 files changed, 131 insertions(+), 57 deletions(-)

diff --git a/logprep/abc/component.py b/logprep/abc/component.py
index 6fa96c84a..4de66a42b 100644
--- a/logprep/abc/component.py
+++ b/logprep/abc/component.py
@@ -142,11 +142,7 @@ def _clear_properties(self) -> None:
         if hasattr(self, "__dict__"):
             self.__dict__.clear()
 
-    def _shut_down(self) -> None:
-        self._clear_scheduled_jobs()
-        self._clear_properties()
-
-    async def shut_down(self):
+    def shut_down(self):
         """Stop processing of this component.
 
         Optional: Called when stopping the pipeline
@@ -154,7 +150,8 @@ async def shut_down(self):
         """
         if not self._is_shut_down:
             self._is_shut_down = True
-            self._shut_down()
+            self._clear_scheduled_jobs()
+            self._clear_properties()
 
     def health(self) -> bool:
         """Check the health of the component.
diff --git a/logprep/abc/connector.py b/logprep/abc/connector.py
index 7749687a3..839ac61cf 100644
--- a/logprep/abc/connector.py
+++ b/logprep/abc/connector.py
@@ -48,3 +48,13 @@ class Metrics(NgComponent.Metrics):
             )
         )
         """Number of errors that occurred while processing events"""
+
+    async def setup(self) -> None:
+        """Set up the connector."""
+
+        await super().setup()
+
+    async def shut_down(self) -> None:
+        """Shutdown the connector and cleanup resources."""
+
+        await super().shut_down()
diff --git a/logprep/ng/abc/component.py b/logprep/ng/abc/component.py
index 551a7a391..0bb618e56 100644
--- a/logprep/ng/abc/component.py
+++ b/logprep/ng/abc/component.py
@@ -16,9 +16,13 @@ class NgComponent(Component):
     # This is unclean from an interface perspective, but works if the worlds doen't mix.
 
     async def setup(self) -> None:
-        return super().setup()
+        """Set up the ng component."""
+
+        super().setup()
 
     async def shut_down(self) -> None:
-        return super().shut_down()
+        """Shut down ng component and cleanup resources."""
+
+        super().shut_down()
 
     # pylint: enable=invalid-overridden-method,useless-parent-delegation
diff --git a/logprep/ng/abc/input.py b/logprep/ng/abc/input.py
index 547759467..9f908e0d0 100644
--- a/logprep/ng/abc/input.py
+++ b/logprep/ng/abc/input.py
@@ -504,3 +504,13 @@ def _add_hmac_to(
         }
         add_fields_to(event_dict, new_field)
         return event_dict
+
+    async def setup(self) -> None:
+        """Set up the input connector."""
+
+        await super().setup()
+
+    async def shut_down(self) -> None:
+        """Shut down input components and cleanup resources."""
+
+        await super().shut_down()
diff --git a/logprep/ng/abc/output.py b/logprep/ng/abc/output.py
index 979945560..ab4f71050 100644
--- a/logprep/ng/abc/output.py
+++ b/logprep/ng/abc/output.py
@@ -144,7 +144,13 @@ def wrapper(self, *args, **kwargs):
 
         return wrapper
 
-    def _shut_down(self) -> None:
-        """Shut down the output connector."""
-        self.flush()
-        return super()._shut_down()
+    async def setup(self) -> None:
+        """Set up the output connector."""
+
+        await super().setup()
+
+    async def shut_down(self) -> None:
+        """Shut down the output connector and cleanup resources."""
+
+        await self.flush()
+        await super().shut_down()
diff --git a/logprep/ng/abc/processor.py b/logprep/ng/abc/processor.py
index cb730960b..d22602ef9 100644
--- a/logprep/ng/abc/processor.py
+++ b/logprep/ng/abc/processor.py
@@ -256,6 +256,13 @@ def _write_target_field(self, event: dict, rule: "Rule", result: Any) -> None:
             )
 
     async def setup(self) -> None:
+        """Set up the processor."""
+
         await super().setup()
         for rule in self.rules:
             _ = rule.metrics  # initialize metrics to show them on startup
+
+    async def shut_down(self) -> None:
+        """Shut down the processor and run required cleanups"""
+
+        await super().shut_down()
diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index 5765c0d4f..f605d5108 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -346,7 +346,7 @@ async def get_consumer(self, max_workers: int = 4) -> AIOConsumer:
 
         return self._consumer
 
-    def _error_callback(self, error: KafkaException) -> None:
+    async def _error_callback(self, error: KafkaException) -> None:
         """Callback for generic/global error events, these errors are typically
         to be considered informational since the client will automatically try to recover.
         This callback is served upon calling client.poll()
@@ -359,7 +359,7 @@ def _error_callback(self, error: KafkaException) -> None:
         self.metrics.number_of_errors += 1
         logger.error("%s: %s", self.describe(), error)
 
-    def _stats_callback(self, stats_raw: str) -> None:
+    async def _stats_callback(self, stats_raw: str) -> None:
         """Callback for statistics data. This callback is triggered by poll()
         or flush every `statistics.interval.ms` (needs to be configured separately)
 
@@ -393,7 +393,7 @@ def _stats_callback(self, stats_raw: str) -> None:
             "assignment_size", DEFAULT_RETURN
         )
 
-    def _commit_callback(
+    async def _commit_callback(
         self,
         error: KafkaException | None,
         topic_partitions: list[TopicPartition],
@@ -605,12 +605,6 @@ async def _get_memberid(self) -> str | None:
             logger.error("Failed to retrieve member ID: %s", error)
         return member_id
 
-    async def shut_down(self) -> None:
-        """Close consumer, which also commits kafka offsets."""
-        consumer = await self.get_consumer()
-        await consumer.close()
-        super()._shut_down()
-
     def health(self) -> bool:
         """Check the health of the component.
 
@@ -635,7 +629,8 @@ async def acknowledge(self, events: list[LogEvent]):
         logger.debug("acknowledge called")
 
     async def setup(self):
-        """Set the component up."""
+        """Set the confluent kafka input connector."""
+
         await super().setup()
 
         try:
@@ -649,3 +644,14 @@ async def setup(self):
             )
         except KafkaException as error:
             raise FatalInputError(self, f"Could not setup kafka consumer: {error}") from error
+
+    async def shut_down(self) -> None:
+        """Shut down the confluent kafka input connector and cleanup resources."""
+
+        consumer = await self.get_consumer()
+
+        if consumer is not None:
+            await consumer.unsubscribe()
+            await consumer.close()
+
+        await super().shut_down()
diff --git a/logprep/ng/connector/confluent_kafka/output.py b/logprep/ng/connector/confluent_kafka/output.py
index e4356f08f..3ce1a75e8 100644
--- a/logprep/ng/connector/confluent_kafka/output.py
+++ b/logprep/ng/connector/confluent_kafka/output.py
@@ -285,14 +285,12 @@ def describe(self) -> str:
 
     async def store_batch(
         self, events: Sequence[Event], target: str | None = None
-    ) -> tuple[Sequence[Event], Sequence[Event]]:
+    ) -> Sequence[Event]:
         store_target = target if target is not None else self.config.topic
         for event in events:
             await self.store_custom(event, store_target)
-        return (
-            [e for e in events if e.state == EventStateType.DELIVERED],
-            [e for e in events if e.state == EventStateType.FAILED],
-        )
+
+        return events
 
     async def store(self, event: Event) -> None:
         """Store a document in the producer topic.
@@ -326,14 +324,52 @@ async def store_custom(self, event: Event, target: str) -> None:
                 value=self._encoder.encode(document),
                 on_delivery=partial(self.on_delivery, event),
             )
-            logger.debug("Produced message %s to topic %s", str(document), target)
-            self._producer.poll(self.config.send_timeout)
-            self._producer.flush()
+
         except BufferError:
-            # block program until buffer is empty or timeout is reached
             self._producer.flush(timeout=self.config.flush_timeout)
             logger.debug("Buffer full, flushing")
 
+            try:
+                self._producer.produce(
+                    topic=target,
+                    value=self._encoder.encode(document),
+                    on_delivery=partial(self.on_delivery, event),
+                )
+            except BufferError as err:
+                event.state.current_state = EventStateType.FAILED
+                event.errors.append(err)
+                logger.error("Message delivery failed after retry: %s", err)
+                self.metrics.number_of_errors += 1
+                return
+
+        except KafkaException as err:
+            event.state.current_state = EventStateType.FAILED
+            event.errors.append(err)
+            logger.error("Kafka exception during produce: %s", err)
+            self.metrics.number_of_errors += 1
+            return
+
+        logger.debug("Produced message %s to topic %s", str(document), target)
+        self._producer.poll(self.config.send_timeout)
+
+    def on_delivery(self, event: Event, err: KafkaException, msg: Message) -> None:
+        """Callback for delivery reports."""
+
+        if err is not None:
+            event.state.current_state = EventStateType.FAILED
+            event.errors.append(err)
+            logger.error("Message delivery failed: %s", err)
+            self.metrics.number_of_errors += 1
+            return
+
+        event.state.current_state = EventStateType.DELIVERED
+        logger.debug(
+            "Message delivered to '%s' partition %s, offset %s",
+            msg.topic(),
+            msg.partition(),
+            msg.offset(),
+        )
+
     async def flush(self) -> None:
         """ensures that all messages are flushed. According to
         https://confluent-kafka-python.readthedocs.io/en/latest/#confluent_kafka.Producer.flush
@@ -364,24 +400,17 @@ def health(self) -> bool:
         return super().health()
 
     async def setup(self) -> None:
-        """Set the component up."""
+        """Set the confluent kafka output connector."""
+
         try:
             await super().setup()
         except KafkaException as error:
             raise FatalOutputError(self, f"Could not setup kafka producer: {error}") from error
 
-    def on_delivery(self, event: Event, err: KafkaException, msg: Message) -> None:
-        """Callback for delivery reports."""
-        if err is not None:
-            event.state.current_state = EventStateType.FAILED
-            event.errors.append(err)
-            logger.error("Message delivery failed: %s", err)
-            self.metrics.number_of_errors += 1
-            return
-        event.state.current_state = EventStateType.DELIVERED
-        logger.debug(
-            "Message delivered to '%s' partition %s, offset %s",
-            msg.topic(),
-            msg.partition(),
-            msg.offset(),
-        )
+    async def shut_down(self) -> None:
+        """Shut down the confluent kafka output connector and cleanup resources."""
+
+        if "_producer" in self.__dict__:
+            await self.flush()
+
+        await super().shut_down()
diff --git a/logprep/ng/connector/file/input.py b/logprep/ng/connector/file/input.py
index e13269c5b..715274745 100644
--- a/logprep/ng/connector/file/input.py
+++ b/logprep/ng/connector/file/input.py
@@ -191,7 +191,7 @@ async def setup(self) -> None:
                 file_name=self.config.logfile_path,
             )
 
-    def _shut_down(self) -> None:
+    async def shut_down(self) -> None:
         """Raises the Stop Event Flag that will stop the thread that monitors the logfile"""
         self.stop_flag.set()
-        return super()._shut_down()
+        await super().shut_down()
diff --git a/logprep/ng/connector/http/input.py b/logprep/ng/connector/http/input.py
index f206dd684..7e09bea14 100644
--- a/logprep/ng/connector/http/input.py
+++ b/logprep/ng/connector/http/input.py
@@ -356,11 +356,11 @@ async def _get_event(self, timeout: float) -> tuple:
         except queue.Empty:
             return None, None, None
 
-    def _shut_down(self):
+    async def shut_down(self):
         """Raises Uvicorn HTTP Server internal stop flag and waits to join"""
         if self.http_server:
             self.http_server.shut_down()
-        return super()._shut_down()
+        await super().shut_down()
 
     @cached_property
     def health_endpoints(self) -> list[str]:
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 934f17ab3..ad697823a 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -197,14 +197,14 @@ async def run(self) -> None:
             await self._orchestrator.run()
         except CancelledError:
             logger.debug("PipelineManager.run cancelled. Shutting down.")
-            await self._shut_down()
+            await self.shut_down()
             raise
         except Exception:
             logger.exception("PipelineManager.run failed. Shutting down.")
-            await self._shut_down()
+            await self.shut_down()
             raise
 
-    async def _shut_down(self) -> None:
+    async def shut_down(self) -> None:
         """Shut down runner components, and required runner attributes."""
 
         logger.debug(
@@ -222,6 +222,7 @@ async def _shut_down(self) -> None:
         if self._sender is not None:
             await self._sender.shut_down()
         # self._input_connector.acknowledge()
+        await self._input_connector.shut_down()
 
         len_delivered_events = len(list(self._event_backlog.get(EventStateType.DELIVERED)))
         if len_delivered_events:
diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index d0ad36b1b..a4e7e73bd 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -292,7 +292,7 @@ async def shut_down(self, timeout_s: float) -> None:
         current_task = asyncio.current_task()
         tasks_but_current = [t for t in self._worker_tasks if t is not current_task]
 
-        logger.debug("waiting for termination of %d tasks", len(tasks_but_current))
+        logger.debug(f"waiting for termination of {len(tasks_but_current)} tasks")
 
         try:
             await asyncio.wait_for(
@@ -300,11 +300,15 @@ async def shut_down(self, timeout_s: float) -> None:
             )
         except TimeoutError:
             unfinished_workers = [w for w in tasks_but_current if not w.done()]
-            if len(unfinished_workers) > 0:
+            if unfinished_workers:
                 logger.debug(
                     "[%d/%d] did not stop gracefully. Cancelling: [%s]",
                     len(unfinished_workers),
                     len(tasks_but_current),
                     ", ".join(map(asyncio.Task.get_name, unfinished_workers)),
                 )
-                await asyncio.gather(*tasks_but_current, return_exceptions=True)
+
+                for worker in unfinished_workers:
+                    worker.cancel()
+
+                await asyncio.gather(*unfinished_workers, return_exceptions=True)

From 47ad52529121845e4e92f8d051c0a34c6270c123 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 31 Mar 2026 14:20:48 +0200
Subject: [PATCH 46/68] fix: prevent race condition between SIGINT handler and
 benchmark flow

- remove docker compose teardown from SIGINT handler to avoid interfering with active OpenSearch requests
- introduce coordinated shutdown via _shutdown_requested flag
- add shutdown checkpoints to abort benchmark flow safely
- ensure compose teardown happens only in controlled finally blocks
- fix intermittent 503 errors during OpenSearch _count caused by concurrent shutdown
---
 benchmark.py      | 115 ++++++++++++++++++++++++++--------------------
 run_benchmarks.py |  11 +++--
 2 files changed, 73 insertions(+), 53 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index 92b3118bd..546a0eab4 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -38,12 +38,17 @@
 
 def _handle_sigint(signum, frame):
     """
-    Handle Ctrl+C (SIGINT) and perform graceful shutdown.
+    Handle Ctrl+C (SIGINT) and request graceful shutdown.
+
+    Avoid tearing down compose services directly from the signal handler,
+    because the main benchmark flow may still be interacting with them.
+    Cleanup is handled by the normal control flow / finally blocks.
     """
+    del signum, frame  # unused
     global _shutdown_requested
     _shutdown_requested = True
 
-    print("\n\n⚠ Ctrl+C detected. Shutting down benchmark...")
+    print("\n\n⚠ Ctrl+C detected. Stopping benchmark gracefully...")
 
     if _current_logprep_proc is not None:
         try:
@@ -51,18 +56,13 @@ def _handle_sigint(signum, frame):
         except Exception:
             pass
 
-    if _current_compose_dir is not None and _current_env is not None:
-        try:
-            run_cmd(
-                ["docker", "compose", "down"],
-                cwd=_current_compose_dir,
-                env=_current_env,
-                ignore_error=True,
-            )
-        except Exception:
-            pass
 
-    sys.exit(130)
+def raise_if_shutdown_requested() -> None:
+    """
+    Abort current benchmark flow if a shutdown was requested.
+    """
+    if _shutdown_requested:
+        raise KeyboardInterrupt("Benchmark shutdown requested")
 
 
 # -------------------------
@@ -134,14 +134,10 @@ def print_benchmark_config(args: argparse.Namespace) -> None:
     for key in sorted(args_dict):
         value = args_dict[key]
 
-        # Format integers with underscore separator
         if isinstance(value, int):
             formatted = f"{value:_}"
-
-        # Format list of integers (e.g. runs)
         elif isinstance(value, list) and all(isinstance(v, int) for v in value):
             formatted = "[" + ", ".join(f"{v:_}" for v in value) + "]"
-
         else:
             formatted = value
 
@@ -385,6 +381,7 @@ def wait_for_tcp(host: str, port: int, *, timeout_s: float, interval_s: float =
     last_err: OSError | None = None
 
     while time.time() < deadline:
+        raise_if_shutdown_requested()
         try:
             with socket.create_connection((host, port), timeout=2):
                 return
@@ -408,6 +405,7 @@ def wait_for_opensearch(opensearch_url: str, *, timeout_s: float, interval_s: fl
     last_err: Exception | None = None
 
     while time.time() < deadline:
+        raise_if_shutdown_requested()
         try:
             resp = requests.get(f"{opensearch_url}/_cluster/health", timeout=2)
             if resp.status_code == 200:
@@ -436,6 +434,7 @@ def wait_for_kafka_topic(
     last_err: Exception | None = None
 
     while time.time() < deadline:
+        raise_if_shutdown_requested()
         try:
             proc = subprocess.run(
                 [
@@ -569,11 +568,13 @@ def benchmark_run(
     _current_env = env
 
     try:
+        raise_if_shutdown_requested()
         ensure_vm_max_map_count()
 
         run_cmd(["docker", "compose", "down"], cwd=compose_dir, env=env)
         run_cmd(["docker", "volume", "rm", "compose_opensearch-data"], env=env, ignore_error=True)
 
+        raise_if_shutdown_requested()
         run_cmd(
             ["docker", "compose", "up", "-d", "--no-deps", *services],
             cwd=compose_dir,
@@ -592,6 +593,8 @@ def benchmark_run(
             wait_for_tcp("127.0.0.1", 9200, timeout_s=float(sleep_after_compose_up_s))
             wait_for_opensearch(opensearch_url, timeout_s=float(sleep_after_compose_up_s))
 
+        raise_if_shutdown_requested()
+
         batch_size = max(event_num // 10, 10)
         output_config = f'{{"bootstrap.servers": "{bootstrap_servers}"}}'
 
@@ -612,7 +615,9 @@ def benchmark_run(
             env=env,
         )
 
+        raise_if_shutdown_requested()
         time.sleep(sleep_after_generate_s)
+        raise_if_shutdown_requested()
 
         binary = "logprep-ng" if ng == 1 else "logprep"
 
@@ -621,15 +626,18 @@ def benchmark_run(
         _current_logprep_proc = logprep_proc
 
         time.sleep(sleep_after_logprep_start_s)
+        raise_if_shutdown_requested()
 
         print("\n=== OpenSearch snapshot (before measurement) ===")
         opensearch_debug_snapshot(opensearch_url)
 
+        raise_if_shutdown_requested()
         baseline = opensearch_count_processed(opensearch_url, processed_index)
         startup_s = time.time() - t_startup
 
         t_run = time.time()
         time.sleep(run_seconds)
+        raise_if_shutdown_requested()
 
         kill_hard(logprep_proc)
 
@@ -637,12 +645,13 @@ def benchmark_run(
         logprep_proc = None
         _current_logprep_proc = None
 
-        # ensure near-real-time writes are visible to _count before measuring
+        raise_if_shutdown_requested()
         opensearch_refresh(opensearch_url, processed_index)
 
         print("\n=== OpenSearch snapshot (after run / after refresh) ===")
         opensearch_debug_snapshot(opensearch_url)
 
+        raise_if_shutdown_requested()
         after = opensearch_count_processed(opensearch_url, processed_index)
         processed = max(0, after - baseline)
 
@@ -838,36 +847,42 @@ def setup_output_tee(out_path: Path | None) -> None:
 if __name__ == "__main__":
     signal.signal(signal.SIGINT, _handle_sigint)
 
-    args_ = parse_args()
-    setup_output_tee(args_.out)
-
-    print_benchmark_config(args_)
-
-    pipeline_config_ = resolve_pipeline_config(args_.ng)
-
-    results: list[RunResult] = []
-
-    benchmark_seconds = args_.runs
-    for run_idx, seconds in enumerate(benchmark_seconds, start=1):
-        print(f"----- Run Round {run_idx}: {seconds} seconds -----")
-        result = benchmark_run(
-            run_seconds=seconds,
-            ng=args_.ng,
-            event_num=args_.event_num,
-            prometheus_multiproc_dir=args_.prometheus_multiproc_dir,
-            compose_dir=args_.compose_dir,
-            pipeline_config=pipeline_config_,
-            gen_input_dir=args_.gen_input_dir,
-            bootstrap_servers=args_.bootstrap_servers,
-            sleep_after_compose_up_s=args_.sleep_after_compose_up_s,
-            sleep_after_generate_s=args_.sleep_after_generate_s,
-            sleep_after_logprep_start_s=args_.sleep_after_logprep_start_s,
-            opensearch_url=args_.opensearch_url,
-            processed_index=args_.processed_index,
-            services=args_.services,
-        )
-        results.append(result)
-        print_single_run_result(result, event_num=args_.event_num)
-        print()
+    try:
+        args_ = parse_args()
+        setup_output_tee(args_.out)
+
+        print_benchmark_config(args_)
+
+        pipeline_config_ = resolve_pipeline_config(args_.ng)
+
+        results: list[RunResult] = []
+
+        benchmark_seconds = args_.runs
+        for run_idx, seconds in enumerate(benchmark_seconds, start=1):
+            raise_if_shutdown_requested()
+            print(f"----- Run Round {run_idx}: {seconds} seconds -----")
+            result = benchmark_run(
+                run_seconds=seconds,
+                ng=args_.ng,
+                event_num=args_.event_num,
+                prometheus_multiproc_dir=args_.prometheus_multiproc_dir,
+                compose_dir=args_.compose_dir,
+                pipeline_config=pipeline_config_,
+                gen_input_dir=args_.gen_input_dir,
+                bootstrap_servers=args_.bootstrap_servers,
+                sleep_after_compose_up_s=args_.sleep_after_compose_up_s,
+                sleep_after_generate_s=args_.sleep_after_generate_s,
+                sleep_after_logprep_start_s=args_.sleep_after_logprep_start_s,
+                opensearch_url=args_.opensearch_url,
+                processed_index=args_.processed_index,
+                services=args_.services,
+            )
+            results.append(result)
+            print_single_run_result(result, event_num=args_.event_num)
+            print()
+
+        print_runs_table_and_summary(results)
 
-    print_runs_table_and_summary(results)
+    except KeyboardInterrupt:
+        print("\nBenchmark aborted.")
+        sys.exit(130)
diff --git a/run_benchmarks.py b/run_benchmarks.py
index 7a57df5cb..2f5f776f1 100644
--- a/run_benchmarks.py
+++ b/run_benchmarks.py
@@ -5,10 +5,10 @@
 from datetime import datetime
 from pathlib import Path
 
-PYTHON_VERSIONS = ["3.11"]  # , "3.12", "3.13", "3.14"]
+PYTHON_VERSIONS = ["3.11", "3.12", "3.13", "3.14"]
 MODES = [
-    ("nonNG", "0"),
     ("asyncNG", "1"),
+    ("nonNG", "0"),
 ]
 
 
@@ -32,9 +32,14 @@ def run_benchmarks() -> None:
                 py,
                 "benchmark.py",
                 "--event-num",
-                "120000",
+                "250000",
                 "--runs",
                 "30",
+                "30",
+                "45",
+                "45",
+                "60",
+                "60",
                 "--ng",
                 ng_flag,
                 "--out",

From 47c22c85b70ed7708df49fe604b50bc54aa47970 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 31 Mar 2026 14:29:12 +0200
Subject: [PATCH 47/68] refactor: simplify worker shutdown after timeout -
 remove docker compose teardown from SIGINT handler to avoid interfering with
 active OpenSearch requests - introduce coordinated shutdown via
 _shutdown_requested flag - add shutdown checkpoints to abort benchmark flow
 safely - ensure compose teardown happens only in controlled finally blocks -
 fix intermittent 503 errors during OpenSearch _count caused by concurrent
 shutdown

---
 logprep/ng/util/worker/worker.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index a4e7e73bd..687be98af 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -302,13 +302,9 @@ async def shut_down(self, timeout_s: float) -> None:
             unfinished_workers = [w for w in tasks_but_current if not w.done()]
             if unfinished_workers:
                 logger.debug(
-                    "[%d/%d] did not stop gracefully. Cancelling: [%s]",
+                    "[%d/%d] did not stop gracefully. Awaiting cancellation: [%s]",
                     len(unfinished_workers),
                     len(tasks_but_current),
                     ", ".join(map(asyncio.Task.get_name, unfinished_workers)),
                 )
-
-                for worker in unfinished_workers:
-                    worker.cancel()
-
                 await asyncio.gather(*unfinished_workers, return_exceptions=True)

From b61b83fda5d2453fe713801304cab87228ba800a Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 31 Mar 2026 16:37:37 +0200
Subject: [PATCH 48/68] fix: clean up exporter port before and after logprep
 runs

---
 benchmark.py      | 128 ++++++++++++++++++++++++++++++++++++++++++++++
 run_benchmarks.py |   2 -
 2 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index 546a0eab4..a5fdce66f 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -35,6 +35,9 @@
 _current_compose_dir: Path | None = None
 _current_env: dict[str, str] | None = None
 
+# Exporter / metrics port used by logprep
+EXPORTER_PORT = 8001
+
 
 def _handle_sigint(signum, frame):
     """
@@ -149,6 +152,7 @@ def print_benchmark_config(args: argparse.Namespace) -> None:
             print(f"{'  ↳ mode':30s}: {mode}")
             print(f"{'  ↳ pipeline_config':30s}: {pipeline_config}")
 
+    print(f"{'exporter_port':30s}: {EXPORTER_PORT}")
     print("================================\n")
 
 
@@ -249,6 +253,118 @@ def kill_hard(proc: subprocess.Popen) -> None:
     proc.wait()
 
 
+def is_tcp_port_open(host: str, port: int, timeout: float = 0.5) -> bool:
+    """
+    Return True if a TCP connection to host:port can be established.
+
+    Args:
+        host: Hostname or IP address.
+        port: TCP port.
+        timeout: Socket timeout in seconds.
+
+    Returns:
+        True if the TCP port accepts connections, otherwise False.
+    """
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+        sock.settimeout(timeout)
+        return sock.connect_ex((host, port)) == 0
+
+
+def find_pids_listening_on_port(port: int) -> list[int]:
+    """
+    Return a list of PIDs listening on the given TCP port.
+
+    Uses lsof and returns an empty list if no processes are found.
+
+    Args:
+        port: TCP port.
+
+    Returns:
+        List of PIDs.
+    """
+    result = subprocess.run(
+        ["lsof", "-ti", f":{port}"],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+
+    if result.returncode not in (0, 1):
+        raise RuntimeError(
+            f"Failed to query listener processes on port {port}: {result.stderr.strip()}"
+        )
+
+    pids: list[int] = []
+    for line in result.stdout.splitlines():
+        line = line.strip()
+        if line.isdigit():
+            pids.append(int(line))
+
+    return pids
+
+
+def kill_processes_listening_on_port(
+    port: int,
+    *,
+    sigterm_wait_s: float = 1.0,
+    final_wait_s: float = 2.0,
+) -> None:
+    """
+    Kill processes listening on the given TCP port.
+
+    First sends SIGTERM, then SIGKILL if the port is still occupied.
+
+    Args:
+        port: TCP port to free.
+        sigterm_wait_s: Time to wait after SIGTERM.
+        final_wait_s: Time to wait after SIGKILL.
+
+    Raises:
+        RuntimeError: If the port is still in use after cleanup.
+    """
+    pids = find_pids_listening_on_port(port)
+    if not pids:
+        return
+
+    print(
+        f"Port {port} is already in use. Terminating listener processes: {', '.join(map(str, pids))}"
+    )
+
+    for pid in pids:
+        try:
+            os.kill(pid, signal.SIGTERM)
+        except ProcessLookupError:
+            pass
+
+    deadline = time.time() + sigterm_wait_s
+    while time.time() < deadline:
+        if not is_tcp_port_open("127.0.0.1", port):
+            return
+        time.sleep(0.1)
+
+    remaining_pids = find_pids_listening_on_port(port)
+    if remaining_pids:
+        print(
+            f"Port {port} still in use after SIGTERM. Sending SIGKILL to: "
+            f"{', '.join(map(str, remaining_pids))}"
+        )
+
+    for pid in remaining_pids:
+        try:
+            os.kill(pid, signal.SIGKILL)
+        except ProcessLookupError:
+            pass
+
+    deadline = time.time() + final_wait_s
+    while time.time() < deadline:
+        if not is_tcp_port_open("127.0.0.1", port):
+            return
+        time.sleep(0.1)
+
+    if is_tcp_port_open("127.0.0.1", port):
+        raise RuntimeError(f"Port {port} is still in use after cleanup.")
+
+
 def opensearch_refresh(opensearch_url: str, processed_index: str) -> None:
     """
     Force a refresh of the processed index so counts reflect recent writes.
@@ -621,6 +737,9 @@ def benchmark_run(
 
         binary = "logprep-ng" if ng == 1 else "logprep"
 
+        # Ensure exporter port is free before starting logprep.
+        kill_processes_listening_on_port(EXPORTER_PORT)
+
         t_startup = time.time()
         logprep_proc = popen_cmd([binary, "run", str(pipeline_config)], env=env)
         _current_logprep_proc = logprep_proc
@@ -641,6 +760,9 @@ def benchmark_run(
 
         kill_hard(logprep_proc)
 
+        # Ensure exporter port is released after forceful process termination.
+        kill_processes_listening_on_port(EXPORTER_PORT)
+
         window_s = time.time() - t_run
         logprep_proc = None
         _current_logprep_proc = None
@@ -662,6 +784,12 @@ def benchmark_run(
     finally:
         if logprep_proc is not None:
             kill_hard(logprep_proc)
+
+        try:
+            kill_processes_listening_on_port(EXPORTER_PORT)
+        except Exception as exc:
+            print(f"Warning: failed to clean up exporter port {EXPORTER_PORT}: {exc}")
+
         _current_logprep_proc = None
         run_cmd(["docker", "compose", "down"], cwd=compose_dir, env=env, ignore_error=True)
 
diff --git a/run_benchmarks.py b/run_benchmarks.py
index 2f5f776f1..8636e052d 100644
--- a/run_benchmarks.py
+++ b/run_benchmarks.py
@@ -38,8 +38,6 @@ def run_benchmarks() -> None:
                 "30",
                 "45",
                 "45",
-                "60",
-                "60",
                 "--ng",
                 ng_flag,
                 "--out",

From b97d92446fe79018072ae333c7a532ef2aa7343d Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Thu, 2 Apr 2026 06:21:15 +0200
Subject: [PATCH 49/68] refactor: restore _shut_down hook to preserve
 idempotent and extensible shutdown semantics

---
 logprep/abc/component.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/logprep/abc/component.py b/logprep/abc/component.py
index 4de66a42b..a80910464 100644
--- a/logprep/abc/component.py
+++ b/logprep/abc/component.py
@@ -142,6 +142,10 @@ def _clear_properties(self) -> None:
         if hasattr(self, "__dict__"):
             self.__dict__.clear()
 
+    def _shut_down(self) -> None:
+        self._clear_scheduled_jobs()
+        self._clear_properties()
+
     def shut_down(self):
         """Stop processing of this component.
 
@@ -150,8 +154,7 @@ def shut_down(self):
         """
         if not self._is_shut_down:
             self._is_shut_down = True
-            self._clear_scheduled_jobs()
-            self._clear_properties()
+            self._shut_down()
 
     def health(self) -> bool:
         """Check the health of the component.

From 99cd7ecace0fe3d1e6e2b893c85dc08671226d5c Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Thu, 2 Apr 2026 06:55:35 +0200
Subject: [PATCH 50/68] refactor: review issues

---
 logprep/ng/connector/confluent_kafka/input.py |  1 -
 pyproject.toml                                | 11 +----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index f605d5108..c5dd8005e 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -501,7 +501,6 @@ async def _get_event(self, timeout: float) -> tuple:
         """
 
         message = await self._get_raw_event(timeout)
-        # assert None not in (message.value(), message.partition(), message.offset())
 
         if message is None:
             return None, None, None
diff --git a/pyproject.toml b/pyproject.toml
index 97cbd5d81..e353dff0a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -172,16 +172,7 @@ max-line-length=100
 no-docstring-rgx="^test_|^.*TestCase|^_|^Test"
 
 [tool.pylint."MESAGES CONTROL"]
-disable = [
-  "too-few-public-methods",
-  "unsupported-membership-test",
-  "too-many-positional-arguments",
-  "too-many-arguments",
-  "too-many-branches",
-  "too-many-instance-attributes",
-  "too-many-lines",
-  "line-too-long",
-]
+disable="too-few-public-methods,unsupported-membership-test"
 
 [tool.pylint.DESIGN]
 min-public-methods=1

From 60621601bb48cc028c7dd6357035107d562f3a7b Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 08:46:57 +0200
Subject: [PATCH 51/68] refactor: remove unsubscribe call, as close() already
 handles cleanup (unsubscribe only needed for dynamic topic switching during
 runtime)

---
 logprep/ng/connector/confluent_kafka/input.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index c5dd8005e..75d330d11 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -647,10 +647,7 @@ async def setup(self):
     async def shut_down(self) -> None:
         """Shut down the confluent kafka input connector and cleanup resources."""
 
-        consumer = await self.get_consumer()
-
-        if consumer is not None:
-            await consumer.unsubscribe()
-            await consumer.close()
+        if self._consumer is not None:
+            await self._consumer.close()
 
         await super().shut_down()

From 6d8bb8100ac203b4af8f21ac5eef0964048d0ae1 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 11:13:24 +0200
Subject: [PATCH 52/68] feat: migrate to async AIOProducer and replace
 on_delivery callbacks with awaitable delivery futures

---
 .../ng/connector/confluent_kafka/output.py    | 39 ++++---------------
 1 file changed, 7 insertions(+), 32 deletions(-)

diff --git a/logprep/ng/connector/confluent_kafka/output.py b/logprep/ng/connector/confluent_kafka/output.py
index 3ce1a75e8..914b7be9d 100644
--- a/logprep/ng/connector/confluent_kafka/output.py
+++ b/logprep/ng/connector/confluent_kafka/output.py
@@ -33,6 +33,7 @@
 from attrs import define, field, validators
 from confluent_kafka import KafkaException, Message, Producer  # type: ignore
 from confluent_kafka.admin import AdminClient
+from confluent_kafka.aio import AIOProducer
 
 from logprep.metrics.metrics import GaugeMetric
 from logprep.ng.abc.event import Event
@@ -235,8 +236,8 @@ def _admin(self) -> AdminClient:
         return AdminClient(admin_config)
 
     @cached_property
-    def _producer(self) -> Producer:
-        return Producer(self._kafka_config)
+    def _producer(self) -> AIOProducer:
+        return AIOProducer(self._kafka_config)
 
     def _error_callback(self, error: KafkaException) -> None:
         """Callback for generic/global error events, these errors are typically
@@ -314,48 +315,22 @@ async def store_custom(self, event: Event, target: str) -> None:
         target : str
             Topic to store event data in.
         """
-        event.state.current_state = EventStateType.STORING_IN_OUTPUT
-
         document = event.data
         self.metrics.number_of_processed_events += 1
+
         try:
-            self._producer.produce(
+            delivery_future = await self._producer.produce(
                 topic=target,
                 value=self._encoder.encode(document),
-                on_delivery=partial(self.on_delivery, event),
             )
-
-        except BufferError:
-            self._producer.flush(timeout=self.config.flush_timeout)
-            logger.debug("Buffer full, flushing")
-
-            try:
-                self._producer.produce(
-                    topic=target,
-                    value=self._encoder.encode(document),
-                    on_delivery=partial(self.on_delivery, event),
-                )
-            except BufferError as err:
-                event.state.current_state = EventStateType.FAILED
-                event.errors.append(err)
-                logger.error("Message delivery failed after retry: %s", err)
-                self.metrics.number_of_errors += 1
-                return
-
+            msg = await delivery_future
         except KafkaException as err:
             event.state.current_state = EventStateType.FAILED
             event.errors.append(err)
             logger.error("Kafka exception during produce: %s", err)
             self.metrics.number_of_errors += 1
             return
-
-        logger.debug("Produced message %s to topic %s", str(document), target)
-        self._producer.poll(self.config.send_timeout)
-
-    def on_delivery(self, event: Event, err: KafkaException, msg: Message) -> None:
-        """Callback for delivery reports."""
-
-        if err is not None:
+        except Exception as err:
             event.state.current_state = EventStateType.FAILED
             event.errors.append(err)
             logger.error("Message delivery failed: %s", err)

From 64656bf95594a473e1328a4a58fbbb3e66e42f99 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 11:36:49 +0200
Subject: [PATCH 53/68] refactor: fix review issue

---
 logprep/ng/util/worker/worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index 687be98af..8cbe75de7 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -292,7 +292,7 @@ async def shut_down(self, timeout_s: float) -> None:
         current_task = asyncio.current_task()
         tasks_but_current = [t for t in self._worker_tasks if t is not current_task]
 
-        logger.debug(f"waiting for termination of {len(tasks_but_current)} tasks")
+        logger.debug("waiting for termination of %d tasks", len(tasks_but_current))
 
         try:
             await asyncio.wait_for(

From b87ba3bdf1072268177443d76443775d97aac2dc Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 11:46:56 +0200
Subject: [PATCH 54/68] refactor: rename module to logging_helpers to avoid
 stdlib name clash and align with project naming conventions

---
 logprep/ng/util/defaults.py                                | 2 +-
 logprep/ng/util/{logprep_logging.py => logging_helpers.py} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename logprep/ng/util/{logprep_logging.py => logging_helpers.py} (100%)

diff --git a/logprep/ng/util/defaults.py b/logprep/ng/util/defaults.py
index a97bebfe9..62f9cd1aa 100644
--- a/logprep/ng/util/defaults.py
+++ b/logprep/ng/util/defaults.py
@@ -36,7 +36,7 @@ class EXITCODES(IntEnum):
     "version": 1,
     "formatters": {
         "logprep": {
-            "class": "logprep.ng.util.logprep_logging.LogprepFormatter",
+            "class": "logprep.ng.util.logging_helpers.LogprepFormatter",
             "format": DEFAULT_LOG_FORMAT,
             "datefmt": DEFAULT_LOG_DATE_FORMAT,
         }
diff --git a/logprep/ng/util/logprep_logging.py b/logprep/ng/util/logging_helpers.py
similarity index 100%
rename from logprep/ng/util/logprep_logging.py
rename to logprep/ng/util/logging_helpers.py

From d0d2d72b4e3af1a983d10408d3836314a9df2e84 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 11:51:34 +0200
Subject: [PATCH 55/68] refactor: remove unused constant
 MAX_CONFIG_REFRESH_INTERVAL_DEVIATION_PERCENT

---
 logprep/ng/runner.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index 975beaa18..e4b6ddfe6 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -21,7 +21,6 @@
 
 GRACEFUL_SHUTDOWN_TIMEOUT = 3
 HARD_SHUTDOWN_TIMEOUT = 5
-MAX_CONFIG_REFRESH_INTERVAL_DEVIATION_PERCENT = 0.05
 
 
 class Runner:

From db44fe8f29d5049241a9c396d9e69372e07f4203 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 11:56:32 +0200
Subject: [PATCH 56/68] refactor: fix review issue

---
 benchmark.py | 284 ++++++++++-----------------------------------------
 1 file changed, 54 insertions(+), 230 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index a5fdce66f..aa49b5805 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -1,4 +1,5 @@
 # pylint: disable=C0103
+
 """
 Benchmark runner for logprep (logprep-ng and non-ng).
 
@@ -35,23 +36,15 @@
 _current_compose_dir: Path | None = None
 _current_env: dict[str, str] | None = None
 
-# Exporter / metrics port used by logprep
-EXPORTER_PORT = 8001
-
 
 def _handle_sigint(signum, frame):
     """
-    Handle Ctrl+C (SIGINT) and request graceful shutdown.
-
-    Avoid tearing down compose services directly from the signal handler,
-    because the main benchmark flow may still be interacting with them.
-    Cleanup is handled by the normal control flow / finally blocks.
+    Handle Ctrl+C (SIGINT) and perform graceful shutdown.
     """
-    del signum, frame  # unused
     global _shutdown_requested
     _shutdown_requested = True
 
-    print("\n\n⚠ Ctrl+C detected. Stopping benchmark gracefully...")
+    print("\n\n⚠ Ctrl+C detected. Shutting down benchmark...")
 
     if _current_logprep_proc is not None:
         try:
@@ -59,13 +52,18 @@ def _handle_sigint(signum, frame):
         except Exception:
             pass
 
+    if _current_compose_dir is not None and _current_env is not None:
+        try:
+            run_cmd(
+                ["docker", "compose", "down"],
+                cwd=_current_compose_dir,
+                env=_current_env,
+                ignore_error=True,
+            )
+        except Exception:
+            pass
 
-def raise_if_shutdown_requested() -> None:
-    """
-    Abort current benchmark flow if a shutdown was requested.
-    """
-    if _shutdown_requested:
-        raise KeyboardInterrupt("Benchmark shutdown requested")
+    sys.exit(130)
 
 
 # -------------------------
@@ -137,10 +135,14 @@ def print_benchmark_config(args: argparse.Namespace) -> None:
     for key in sorted(args_dict):
         value = args_dict[key]
 
+        # Format integers with underscore separator
         if isinstance(value, int):
             formatted = f"{value:_}"
+
+        # Format list of integers (e.g. runs)
         elif isinstance(value, list) and all(isinstance(v, int) for v in value):
             formatted = "[" + ", ".join(f"{v:_}" for v in value) + "]"
+
         else:
             formatted = value
 
@@ -152,7 +154,6 @@ def print_benchmark_config(args: argparse.Namespace) -> None:
             print(f"{'  ↳ mode':30s}: {mode}")
             print(f"{'  ↳ pipeline_config':30s}: {pipeline_config}")
 
-    print(f"{'exporter_port':30s}: {EXPORTER_PORT}")
     print("================================\n")
 
 
@@ -253,118 +254,6 @@ def kill_hard(proc: subprocess.Popen) -> None:
     proc.wait()
 
 
-def is_tcp_port_open(host: str, port: int, timeout: float = 0.5) -> bool:
-    """
-    Return True if a TCP connection to host:port can be established.
-
-    Args:
-        host: Hostname or IP address.
-        port: TCP port.
-        timeout: Socket timeout in seconds.
-
-    Returns:
-        True if the TCP port accepts connections, otherwise False.
-    """
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
-        sock.settimeout(timeout)
-        return sock.connect_ex((host, port)) == 0
-
-
-def find_pids_listening_on_port(port: int) -> list[int]:
-    """
-    Return a list of PIDs listening on the given TCP port.
-
-    Uses lsof and returns an empty list if no processes are found.
-
-    Args:
-        port: TCP port.
-
-    Returns:
-        List of PIDs.
-    """
-    result = subprocess.run(
-        ["lsof", "-ti", f":{port}"],
-        capture_output=True,
-        text=True,
-        check=False,
-    )
-
-    if result.returncode not in (0, 1):
-        raise RuntimeError(
-            f"Failed to query listener processes on port {port}: {result.stderr.strip()}"
-        )
-
-    pids: list[int] = []
-    for line in result.stdout.splitlines():
-        line = line.strip()
-        if line.isdigit():
-            pids.append(int(line))
-
-    return pids
-
-
-def kill_processes_listening_on_port(
-    port: int,
-    *,
-    sigterm_wait_s: float = 1.0,
-    final_wait_s: float = 2.0,
-) -> None:
-    """
-    Kill processes listening on the given TCP port.
-
-    First sends SIGTERM, then SIGKILL if the port is still occupied.
-
-    Args:
-        port: TCP port to free.
-        sigterm_wait_s: Time to wait after SIGTERM.
-        final_wait_s: Time to wait after SIGKILL.
-
-    Raises:
-        RuntimeError: If the port is still in use after cleanup.
-    """
-    pids = find_pids_listening_on_port(port)
-    if not pids:
-        return
-
-    print(
-        f"Port {port} is already in use. Terminating listener processes: {', '.join(map(str, pids))}"
-    )
-
-    for pid in pids:
-        try:
-            os.kill(pid, signal.SIGTERM)
-        except ProcessLookupError:
-            pass
-
-    deadline = time.time() + sigterm_wait_s
-    while time.time() < deadline:
-        if not is_tcp_port_open("127.0.0.1", port):
-            return
-        time.sleep(0.1)
-
-    remaining_pids = find_pids_listening_on_port(port)
-    if remaining_pids:
-        print(
-            f"Port {port} still in use after SIGTERM. Sending SIGKILL to: "
-            f"{', '.join(map(str, remaining_pids))}"
-        )
-
-    for pid in remaining_pids:
-        try:
-            os.kill(pid, signal.SIGKILL)
-        except ProcessLookupError:
-            pass
-
-    deadline = time.time() + final_wait_s
-    while time.time() < deadline:
-        if not is_tcp_port_open("127.0.0.1", port):
-            return
-        time.sleep(0.1)
-
-    if is_tcp_port_open("127.0.0.1", port):
-        raise RuntimeError(f"Port {port} is still in use after cleanup.")
-
-
 def opensearch_refresh(opensearch_url: str, processed_index: str) -> None:
     """
     Force a refresh of the processed index so counts reflect recent writes.
@@ -397,33 +286,6 @@ def opensearch_count_processed(opensearch_url: str, processed_index: str) -> int
     return int(resp.json()["count"])
 
 
-def opensearch_debug_snapshot(opensearch_url: str) -> None:
-    """
-    Print a small OpenSearch state snapshot for debugging.
-    Never raises (best-effort).
-    """
-    try:
-        r = requests.get(f"{opensearch_url}/_cat/indices?v", timeout=10)
-        print("\n--- _cat/indices ---")
-        print(r.text)
-    except Exception as e:
-        print(f"\n--- _cat/indices (failed) ---\n{e}")
-
-    try:
-        r = requests.get(f"{opensearch_url}/_cat/count?v", timeout=10)
-        print("\n--- _cat/count ---")
-        print(r.text)
-    except Exception as e:
-        print(f"\n--- _cat/count (failed) ---\n{e}")
-
-    try:
-        r = requests.get(f"{opensearch_url}/_cat/aliases?v", timeout=10)
-        print("\n--- _cat/aliases ---")
-        print(r.text)
-    except Exception as e:
-        print(f"\n--- _cat/aliases (failed) ---\n{e}")
-
-
 def reset_prometheus_dir(path: str) -> None:
     """
     Recreate PROMETHEUS_MULTIPROC_DIR.
@@ -446,8 +308,8 @@ def resolve_pipeline_config(ng: int) -> Path:
         Pipeline config path.
     """
     if ng == 1:
-        return Path("./examples/exampledata/config/_benchmark_ng_pipeline.yml")
-    return Path("./examples/exampledata/config/_benchmark_non_ng_pipeline.yml")
+        return Path("./examples/exampledata/config/ng_pipeline.yml")
+    return Path("./examples/exampledata/config/pipeline.yml")
 
 
 def read_vm_max_map_count() -> int:
@@ -497,7 +359,6 @@ def wait_for_tcp(host: str, port: int, *, timeout_s: float, interval_s: float =
     last_err: OSError | None = None
 
     while time.time() < deadline:
-        raise_if_shutdown_requested()
         try:
             with socket.create_connection((host, port), timeout=2):
                 return
@@ -521,7 +382,6 @@ def wait_for_opensearch(opensearch_url: str, *, timeout_s: float, interval_s: fl
     last_err: Exception | None = None
 
     while time.time() < deadline:
-        raise_if_shutdown_requested()
         try:
             resp = requests.get(f"{opensearch_url}/_cluster/health", timeout=2)
             if resp.status_code == 200:
@@ -550,7 +410,6 @@ def wait_for_kafka_topic(
     last_err: Exception | None = None
 
     while time.time() < deadline:
-        raise_if_shutdown_requested()
         try:
             proc = subprocess.run(
                 [
@@ -684,13 +543,11 @@ def benchmark_run(
     _current_env = env
 
     try:
-        raise_if_shutdown_requested()
         ensure_vm_max_map_count()
 
         run_cmd(["docker", "compose", "down"], cwd=compose_dir, env=env)
         run_cmd(["docker", "volume", "rm", "compose_opensearch-data"], env=env, ignore_error=True)
 
-        raise_if_shutdown_requested()
         run_cmd(
             ["docker", "compose", "up", "-d", "--no-deps", *services],
             cwd=compose_dir,
@@ -709,8 +566,6 @@ def benchmark_run(
             wait_for_tcp("127.0.0.1", 9200, timeout_s=float(sleep_after_compose_up_s))
             wait_for_opensearch(opensearch_url, timeout_s=float(sleep_after_compose_up_s))
 
-        raise_if_shutdown_requested()
-
         batch_size = max(event_num // 10, 10)
         output_config = f'{{"bootstrap.servers": "{bootstrap_servers}"}}'
 
@@ -731,49 +586,30 @@ def benchmark_run(
             env=env,
         )
 
-        raise_if_shutdown_requested()
         time.sleep(sleep_after_generate_s)
-        raise_if_shutdown_requested()
 
         binary = "logprep-ng" if ng == 1 else "logprep"
 
-        # Ensure exporter port is free before starting logprep.
-        kill_processes_listening_on_port(EXPORTER_PORT)
-
         t_startup = time.time()
         logprep_proc = popen_cmd([binary, "run", str(pipeline_config)], env=env)
         _current_logprep_proc = logprep_proc
 
         time.sleep(sleep_after_logprep_start_s)
-        raise_if_shutdown_requested()
-
-        print("\n=== OpenSearch snapshot (before measurement) ===")
-        opensearch_debug_snapshot(opensearch_url)
 
-        raise_if_shutdown_requested()
         baseline = opensearch_count_processed(opensearch_url, processed_index)
         startup_s = time.time() - t_startup
 
         t_run = time.time()
         time.sleep(run_seconds)
-        raise_if_shutdown_requested()
+        window_s = time.time() - t_run
 
         kill_hard(logprep_proc)
-
-        # Ensure exporter port is released after forceful process termination.
-        kill_processes_listening_on_port(EXPORTER_PORT)
-
-        window_s = time.time() - t_run
         logprep_proc = None
         _current_logprep_proc = None
 
-        raise_if_shutdown_requested()
+        # ensure near-real-time writes are visible to _count before measuring
         opensearch_refresh(opensearch_url, processed_index)
 
-        print("\n=== OpenSearch snapshot (after run / after refresh) ===")
-        opensearch_debug_snapshot(opensearch_url)
-
-        raise_if_shutdown_requested()
         after = opensearch_count_processed(opensearch_url, processed_index)
         processed = max(0, after - baseline)
 
@@ -784,12 +620,6 @@ def benchmark_run(
     finally:
         if logprep_proc is not None:
             kill_hard(logprep_proc)
-
-        try:
-            kill_processes_listening_on_port(EXPORTER_PORT)
-        except Exception as exc:
-            print(f"Warning: failed to clean up exporter port {EXPORTER_PORT}: {exc}")
-
         _current_logprep_proc = None
         run_cmd(["docker", "compose", "down"], cwd=compose_dir, env=env, ignore_error=True)
 
@@ -975,42 +805,36 @@ def setup_output_tee(out_path: Path | None) -> None:
 if __name__ == "__main__":
     signal.signal(signal.SIGINT, _handle_sigint)
 
-    try:
-        args_ = parse_args()
-        setup_output_tee(args_.out)
-
-        print_benchmark_config(args_)
-
-        pipeline_config_ = resolve_pipeline_config(args_.ng)
-
-        results: list[RunResult] = []
-
-        benchmark_seconds = args_.runs
-        for run_idx, seconds in enumerate(benchmark_seconds, start=1):
-            raise_if_shutdown_requested()
-            print(f"----- Run Round {run_idx}: {seconds} seconds -----")
-            result = benchmark_run(
-                run_seconds=seconds,
-                ng=args_.ng,
-                event_num=args_.event_num,
-                prometheus_multiproc_dir=args_.prometheus_multiproc_dir,
-                compose_dir=args_.compose_dir,
-                pipeline_config=pipeline_config_,
-                gen_input_dir=args_.gen_input_dir,
-                bootstrap_servers=args_.bootstrap_servers,
-                sleep_after_compose_up_s=args_.sleep_after_compose_up_s,
-                sleep_after_generate_s=args_.sleep_after_generate_s,
-                sleep_after_logprep_start_s=args_.sleep_after_logprep_start_s,
-                opensearch_url=args_.opensearch_url,
-                processed_index=args_.processed_index,
-                services=args_.services,
-            )
-            results.append(result)
-            print_single_run_result(result, event_num=args_.event_num)
-            print()
-
-        print_runs_table_and_summary(results)
+    args_ = parse_args()
+    setup_output_tee(args_.out)
+
+    print_benchmark_config(args_)
+
+    pipeline_config_ = resolve_pipeline_config(args_.ng)
+
+    results: list[RunResult] = []
+
+    benchmark_seconds = args_.runs
+    for run_idx, seconds in enumerate(benchmark_seconds, start=1):
+        print(f"----- Run Round {run_idx}: {seconds} seconds -----")
+        result = benchmark_run(
+            run_seconds=seconds,
+            ng=args_.ng,
+            event_num=args_.event_num,
+            prometheus_multiproc_dir=args_.prometheus_multiproc_dir,
+            compose_dir=args_.compose_dir,
+            pipeline_config=pipeline_config_,
+            gen_input_dir=args_.gen_input_dir,
+            bootstrap_servers=args_.bootstrap_servers,
+            sleep_after_compose_up_s=args_.sleep_after_compose_up_s,
+            sleep_after_generate_s=args_.sleep_after_generate_s,
+            sleep_after_logprep_start_s=args_.sleep_after_logprep_start_s,
+            opensearch_url=args_.opensearch_url,
+            processed_index=args_.processed_index,
+            services=args_.services,
+        )
+        results.append(result)
+        print_single_run_result(result, event_num=args_.event_num)
+        print()
 
-    except KeyboardInterrupt:
-        print("\nBenchmark aborted.")
-        sys.exit(130)
+    print_runs_table_and_summary(results)

From bcba7a99a06eb9df2af8766a3c67667d47a045b7 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 12:11:15 +0200
Subject: [PATCH 57/68] refactor: guard cached _search_context on shutdown and
 remove unused @override decorator for consistency

---
 logprep/ng/connector/opensearch/output.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/logprep/ng/connector/opensearch/output.py b/logprep/ng/connector/opensearch/output.py
index 9aa5efbe2..df99a74cf 100644
--- a/logprep/ng/connector/opensearch/output.py
+++ b/logprep/ng/connector/opensearch/output.py
@@ -45,7 +45,6 @@
     helpers,
 )
 from opensearchpy.serializer import JSONSerializer
-from typing_extensions import override
 
 from logprep.abc.exceptions import LogprepException
 from logprep.ng.abc.event import Event
@@ -356,6 +355,7 @@ async def health(self) -> bool:  # type: ignore  # TODO: fix mypy issue
             return False
         return super().health() and resp.get("status") in self.config.desired_cluster_status
 
-    @override
     async def shut_down(self):
-        await self._search_context.close()
+        if "_search_context" in self.__dict__:
+            await self._search_context.close()
+        await super().shut_down()

From f669bca46326a87aba9b98886ce69589f2064471 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 12:37:03 +0200
Subject: [PATCH 58/68] fix: make callbacks async

---
 logprep/ng/connector/confluent_kafka/input.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index 75d330d11..6cdb13915 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -414,9 +414,9 @@ async def _commit_callback(
             if `error` is not None
         """
         if error is not None:
-            #   self.metrics.commit_failures += 1
+            self.metrics.commit_failures += 1
             raise InputWarning(self, f"Could not commit offsets for {topic_partitions}: {error}")
-        # self.metrics.commit_success += 1
+        self.metrics.commit_success += 1
         for topic_partition in topic_partitions:
             offset = topic_partition.offset
             if offset in SPECIAL_OFFSETS:

From e600bf560a732e9d542e8c8767dd8a18e4cba059 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 12:37:30 +0200
Subject: [PATCH 59/68] fix: set event state to STORING_IN_OUTPUT

---
 logprep/ng/connector/confluent_kafka/output.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/logprep/ng/connector/confluent_kafka/output.py b/logprep/ng/connector/confluent_kafka/output.py
index 914b7be9d..de1d852a3 100644
--- a/logprep/ng/connector/confluent_kafka/output.py
+++ b/logprep/ng/connector/confluent_kafka/output.py
@@ -315,6 +315,8 @@ async def store_custom(self, event: Event, target: str) -> None:
         target : str
             Topic to store event data in.
         """
+        event.state.current_state = EventStateType.STORING_IN_OUTPUT
+
         document = event.data
         self.metrics.number_of_processed_events += 1
 

From a4530abd555b4350c05667dfa83a02f2b390718d Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 25 Mar 2026 10:42:52 +0100
Subject: [PATCH 60/68] fix: fix config refresh, remove config scheduler, small
 adaptions

---
 logprep/ng/util/worker/worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index 8cbe75de7..d9ece8db4 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -19,7 +19,7 @@
 
 from logprep.ng.util.worker.types import AsyncHandler, SizeLimitedQueue
 
-logger = logging.getLogger("Worker")
+logger = logging.getLogger("Worker")  # pylint: disable=no-member
 
 T = TypeVar("T")
 Input = TypeVar("Input")

From 5c4cf30c7022871fd7367e0801e9be76cf113578 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 31 Mar 2026 09:36:43 +0200
Subject: [PATCH 61/68] WIP

---
 benchmark.py                     |  1 +
 config_manipulator.py            | 34 ++++++++++++++++++++++++++++++++
 logprep/ng/manager.py            | 24 +++++++++++-----------
 logprep/ng/pipeline.py           |  4 ++++
 logprep/ng/sender.py             |  6 +-----
 logprep/ng/util/async_helpers.py | 20 ++++++++++++++++++-
 run_benchmarks.py                |  9 +++------
 run_config_manipulation.py       | 25 +++++++++++++++++++++++
 8 files changed, 100 insertions(+), 23 deletions(-)
 create mode 100644 config_manipulator.py
 create mode 100644 run_config_manipulation.py

diff --git a/benchmark.py b/benchmark.py
index aa49b5805..cbc258511 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -591,6 +591,7 @@ def benchmark_run(
         binary = "logprep-ng" if ng == 1 else "logprep"
 
         t_startup = time.time()
+
         logprep_proc = popen_cmd([binary, "run", str(pipeline_config)], env=env)
         _current_logprep_proc = logprep_proc
 
diff --git a/config_manipulator.py b/config_manipulator.py
new file mode 100644
index 000000000..4e6bea929
--- /dev/null
+++ b/config_manipulator.py
@@ -0,0 +1,34 @@
+import sys
+import time
+from pathlib import Path
+
+from ruamel.yaml import YAML
+
+
+def set_yaml_value(file_path: Path, key: str, value) -> None:
+    yaml = YAML()
+    yaml.preserve_quotes = True
+    yaml.width = 10_000
+
+    with file_path.open("r", encoding="utf-8") as f:
+        data = yaml.load(f)
+
+    old_value = data.get(key, "<not set>")
+    data[key] = value
+
+    print(f"Updated '{key}': {old_value} -> {value}")
+
+    with file_path.open("w", encoding="utf-8") as f:
+        yaml.dump(data, f)
+
+
+if __name__ == "__main__":
+    delay = int(sys.argv[1]) if len(sys.argv) > 1 else 0
+
+    if delay > 0:
+        print(f"Sleeping for {delay} seconds...")
+        time.sleep(delay)
+
+    config_path = Path("examples/exampledata/config/_benchmark_ng_pipeline.yml")
+
+    set_yaml_value(config_path, "version", 3)
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index ad697823a..57b2f4b9e 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -17,8 +17,8 @@
 from logprep.ng.event.set_event_backlog import SetEventBacklog
 from logprep.ng.pipeline import Pipeline
 from logprep.ng.sender import Sender
+from logprep.ng.util.async_helpers import report_event_state
 from logprep.ng.util.configuration import Configuration
-from logprep.ng.util.events import partition_by_state
 from logprep.ng.util.worker.types import SizeLimitedQueue
 from logprep.ng.util.worker.worker import Worker, WorkerOrchestrator
 
@@ -90,10 +90,14 @@ def _create_orchestrator(self) -> WorkerOrchestrator:  # pylint: disable=too-man
             acknowledge_queue,
         ]
 
+        async def _report_event_state(batch: list[LogEvent]) -> list[LogEvent]:
+            return await report_event_state(logger, batch)
+
         async def transfer_batch(batch: list[LogEvent]) -> list[LogEvent]:
             for event in batch:
                 event.state.current_state = EventStateType.RECEIVED
 
+            _ = await _report_event_state(batch)
             return batch
 
         input_worker: Worker[LogEvent, LogEvent] = Worker(
@@ -119,6 +123,8 @@ async def _handle(event: LogEvent):
                     await send_to_error_queue.put(event)
 
             await asyncio.gather(*map(_handle, batch))
+
+            _ = await _report_event_state(batch)
             return batch
 
         processing_worker: Worker[LogEvent, LogEvent] = Worker(
@@ -130,6 +136,7 @@ async def _handle(event: LogEvent):
         )
 
         async def _send_extras_handler(batch: list[LogEvent]) -> list[LogEvent]:
+            _ = await _report_event_state(batch)
             return await self._sender.send_extras(batch)
 
         extra_output_worker: Worker[LogEvent, LogEvent] = Worker(
@@ -142,6 +149,7 @@ async def _send_extras_handler(batch: list[LogEvent]) -> list[LogEvent]:
         )
 
         async def _send_default_output_handler(batch: list[LogEvent]) -> list[LogEvent]:
+            _ = await _report_event_state(batch)
             return await self._sender.send_default_output(batch)
 
         output_worker: Worker[LogEvent, LogEvent] = Worker(
@@ -153,22 +161,16 @@ async def _send_default_output_handler(batch: list[LogEvent]) -> list[LogEvent]:
             handler=_send_default_output_handler,
         )
 
-        async def _report_event_state(batch: list[LogEvent]) -> list[LogEvent]:
-            events_by_state = partition_by_state(batch)
-            logger.info(
-                "Finished processing %d events: %s",
-                len(batch),
-                ", ".join(f"#{state}={len(events)}" for state, events in events_by_state.items()),
-            )
-            return batch
+        async def _send_error_output_handler(batch: list[LogEvent]) -> list[LogEvent]:
+            _ = await _report_event_state(batch)
+            return await self._sender._send_and_flush_failed_events(batch)
 
         error_worker: Worker[LogEvent, LogEvent] = Worker(
             name="error_worker",
             batch_size=BATCH_SIZE,
             batch_interval_s=BATCH_INTERVAL_S,
             in_queue=send_to_error_queue,
-            # TODO implement handling and sending failed events
-            handler=_report_event_state,
+            handler=_send_error_output_handler,
         )
 
         acknowledge_worker: Worker[LogEvent, LogEvent] = Worker(
diff --git a/logprep/ng/pipeline.py b/logprep/ng/pipeline.py
index eac8f22ef..fef2f2b56 100644
--- a/logprep/ng/pipeline.py
+++ b/logprep/ng/pipeline.py
@@ -19,6 +19,10 @@ def _process_event(event: LogEvent | None, processors: list[Processor]) -> LogEv
         if not event.data:
             break
         processor.process(event)
+
+        # TODO: Debug - remove this two lines!
+        event.state.current_state = EventStateType.FAILED
+        event.errors.append(ValueError("test"))
     if not event.errors:
         event.state.current_state = EventStateType.PROCESSED
     else:
diff --git a/logprep/ng/sender.py b/logprep/ng/sender.py
index e316fc6db..a5dd5a3fb 100644
--- a/logprep/ng/sender.py
+++ b/logprep/ng/sender.py
@@ -77,12 +77,8 @@ async def send_default_output(self, batch_events: Sequence[LogEvent]) -> Sequenc
         return await self._default_output.store_batch(batch_events)  # type: ignore
 
     async def _send_and_flush_failed_events(self, batch_events: list[LogEvent]) -> None:
-        failed = [event for event in batch_events if event.state is EventStateType.FAILED]
-        if not failed:
-            return
-
         # send in parallel (minimal change vs. serial list comprehension)
-        error_events = await asyncio.gather(*(self._send_failed(event) for event in failed))
+        error_events = await asyncio.gather(*(self._send_failed(event) for event in batch_events))
 
         await self._error_output.flush()  # type: ignore[union-attr]
 
diff --git a/logprep/ng/util/async_helpers.py b/logprep/ng/util/async_helpers.py
index 016d58517..8ebba3a2a 100644
--- a/logprep/ng/util/async_helpers.py
+++ b/logprep/ng/util/async_helpers.py
@@ -5,6 +5,9 @@
 from logging import Logger
 from typing import Awaitable, TypeVar
 
+from logprep.ng.event.log_event import LogEvent
+from logprep.ng.util.events import partition_by_state
+
 T = TypeVar("T")
 D = TypeVar("D")
 
@@ -115,7 +118,7 @@ async def restart_task_on_iter(
 
 
 def asyncio_exception_handler(
-    _: asyncio.AbstractEventLoop,
+    loop: asyncio.AbstractEventLoop,  # pylint: disable=unused-argument
     context: dict,
     logger: Logger,
 ) -> None:
@@ -124,6 +127,11 @@ def asyncio_exception_handler(
 
     Covers exceptions from background tasks, callbacks, and loop internals.
     Does not handle exceptions from awaited coroutines (e.g. runner.run()).
+
+    Args:
+        loop: The current event loop.
+        context: Asyncio error context (may contain message, exception, task/future).
+        logger: Logger used to record the error.
     """
 
     msg = context.get("message", "Unhandled exception in event loop")
@@ -142,3 +150,13 @@ def asyncio_exception_handler(
         logger.error(f"Unhandled exception: {exception!r}", exc_info=exception)
     else:
         logger.error(f"Context: {context!r}")
+
+
+async def report_event_state(logger: Logger, batch: list[LogEvent]) -> list[LogEvent]:
+    events_by_state = partition_by_state(batch)
+    logger.info(
+        "Finished processing %d events: %s",
+        len(batch),
+        ", ".join(f"#{state}={len(events)}" for state, events in events_by_state.items()),
+    )
+    return batch
diff --git a/run_benchmarks.py b/run_benchmarks.py
index 8636e052d..d243e6ba5 100644
--- a/run_benchmarks.py
+++ b/run_benchmarks.py
@@ -5,10 +5,10 @@
 from datetime import datetime
 from pathlib import Path
 
-PYTHON_VERSIONS = ["3.11", "3.12", "3.13", "3.14"]
+PYTHON_VERSIONS = ["3.11"]  # , "3.12", "3.13", "3.14"]
 MODES = [
     ("asyncNG", "1"),
-    ("nonNG", "0"),
+    #   ("nonNG", "0"),
 ]
 
 
@@ -32,12 +32,9 @@ def run_benchmarks() -> None:
                 py,
                 "benchmark.py",
                 "--event-num",
-                "250000",
+                "50000",
                 "--runs",
                 "30",
-                "30",
-                "45",
-                "45",
                 "--ng",
                 ng_flag,
                 "--out",
diff --git a/run_config_manipulation.py b/run_config_manipulation.py
new file mode 100644
index 000000000..a965d06ce
--- /dev/null
+++ b/run_config_manipulation.py
@@ -0,0 +1,25 @@
+import subprocess
+import sys
+import time
+
+
+def main(delay_seconds: int):
+    python_executable = sys.executable  # ensures correct interpreter (venv-safe)
+
+    # Delay before starting script
+    print(f"Waiting {delay_seconds} seconds before starting config manipulation...")
+    time.sleep(delay_seconds)
+
+    # Start script
+    print("Starting config manipulation...")
+    proc = subprocess.Popen([python_executable, "config_manipulator.py"])
+
+    print("Process are now running.")
+
+    proc.wait()
+
+    print("Processes finished.")
+
+
+if __name__ == "__main__":
+    main(delay_seconds=5)

From 42d99da4048aea49097260961b6570313e98734a Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Thu, 2 Apr 2026 06:53:25 +0200
Subject: [PATCH 62/68] WIP

---
 logprep/ng/pipeline.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/logprep/ng/pipeline.py b/logprep/ng/pipeline.py
index fef2f2b56..eac8f22ef 100644
--- a/logprep/ng/pipeline.py
+++ b/logprep/ng/pipeline.py
@@ -19,10 +19,6 @@ def _process_event(event: LogEvent | None, processors: list[Processor]) -> LogEv
         if not event.data:
             break
         processor.process(event)
-
-        # TODO: Debug - remove this two lines!
-        event.state.current_state = EventStateType.FAILED
-        event.errors.append(ValueError("test"))
     if not event.errors:
         event.state.current_state = EventStateType.PROCESSED
     else:

From 5ce902db6714942a0a0b8fedc44d632d4453d4d7 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Tue, 7 Apr 2026 15:12:15 +0200
Subject: [PATCH 63/68] WIP

---
 logprep/ng/connector/confluent_kafka/input.py |  3 +-
 .../ng/connector/confluent_kafka/output.py    | 28 +++++++++++++++----
 logprep/ng/manager.py                         |  3 +-
 logprep/ng/pipeline.py                        |  1 +
 logprep/ng/util/async_helpers.py              |  2 +-
 run_benchmarks.py                             |  4 +--
 6 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index 6cdb13915..a901447cf 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -341,8 +341,7 @@ async def get_consumer(self, max_workers: int = 4) -> AIOConsumer:
         """
 
         if self._consumer is None:
-            consumer = AIOConsumer(self._kafka_config, max_workers=max_workers)
-            self._consumer = consumer
+            self._consumer = AIOConsumer(self._kafka_config, max_workers=max_workers)
 
         return self._consumer
 
diff --git a/logprep/ng/connector/confluent_kafka/output.py b/logprep/ng/connector/confluent_kafka/output.py
index de1d852a3..2a6613872 100644
--- a/logprep/ng/connector/confluent_kafka/output.py
+++ b/logprep/ng/connector/confluent_kafka/output.py
@@ -198,6 +198,12 @@ class Config(Output.Config):
            - Regularly rotate your Kafka credentials and secrets.
         """
 
+    __slots__ = ["_producer"]
+
+    def __init__(self, name: str, configuration: "ConfluentKafkaOutput.Config"):
+        super().__init__(name, configuration)
+        self._producer: AIOProducer | None = None
+
     @property
     def config(self) -> Config:
         """Provides the properly typed rule configuration object"""
@@ -236,8 +242,20 @@ def _admin(self) -> AdminClient:
         return AdminClient(admin_config)
 
     @cached_property
-    def _producer(self) -> AIOProducer:
-        return AIOProducer(self._kafka_config)
+    async def get_producer(self) -> AIOProducer:
+        """
+        Configures and returns the asynchronous Kafka producer.
+
+        Returns
+        -------
+        AIOProducer
+            The pre-configured aiokafka producer object.
+        """
+
+        if self._producer is None:
+            self._producer = AIOProducer(self._kafka_config)
+
+        return self._producer
 
     def _error_callback(self, error: KafkaException) -> None:
         """Callback for generic/global error events, these errors are typically
@@ -353,7 +371,7 @@ async def flush(self) -> None:
         flush without the timeout parameter will block until all messages are delivered.
         This ensures no messages will get lost on shutdown.
         """
-        remaining_messages = self._producer.flush()
+        remaining_messages = await self._producer.flush()
         if remaining_messages:
             self.metrics.number_of_errors += 1
             logger.error(
@@ -387,7 +405,7 @@ async def setup(self) -> None:
     async def shut_down(self) -> None:
         """Shut down the confluent kafka output connector and cleanup resources."""
 
-        if "_producer" in self.__dict__:
-            await self.flush()
+        if self._producer is not None:
+            await self._producer.close()
 
         await super().shut_down()
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index 57b2f4b9e..fe308bf0a 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -163,7 +163,8 @@ async def _send_default_output_handler(batch: list[LogEvent]) -> list[LogEvent]:
 
         async def _send_error_output_handler(batch: list[LogEvent]) -> list[LogEvent]:
             _ = await _report_event_state(batch)
-            return await self._sender._send_and_flush_failed_events(batch)
+            await self._sender._send_and_flush_failed_events(batch)
+            return batch
 
         error_worker: Worker[LogEvent, LogEvent] = Worker(
             name="error_worker",
diff --git a/logprep/ng/pipeline.py b/logprep/ng/pipeline.py
index eac8f22ef..ccb9d2546 100644
--- a/logprep/ng/pipeline.py
+++ b/logprep/ng/pipeline.py
@@ -19,6 +19,7 @@ def _process_event(event: LogEvent | None, processors: list[Processor]) -> LogEv
         if not event.data:
             break
         processor.process(event)
+        event.errors.append(ValueError("test"))
     if not event.errors:
         event.state.current_state = EventStateType.PROCESSED
     else:
diff --git a/logprep/ng/util/async_helpers.py b/logprep/ng/util/async_helpers.py
index 8ebba3a2a..51fefec54 100644
--- a/logprep/ng/util/async_helpers.py
+++ b/logprep/ng/util/async_helpers.py
@@ -12,7 +12,7 @@
 D = TypeVar("D")
 
 
-TaskFactory = Callable[[D], asyncio.Task[T] | Awaitable[asyncio.Task[T]]]
+TaskFactory = Callable[[D], Awaitable[asyncio.Task[T]]]
 
 
 class TerminateTaskGroup(Exception):
diff --git a/run_benchmarks.py b/run_benchmarks.py
index d243e6ba5..f8d7facac 100644
--- a/run_benchmarks.py
+++ b/run_benchmarks.py
@@ -32,9 +32,9 @@ def run_benchmarks() -> None:
                 py,
                 "benchmark.py",
                 "--event-num",
-                "50000",
+                "3210",
                 "--runs",
-                "30",
+                "60",
                 "--ng",
                 ng_flag,
                 "--out",

From 8307ed749396dff5d41ffa7d31202b08edecdf48 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Wed, 8 Apr 2026 15:43:17 +0200
Subject: [PATCH 64/68] WIP

---
 logprep/ng/connector/confluent_kafka/output.py | 13 +++++++------
 logprep/ng/manager.py                          |  2 +-
 logprep/ng/util/worker/worker.py               | 16 +++++++---------
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/logprep/ng/connector/confluent_kafka/output.py b/logprep/ng/connector/confluent_kafka/output.py
index 2a6613872..aa55d3b32 100644
--- a/logprep/ng/connector/confluent_kafka/output.py
+++ b/logprep/ng/connector/confluent_kafka/output.py
@@ -241,8 +241,7 @@ def _admin(self) -> AdminClient:
                 admin_config[key] = value
         return AdminClient(admin_config)
 
-    @cached_property
-    async def get_producer(self) -> AIOProducer:
+    def get_producer(self) -> AIOProducer:
         """
         Configures and returns the asynchronous Kafka producer.
 
@@ -339,7 +338,8 @@ async def store_custom(self, event: Event, target: str) -> None:
         self.metrics.number_of_processed_events += 1
 
         try:
-            delivery_future = await self._producer.produce(
+            producer = self.get_producer()
+            delivery_future = await producer.produce(
                 topic=target,
                 value=self._encoder.encode(document),
             )
@@ -371,7 +371,8 @@ async def flush(self) -> None:
         flush without the timeout parameter will block until all messages are delivered.
         This ensures no messages will get lost on shutdown.
         """
-        remaining_messages = await self._producer.flush()
+        producer = self.get_producer()
+        remaining_messages = await producer.flush()
         if remaining_messages:
             self.metrics.number_of_errors += 1
             logger.error(
@@ -405,7 +406,7 @@ async def setup(self) -> None:
     async def shut_down(self) -> None:
         """Shut down the confluent kafka output connector and cleanup resources."""
 
+        await super().shut_down()
+
         if self._producer is not None:
             await self._producer.close()
-
-        await super().shut_down()
diff --git a/logprep/ng/manager.py b/logprep/ng/manager.py
index fe308bf0a..1126db363 100644
--- a/logprep/ng/manager.py
+++ b/logprep/ng/manager.py
@@ -162,7 +162,7 @@ async def _send_default_output_handler(batch: list[LogEvent]) -> list[LogEvent]:
         )
 
         async def _send_error_output_handler(batch: list[LogEvent]) -> list[LogEvent]:
-            _ = await _report_event_state(batch)
+            await _report_event_state(batch)
             await self._sender._send_and_flush_failed_events(batch)
             return batch
 
diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index d9ece8db4..8df040fb8 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -155,6 +155,7 @@ async def flush(self) -> None:
         Drains and processes the current buffer regardless of size or
         timer state.
         """
+
         batch_to_flush: list[Input] | None = None
         async with self._buffer_lock:
             if self._batch_buffer:
@@ -252,8 +253,7 @@ def run_workers(self) -> None:
             self._add_worker_task(t)
 
     def _add_worker_task(self, task: asyncio.Task[Any]) -> None:
-        """Track a worker task and fail-fast on exceptions."""
-        self._worker_tasks.add(task)
+        self.exceptions_ = """Track a worker task and fail-fast on exceptions."""
 
         def _done(t: asyncio.Task[Any]) -> None:
             self._worker_tasks.discard(t)
@@ -267,6 +267,7 @@ def _done(t: asyncio.Task[Any]) -> None:
                 self._stop_event.set()
 
         task.add_done_callback(_done)
+        self._worker_tasks.add(task)
 
     async def run(self) -> None:
         """
@@ -289,22 +290,19 @@ async def shut_down(self, timeout_s: float) -> None:
         """
         self._stop_event.set()
 
-        current_task = asyncio.current_task()
-        tasks_but_current = [t for t in self._worker_tasks if t is not current_task]
-
-        logger.debug("waiting for termination of %d tasks", len(tasks_but_current))
+        logger.debug("waiting for termination of %d tasks", len(self._worker_tasks))
 
         try:
             await asyncio.wait_for(
-                asyncio.gather(*tasks_but_current, return_exceptions=True), timeout_s
+                asyncio.gather(*self._worker_tasks, return_exceptions=True), timeout_s
             )
         except TimeoutError:
-            unfinished_workers = [w for w in tasks_but_current if not w.done()]
+            unfinished_workers = [w for w in self._worker_tasks if not w.done()]
             if unfinished_workers:
                 logger.debug(
                     "[%d/%d] did not stop gracefully. Awaiting cancellation: [%s]",
                     len(unfinished_workers),
-                    len(tasks_but_current),
+                    len(self._worker_tasks),
                     ", ".join(map(asyncio.Task.get_name, unfinished_workers)),
                 )
                 await asyncio.gather(*unfinished_workers, return_exceptions=True)

From 3cd5ceed8c7ddb0fffe4a9e8946fe3e58c264998 Mon Sep 17 00:00:00 2001
From: David Kaya <david.kaya@gmx.net>
Date: Thu, 9 Apr 2026 11:23:46 +0200
Subject: [PATCH 65/68] fix: avoid adding None items in worker queue mode as
 well

---
 logprep/ng/util/worker/worker.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/logprep/ng/util/worker/worker.py b/logprep/ng/util/worker/worker.py
index 8df040fb8..0254ff5da 100644
--- a/logprep/ng/util/worker/worker.py
+++ b/logprep/ng/util/worker/worker.py
@@ -194,7 +194,10 @@ async def run(self, stop_event: asyncio.Event) -> None:
             if isinstance(self.in_queue, asyncio.Queue):
                 while not stop_event.is_set():
                     item = await self.in_queue.get()
-                    await self.add(item)
+
+                    if item is not None:
+                        await self.add(item)
+
                     await asyncio.sleep(0.0)
             else:
                 while not stop_event.is_set():

From 615617d5bdf4cd79a9a88cc22f91f7c0b750413f Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Fri, 10 Apr 2026 10:23:38 +0200
Subject: [PATCH 66/68] remove benchmark results and extra scripts

---
 .../20260309_231610/asyncNG_python3.11.txt    |  83 ----
 .../20260309_231610/nonNG_python3.11.txt      |  83 ----
 .../20260309_232743/asyncNG_python3.11.txt    |  85 ----
 .../20260309_232743/nonNG_python3.11.txt      |  83 ----
 .../20260310_114644/asyncNG_python3.11.txt    | 462 -----------------
 .../20260310_114644/asyncNG_python3.12.txt    | 463 -----------------
 .../20260310_114644/asyncNG_python3.13.txt    | 465 ------------------
 .../20260310_114644/asyncNG_python3.14.txt    | 464 -----------------
 .../20260310_114644/nonNG_python3.11.txt      | 464 -----------------
 .../20260310_114644/nonNG_python3.12.txt      | 464 -----------------
 .../20260310_114644/nonNG_python3.13.txt      | 462 -----------------
 .../20260310_114644/nonNG_python3.14.txt      | 465 ------------------
 config_manipulator.py                         |  34 --
 examples/exampledata/config/ng_pipeline.yml   |   4 +-
 run_config_manipulation.py                    |  25 -
 15 files changed, 2 insertions(+), 4104 deletions(-)
 delete mode 100644 benchmark_results/20260309_231610/asyncNG_python3.11.txt
 delete mode 100644 benchmark_results/20260309_231610/nonNG_python3.11.txt
 delete mode 100644 benchmark_results/20260309_232743/asyncNG_python3.11.txt
 delete mode 100644 benchmark_results/20260309_232743/nonNG_python3.11.txt
 delete mode 100644 benchmark_results/20260310_114644/asyncNG_python3.11.txt
 delete mode 100644 benchmark_results/20260310_114644/asyncNG_python3.12.txt
 delete mode 100644 benchmark_results/20260310_114644/asyncNG_python3.13.txt
 delete mode 100644 benchmark_results/20260310_114644/asyncNG_python3.14.txt
 delete mode 100644 benchmark_results/20260310_114644/nonNG_python3.11.txt
 delete mode 100644 benchmark_results/20260310_114644/nonNG_python3.12.txt
 delete mode 100644 benchmark_results/20260310_114644/nonNG_python3.13.txt
 delete mode 100644 benchmark_results/20260310_114644/nonNG_python3.14.txt
 delete mode 100644 config_manipulator.py
 delete mode 100644 run_config_manipulation.py

diff --git a/benchmark_results/20260309_231610/asyncNG_python3.11.txt b/benchmark_results/20260309_231610/asyncNG_python3.11.txt
deleted file mode 100644
index b1c881ae9..000000000
--- a/benchmark_results/20260309_231610/asyncNG_python3.11.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-09T23:17:50.692991
-timestamp (UTC)               : 2026-03-09T22:17:50.692995+00:00
-python version                : 3.11.14
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 1
-  ↳ mode                      : logprep-ng
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260309_231610/asyncNG_python3.11.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        NFlpeREcQX6uhmpZXTdNnA   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability pjUG0ykhQ9yITaSLZuvSOA   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773094728 22:18:48  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 HgKcRCSeT2-KxTAjoHGLGA   1   1       5832            0   1007.9kb       1007.9kb
-green  open   .opensearch-observability pjUG0ykhQ9yITaSLZuvSOA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        NFlpeREcQX6uhmpZXTdNnA   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       KKmiQGjPTEyb2dlSW3Xr5A   1   1        754            0      221kb          221kb
-yellow open   pseudonyms                FEnG8tOxQJuXNj0YPUrfRA   1   1        756            0       76kb           76kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773094759 22:19:19  7343
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.051 s
-measurement window:     30.000 s
-processed (OpenSearch): 5_832
-throughput:             194.40 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  1
-total runtime:         30.000 s
-total processed:       5_832
-
-throughput (weighted): 194.40 docs/s
-throughput (median):   194.40 docs/s
-throughput (average):  194.40 docs/s
-throughput (min/max):  194.40 / 194.40 docs/s
-throughput (std dev):  0.00 docs/s
-================================
diff --git a/benchmark_results/20260309_231610/nonNG_python3.11.txt b/benchmark_results/20260309_231610/nonNG_python3.11.txt
deleted file mode 100644
index 151abe767..000000000
--- a/benchmark_results/20260309_231610/nonNG_python3.11.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-09T23:16:11.042971
-timestamp (UTC)               : 2026-03-09T22:16:11.042974+00:00
-python version                : 3.11.14
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 0
-  ↳ mode                      : logprep
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260309_231610/nonNG_python3.11.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability 4LufVowAQ6iWBbEmSsisNg   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        LIwyBs_0T0K7QQJ4gw0UwQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773094627 22:17:07  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 AumBEANnRreI6jEWzhph1Q   1   1       1283            0    447.5kb        447.5kb
-green  open   .opensearch-observability 4LufVowAQ6iWBbEmSsisNg   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        LIwyBs_0T0K7QQJ4gw0UwQ   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       ZxkLYFZRTDqppWkN6eKRZg   1   1        162            0    153.1kb        153.1kb
-yellow open   pseudonyms                kiVDDQK_T0C8dW31aTibRw   1   1        165            0    110.2kb        110.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773094659 22:17:39  1611
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.041 s
-measurement window:     30.000 s
-processed (OpenSearch): 1_283
-throughput:             42.77 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  1
-total runtime:         30.000 s
-total processed:       1_283
-
-throughput (weighted): 42.77 docs/s
-throughput (median):   42.77 docs/s
-throughput (average):  42.77 docs/s
-throughput (min/max):  42.77 / 42.77 docs/s
-throughput (std dev):  0.00 docs/s
-================================
diff --git a/benchmark_results/20260309_232743/asyncNG_python3.11.txt b/benchmark_results/20260309_232743/asyncNG_python3.11.txt
deleted file mode 100644
index 4a9e69ba7..000000000
--- a/benchmark_results/20260309_232743/asyncNG_python3.11.txt
+++ /dev/null
@@ -1,85 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-09T23:29:22.520526
-timestamp (UTC)               : 2026-03-09T22:29:22.520530+00:00
-python version                : 3.11.14
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 1
-  ↳ mode                      : logprep-ng
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260309_232743/asyncNG_python3.11.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config           kD16pAJGRZ2MhuLKsXihAg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    u4eFMth_RJ-cuygSTg7evg   1   0          0            0       208b           208b
-green  open   top_queries-2026.03.09-25320 5kyK9IkeQweUUuHDq10k3A   1   0          8           16     79.8kb         79.8kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773095419 22:30:19  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                    JHWMU0uiQFWhmimuzOfo0A   1   1       5820            0    771.4kb        771.4kb
-green  open   .opensearch-observability    u4eFMth_RJ-cuygSTg7evg   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           kD16pAJGRZ2MhuLKsXihAg   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          PgeiHyQTR3y2A0Mf4byXaw   1   1        750            0    231.5kb        231.5kb
-yellow open   pseudonyms                   5cAB-TyRRh-Mp2QT8PV9_Q   1   1        749            0     73.9kb         73.9kb
-green  open   top_queries-2026.03.09-25320 5kyK9IkeQweUUuHDq10k3A   1   0          8           16     79.8kb         79.8kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773095450 22:30:50  7328
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.041 s
-measurement window:     30.000 s
-processed (OpenSearch): 5_820
-throughput:             194.00 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  1
-total runtime:         30.000 s
-total processed:       5_820
-
-throughput (weighted): 194.00 docs/s
-throughput (median):   194.00 docs/s
-throughput (average):  194.00 docs/s
-throughput (min/max):  194.00 / 194.00 docs/s
-throughput (std dev):  0.00 docs/s
-================================
diff --git a/benchmark_results/20260309_232743/nonNG_python3.11.txt b/benchmark_results/20260309_232743/nonNG_python3.11.txt
deleted file mode 100644
index bbbb08e30..000000000
--- a/benchmark_results/20260309_232743/nonNG_python3.11.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-09T23:27:44.332323
-timestamp (UTC)               : 2026-03-09T22:27:44.332326+00:00
-python version                : 3.11.14
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 0
-  ↳ mode                      : logprep
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260309_232743/nonNG_python3.11.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability PPo2XTsbS3i5-Md9d2YEYw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        soKZV8HHQDesGw2I-NZ15A   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773095319 22:28:39  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 6IWSH9bcSbiUitvFF4CQPQ   1   1      30007            0        4mb            4mb
-green  open   .opensearch-observability PPo2XTsbS3i5-Md9d2YEYw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        soKZV8HHQDesGw2I-NZ15A   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       7RFX8KAGToCq_JmWUNTXXA   1   1       3747            0    839.8kb        839.8kb
-yellow open   pseudonyms                NBQlOkAcQ56O3a-lORDaYQ   1   1       3746            0    251.3kb        251.3kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773095351 22:29:11  37501
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.037 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_007
-throughput:             1,000.23 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  1
-total runtime:         30.000 s
-total processed:       30_007
-
-throughput (weighted): 1,000.23 docs/s
-throughput (median):   1,000.23 docs/s
-throughput (average):  1,000.23 docs/s
-throughput (min/max):  1,000.23 / 1,000.23 docs/s
-throughput (std dev):  0.00 docs/s
-================================
diff --git a/benchmark_results/20260310_114644/asyncNG_python3.11.txt b/benchmark_results/20260310_114644/asyncNG_python3.11.txt
deleted file mode 100644
index d16ed0c72..000000000
--- a/benchmark_results/20260310_114644/asyncNG_python3.11.txt
+++ /dev/null
@@ -1,462 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-10T11:46:44.880438
-timestamp (UTC)               : 2026-03-10T10:46:44.880442+00:00
-python version                : 3.11.14
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 1
-  ↳ mode                      : logprep-ng
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260310_114644/asyncNG_python3.11.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability ljQZHXlgS763a1yot74u5Q   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        4O2BXmbdSlCu2AnWk24AmQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773139659 10:47:39  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 xkYvH-rXTRuodc9RFoN9pQ   1   1      40000            0      4.2mb          4.2mb
-green  open   .opensearch-observability ljQZHXlgS763a1yot74u5Q   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        4O2BXmbdSlCu2AnWk24AmQ   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       vFjaX9g8R0GBtAhD2NgPrA   1   1       5620            0      1.1mb          1.1mb
-yellow open   pseudonyms                LPV1P7acTiOcksbF5kVFZQ   1   1       5623            0    282.7kb        282.7kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773139693 10:48:13  51244
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.050 s
-measurement window:     30.000 s
-processed (OpenSearch): 40_000
-throughput:             1,333.33 docs/s
---------------
-
------ Run Round 2: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        IuK839AzSBqJ79-EzPvPRw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability L1BBTE1rRj-eA3JCHeeYJg   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773139759 10:49:19  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 9n6JkGUdROSFGDSfYQAdEw   1   1      40000            0      4.3mb          4.3mb
-green  open   .plugins-ml-config        IuK839AzSBqJ79-EzPvPRw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability L1BBTE1rRj-eA3JCHeeYJg   1   0          0            0       208b           208b
-yellow open   sre                       5aygQ7VvSY6s3c61XbycNg   1   1       5618            0      1.1mb          1.1mb
-yellow open   pseudonyms                chsT5gAmRaCPHwZgpu74rw   1   1       5626            0    286.9kb        286.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773139792 10:49:52  51245
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.037 s
-measurement window:     30.000 s
-processed (OpenSearch): 40_000
-throughput:             1,333.33 docs/s
---------------
-
------ Run Round 3: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability tWuePL_yQNmjYvXyfIw3AA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        zOANW9xsSWK24agvbJjUOQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773139858 10:50:58  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 O5Pu0apVTQ2prxfw_f4Bkw   1   1      40000            0      4.4mb          4.4mb
-green  open   .opensearch-observability tWuePL_yQNmjYvXyfIw3AA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        zOANW9xsSWK24agvbJjUOQ   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       9xFBenolREOrIpSL65fgUA   1   1       5628            0      1.1mb          1.1mb
-yellow open   pseudonyms                etDOzg1yRp23BdPIqUa86Q   1   1       5633            0    286.2kb        286.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773139892 10:51:32  51262
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.034 s
-measurement window:     30.000 s
-processed (OpenSearch): 40_000
-throughput:             1,333.33 docs/s
---------------
-
------ Run Round 4: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability EvjiI8pXRF6HvJ6sqbpSRw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        WuQMFz3RSp2NB-oCNGOMZA   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773139956 10:52:36  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 0QTcrtu7RyiSyRos7efdFQ   1   1      65000            0      5.7mb          5.7mb
-green  open   .opensearch-observability EvjiI8pXRF6HvJ6sqbpSRw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        WuQMFz3RSp2NB-oCNGOMZA   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       1vnFL02cRqGo0qItm5oEPQ   1   1       6253            0      1.4mb          1.4mb
-yellow open   pseudonyms                AecG8NCBQtC3_R1lb93a3A   1   1       6240            0    352.2kb        352.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140006 10:53:26  82496
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.032 s
-measurement window:     45.000 s
-processed (OpenSearch): 65_000
-throughput:             1,444.44 docs/s
---------------
-
------ Run Round 5: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        7JTeJqsgSYmQmMmjmqQ8jw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability pP0c35tzQLqY-QdoRV0Gaw   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140072 10:54:32  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 MTcPp51gQFaUFonTPxALTg   1   0          9           18     80.6kb         80.6kb
-yellow open   processed                    2xwfYuzxS1iubgIdWK8zdA   1   1      67500            0      5.8mb          5.8mb
-green  open   .plugins-ml-config           7JTeJqsgSYmQmMmjmqQ8jw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    pP0c35tzQLqY-QdoRV0Gaw   1   0          0            0       208b           208b
-yellow open   sre                          _A__3FeCSpakdijO5Nfilw   1   1       6246            0      1.4mb          1.4mb
-yellow open   pseudonyms                   n_JildKtQ9aJOBnAZN7jhw   1   1       6252            0    359.9kb        359.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140122 10:55:22  85391
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.036 s
-measurement window:     45.000 s
-processed (OpenSearch): 67_500
-throughput:             1,500.00 docs/s
---------------
-
------ Run Round 6: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        HFqnEIUfTuG_eHhwtJI0lA   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability lcEdLdTJTvq74deB88jCBw   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140187 10:56:27  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 D-9ZuAtkQPu2iamUHpBbIQ   1   1      65000            0      5.6mb          5.6mb
-green  open   .plugins-ml-config        HFqnEIUfTuG_eHhwtJI0lA   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability lcEdLdTJTvq74deB88jCBw   1   0          0            0       208b           208b
-yellow open   sre                       eSHkGeyJRMONZ5Rmheq-9A   1   1       6092            0      1.4mb          1.4mb
-yellow open   pseudonyms                Rq_DQIzhTGSCgiB9sCZCOQ   1   1       6038            0    359.7kb        359.7kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140236 10:57:16  82504
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.035 s
-measurement window:     45.000 s
-processed (OpenSearch): 65_000
-throughput:             1,444.44 docs/s
---------------
-
------ Run Round 7: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        ITAcInvAROytzKYUuPp0Rw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability 8B6CWE3gRLSU-KyMWE6b8A   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140301 10:58:21  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 I6WJzfGTQsG_-uoVeA-irA   1   1      87500            0      7.9mb          7.9mb
-green  open   .plugins-ml-config        ITAcInvAROytzKYUuPp0Rw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability 8B6CWE3gRLSU-KyMWE6b8A   1   0          0            0       208b           208b
-yellow open   sre                       kmCZCRafQRmnQ7qOA7BVQw   1   1       6245            0      1.6mb          1.6mb
-yellow open   pseudonyms                WRr1GJmGTP-uAzRSjcrXAQ   1   1       6252            0    389.5kb        389.5kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140362 10:59:22  110624
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.033 s
-measurement window:     60.000 s
-processed (OpenSearch): 87_500
-throughput:             1,458.33 docs/s
---------------
-
------ Run Round 8: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 dv7JdD5ZQbK6l2SUFJqW1g   1   0          8           16       81kb           81kb
-green  open   .opensearch-observability    8nEuAtFZRhiIa-oQc4LrWQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           688s5erDTFSoLki9p4HaFg   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140427 11:00:27  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 dv7JdD5ZQbK6l2SUFJqW1g   1   0          8           16       81kb           81kb
-yellow open   processed                    sIxo3cZ1Rka_C-sO8MKvlQ   1   1      82500            0      7.6mb          7.6mb
-green  open   .opensearch-observability    8nEuAtFZRhiIa-oQc4LrWQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           688s5erDTFSoLki9p4HaFg   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          OiNFT3mdRdGdH73MyYmcyg   1   1       6447            0      1.5mb          1.5mb
-yellow open   pseudonyms                   YTAlCVV8T2KWYWOcX9U2JA   1   1       6506            0    427.6kb        427.6kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140488 11:01:28  104386
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.034 s
-measurement window:     60.000 s
-processed (OpenSearch): 82_500
-throughput:             1,374.99 docs/s
---------------
-
------ Run Round 9: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability QME35PLlSmmi3TC6rZqv_Q   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        zGGQHOo0T2iUot0aQ4Op5w   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140556 11:02:36  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 yF17H8PVTA6EdWXOMZjaug   1   1      65000            0      6.3mb          6.3mb
-green  open   .opensearch-observability QME35PLlSmmi3TC6rZqv_Q   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        zGGQHOo0T2iUot0aQ4Op5w   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       JTHGdLguQEyaV68ixpBLew   1   1       4731            0      1.1mb          1.1mb
-yellow open   pseudonyms                Ss2ViQ40SmeFG7xGYgE7TA   1   1       4697            0    293.8kb        293.8kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140617 11:03:37  82496
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.044 s
-measurement window:     60.000 s
-processed (OpenSearch): 65_000
-throughput:             1,083.33 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  9
-total runtime:         405.001 s
-total processed:       552_500
-
-throughput (weighted): 1,364.19 docs/s
-throughput (median):   1,374.99 docs/s
-throughput (average):  1,367.28 docs/s
-throughput (min/max):  1,083.33 / 1,500.00 docs/s
-throughput (std dev):  123.47 docs/s
-================================
diff --git a/benchmark_results/20260310_114644/asyncNG_python3.12.txt b/benchmark_results/20260310_114644/asyncNG_python3.12.txt
deleted file mode 100644
index 89c35c472..000000000
--- a/benchmark_results/20260310_114644/asyncNG_python3.12.txt
+++ /dev/null
@@ -1,463 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-10T12:20:52.101701
-timestamp (UTC)               : 2026-03-10T11:20:52.101704+00:00
-python version                : 3.12.12
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 1
-  ↳ mode                      : logprep-ng
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260310_114644/asyncNG_python3.12.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        qURp8X-hQAaKc_KQBzuc4Q   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability RZk55MljQm6XIUiPSH4Dag   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141706 11:21:46  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 pLH1ngXsRsi4foNDpcr14w   1   1      30000            0      3.5mb          3.5mb
-green  open   .plugins-ml-config        qURp8X-hQAaKc_KQBzuc4Q   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability RZk55MljQm6XIUiPSH4Dag   1   0          0            0       208b           208b
-yellow open   sre                       y36g6YgjQuO4WohhzjE8iw   1   1       4362            0    941.9kb        941.9kb
-yellow open   pseudonyms                nG2lWUmlRyehxnCPdhGt9g   1   1       4374            0    204.2kb        204.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141739 11:22:19  38737
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.036 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_000
-throughput:             1,000.00 docs/s
---------------
-
------ Run Round 2: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        527LnEIXRXGZabP3jFs55w   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability yxUHZZLZQFKfGVNXSLKgsg   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141804 11:23:24  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 yt14um7oQVa6jQYnOjuCzw   1   1      30000            0      3.4mb          3.4mb
-green  open   .opensearch-observability yxUHZZLZQFKfGVNXSLKgsg   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        527LnEIXRXGZabP3jFs55w   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       KddhBiwlQoyELuLXisuaAA   1   1       4386            0    974.4kb        974.4kb
-yellow open   pseudonyms                JRBGgITdQm-8SMEitpXNEA   1   1       4368            0    234.1kb        234.1kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141837 11:23:57  38755
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.034 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_000
-throughput:             999.99 docs/s
---------------
-
------ Run Round 3: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability pjthS6PeS_GkZNFf2ZcFpA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        JX4Dlb_uSCq7T1W8vVUdOw   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141902 11:25:02  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 _XAzXa-VRZWAMD0dc0z0Fg   1   0          8           16     69.2kb         69.2kb
-yellow open   processed                    KVIx75vxQI2AdSPMEwDF5Q   1   1      30000            0      3.3mb          3.3mb
-green  open   .opensearch-observability    pjthS6PeS_GkZNFf2ZcFpA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           JX4Dlb_uSCq7T1W8vVUdOw   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          UloRgdo2TlKRN7lHNhpNsQ   1   1       4367            0    924.1kb        924.1kb
-yellow open   pseudonyms                   j6pld3rXQtuP5zkxRFYnPg   1   1       4374            0    242.7kb        242.7kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141935 11:25:35  38750
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.032 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_000
-throughput:             1,000.00 docs/s
---------------
-
------ Run Round 4: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        2DK8U6OmSyW2d2e1I_E6xQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability nWDddVeDSDeXHRZm9lmCpQ   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141999 11:26:39  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 Dy4zsNKMQQGiBXWP4P_fIw   1   1      47500            0      3.9mb          3.9mb
-green  open   .opensearch-observability nWDddVeDSDeXHRZm9lmCpQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        2DK8U6OmSyW2d2e1I_E6xQ   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       Nn4SuE-fQSipF5dfmoJ4ew   1   1       4689            0        1mb            1mb
-yellow open   pseudonyms                KHU2D5tDSKGxZbsGCL4q6A   1   1       4682            0    270.5kb        270.5kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142048 11:27:28  60621
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.033 s
-measurement window:     45.000 s
-processed (OpenSearch): 47_500
-throughput:             1,055.55 docs/s
---------------
-
------ Run Round 5: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability m873XzwBQr67H-si4terAg   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        8HAVu81gSDiVeHkD_No_jA   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142112 11:28:32  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 F3qPm27QQLmzNV4zqAg_NA   1   1      47500            0      3.9mb          3.9mb
-green  open   .opensearch-observability m873XzwBQr67H-si4terAg   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        8HAVu81gSDiVeHkD_No_jA   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       0-559WijQ1KCSH1eBrfzuA   1   1       4679            0        1mb            1mb
-yellow open   pseudonyms                lmgqw11gTcWucl8320DrLw   1   1       4698            0    269.8kb        269.8kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142161 11:29:21  60630
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.032 s
-measurement window:     45.000 s
-processed (OpenSearch): 47_500
-throughput:             1,055.55 docs/s
---------------
-
------ Run Round 6: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 6pV90QVTRLeuhHGmVv7Eqw   1   0          8           16     74.3kb         74.3kb
-green  open   .opensearch-observability    _nWuczwbTS2EtwLYc1EnIw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           xXQtIHTaQeqIh5J1op25IQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142226 11:30:26  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 6pV90QVTRLeuhHGmVv7Eqw   1   0          8           16     74.3kb         74.3kb
-yellow open   processed                    RhnMmdt_TdyPihuDw0S5Uw   1   1      50000            0        4mb            4mb
-green  open   .plugins-ml-config           xXQtIHTaQeqIh5J1op25IQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    _nWuczwbTS2EtwLYc1EnIw   1   0          0            0       208b           208b
-yellow open   sre                          ORtxJK5qQHWqZTPx2C8XxA   1   1       4693            0        1mb            1mb
-yellow open   pseudonyms                   OTqIRQUnT1eeS9MbukaDqQ   1   1       4739            0      285kb          285kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142275 11:31:15  63768
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.035 s
-measurement window:     45.000 s
-processed (OpenSearch): 50_000
-throughput:             1,111.11 docs/s
---------------
-
------ Run Round 7: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability p_AUWtLpRpyMypqizqTOig   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        Zpbjtp0RRZ-9lYKtLf0wcA   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142340 11:32:20  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 BX_pUvliQvCf3PbMVS8vUA   1   1      65000            0      6.2mb          6.2mb
-green  open   .opensearch-observability p_AUWtLpRpyMypqizqTOig   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        Zpbjtp0RRZ-9lYKtLf0wcA   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       aExQe77SQR-NGMlm_CFwOQ   1   1       4689            0      1.2mb          1.2mb
-yellow open   pseudonyms                gNJ4yw5QTbqjiviBEDycpQ   1   1       4689            0    328.2kb        328.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142401 11:33:21  82494
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.035 s
-measurement window:     60.000 s
-processed (OpenSearch): 65_000
-throughput:             1,083.33 docs/s
---------------
-
------ Run Round 8: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        siziilcbQRu_h5HXR8Rxjg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability w_ggeYyIToyP2BD0xCpVig   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142465 11:34:25  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 sPK_PKFQREOSmS5LVF8pZA   1   0          9           18    101.7kb        101.7kb
-yellow open   processed                    kI4K46jhTy-hT-7S-pMZ7w   1   1      62500            0        6mb            6mb
-green  open   .plugins-ml-config           siziilcbQRu_h5HXR8Rxjg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    w_ggeYyIToyP2BD0xCpVig   1   0          0            0       208b           208b
-yellow open   sre                          TqULOHejStmMB_5l8E_v4A   1   1       4682            0      1.1mb          1.1mb
-yellow open   pseudonyms                   QWkkVgh7T3Wmnm3n2R12Bg   1   1       4694            0    291.3kb        291.3kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142527 11:35:27  79388
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.035 s
-measurement window:     60.000 s
-processed (OpenSearch): 62_500
-throughput:             1,041.67 docs/s
---------------
-
------ Run Round 9: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability fqTKW3dSRaSROCbvNIes3A   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        rYv2BXNWTxa2RbBoWvIg2A   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142591 11:36:31  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 Czlip08tTW-PO0_WrNLBlA   1   1      62500            0        6mb            6mb
-green  open   .opensearch-observability fqTKW3dSRaSROCbvNIes3A   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        rYv2BXNWTxa2RbBoWvIg2A   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       gZoTmPBNS--bIYMqzSgvZQ   1   1       4696            0      1.1mb          1.1mb
-yellow open   pseudonyms                4et-xUB9QxCPWQWEoHn20Q   1   1       4690            0    304.1kb        304.1kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142652 11:37:32  79383
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.032 s
-measurement window:     60.000 s
-processed (OpenSearch): 62_500
-throughput:             1,041.66 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  9
-total runtime:         405.001 s
-total processed:       425_000
-
-throughput (weighted): 1,049.38 docs/s
-throughput (median):   1,041.67 docs/s
-throughput (average):  1,043.21 docs/s
-throughput (min/max):  999.99 / 1,111.11 docs/s
-throughput (std dev):  38.94 docs/s
-================================
diff --git a/benchmark_results/20260310_114644/asyncNG_python3.13.txt b/benchmark_results/20260310_114644/asyncNG_python3.13.txt
deleted file mode 100644
index dfac3c584..000000000
--- a/benchmark_results/20260310_114644/asyncNG_python3.13.txt
+++ /dev/null
@@ -1,465 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-10T12:54:33.437232
-timestamp (UTC)               : 2026-03-10T11:54:33.437236+00:00
-python version                : 3.13.9
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 1
-  ↳ mode                      : logprep-ng
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260310_114644/asyncNG_python3.13.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 I_xv31UiQriOJfobYjhmOg   1   0          8           16     41.8kb         41.8kb
-green  open   .opensearch-observability    ztMiR5HIRvqpmGZsU_BnBQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           ji8Y9AnSTAaOfSVm9AMhXg   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143728 11:55:28  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 I_xv31UiQriOJfobYjhmOg   1   0          8           16     41.8kb         41.8kb
-yellow open   processed                    MnfF_3PCTaelpIquQ-D2VA   1   1      30000            0      3.4mb          3.4mb
-green  open   .opensearch-observability    ztMiR5HIRvqpmGZsU_BnBQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           ji8Y9AnSTAaOfSVm9AMhXg   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          wgZ-y1VASkGvBKTnZixmcQ   1   1       4372            0    915.8kb        915.8kb
-yellow open   pseudonyms                   o_SYaU7bQg-BqXpo7X1BRA   1   1       4370            0    257.6kb        257.6kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143761 11:56:01  38751
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.032 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_000
-throughput:             1,000.00 docs/s
---------------
-
------ Run Round 2: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability X2ZS_rvBRjuY3T8XGagaWw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        7XhPz67bTluTSndxZ5AzXg   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143825 11:57:05  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 6643q4wQRmWk7ZjvgsZ0rQ   1   1      30000            0      3.4mb          3.4mb
-green  open   .opensearch-observability X2ZS_rvBRjuY3T8XGagaWw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        7XhPz67bTluTSndxZ5AzXg   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       dJYe8r6ARte2dXTaUr4bkw   1   1       4365            0    936.5kb        936.5kb
-yellow open   pseudonyms                jWUGsg__RXKXE4Wmi2a68A   1   1       4368            0    259.6kb        259.6kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143858 11:57:38  38734
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.035 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_000
-throughput:             1,000.00 docs/s
---------------
-
------ Run Round 3: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability YBGrVG7rSSuAa6JjVVXfXw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        eidC_2AuSgmzQ3mcQSfg7Q   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143921 11:58:41  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 DPq2_avaS9C3r1_EJ5nmVg   1   1      30000            0      3.3mb          3.3mb
-green  open   .opensearch-observability YBGrVG7rSSuAa6JjVVXfXw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        eidC_2AuSgmzQ3mcQSfg7Q   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       E2it4BdxRmW9uqS-CRHl2A   1   1       4368            0    914.7kb        914.7kb
-yellow open   pseudonyms                zex0d1kjQvKySA_NX7zqog   1   1       4376            0    239.5kb        239.5kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143954 11:59:14  38745
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.033 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_000
-throughput:             1,000.00 docs/s
---------------
-
------ Run Round 4: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 hz-t4A3wQUOqXFynvq94Tg   1   0          8           16     79.9kb         79.9kb
-green  open   .opensearch-observability    vJraBAgiTz-TRcZA-qkxow   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           CpvyXPTGTtuSxtNv_4Owmg   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144017 12:00:17  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 hz-t4A3wQUOqXFynvq94Tg   1   0          8           16     79.9kb         79.9kb
-yellow open   processed                    rkucaoTuT-GlrmWwPcTQ0w   1   1      50000            0      4.8mb          4.8mb
-green  open   .plugins-ml-config           CpvyXPTGTtuSxtNv_4Owmg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    vJraBAgiTz-TRcZA-qkxow   1   0          0            0       208b           208b
-yellow open   sre                          MGimxtLFSIeCTkYBrL3WvQ   1   1       4807            0      1.1mb          1.1mb
-yellow open   pseudonyms                   m4ugOB34RJqstihgsvM30g   1   1       4707            0      293kb          293kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144067 12:01:07  63751
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.038 s
-measurement window:     45.000 s
-processed (OpenSearch): 50_000
-throughput:             1,111.11 docs/s
---------------
-
------ Run Round 5: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability F8SYagy1TDWVgHRJJUN77w   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        z0vOy7RxTym2hFi7y0Buew   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144130 12:02:10  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 Ynngz-V5Ts29CRZU79LfIg   1   1      47500            0      4.7mb          4.7mb
-green  open   .opensearch-observability F8SYagy1TDWVgHRJJUN77w   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        z0vOy7RxTym2hFi7y0Buew   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       qsdSGzxgR6io2o69J1uULQ   1   1       4712            0        1mb            1mb
-yellow open   pseudonyms                ZkY7ZyJ6T5e7arGy0Cdd5A   1   1       4698            0    231.5kb        231.5kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144179 12:02:59  60629
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.032 s
-measurement window:     45.000 s
-processed (OpenSearch): 47_500
-throughput:             1,055.55 docs/s
---------------
-
------ Run Round 6: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        4mVF2oh0R5eo8X7okMFMRg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability kB9ZgpZuTKe7Jfuigv70-A   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144243 12:04:03  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 u8sS3SX8TrqtMJjqU1BqOg   1   1      47500            0      4.6mb          4.6mb
-green  open   .plugins-ml-config        4mVF2oh0R5eo8X7okMFMRg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability kB9ZgpZuTKe7Jfuigv70-A   1   0          0            0       208b           208b
-yellow open   sre                       27TIfJxPQAmJdYBGa_V2gw   1   1       4691            0      1.1mb          1.1mb
-yellow open   pseudonyms                eM8L7T3zRa6s3FEC_FMAKw   1   1       4687            0    283.3kb        283.3kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144292 12:04:52  60626
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.033 s
-measurement window:     45.000 s
-processed (OpenSearch): 47_500
-throughput:             1,055.55 docs/s
---------------
-
------ Run Round 7: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability WlSHsARmRvuKZ08uNrjUGg   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        3Yj4LkPFRImpm7VbEHhwTQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144356 12:05:56  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 lQ6WSC1dQOOmD1Oimv0BXg   1   1      62500            0      6.1mb          6.1mb
-green  open   .plugins-ml-config        3Yj4LkPFRImpm7VbEHhwTQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability WlSHsARmRvuKZ08uNrjUGg   1   0          0            0       208b           208b
-yellow open   sre                       iqMuWQZjQnGjB98-uiS-cA   1   1       4695            0      1.1mb          1.1mb
-yellow open   pseudonyms                t4iNpAx-TU6e6IsTQ1gjTQ   1   1       4552            0    277.1kb        277.1kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144417 12:06:57  79377
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.034 s
-measurement window:     60.000 s
-processed (OpenSearch): 62_500
-throughput:             1,041.66 docs/s
---------------
-
------ Run Round 8: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability dBLmUMg7RoaDKtbBLRtSbw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        7onXSIPETwCIS0WuVjMYBQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144481 12:08:01  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 8vYXllK0QhGDJYr9DKurTg   1   1      65000            0      6.2mb          6.2mb
-green  open   .opensearch-observability dBLmUMg7RoaDKtbBLRtSbw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        7onXSIPETwCIS0WuVjMYBQ   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       MVfehTxTROeCPxHlWNbEHw   1   1       4691            0      1.1mb          1.1mb
-yellow open   pseudonyms                uxw3LtF5RNyNzp9D6mOZGw   1   1       4683            0    278.1kb        278.1kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144542 12:09:02  82501
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.032 s
-measurement window:     60.000 s
-processed (OpenSearch): 65_000
-throughput:             1,083.33 docs/s
---------------
-
------ Run Round 9: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 SxW3D0SRQ52N_EF5tMaRwg   1   0          8           16     80.2kb         80.2kb
-green  open   .plugins-ml-config           7dTZoopnS9aS1aFDO_zG-A   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    1hyqTPRzQWKkYsV3swCWsQ   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144605 12:10:05  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 SxW3D0SRQ52N_EF5tMaRwg   1   0          8           16     80.2kb         80.2kb
-yellow open   processed                    A3FCiy-2QnKDOY2wrqW8iw   1   1      65000            0      6.2mb          6.2mb
-green  open   .plugins-ml-config           7dTZoopnS9aS1aFDO_zG-A   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    1hyqTPRzQWKkYsV3swCWsQ   1   0          0            0       208b           208b
-yellow open   sre                          N0HqW2xRR3GCrWXaPIO6PQ   1   1       4679            0      1.2mb          1.2mb
-yellow open   pseudonyms                   neSFnajFRxGv1mrognFv2g   1   1       4691            0    270.7kb        270.7kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144667 12:11:07  82509
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.033 s
-measurement window:     60.000 s
-processed (OpenSearch): 65_000
-throughput:             1,083.33 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  9
-total runtime:         405.001 s
-total processed:       427_500
-
-throughput (weighted): 1,055.55 docs/s
-throughput (median):   1,055.55 docs/s
-throughput (average):  1,047.84 docs/s
-throughput (min/max):  1,000.00 / 1,111.11 docs/s
-throughput (std dev):  41.15 docs/s
-================================
diff --git a/benchmark_results/20260310_114644/asyncNG_python3.14.txt b/benchmark_results/20260310_114644/asyncNG_python3.14.txt
deleted file mode 100644
index 7e350dc55..000000000
--- a/benchmark_results/20260310_114644/asyncNG_python3.14.txt
+++ /dev/null
@@ -1,464 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-10T13:27:59.682681
-timestamp (UTC)               : 2026-03-10T12:27:59.682684+00:00
-python version                : 3.14.0
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 1
-  ↳ mode                      : logprep-ng
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260310_114644/asyncNG_python3.14.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability Y-yoEoY2SBadAyzjcl610w   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        7em18HdfRESYMOc2Eu4UHg   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145733 12:28:53  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 8HTmbKfTQPWK1AfbmkjzHQ   1   1      30000            0      3.3mb          3.3mb
-green  open   .plugins-ml-config        7em18HdfRESYMOc2Eu4UHg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability Y-yoEoY2SBadAyzjcl610w   1   0          0            0       208b           208b
-yellow open   sre                       UAcyxVSLSUCY360brDSGYg   1   1       4374            0    993.7kb        993.7kb
-yellow open   pseudonyms                x_-RtrZhSDmDUH00IX2dXw   1   1       4380            0      229kb          229kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145767 12:29:27  38755
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.035 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_000
-throughput:             999.99 docs/s
---------------
-
------ Run Round 2: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 eScOqdCsTja6b4SVZtL-Fg   1   0          8           16     73.3kb         73.3kb
-green  open   .opensearch-observability    LOf3NiaFSam63_AVT2NayA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           GtFKSirSQEK7i18i8VNFKg   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145830 12:30:30  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 eScOqdCsTja6b4SVZtL-Fg   1   0          8           16     73.3kb         73.3kb
-yellow open   processed                    mo2nLn40RRazLcF2jP4wng   1   1      30000            0      3.4mb          3.4mb
-green  open   .opensearch-observability    LOf3NiaFSam63_AVT2NayA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           GtFKSirSQEK7i18i8VNFKg   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          AwodoNPnSi2c1hXSbnls6g   1   1       4379            0    977.4kb        977.4kb
-yellow open   pseudonyms                   MXoFwE2tS_6NTE19Y99mnw   1   1       4369            0    219.1kb        219.1kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145863 12:31:03  38757
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.032 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_000
-throughput:             1,000.00 docs/s
---------------
-
------ Run Round 3: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        rhuNRvHsQ_q64N5pVnlgxQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability oXS9zgOQSsi4RDuQxvxktg   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145926 12:32:06  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 KqklODL0SwWgsfhsXiLO0w   1   1      30000            0      3.4mb          3.4mb
-green  open   .plugins-ml-config        rhuNRvHsQ_q64N5pVnlgxQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability oXS9zgOQSsi4RDuQxvxktg   1   0          0            0       208b           208b
-yellow open   sre                       fEzl_d_tTsy-WEDuNDwmzw   1   1       4364            0    951.2kb        951.2kb
-yellow open   pseudonyms                yB_9FDq4Tji4trPpRoUe5A   1   1       4373            0    242.7kb        242.7kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145960 12:32:40  38738
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.036 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_000
-throughput:             1,000.00 docs/s
---------------
-
------ Run Round 4: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability 4QyYkV6TQtuNzly8V-gehQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        1LuTJ8s8SWGPdi94zzuHhw   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146023 12:33:43  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 Te_n8RWsQ0KJvKK1khIrlg   1   1      47500            0      4.7mb          4.7mb
-green  open   .opensearch-observability 4QyYkV6TQtuNzly8V-gehQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        1LuTJ8s8SWGPdi94zzuHhw   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       50KmSfKVRHKT4PpGftZ9bA   1   1       4684            0        1mb            1mb
-yellow open   pseudonyms                qHkqu10XQJiPVNs7bQQIGg   1   1       4686            0    271.3kb        271.3kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146073 12:34:33  60628
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.035 s
-measurement window:     45.000 s
-processed (OpenSearch): 47_500
-throughput:             1,055.55 docs/s
---------------
-
------ Run Round 5: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 IxdUyj8JRyCm26QNggMVHQ   1   0          8           16     61.6kb         61.6kb
-green  open   .opensearch-observability    XeqnedYCTDmdhJ0jmIP8-g   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           XhLawENBSmaN-FvcZEfifg   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146136 12:35:36  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 IxdUyj8JRyCm26QNggMVHQ   1   0          8           16     61.6kb         61.6kb
-yellow open   processed                    40Zw8znvQ2m2jAhzAsxJOg   1   1      50000            0      4.7mb          4.7mb
-green  open   .opensearch-observability    XeqnedYCTDmdhJ0jmIP8-g   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           XhLawENBSmaN-FvcZEfifg   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          17hOtqdvRDm3zgwC9Dwhng   1   1       4684            0      1.1mb          1.1mb
-yellow open   pseudonyms                   MywuZYAkTWONDN3h3yczsw   1   1       4680            0    273.2kb        273.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146186 12:36:26  63759
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.034 s
-measurement window:     45.000 s
-processed (OpenSearch): 50_000
-throughput:             1,111.11 docs/s
---------------
-
------ Run Round 6: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        qZaV-M09Qzi7D5n0Gq6OMA   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability hcHkIfVWQr2m7kaXgBz9Xg   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146249 12:37:29  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 MnI7YvuGSTmKj-74TR-2_A   1   1      50000            0      4.9mb          4.9mb
-green  open   .plugins-ml-config        qZaV-M09Qzi7D5n0Gq6OMA   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability hcHkIfVWQr2m7kaXgBz9Xg   1   0          0            0       208b           208b
-yellow open   sre                       PqoCdebnRquRlyy_Phrk7Q   1   1       4679            0        1mb            1mb
-yellow open   pseudonyms                iy2YRNUyQGmIfLscS_xsHQ   1   1       4685            0    255.5kb        255.5kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146298 12:38:18  63741
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.031 s
-measurement window:     45.000 s
-processed (OpenSearch): 50_000
-throughput:             1,111.11 docs/s
---------------
-
------ Run Round 7: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability tUZM9KHbRNy9ikTUUhVSsg   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        HuAU0aysShKw1VqY2oqRFw   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146361 12:39:21  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 QxrQenXFTEib_JDqlwQfMw   1   0          9           18    101.3kb        101.3kb
-yellow open   processed                    zp9xCs97QiGaPjxKpOXgfA   1   1      62500            0        6mb            6mb
-green  open   .opensearch-observability    tUZM9KHbRNy9ikTUUhVSsg   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           HuAU0aysShKw1VqY2oqRFw   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          NY0dnxYyQGOoM_fFp4YdjQ   1   1       4693            0      1.2mb          1.2mb
-yellow open   pseudonyms                   okETaS-0RJCzCnlBML2uWQ   1   1       4681            0    275.5kb        275.5kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146422 12:40:22  79379
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.036 s
-measurement window:     60.000 s
-processed (OpenSearch): 62_500
-throughput:             1,041.66 docs/s
---------------
-
------ Run Round 8: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability vmKR2wz2QeyC9U2QcWM1fA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        eKEyvdORQNWUT73s9j-w7g   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146485 12:41:25  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 djqBfC4tQ1-EZVn3f7_8lw   1   1      65000            0      6.2mb          6.2mb
-green  open   .plugins-ml-config        eKEyvdORQNWUT73s9j-w7g   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability vmKR2wz2QeyC9U2QcWM1fA   1   0          0            0       208b           208b
-yellow open   sre                       UoGzmiCYSneaBq_XYlbwJg   1   1       4817            0      1.2mb          1.2mb
-yellow open   pseudonyms                QMwDKVLNS4SpuR6EJj8ylg   1   1       4737            0    300.2kb        300.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146547 12:42:27  82506
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.034 s
-measurement window:     60.000 s
-processed (OpenSearch): 65_000
-throughput:             1,083.33 docs/s
---------------
-
------ Run Round 9: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability ienQTDobRt-Atfck40B57A   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        3CKvJaitQBeWvNg3yhGnxw   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146610 12:43:30  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 _9ePdVa-TH-pq5fUffGZ3g   1   1      65000            0      6.2mb          6.2mb
-green  open   .opensearch-observability ienQTDobRt-Atfck40B57A   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        3CKvJaitQBeWvNg3yhGnxw   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       Pbnhi4uXRBiWgDljH_cHcQ   1   1       4681            0      1.2mb          1.2mb
-yellow open   pseudonyms                eh06XRVlQOiKWrk9I3NFyg   1   1       4682            0    312.3kb        312.3kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146671 12:44:31  82497
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.037 s
-measurement window:     60.000 s
-processed (OpenSearch): 65_000
-throughput:             1,083.33 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  9
-total runtime:         405.001 s
-total processed:       430_000
-
-throughput (weighted): 1,061.73 docs/s
-throughput (median):   1,055.55 docs/s
-throughput (average):  1,054.01 docs/s
-throughput (min/max):  999.99 / 1,111.11 docs/s
-throughput (std dev):  46.30 docs/s
-================================
diff --git a/benchmark_results/20260310_114644/nonNG_python3.11.txt b/benchmark_results/20260310_114644/nonNG_python3.11.txt
deleted file mode 100644
index a0477efc9..000000000
--- a/benchmark_results/20260310_114644/nonNG_python3.11.txt
+++ /dev/null
@@ -1,464 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-10T12:03:48.919675
-timestamp (UTC)               : 2026-03-10T11:03:48.919679+00:00
-python version                : 3.11.14
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 0
-  ↳ mode                      : logprep
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260310_114644/nonNG_python3.11.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        e0dNQ2vlSWy-u9bIhSyqzQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability 0ixjhV-YRFWedi8wwZ6DpA   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140682 11:04:42  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 xys1N9SHQWGwzunQXyTM8g   1   0          9           18     80.9kb         80.9kb
-yellow open   processed                    sGRLNDwTSTqIVIGUXvEkZA   1   1      38823            0      5.1mb          5.1mb
-green  open   .plugins-ml-config           e0dNQ2vlSWy-u9bIhSyqzQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    0ixjhV-YRFWedi8wwZ6DpA   1   0          0            0       208b           208b
-yellow open   sre                          KRCsqvtETYCgaMk5mpBqVA   1   1       4753            0        1mb            1mb
-yellow open   pseudonyms                   OLOPiy0PS06gZ-M0Hj4Lkw   1   1       4746            0    269.4kb        269.4kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140717 11:05:17  48332
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.041 s
-measurement window:     30.000 s
-processed (OpenSearch): 38_823
-throughput:             1,294.10 docs/s
---------------
-
------ Run Round 2: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        hcYEqSHOTcighhKWCvQ6Cg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability 1E3wgaLTRXiqDKcDzy3_lw   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140781 11:06:21  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 9TP8NrSVQl6C5ft7Mbc-gA   1   1      33989            0      4.5mb          4.5mb
-green  open   .plugins-ml-config        hcYEqSHOTcighhKWCvQ6Cg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability 1E3wgaLTRXiqDKcDzy3_lw   1   0          0            0       208b           208b
-yellow open   sre                       jYsPzo3lR_GAKfzQkk71HA   1   1       4249            0    936.5kb        936.5kb
-yellow open   pseudonyms                8N7__rR9T_qHXab5iPVYnQ   1   1       4262            0    240.2kb        240.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140813 11:06:53  42501
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.039 s
-measurement window:     30.000 s
-processed (OpenSearch): 33_989
-throughput:             1,132.96 docs/s
---------------
-
------ Run Round 3: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        ALbsk9XRQdyrzoOvXr6_Xw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability C7IMFIveRWaWed0RkOuAmA   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140879 11:07:59  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 HOiBSU97SNq9nOvQ_lGmNg   1   1      25988            0      3.4mb          3.4mb
-green  open   .plugins-ml-config        ALbsk9XRQdyrzoOvXr6_Xw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability C7IMFIveRWaWed0RkOuAmA   1   0          0            0       208b           208b
-yellow open   sre                       eQuHAqoESRurtnLhbJgSPg   1   1       3251            0    729.6kb        729.6kb
-yellow open   pseudonyms                D1VRuZnkSwaSwXgz7fEuEA   1   1       3261            0      232kb          232kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140911 11:08:31  32501
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.046 s
-measurement window:     30.000 s
-processed (OpenSearch): 25_988
-throughput:             866.26 docs/s
---------------
-
------ Run Round 4: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability Y8BA1ot5Tuqnte2qDkwVkw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        it8qid6uSEyb1O2qBRTtMA   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773140980 11:09:40  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 3jpkmJf8S72GNFkT7UjGnw   1   0          9           18     81.6kb         81.6kb
-yellow open   processed                    PcQ5ce3CQPWBgM2ryKql9A   1   1      46002            0      4.6mb          4.6mb
-green  open   .plugins-ml-config           it8qid6uSEyb1O2qBRTtMA   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    Y8BA1ot5Tuqnte2qDkwVkw   1   0          0            0       208b           208b
-yellow open   sre                          2R49DDPURBirxOPxVVJ4jg   1   1       4007            0    971.3kb        971.3kb
-yellow open   pseudonyms                   Vue0yAx2T7GF49ZJapeYLQ   1   1       3994            0    268.8kb        268.8kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141028 11:10:28  57510
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.038 s
-measurement window:     45.000 s
-processed (OpenSearch): 46_002
-throughput:             1,022.27 docs/s
---------------
-
------ Run Round 5: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        ZWljULjpRVivsPA8KhsEAg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability GI_Z4tpERa2TxIRRSvNoIA   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141092 11:11:32  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 h6VzerBpQfGfwdGxbIs6sw   1   1      55995            0      7.5mb          7.5mb
-green  open   .plugins-ml-config        ZWljULjpRVivsPA8KhsEAg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability GI_Z4tpERa2TxIRRSvNoIA   1   0          0            0       208b           208b
-yellow open   sre                       Nu15yppQRmCrNP_oELyAVA   1   1       4752            0      1.1mb          1.1mb
-yellow open   pseudonyms                LkmhVj4sRmCR03WunwEKNA   1   1       4751            0    309.2kb        309.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141140 11:12:20  70001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.038 s
-measurement window:     45.000 s
-processed (OpenSearch): 55_995
-throughput:             1,244.33 docs/s
---------------
-
------ Run Round 6: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability rs3PBvC3ToSxl2LsnMBMMQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        LPB7fh76RCyVMuiINKYR5w   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141206 11:13:26  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 2CRTiebITJ2YbaJMs98mQw   1   1      56003            0      5.8mb          5.8mb
-green  open   .opensearch-observability rs3PBvC3ToSxl2LsnMBMMQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        LPB7fh76RCyVMuiINKYR5w   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       eDRTOdepSl67xuSGcroDvw   1   1       5000            0      1.1mb          1.1mb
-yellow open   pseudonyms                CdUFe96eQVOD3qkht369tg   1   1       5004            0      304kb          304kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141253 11:14:13  70001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.037 s
-measurement window:     45.000 s
-processed (OpenSearch): 56_003
-throughput:             1,244.51 docs/s
---------------
-
------ Run Round 7: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 U2NwNDP8SdmXYeMJONkXSw   1   0          8           16     74.6kb         74.6kb
-green  open   .opensearch-observability    uUFb8qADQ_u9jxwlQZHfOQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           wpAPHI8WSlS-VyQJw7eUPA   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141318 11:15:18  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 U2NwNDP8SdmXYeMJONkXSw   1   0          8           16     74.6kb         74.6kb
-yellow open   processed                    -TuXzK4LTyW8Nt1Mzdb9Ig   1   1      64649            0      7.3mb          7.3mb
-green  open   .plugins-ml-config           wpAPHI8WSlS-VyQJw7eUPA   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    uUFb8qADQ_u9jxwlQZHfOQ   1   0          0            0       208b           208b
-yellow open   sre                          vOEtOV_YRLiwt54paX9QfA   1   1       3826            0        1mb            1mb
-yellow open   pseudonyms                   b6doPBSMQf2Yc_oCHpthbw   1   1       3863            0    347.4kb        347.4kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141382 11:16:22  80809
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.039 s
-measurement window:     60.000 s
-processed (OpenSearch): 64_649
-throughput:             1,077.48 docs/s
---------------
-
------ Run Round 8: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability Wt9akI1uQsSDDN3MSs1CtA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        uCwxE8PhST64L1u3P0QG2w   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141447 11:17:27  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 3LNxvFmZRUW_BlmGMvACNw   1   1      64011            0        8mb            8mb
-green  open   .opensearch-observability Wt9akI1uQsSDDN3MSs1CtA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        uCwxE8PhST64L1u3P0QG2w   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       dp-kx6hDQWWW2uo_PKbHEA   1   1       4051            0        1mb            1mb
-yellow open   pseudonyms                k0Jw3iwQQMyvflX3FkbpUg   1   1       4073            0    385.1kb        385.1kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141512 11:18:32  80015
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.039 s
-measurement window:     60.000 s
-processed (OpenSearch): 64_011
-throughput:             1,066.85 docs/s
---------------
-
------ Run Round 9: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        ZymTSsC2TbOtt1UYIq-rGQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability rjJiI8OxQrWJma4zAR9gcQ   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141577 11:19:37  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 IPjYfgdlSUGzd2W0tClrKg   1   0          9           18     79.2kb         79.2kb
-yellow open   processed                    LNRy0sR1RCO_PD-l70bAYA   1   1      66002            0      8.3mb          8.3mb
-green  open   .opensearch-observability    rjJiI8OxQrWJma4zAR9gcQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           ZymTSsC2TbOtt1UYIq-rGQ   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          DDOwsXA3TSWNq0rgacnY8A   1   1       3995            0      1.1mb          1.1mb
-yellow open   pseudonyms                   bVyv95HkSXOCfRBDlODBWA   1   1       3995            0    343.8kb        343.8kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773141640 11:20:40  82510
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.042 s
-measurement window:     60.000 s
-processed (OpenSearch): 66_002
-throughput:             1,100.03 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  9
-total runtime:         405.001 s
-total processed:       451_462
-
-throughput (weighted): 1,114.72 docs/s
-throughput (median):   1,100.03 docs/s
-throughput (average):  1,116.53 docs/s
-throughput (min/max):  866.26 / 1,294.10 docs/s
-throughput (std dev):  132.43 docs/s
-================================
diff --git a/benchmark_results/20260310_114644/nonNG_python3.12.txt b/benchmark_results/20260310_114644/nonNG_python3.12.txt
deleted file mode 100644
index 9a706b0b7..000000000
--- a/benchmark_results/20260310_114644/nonNG_python3.12.txt
+++ /dev/null
@@ -1,464 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-10T12:37:43.976924
-timestamp (UTC)               : 2026-03-10T11:37:43.976927+00:00
-python version                : 3.12.12
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 0
-  ↳ mode                      : logprep
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260310_114644/nonNG_python3.12.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        xKQmQAbuRzuM4Xz8cy3khg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability 2qNHCcNWTB6ika5VkxFjYA   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142717 11:38:37  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 nc4nyA52QwiO2UJWYONB-A   1   1      28004            0      3.8mb          3.8mb
-green  open   .plugins-ml-config        xKQmQAbuRzuM4Xz8cy3khg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability 2qNHCcNWTB6ika5VkxFjYA   1   0          0            0       208b           208b
-yellow open   sre                       hxVQzqVWSvqEopVDKte1Xw   1   1       3500            0    786.1kb        786.1kb
-yellow open   pseudonyms                u5UrgkSbR_Oj8q0rCmcgxA   1   1       3496            0    224.1kb        224.1kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142749 11:39:09  35001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.043 s
-measurement window:     30.000 s
-processed (OpenSearch): 28_004
-throughput:             933.46 docs/s
---------------
-
------ Run Round 2: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 l4jRWRXNSKCIJC7Pau3C3w   1   0          8           16     76.8kb         76.8kb
-green  open   .plugins-ml-config           hcdPw3aZQQKu_NWmJzRInw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    OWRDrvyaSOCu2Agsn654lw   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142814 11:40:14  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 l4jRWRXNSKCIJC7Pau3C3w   1   0          8           16     76.8kb         76.8kb
-yellow open   processed                    AQkKpCIbSzaNihOePbaTfw   1   1      30006            0      3.6mb          3.6mb
-green  open   .opensearch-observability    OWRDrvyaSOCu2Agsn654lw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           hcdPw3aZQQKu_NWmJzRInw   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          dgohKDktSSW3nk8Qrgc-CQ   1   1       3737            0      842kb          842kb
-yellow open   pseudonyms                   5OdFQ75JQcSR5bjScPeeKA   1   1       3757            0    263.3kb        263.3kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142846 11:40:46  37509
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.043 s
-measurement window:     30.000 s
-processed (OpenSearch): 30_006
-throughput:             1,000.19 docs/s
---------------
-
------ Run Round 3: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        vaF7jivvQ3q-LE7leg4ngg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability 4Wncf1pvTd-IBDDuSP2D5Q   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142911 11:41:51  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 NavRA1jdQj2kumJYdHx_PQ   1   1      28002            0      3.3mb          3.3mb
-green  open   .plugins-ml-config        vaF7jivvQ3q-LE7leg4ngg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability 4Wncf1pvTd-IBDDuSP2D5Q   1   0          0            0       208b           208b
-yellow open   sre                       P7KbJtIRRDuGCbDQF-1NZg   1   1       3498            0    805.9kb        805.9kb
-yellow open   pseudonyms                1XDdXNx0Ql-4QV56TuGmgA   1   1       3500            0    233.6kb        233.6kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773142942 11:42:22  35001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.038 s
-measurement window:     30.000 s
-processed (OpenSearch): 28_002
-throughput:             933.40 docs/s
---------------
-
------ Run Round 4: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability gOy8kOq2QgqTYGSWsVlD1Q   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        hT3OFEI2Tv-aUFfd1zoiWQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143007 11:43:27  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 rBEjkYYuRia1f1SLwJkGcw   1   1      47995            0      6.1mb          6.1mb
-green  open   .opensearch-observability gOy8kOq2QgqTYGSWsVlD1Q   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        hT3OFEI2Tv-aUFfd1zoiWQ   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       kPftvXArSziJCu-o6-CN_g   1   1       4233            0   1010.1kb       1010.1kb
-yellow open   pseudonyms                DOzjs81cQ8uvlPMLDE5YfA   1   1       4198            0    293.4kb        293.4kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143054 11:44:14  60001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.040 s
-measurement window:     45.000 s
-processed (OpenSearch): 47_995
-throughput:             1,066.55 docs/s
---------------
-
------ Run Round 5: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 DXTP-mHWTiezn0-lXx2_Qg   1   0          8           16     78.2kb         78.2kb
-green  open   .opensearch-observability    MkraE3jSTSaY79x-oVi5jA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           wUS0_VB9RjSXGh6udkMLeQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143119 11:45:19  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 DXTP-mHWTiezn0-lXx2_Qg   1   0          8           16     78.2kb         78.2kb
-yellow open   processed                    BpBbCmAwRg6f11vkHJdawQ   1   1      48004            0      6.2mb          6.2mb
-green  open   .opensearch-observability    MkraE3jSTSaY79x-oVi5jA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           wUS0_VB9RjSXGh6udkMLeQ   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          y0IMG4UVTQe-1niPwTGm0A   1   1       4251            0    999.4kb        999.4kb
-yellow open   pseudonyms                   15u0c1EvTU2oEbZzHRhhLw   1   1       4246            0      289kb          289kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143166 11:46:06  60009
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.037 s
-measurement window:     45.000 s
-processed (OpenSearch): 48_004
-throughput:             1,066.75 docs/s
---------------
-
------ Run Round 6: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability TxW6AO_NSmy2MfpcFj7C_g   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        OZNiJUm1TWyUq2nZIczXZw   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143231 11:47:11  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 XdaP6tdcQ0WWO0ltTDKPsg   1   1      47998            0        6mb            6mb
-green  open   .opensearch-observability TxW6AO_NSmy2MfpcFj7C_g   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        OZNiJUm1TWyUq2nZIczXZw   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       ahfuIkBtSPO0BzFbsNjwYQ   1   1       4258            0    966.3kb        966.3kb
-yellow open   pseudonyms                Gk8KZ1W1QvKLnmJd4aD-ug   1   1       4236            0      273kb          273kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143279 11:47:59  60001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.036 s
-measurement window:     45.000 s
-processed (OpenSearch): 47_998
-throughput:             1,066.62 docs/s
---------------
-
------ Run Round 7: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability 2xBpCxrMR3qqSyYEB9e5BQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        ppET6MUASJWlV8WbPIFSTQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143343 11:49:03  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 NMsHrRSBQPuRTSRiRg09gw   1   0          9           18     81.5kb         81.5kb
-yellow open   processed                    Bd9aTJXFS-SWsfGBobCIdA   1   1      65996            0        8mb            8mb
-green  open   .plugins-ml-config           ppET6MUASJWlV8WbPIFSTQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    2xBpCxrMR3qqSyYEB9e5BQ   1   0          0            0       208b           208b
-yellow open   sre                          2hv5ZQovRqq7-UQgHBK9kg   1   1       4107            0        1mb            1mb
-yellow open   pseudonyms                   dXvGHJ3hQXumrglDYLwAmA   1   1       4137            0    333.8kb        333.8kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143407 11:50:07  82510
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.037 s
-measurement window:     60.000 s
-processed (OpenSearch): 65_996
-throughput:             1,099.93 docs/s
---------------
-
------ Run Round 8: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability E9N4pVo9TeS3XZIfgRvnGQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        ael2q5MuTm6sdHlo-Fu1wQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143470 11:51:10  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 -hgNVet2TGmiFgQ7Xv8LpA   1   1      68003            0      8.4mb          8.4mb
-green  open   .plugins-ml-config        ael2q5MuTm6sdHlo-Fu1wQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability E9N4pVo9TeS3XZIfgRvnGQ   1   0          0            0       208b           208b
-yellow open   sre                       Xe66T3d9TVusbE2RdfMMbQ   1   1       4252            0      1.1mb          1.1mb
-yellow open   pseudonyms                AdkJmwWdQlCPZZm_wc-3UQ   1   1       4247            0    321.4kb        321.4kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143534 11:52:14  85002
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.037 s
-measurement window:     60.000 s
-processed (OpenSearch): 68_003
-throughput:             1,133.38 docs/s
---------------
-
------ Run Round 9: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability u2XpQWx_Qcet2YoOIZ3Xvw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        M0bJfEV1R_6G3naYoMeLFA   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143598 11:53:18  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 16L2NGeqQma5afA9wIiDEQ   1   1      68003            0      8.6mb          8.6mb
-green  open   .opensearch-observability u2XpQWx_Qcet2YoOIZ3Xvw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        M0bJfEV1R_6G3naYoMeLFA   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       odTnQ7X1SSSikOWHskCJ1g   1   1       4228            0      1.1mb          1.1mb
-yellow open   pseudonyms                qDhqwhCBT2K1EM7noUDp_g   1   1       4216            0      312kb          312kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773143661 11:54:21  85001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.039 s
-measurement window:     60.000 s
-processed (OpenSearch): 68_003
-throughput:             1,133.38 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  9
-total runtime:         405.001 s
-total processed:       432_011
-
-throughput (weighted): 1,066.69 docs/s
-throughput (median):   1,066.62 docs/s
-throughput (average):  1,048.19 docs/s
-throughput (min/max):  933.40 / 1,133.38 docs/s
-throughput (std dev):  76.53 docs/s
-================================
diff --git a/benchmark_results/20260310_114644/nonNG_python3.13.txt b/benchmark_results/20260310_114644/nonNG_python3.13.txt
deleted file mode 100644
index d0bf7dccb..000000000
--- a/benchmark_results/20260310_114644/nonNG_python3.13.txt
+++ /dev/null
@@ -1,462 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-10T13:11:18.131638
-timestamp (UTC)               : 2026-03-10T12:11:18.131642+00:00
-python version                : 3.13.9
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 0
-  ↳ mode                      : logprep
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260310_114644/nonNG_python3.13.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability GhZ_Q10kTueOTYUFCPDQpQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        -pTQ-NYXQ--Bu20qR4d-lw   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144730 12:12:10  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 Rq3pEFoDQ1iVJzubmn4cHw   1   1      27999            0      3.8mb          3.8mb
-green  open   .plugins-ml-config        -pTQ-NYXQ--Bu20qR4d-lw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability GhZ_Q10kTueOTYUFCPDQpQ   1   0          0            0       208b           208b
-yellow open   sre                       aLe6vgxEQ2qXdnCSykfiyQ   1   1       3502            0    821.4kb        821.4kb
-yellow open   pseudonyms                tdTSBC5sS_S18kLbvUkp4w   1   1       3499            0    182.4kb        182.4kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144762 12:12:42  35001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.046 s
-measurement window:     30.000 s
-processed (OpenSearch): 27_999
-throughput:             933.30 docs/s
---------------
-
------ Run Round 2: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability LODCq9KgSAmWAASmavB2Mw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        RPF-s9RnSN-0Q_baaYMiLg   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144825 12:13:45  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 dVuoA_CITae3FD_Dw1whtw   1   1      29998            0      4.2mb          4.2mb
-green  open   .opensearch-observability LODCq9KgSAmWAASmavB2Mw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        RPF-s9RnSN-0Q_baaYMiLg   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       eKiFJj5-SpKSKGu0hV4X6w   1   1       3750            0    846.1kb        846.1kb
-yellow open   pseudonyms                7HWhIA1oQsG6Ij-Th1K1Cw   1   1       3752            0    249.3kb        249.3kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144857 12:14:17  37501
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.037 s
-measurement window:     30.000 s
-processed (OpenSearch): 29_998
-throughput:             999.93 docs/s
---------------
-
------ Run Round 3: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 wOqBWSjnQ0eRgg-CS0QoVA   1   0          8           16     75.7kb         75.7kb
-green  open   .plugins-ml-config           rGnW4MmYR9iFQo3YHKydJw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    -OBFtrlPSo2Nt7e4dlRCDA   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144921 12:15:21  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 wOqBWSjnQ0eRgg-CS0QoVA   1   0          8           16     75.7kb         75.7kb
-yellow open   processed                    1Yi1lKaEQ8yVNrjrklU-sw   1   1      28001            0      3.5mb          3.5mb
-green  open   .plugins-ml-config           rGnW4MmYR9iFQo3YHKydJw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    -OBFtrlPSo2Nt7e4dlRCDA   1   0          0            0       208b           208b
-yellow open   sre                          I19K4-K4RJibM7ytC7bUHw   1   1       3510            0    800.4kb        800.4kb
-yellow open   pseudonyms                   LSrz2MgORMCpzJ2Tvpyq-w   1   1       3489            0    225.6kb        225.6kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773144953 12:15:53  35009
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.039 s
-measurement window:     30.000 s
-processed (OpenSearch): 28_001
-throughput:             933.36 docs/s
---------------
-
------ Run Round 4: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        jGahqoFIR9yyg3qC6Nx6Eg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability OimVB9IFSdSccQbAVEQqZA   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145016 12:16:56  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 iLscfogJRf22H0MVBbmgzw   1   1      48003            0        6mb            6mb
-green  open   .plugins-ml-config        jGahqoFIR9yyg3qC6Nx6Eg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability OimVB9IFSdSccQbAVEQqZA   1   0          0            0       208b           208b
-yellow open   sre                       A52BkQxvSz6B4taMOz6PdQ   1   1       4147            0    996.6kb        996.6kb
-yellow open   pseudonyms                6uUArIg1Rl6gQE1ORgVVJg   1   1       4167            0    241.7kb        241.7kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145065 12:17:45  60001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.037 s
-measurement window:     45.000 s
-processed (OpenSearch): 48_003
-throughput:             1,066.73 docs/s
---------------
-
------ Run Round 5: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability _KgaawBWQMe0A76liAne3g   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        rI8ghtu8SBaS8s9Msbp88g   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145128 12:18:48  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 D7p7tkUYTgiqLHQf5aqEQQ   1   1      48008            0      6.4mb          6.4mb
-green  open   .plugins-ml-config        rI8ghtu8SBaS8s9Msbp88g   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability _KgaawBWQMe0A76liAne3g   1   0          0            0       208b           208b
-yellow open   sre                       NpOm125NQeu4PAsgZhZJ6Q   1   1       4134            0        1mb            1mb
-yellow open   pseudonyms                PZejCcGMRPeHnaKFaHdcGw   1   1       4176            0    265.8kb        265.8kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145176 12:19:36  60001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.039 s
-measurement window:     45.000 s
-processed (OpenSearch): 48_008
-throughput:             1,066.84 docs/s
---------------
-
------ Run Round 6: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        doLiPPuZT9-w19PCQeYBOg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability fWgjmJdbTJ-LcPwcC-uBPw   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145239 12:20:39  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 U5SB1ZSKQumVi-WfIAKB3g   1   1      48006            0      6.1mb          6.1mb
-green  open   .opensearch-observability fWgjmJdbTJ-LcPwcC-uBPw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        doLiPPuZT9-w19PCQeYBOg   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       vcS1BkVzQXmDWWnuY6HUAw   1   1       4064            0        1mb            1mb
-yellow open   pseudonyms                ETFH5v29QKuW1qJMtjsgdQ   1   1       4082            0    297.3kb        297.3kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145287 12:21:27  60001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.038 s
-measurement window:     45.000 s
-processed (OpenSearch): 48_006
-throughput:             1,066.80 docs/s
---------------
-
------ Run Round 7: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability 48HoQbc_QnSHHSC-NtV55g   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        eV2Ar8vaQC2Vdu9h8F0RRw   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145350 12:22:30  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 Nbuxpz_hQlWnbP3uyZ1uig   1   1      64006            0      8.1mb          8.1mb
-green  open   .plugins-ml-config        eV2Ar8vaQC2Vdu9h8F0RRw   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability 48HoQbc_QnSHHSC-NtV55g   1   0          0            0       208b           208b
-yellow open   sre                       vKALGUEmRpSesKGg1uDiXg   1   1       4001            0        1mb            1mb
-yellow open   pseudonyms                xAg7PHPQSwCT8FeRQIWKaQ   1   1       3996            0    264.7kb        264.7kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145413 12:23:33  80001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.037 s
-measurement window:     60.000 s
-processed (OpenSearch): 64_006
-throughput:             1,066.77 docs/s
---------------
-
------ Run Round 8: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability zPdZzI8mQSqvzmd2BY07xw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        n47LCeyTTv2XSqyxH_KBLQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145476 12:24:36  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 DWKAiZxHQ6ysaZX8YWaaXA   1   0          9           18    131.8kb        131.8kb
-yellow open   processed                    6O884SuQSwyE83onNCeiYA   1   1      66003            0      7.6mb          7.6mb
-green  open   .opensearch-observability    zPdZzI8mQSqvzmd2BY07xw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           n47LCeyTTv2XSqyxH_KBLQ   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          wtmyUfvjT_a2gXxtppbZhQ   1   1       4164            0      1.1mb          1.1mb
-yellow open   pseudonyms                   6tY0e0gsTt-l8p44ubzcrA   1   1       4170            0    320.4kb        320.4kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145539 12:25:39  82510
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.036 s
-measurement window:     60.000 s
-processed (OpenSearch): 66_003
-throughput:             1,100.05 docs/s
---------------
-
------ Run Round 9: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .plugins-ml-config        oZaWa2laRT-9EP6d1C_n0w   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability OuIcREjMQOSPr-RN8iMSMQ   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145604 12:26:44  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 jHEGAcydRquETyE5Soh4vA   1   1      65205            0      8.4mb          8.4mb
-green  open   .opensearch-observability OuIcREjMQOSPr-RN8iMSMQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        oZaWa2laRT-9EP6d1C_n0w   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       vKPm6zMoTKmA8XoUup_d0A   1   1       4163            0        1mb            1mb
-yellow open   pseudonyms                Y2Qukw_wR8qPqV8RHdL3SQ   1   1       4144            0    355.2kb        355.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773145667 12:27:47  81513
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.041 s
-measurement window:     60.000 s
-processed (OpenSearch): 65_205
-throughput:             1,086.75 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  9
-total runtime:         405.001 s
-total processed:       425_229
-
-throughput (weighted): 1,049.95 docs/s
-throughput (median):   1,066.77 docs/s
-throughput (average):  1,035.61 docs/s
-throughput (min/max):  933.30 / 1,100.05 docs/s
-throughput (std dev):  64.08 docs/s
-================================
diff --git a/benchmark_results/20260310_114644/nonNG_python3.14.txt b/benchmark_results/20260310_114644/nonNG_python3.14.txt
deleted file mode 100644
index b315844da..000000000
--- a/benchmark_results/20260310_114644/nonNG_python3.14.txt
+++ /dev/null
@@ -1,465 +0,0 @@
-
-=== BENCHMARK CONFIGURATION ===
-timestamp (local)             : 2026-03-10T13:44:42.685154
-timestamp (UTC)               : 2026-03-10T12:44:42.685156+00:00
-python version                : 3.14.0
-----------------------------------------
-bootstrap_servers             : 127.0.0.1:9092
-compose_dir                   : examples/compose
-event_num                     : 120_000
-gen_input_dir                 : examples/exampledata/kafka_generate_input_logdata
-ng                            : 0
-  ↳ mode                      : logprep
-  ↳ pipeline_config           : examples/exampledata/config/_benchmark_non_ng_pipeline.yml
-opensearch_url                : http://localhost:9200
-out                           : benchmark_results/20260310_114644/nonNG_python3.14.txt
-processed_index               : processed
-prometheus_multiproc_dir      : /tmp/logprep
-runs                          : [30, 30, 30, 45, 45, 45, 60, 60, 60]
-services                      : ['kafka', 'opensearch']
-sleep_after_compose_up_s      : 30
-sleep_after_generate_s        : 2
-sleep_after_logprep_start_s   : 5
-================================
-
------ Run Round 1: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 wTfhsr4jReihcr52EZm73A   1   0          8           16     79.5kb         79.5kb
-green  open   .opensearch-observability    ghFN5G_9RxmxUVeSCMeKpQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           0m8zc-2BSHW5TtbZTMK03A   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146735 12:45:35  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 wTfhsr4jReihcr52EZm73A   1   0          8           16     79.5kb         79.5kb
-yellow open   processed                    3y8pq-1CSiuf8VsU7qJHGg   1   1      28001            0      3.6mb          3.6mb
-green  open   .opensearch-observability    ghFN5G_9RxmxUVeSCMeKpQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           0m8zc-2BSHW5TtbZTMK03A   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                          MUEKAO8ISD-nIEZlXX7QSQ   1   1       3497            0    818.5kb        818.5kb
-yellow open   pseudonyms                   szt3S6hGSwW-IlozBLTmKQ   1   1       3502            0    221.7kb        221.7kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146767 12:46:07  35009
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.041 s
-measurement window:     30.000 s
-processed (OpenSearch): 28_001
-throughput:             933.36 docs/s
---------------
-
------ Run Round 2: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability RLxaHRkLSNGtbCrsno1X_Q   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        kcwdGG7_S8SWowp-r41x_w   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146831 12:47:11  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 WkmzndxHRvaivd1sul1k1w   1   1      29991            0      4.2mb          4.2mb
-green  open   .opensearch-observability RLxaHRkLSNGtbCrsno1X_Q   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        kcwdGG7_S8SWowp-r41x_w   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       0JAF4G7OQKy9vNlkC8c5lA   1   1       3746            0    890.4kb        890.4kb
-yellow open   pseudonyms                ZAjDzx4mQX-NtUTQ4XTI0A   1   1       3763            0    272.3kb        272.3kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146863 12:47:43  37501
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.038 s
-measurement window:     30.000 s
-processed (OpenSearch): 29_991
-throughput:             999.70 docs/s
---------------
-
------ Run Round 3: 30 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability sAZMJUSCSNixxIPS52aRRQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        uE8bPiYhSUKAPhuoGdG-rA   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146932 12:48:52  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 GSQ0Ss9jQem31eIXA9uDYw   1   1      29996            0      4.3mb          4.3mb
-green  open   .opensearch-observability sAZMJUSCSNixxIPS52aRRQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        uE8bPiYhSUKAPhuoGdG-rA   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       r9CaK135QZ-WtRi1ZCKPRQ   1   1       3756            0    906.4kb        906.4kb
-yellow open   pseudonyms                XNgS-N5OSQKLkrgBKjD58w   1   1       3748            0    271.1kb        271.1kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773146964 12:49:24  37501
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            30
-events generated:       120_000
-startup time:           5.039 s
-measurement window:     30.000 s
-processed (OpenSearch): 29_996
-throughput:             999.86 docs/s
---------------
-
------ Run Round 4: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 aqSXnnT7Qgi0yno_WHXWZg   1   0          8           16     41.6kb         41.6kb
-green  open   .plugins-ml-config           gxIqBIxGTOmQ-goDqFdRgQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    pfkfRezlQx-Y21t1RdzsqQ   1   0          0            0       208b           208b
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147028 12:50:28  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 aqSXnnT7Qgi0yno_WHXWZg   1   0          8           16     41.6kb         41.6kb
-yellow open   processed                    3ArVWlRGQhKxBDwMdpa0Fg   1   1      48002            0      6.1mb          6.1mb
-green  open   .plugins-ml-config           gxIqBIxGTOmQ-goDqFdRgQ   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    pfkfRezlQx-Y21t1RdzsqQ   1   0          0            0       208b           208b
-yellow open   sre                          d9sYOegfTF-aVOIn9_9HKg   1   1       4239            0   1006.1kb       1006.1kb
-yellow open   pseudonyms                   3Rf2yN8NSeCQKodiaZDwcg   1   1       4253            0    315.4kb        315.4kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147076 12:51:16  60009
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.038 s
-measurement window:     45.000 s
-processed (OpenSearch): 48_002
-throughput:             1,066.71 docs/s
---------------
-
------ Run Round 5: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability gMjtlp--SYKvyfi479Hisw   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        mOpTFlItTzSXaHc11j-9yA   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147140 12:52:20  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 C-ytG9gMSbiLxEnnpqOHvQ   1   1      43994            0      5.6mb          5.6mb
-green  open   .plugins-ml-config        mOpTFlItTzSXaHc11j-9yA   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability gMjtlp--SYKvyfi479Hisw   1   0          0            0       208b           208b
-yellow open   sre                       aGsNXkXGR8K9xMUZ-Hjoyw   1   1       3952            0    984.6kb        984.6kb
-yellow open   pseudonyms                l17ZBkFuTfGNQFHx-ICMEA   1   1       3972            0    279.5kb        279.5kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147188 12:53:08  55001
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.036 s
-measurement window:     45.000 s
-processed (OpenSearch): 43_994
-throughput:             977.64 docs/s
---------------
-
------ Run Round 6: 45 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability mW0DeAqfSBCq8my-61mIsQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        ZftEcawOTlqAP4oBF26sCA   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147251 12:54:11  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 2j57uaIRRAy5JIU-HozJbA   1   1      51515            0      6.4mb          6.4mb
-green  open   .opensearch-observability mW0DeAqfSBCq8my-61mIsQ   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        ZftEcawOTlqAP4oBF26sCA   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       djf3PpZ5SMKT4303ndClBw   1   1       4001            0    971.9kb        971.9kb
-yellow open   pseudonyms                EuaoYjKiQf-sMtnIbmTfOw   1   1       4006            0    238.9kb        238.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147302 12:55:02  64401
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            45
-events generated:       120_000
-startup time:           5.040 s
-measurement window:     45.000 s
-processed (OpenSearch): 51_515
-throughput:             1,144.78 docs/s
---------------
-
------ Run Round 7: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability s_znwlXOSbm4pJE48LvO3A   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        AlgnH6BPTo-Rs5xqpZbSHQ   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147365 12:56:05  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 pA0rbFT4T16FqmKJSdNuvw   1   1      72309            0      8.6mb          8.6mb
-green  open   .opensearch-observability s_znwlXOSbm4pJE48LvO3A   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        AlgnH6BPTo-Rs5xqpZbSHQ   1   0          1            0      3.9kb          3.9kb
-yellow open   sre                       LXXTW843RqyJ7upcicf1IA   1   1       4132            0      1.1mb          1.1mb
-yellow open   pseudonyms                lHAcPQynS3aSVQmshYDLEg   1   1       4198            0    337.2kb        337.2kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147431 12:57:11  90391
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.043 s
-measurement window:     60.000 s
-processed (OpenSearch): 72_309
-throughput:             1,205.15 docs/s
---------------
-
------ Run Round 8: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   .opensearch-observability KhsGGaqWT3202jGfKuwJnA   1   0          0            0       208b           208b
-green  open   .plugins-ml-config        jn7FRHaoSxiN6sTsBFDfVg   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147494 12:58:14  1
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                     uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   processed                 dUD2jrHfSJuUWN5AeV1INw   1   1      71444            0      8.9mb          8.9mb
-green  open   .plugins-ml-config        jn7FRHaoSxiN6sTsBFDfVg   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability KhsGGaqWT3202jGfKuwJnA   1   0          0            0       208b           208b
-yellow open   sre                       PvzVblZnTJm9JnGX_wVu2w   1   1       4005            0        1mb            1mb
-yellow open   pseudonyms                D9fQm1IzSIuFeidc53FJdQ   1   1       3991            0    319.9kb        319.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147560 12:59:20  89301
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.041 s
-measurement window:     60.000 s
-processed (OpenSearch): 71_444
-throughput:             1,190.73 docs/s
---------------
-
------ Run Round 9: 60 seconds -----
-
-=== OpenSearch snapshot (before measurement) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 _wnKuX6KRrCWdoLFY9h4KQ   1   0          8           16     77.7kb         77.7kb
-green  open   .opensearch-observability    Gn8XMh3fSlOw8jkT8mrRBg   1   0          0            0       208b           208b
-green  open   .plugins-ml-config           rGLw-23MRcCjk4PxFrai8w   1   0          1            0      3.9kb          3.9kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147623 13:00:23  9
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
-
-=== OpenSearch snapshot (after run / after refresh) ===
-
---- _cat/indices ---
-health status index                        uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-green  open   top_queries-2026.03.10-25342 _wnKuX6KRrCWdoLFY9h4KQ   1   0          8           16     77.7kb         77.7kb
-yellow open   processed                    9KGLSbXKRxuS59WWvtmYGA   1   1      62002            0      7.5mb          7.5mb
-green  open   .plugins-ml-config           rGLw-23MRcCjk4PxFrai8w   1   0          1            0      3.9kb          3.9kb
-green  open   .opensearch-observability    Gn8XMh3fSlOw8jkT8mrRBg   1   0          0            0       208b           208b
-yellow open   sre                          EvnF1wdZRu-XSeXlJk-9FQ   1   1       3750            0        1mb            1mb
-yellow open   pseudonyms                   SEAC2lkmToauaS59SCts7w   1   1       3763            0    300.6kb        300.6kb
-
-
---- _cat/count ---
-epoch      timestamp count
-1773147686 13:01:26  77509
-
-
---- _cat/aliases ---
-alias index filter routing.index routing.search is_write_index
-
---- RESULT ---
-run_seconds:            60
-events generated:       120_000
-startup time:           5.041 s
-measurement window:     60.000 s
-processed (OpenSearch): 62_002
-throughput:             1,033.37 docs/s
---------------
-
-
-=== FINAL BENCHMARK SUMMARY ===
-runs:                  9
-total runtime:         405.001 s
-total processed:       437_254
-
-throughput (weighted): 1,079.64 docs/s
-throughput (median):   1,033.37 docs/s
-throughput (average):  1,061.26 docs/s
-throughput (min/max):  933.36 / 1,205.15 docs/s
-throughput (std dev):  97.57 docs/s
-================================
diff --git a/config_manipulator.py b/config_manipulator.py
deleted file mode 100644
index 4e6bea929..000000000
--- a/config_manipulator.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import sys
-import time
-from pathlib import Path
-
-from ruamel.yaml import YAML
-
-
-def set_yaml_value(file_path: Path, key: str, value) -> None:
-    yaml = YAML()
-    yaml.preserve_quotes = True
-    yaml.width = 10_000
-
-    with file_path.open("r", encoding="utf-8") as f:
-        data = yaml.load(f)
-
-    old_value = data.get(key, "<not set>")
-    data[key] = value
-
-    print(f"Updated '{key}': {old_value} -> {value}")
-
-    with file_path.open("w", encoding="utf-8") as f:
-        yaml.dump(data, f)
-
-
-if __name__ == "__main__":
-    delay = int(sys.argv[1]) if len(sys.argv) > 1 else 0
-
-    if delay > 0:
-        print(f"Sleeping for {delay} seconds...")
-        time.sleep(delay)
-
-    config_path = Path("examples/exampledata/config/_benchmark_ng_pipeline.yml")
-
-    set_yaml_value(config_path, "version", 3)
diff --git a/examples/exampledata/config/ng_pipeline.yml b/examples/exampledata/config/ng_pipeline.yml
index 0ca42bef7..fc8c4dbda 100644
--- a/examples/exampledata/config/ng_pipeline.yml
+++ b/examples/exampledata/config/ng_pipeline.yml
@@ -2,10 +2,10 @@ version: 2
 process_count: 1
 timeout: 5.0
 restart_count: 2
-config_refresh_interval: 5
+config_refresh_interval: 300
 error_backlog_size: 1500000
 logger:
-  level: DEBUG
+  level: INFO
   format: "%(asctime)-15s %(hostname)-5s %(name)-10s %(levelname)-8s: %(message)s"
   datefmt: "%Y-%m-%d %H:%M:%S"
   loggers:
diff --git a/run_config_manipulation.py b/run_config_manipulation.py
deleted file mode 100644
index a965d06ce..000000000
--- a/run_config_manipulation.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import subprocess
-import sys
-import time
-
-
-def main(delay_seconds: int):
-    python_executable = sys.executable  # ensures correct interpreter (venv-safe)
-
-    # Delay before starting script
-    print(f"Waiting {delay_seconds} seconds before starting config manipulation...")
-    time.sleep(delay_seconds)
-
-    # Start script
-    print("Starting config manipulation...")
-    proc = subprocess.Popen([python_executable, "config_manipulator.py"])
-
-    print("Process are now running.")
-
-    proc.wait()
-
-    print("Processes finished.")
-
-
-if __name__ == "__main__":
-    main(delay_seconds=5)

From 74107d22c199a2fb1c0704c0c79f4ef71f2f563e Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Fri, 10 Apr 2026 10:35:46 +0200
Subject: [PATCH 67/68] disentangle ng and non-ng

---
 logprep/abc/connector.py    | 18 +++----------
 logprep/factory.py          |  2 +-
 logprep/ng/abc/connector.py | 50 +++++++++++++++++++++++++++++++++++++
 logprep/ng/abc/input.py     |  2 +-
 logprep/ng/abc/output.py    |  2 +-
 logprep/ng/abc/processor.py |  6 ++---
 6 files changed, 60 insertions(+), 20 deletions(-)
 create mode 100644 logprep/ng/abc/connector.py

diff --git a/logprep/abc/connector.py b/logprep/abc/connector.py
index 839ac61cf..342943a76 100644
--- a/logprep/abc/connector.py
+++ b/logprep/abc/connector.py
@@ -2,19 +2,19 @@
 
 from attrs import define, field
 
+from logprep.abc.component import Component
 from logprep.metrics.metrics import CounterMetric, HistogramMetric
-from logprep.ng.abc.component import NgComponent
 
 
-class Connector(NgComponent):
+class Connector(Component):
     """Abstract Connector Class to define the Interface"""
 
     @define(kw_only=True)
-    class Config(NgComponent.Config):
+    class Config(Component.Config):
         """Configuration for the connector"""
 
     @define(kw_only=True)
-    class Metrics(NgComponent.Metrics):
+    class Metrics(Component.Metrics):
         """Tracks statistics about this connector"""
 
         number_of_processed_events: CounterMetric = field(
@@ -48,13 +48,3 @@ class Metrics(NgComponent.Metrics):
             )
         )
         """Number of errors that occurred while processing events"""
-
-    async def setup(self) -> None:
-        """Set up the connector."""
-
-        await super().setup()
-
-    async def shut_down(self) -> None:
-        """Shutdown the connector and cleanup resources."""
-
-        await super().shut_down()
diff --git a/logprep/factory.py b/logprep/factory.py
index 85be2f92f..7e6e6db33 100644
--- a/logprep/factory.py
+++ b/logprep/factory.py
@@ -25,7 +25,7 @@ def create(cls, configuration: dict) -> Component:
             )
         # we know configuration has exactly one entry
         [(component_name, component_configuration_dict)] = configuration.items()
-        if configuration == {} or component_configuration_dict is None:
+        if component_configuration_dict is None:
             raise InvalidConfigurationError(
                 f'The definition of component "{component_name}" is empty.'
             )
diff --git a/logprep/ng/abc/connector.py b/logprep/ng/abc/connector.py
new file mode 100644
index 000000000..d817d94c9
--- /dev/null
+++ b/logprep/ng/abc/connector.py
@@ -0,0 +1,50 @@
+"""abstract module for connectors"""
+
+from attrs import define, field
+
+from logprep.metrics.metrics import CounterMetric, HistogramMetric
+from logprep.ng.abc.component import NgComponent as Component
+
+
+class Connector(Component):
+    """Abstract Connector Class to define the Interface"""
+
+    @define(kw_only=True)
+    class Config(Component.Config):
+        """Configuration for the connector"""
+
+    @define(kw_only=True)
+    class Metrics(Component.Metrics):
+        """Tracks statistics about this connector"""
+
+        number_of_processed_events: CounterMetric = field(
+            factory=lambda: CounterMetric(
+                description="Number of successful events",
+                name="number_of_processed_events",
+            )
+        )
+        """Number of successful events"""
+
+        processing_time_per_event: HistogramMetric = field(
+            factory=lambda: HistogramMetric(
+                description="Time in seconds that it took to store an event",
+                name="processing_time_per_event",
+            )
+        )
+        """Time in seconds that it took to process an event"""
+
+        number_of_warnings: CounterMetric = field(
+            factory=lambda: CounterMetric(
+                description="Number of warnings that occurred while storing events",
+                name="number_of_warnings",
+            )
+        )
+        """Number of warnings that occurred while processing events"""
+
+        number_of_errors: CounterMetric = field(
+            factory=lambda: CounterMetric(
+                description="Number of errors that occurred while storing events",
+                name="number_of_errors",
+            )
+        )
+        """Number of errors that occurred while processing events"""
diff --git a/logprep/ng/abc/input.py b/logprep/ng/abc/input.py
index 9f908e0d0..742223839 100644
--- a/logprep/ng/abc/input.py
+++ b/logprep/ng/abc/input.py
@@ -20,8 +20,8 @@
 
 from attrs import define, field, validators
 
-from logprep.abc.connector import Connector
 from logprep.abc.exceptions import LogprepException
+from logprep.ng.abc.connector import Connector
 from logprep.ng.event.event_state import EventStateType
 from logprep.ng.event.log_event import LogEvent
 from logprep.processor.base.exceptions import FieldExistsWarning
diff --git a/logprep/ng/abc/output.py b/logprep/ng/abc/output.py
index ab4f71050..0fdf32bfd 100644
--- a/logprep/ng/abc/output.py
+++ b/logprep/ng/abc/output.py
@@ -9,8 +9,8 @@
 
 from attrs import define, field, validators
 
-from logprep.abc.connector import Connector
 from logprep.abc.exceptions import LogprepException
+from logprep.ng.abc.connector import Connector
 from logprep.ng.abc.event import Event
 from logprep.ng.event.event_state import EventStateType
 
diff --git a/logprep/ng/abc/processor.py b/logprep/ng/abc/processor.py
index d22602ef9..cc093994f 100644
--- a/logprep/ng/abc/processor.py
+++ b/logprep/ng/abc/processor.py
@@ -11,7 +11,7 @@
 
 from logprep.framework.rule_tree.rule_tree import RuleTree
 from logprep.metrics.metrics import Metric
-from logprep.ng.abc.component import NgComponent
+from logprep.ng.abc.component import NgComponent as Component
 from logprep.ng.event.log_event import LogEvent
 from logprep.processor.base.exceptions import ProcessingCriticalError, ProcessingWarning
 from logprep.util.helper import (
@@ -28,11 +28,11 @@
 logger = logging.getLogger("Processor")
 
 
-class Processor(NgComponent):
+class Processor(Component):
     """Abstract Processor Class to define the Interface"""
 
     @define(kw_only=True, slots=False)
-    class Config(NgComponent.Config):
+    class Config(Component.Config):
         """Common Configurations"""
 
         rules: list[str] = field(

From 2863ad9983d358c7702f01d44a1480233e98ea18 Mon Sep 17 00:00:00 2001
From: Michael Hoff <9436725+mhoff@users.noreply.github.com>
Date: Fri, 10 Apr 2026 12:05:46 +0200
Subject: [PATCH 68/68] cleanup, simplify and fix mypy issues

---
 logprep/connector/http/input.py               |  8 +--
 logprep/ng/abc/event.py                       | 11 ++-
 logprep/ng/abc/input.py                       | 54 +++++----------
 logprep/ng/connector/confluent_kafka/input.py | 67 ++++++++++---------
 logprep/ng/connector/http/input.py            | 11 +--
 logprep/ng/pipeline.py                        |  4 +-
 logprep/ng/runner.py                          | 23 ++++---
 7 files changed, 85 insertions(+), 93 deletions(-)

diff --git a/logprep/connector/http/input.py b/logprep/connector/http/input.py
index 0bc924cc5..4ee2c13f9 100644
--- a/logprep/connector/http/input.py
+++ b/logprep/connector/http/input.py
@@ -266,7 +266,7 @@ class HttpEndpoint(ABC):
     # pylint: disable=too-many-arguments
     def __init__(
         self,
-        messages: mp.Queue,
+        messages: mp.Queue[dict],
         original_event_field: dict[str, str] | None,
         collect_meta: bool,
         metafield_name: str,
@@ -336,7 +336,7 @@ def put_message(self, event: dict, metadata: dict):
 class JSONHttpEndpoint(HttpEndpoint):
     """:code:`json` endpoint to get json from request"""
 
-    _decoder = msgspec.json.Decoder()
+    _decoder: msgspec.json.Decoder[dict] = msgspec.json.Decoder()
 
     @raise_request_exceptions
     @basic_auth
@@ -360,7 +360,7 @@ async def __call__(self, req, resp, **kwargs):  # pylint: disable=arguments-diff
 class JSONLHttpEndpoint(HttpEndpoint):
     """:code:`jsonl` endpoint to get jsonl from request"""
 
-    _decoder = msgspec.json.Decoder()
+    _decoder: msgspec.json.Decoder[dict] = msgspec.json.Decoder()
 
     @raise_request_exceptions
     @basic_auth
@@ -555,7 +555,7 @@ def __attrs_post_init__(self):
 
     __slots__: list[str] = ["target", "app", "http_server"]
 
-    messages: typing.Optional[Queue] = None
+    messages: Queue[dict] | None = None
 
     _endpoint_registry: Mapping[str, type[HttpEndpoint]] = {
         "json": JSONHttpEndpoint,
diff --git a/logprep/ng/abc/event.py b/logprep/ng/abc/event.py
index c8400e975..72a6523c0 100644
--- a/logprep/ng/abc/event.py
+++ b/logprep/ng/abc/event.py
@@ -9,6 +9,7 @@
 from logprep.ng.event.event_state import EventState, EventStateType
 from logprep.util.helper import (
     FieldValue,
+    Missing,
     add_fields_to,
     get_dotted_field_value,
     pop_dotted_field_value,
@@ -21,6 +22,14 @@
 class EventMetadata(ABC):
     """Abstract EventMetadata Class to define the Interface"""
 
+    @staticmethod
+    def from_dict(_: dict):
+        """
+        Constructs a metadata object from the given dict.
+        Currently implemented as a placeholder for future development.
+        """
+        return EventMetadata()
+
 
 class Event(ABC):
     """
@@ -197,7 +206,7 @@ def get_dotted_field_value(self, dotted_field: str) -> Any:
         """
         return get_dotted_field_value(self.data, dotted_field)
 
-    def pop_dotted_field_value(self, dotted_field: str) -> FieldValue:
+    def pop_dotted_field_value(self, dotted_field: str) -> FieldValue | Missing:
         """
         Shortcut method that delegates to the global `pop_dotted_field_value` helper.
 
diff --git a/logprep/ng/abc/input.py b/logprep/ng/abc/input.py
index 742223839..57eafa182 100644
--- a/logprep/ng/abc/input.py
+++ b/logprep/ng/abc/input.py
@@ -22,6 +22,7 @@
 
 from logprep.abc.exceptions import LogprepException
 from logprep.ng.abc.connector import Connector
+from logprep.ng.abc.event import EventMetadata
 from logprep.ng.event.event_state import EventStateType
 from logprep.ng.event.log_event import LogEvent
 from logprep.processor.base.exceptions import FieldExistsWarning
@@ -232,25 +233,8 @@ def _add_full_event_to_target_field(self) -> bool:
         """Check and return if the event should be written into one singular field."""
         return bool(self.config.preprocessing.add_full_event_to_target_field)
 
-    async def _get_raw_event(
-        self, timeout: float
-    ) -> bytes | None:  # pylint: disable=unused-argument
-        """Implements the details how to get the raw event
-
-        Parameters
-        ----------
-        timeout : float
-            timeout
-
-        Returns
-        -------
-        raw_event : bytes
-            The retrieved raw event
-        """
-        return None
-
     @abstractmethod
-    async def _get_event(self, timeout: float) -> tuple:
+    async def _get_event(self, timeout: float) -> tuple[dict, bytes, EventMetadata] | None:
         """Implements the details how to get the event
 
         Parameters
@@ -260,22 +244,22 @@ async def _get_event(self, timeout: float) -> tuple:
 
         Returns
         -------
-        (event, raw_event, metadata)
+        (event, raw_event, metadata) | None
         """
 
     def _produce_failed_event(
         self,
         event: dict | None,
-        raw_event: bytes | None,
-        metadata: dict | None,
+        raw_event: bytes,
+        metadata: EventMetadata,
         error: Exception,
     ) -> LogEvent:
         """Helper method to register the failed event to event backlog."""
 
         error_log_event = LogEvent(
-            data=event if isinstance(event, dict) else {},
-            original=raw_event if raw_event is not None else b"",
-            metadata=metadata,  # type: ignore  # TODO: fix mypy issue
+            data=event if event is not None else {},
+            original=raw_event,
+            metadata=metadata,
         )
         error_log_event.errors.append(error)
         error_log_event.state.current_state = EventStateType.FAILED
@@ -296,17 +280,14 @@ async def get_next(self, timeout: float) -> LogEvent | None:
         input : LogEvent, None
             Input log data.
         """
-        # self.acknowledge()
-        event: dict | None = None
-        raw_event: bytes | None = None
-        metadata: dict | None = None
+        event_tuple = await self._get_event(timeout)
 
-        try:
-            event, raw_event, metadata = await self._get_event(timeout)
+        if event_tuple is None:
+            return None
 
-            if event is None:
-                return None
+        event, raw_event, metadata = event_tuple
 
+        try:
             if not isinstance(event, dict):
                 raise CriticalInputError(self, "not a dict", event)
 
@@ -346,11 +327,10 @@ async def get_next(self, timeout: float) -> LogEvent | None:
             except (FieldExistsWarning, TimeParserException) as error:
                 raise CriticalInputError(self, error.args[0], event) from error
         except CriticalInputError as error:
-            # TODO handle failed events
             self._produce_failed_event(
                 event=event,
                 raw_event=raw_event,
-                metadata=metadata,  # type: ignore
+                metadata=metadata if metadata is not None else None,
                 error=error,
             )
             return None
@@ -358,7 +338,7 @@ async def get_next(self, timeout: float) -> LogEvent | None:
         log_event = LogEvent(
             data=event,
             original=raw_event,
-            metadata=metadata,  # type: ignore  # TODO: fix mypy issue
+            metadata=metadata,
         )
 
         log_event.state.current_state = EventStateType.RECEIVED
@@ -424,8 +404,8 @@ def _add_arrival_timedelta_information_to_event(
         log_arrival_time = get_dotted_field_value(event, log_arrival_time_target_field)
         if time_reference and isinstance(log_arrival_time, str) and isinstance(time_reference, str):
             delta_time_sec = (
-                TimeParser.from_string(log_arrival_time).astimezone(UTC)  # type: ignore  # TODO: fix mypy issue
-                - TimeParser.from_string(time_reference).astimezone(UTC)  # type: ignore  # TODO: fix mypy issue
+                TimeParser.from_string(log_arrival_time).astimezone(UTC)
+                - TimeParser.from_string(time_reference).astimezone(UTC)
             ).total_seconds()
             add_fields_to(event, fields={target_field: delta_time_sec})
 
diff --git a/logprep/ng/connector/confluent_kafka/input.py b/logprep/ng/connector/confluent_kafka/input.py
index a901447cf..f4aa377d0 100644
--- a/logprep/ng/connector/confluent_kafka/input.py
+++ b/logprep/ng/connector/confluent_kafka/input.py
@@ -50,6 +50,7 @@
 from confluent_kafka.aio import AIOConsumer
 
 from logprep.metrics.metrics import CounterMetric, GaugeMetric
+from logprep.ng.abc.event import EventMetadata
 from logprep.ng.abc.input import (
     CriticalInputError,
     CriticalInputParsingError,
@@ -265,17 +266,23 @@ class Config(Input.Config):
            - Use SSL/mTLS encryption for data in transit.
            - Configure SASL or mTLS authentication for your Kafka clients.
            - Regularly rotate your Kafka credentials and secrets.
-
         """
 
-    _last_valid_record: Message | None
+        max_workers: int = field(
+            validator=validators.instance_of(int),
+            default=4,
+        )
+        """
+        The maximum number of concurrent worker tasks for message processing.
+        Should generally not exceed the number of topic partitions.
+        Defaults to 4.
+        """
 
     __slots__ = ["_last_valid_record", "_consumer"]
 
     def __init__(self, name: str, configuration: "ConfluentKafkaInput.Config") -> None:
         super().__init__(name, configuration)
-        self._last_valid_record = None
-        self._consumer: AIOConsumer | None = None
+        self._last_valid_record: Message | None = None
 
     @property
     def config(self) -> Config:
@@ -308,6 +315,23 @@ def _kafka_config(self) -> dict:
         )
         return DEFAULTS | self.config.kafka_config | injected_config
 
+    async def setup(self):
+        """Set the confluent kafka input connector."""
+
+        await super().setup()
+
+        try:
+            self._consumer = AIOConsumer(self._kafka_config, max_workers=self.config.max_workers)
+
+            await self._consumer.subscribe(
+                [self.config.topic],
+                on_assign=self._assign_callback,
+                on_revoke=self._revoke_callback,
+                on_lost=self._lost_callback,
+            )
+        except KafkaException as error:
+            raise FatalInputError(self, f"Could not setup kafka consumer: {error}") from error
+
     @cached_property
     def _admin(self) -> AdminClient:
         """configures and returns the admin client
@@ -330,9 +354,7 @@ async def get_consumer(self, max_workers: int = 4) -> AIOConsumer:
         Parameters
         ----------
         max_workers : int, optional
-            The maximum number of concurrent worker tasks for message processing.
-            Should generally not exceed the number of topic partitions.
-            Defaults to 4.
+
 
         Returns
         -------
@@ -437,7 +459,7 @@ def describe(self) -> str:
         base_description = super().describe()
         return f"{base_description} - Kafka Input: {self.config.kafka_config['bootstrap.servers']}"
 
-    async def _get_raw_event(self, timeout: float) -> Message | None:  # type: ignore  # TODO: fix mypy issue
+    async def _get_raw_event(self, timeout: float) -> Message | None:
         """Get next raw Message from Kafka.
 
         Parameters
@@ -476,7 +498,7 @@ async def _get_raw_event(self, timeout: float) -> Message | None:  # type: ignor
 
         return message
 
-    async def _get_event(self, timeout: float) -> tuple:
+    async def _get_event(self, timeout: float) -> tuple[dict, bytes, EventMetadata] | None:
         """Parse the raw document from Kafka into a json.
 
         Parameters
@@ -502,7 +524,7 @@ async def _get_event(self, timeout: float) -> tuple:
         message = await self._get_raw_event(timeout)
 
         if message is None:
-            return None, None, None
+            return None
 
         raw_event = typing.cast(bytes, message.value())
 
@@ -534,7 +556,7 @@ def _enable_auto_offset_store(self) -> bool:
     def _enable_auto_commit(self) -> bool:
         return self.config.kafka_config.get("enable.auto.commit") == "true"
 
-    async def batch_finished_callback(self) -> None:  # type: ignore  # TODO: fix mypy issue
+    async def batch_finished_callback(self) -> None:
         """Store offsets for last message referenced by `self._last_valid_records`.
         Should be called after delivering the current message to the output or error queue.
         """
@@ -545,8 +567,7 @@ async def batch_finished_callback(self) -> None:  # type: ignore  # TODO: fix my
         if not self._last_valid_record:
             return
         try:
-            consumer = await self.get_consumer()
-            await consumer.store_offsets(message=self._last_valid_record)
+            await self._consumer.store_offsets(message=self._last_valid_record)
         except KafkaException as error:
             raise InputWarning(self, f"{error}, {self._last_valid_record}") from error
 
@@ -597,8 +618,7 @@ async def _lost_callback(self, _: AIOConsumer, topic_partitions: list[TopicParti
     async def _get_memberid(self) -> str | None:
         member_id = None
         try:
-            consumer = await self.get_consumer()
-            member_id = consumer._consumer.memberid()  # pylint: disable=protected-access
+            member_id = self._consumer._consumer.memberid()  # pylint: disable=protected-access
         except RuntimeError as error:
             logger.error("Failed to retrieve member ID: %s", error)
         return member_id
@@ -626,23 +646,6 @@ def health(self) -> bool:
     async def acknowledge(self, events: list[LogEvent]):
         logger.debug("acknowledge called")
 
-    async def setup(self):
-        """Set the confluent kafka input connector."""
-
-        await super().setup()
-
-        try:
-            consumer = await self.get_consumer()
-
-            await consumer.subscribe(
-                [self.config.topic],
-                on_assign=self._assign_callback,
-                on_revoke=self._revoke_callback,
-                on_lost=self._lost_callback,
-            )
-        except KafkaException as error:
-            raise FatalInputError(self, f"Could not setup kafka consumer: {error}") from error
-
     async def shut_down(self) -> None:
         """Shut down the confluent kafka input connector and cleanup resources."""
 
diff --git a/logprep/ng/connector/http/input.py b/logprep/ng/connector/http/input.py
index 7e09bea14..e59755592 100644
--- a/logprep/ng/connector/http/input.py
+++ b/logprep/ng/connector/http/input.py
@@ -114,6 +114,7 @@
 )
 from logprep.factory_error import InvalidConfigurationError
 from logprep.metrics.metrics import CounterMetric, GaugeMetric
+from logprep.ng.abc.event import EventMetadata
 from logprep.ng.abc.input import Input
 from logprep.util import http, rstr
 from logprep.util.credentials import CredentialsFactory
@@ -272,7 +273,7 @@ def __attrs_post_init__(self):
 
     __slots__: list[str] = ["target", "app", "http_server"]
 
-    messages: typing.Optional[Queue] = None
+    messages: typing.Optional[Queue[dict]] = None
 
     _endpoint_registry: Mapping[str, type[HttpEndpoint]] = {
         "json": JSONHttpEndpoint,
@@ -344,17 +345,17 @@ def _get_asgi_app(endpoints_config: dict) -> falcon.asgi.App:
             app.add_sink(endpoint, prefix=route_compile_helper(endpoint_path))
         return app
 
-    async def _get_event(self, timeout: float) -> tuple:
+    async def _get_event(self, timeout: float) -> tuple[dict, bytes, EventMetadata] | None:
         """Returns the first message from the queue"""
-        messages = typing.cast(Queue, self.messages)
+        messages = typing.cast(Queue[dict], self.messages)
 
         self.metrics.message_backlog_size += messages.qsize()
         try:
             message = messages.get(timeout=timeout)
             raw_message = str(message).encode("utf8")
-            return message, raw_message, None
+            return message, raw_message, EventMetadata.from_dict({})
         except queue.Empty:
-            return None, None, None
+            return None
 
     async def shut_down(self):
         """Raises Uvicorn HTTP Server internal stop flag and waits to join"""
diff --git a/logprep/ng/pipeline.py b/logprep/ng/pipeline.py
index ccb9d2546..715de773d 100644
--- a/logprep/ng/pipeline.py
+++ b/logprep/ng/pipeline.py
@@ -10,10 +10,8 @@
 logger = logging.getLogger("Pipeline")
 
 
-def _process_event(event: LogEvent | None, processors: list[Processor]) -> LogEvent:
+def _process_event(event: LogEvent, processors: list[Processor]) -> LogEvent:
     """process all processors for one event"""
-    if event is None or not event.data:
-        raise ValueError("no event given")
     event.state.current_state = EventStateType.PROCESSING
     for processor in processors:
         if not event.data:
diff --git a/logprep/ng/runner.py b/logprep/ng/runner.py
index e4b6ddfe6..23b6348e7 100644
--- a/logprep/ng/runner.py
+++ b/logprep/ng/runner.py
@@ -14,7 +14,7 @@
 from logprep.ng.manager import PipelineManager
 from logprep.ng.util.async_helpers import TerminateTaskGroup, restart_task_on_iter
 from logprep.ng.util.configuration import Configuration
-from logprep.ng.util.defaults import DEFAULT_LOG_CONFIG, MIN_CONFIG_REFRESH_INTERVAL
+from logprep.ng.util.defaults import DEFAULT_LOG_CONFIG
 
 logger = logging.getLogger("Runner")
 
@@ -41,12 +41,12 @@ def __init__(self, configuration: Configuration) -> None:
         """
 
         self.config = configuration
-        self._running_config_version: None | str = None
-        self._task_group = asyncio.TaskGroup()
         self._stop_event = asyncio.Event()
 
-    async def _refresh_configuration_gen(self) -> AsyncGenerator[Configuration, None]:
-        self._running_config_version = self.config.version
+    async def _refresh_configuration_gen(
+        self, initial_config_version: str | None = None
+    ) -> AsyncGenerator[Configuration, None]:
+        current_config_version = initial_config_version
         refresh_interval = self.config.config_refresh_interval
 
         if refresh_interval is None:
@@ -76,9 +76,9 @@ async def _refresh_configuration_gen(self) -> AsyncGenerator[Configuration, None
                 logger.exception("scheduled config reload failed")
                 raise
             else:
-                if self.config.version != self._running_config_version:
-                    logger.info(f"Detected new config version: {self.config.version}")
-                    self._running_config_version = self.config.version
+                if self.config.version != current_config_version:
+                    logger.info("Detected new config version: %s", self.config.version)
+                    current_config_version = self.config.version
                     yield self.config
 
             refresh_interval = self.config.config_refresh_interval
@@ -90,10 +90,9 @@ async def _refresh_configuration_gen(self) -> AsyncGenerator[Configuration, None
 
     async def run(self) -> None:
         """Run the runner and continuously process events until stopped."""
-        self._running_config_version = self.config.version
 
         try:
-            async with self._task_group as tg:
+            async with asyncio.TaskGroup() as tg:
                 tg.create_task(TerminateTaskGroup.raise_on_event(self._stop_event))
 
                 async def start_pipeline(config: Configuration) -> asyncio.Task:
@@ -109,7 +108,9 @@ async def start_pipeline(config: Configuration) -> asyncio.Task:
 
                 try:
                     async for _ in restart_task_on_iter(
-                        source=self._refresh_configuration_gen(),
+                        source=self._refresh_configuration_gen(
+                            initial_config_version=self.config.version
+                        ),
                         task_factory=start_pipeline,
                         cancel_timeout_s=HARD_SHUTDOWN_TIMEOUT,
                         inital_task=await start_pipeline(self.config),