diff --git a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java
index 96fe2e04eea..15686e0e38e 100644
--- a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java
+++ b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java
@@ -36,6 +36,7 @@ public enum Key {
PPL_SYNTAX_LEGACY_PREFERRED("plugins.ppl.syntax.legacy.preferred"),
PPL_SUBSEARCH_MAXOUT("plugins.ppl.subsearch.maxout"),
PPL_JOIN_SUBSEARCH_MAXOUT("plugins.ppl.join.subsearch_maxout"),
+ PPL_DISTRIBUTED_ENABLED("plugins.ppl.distributed.enabled"),
/** Enable Calcite as execution engine */
CALCITE_ENGINE_ENABLED("plugins.calcite.enabled"),
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/ExchangeManager.java b/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/ExchangeManager.java
new file mode 100644
index 00000000000..f42c57ed80a
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/ExchangeManager.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.exchange;
+
+import org.opensearch.sql.planner.distributed.operator.OperatorContext;
+import org.opensearch.sql.planner.distributed.stage.PartitioningScheme;
+
+/**
+ * Manages the lifecycle of exchanges between compute stages. Creates exchange sink and source
+ * operators for inter-stage data transfer.
+ */
+public interface ExchangeManager {
+
+ /**
+ * Creates an exchange sink operator for sending data from one stage to another.
+ *
+ * @param context the operator context
+ * @param targetStageId the downstream stage receiving the data
+ * @param partitioning how the output should be partitioned
+ * @return the exchange sink operator
+ */
+ ExchangeSinkOperator createSink(
+ OperatorContext context, String targetStageId, PartitioningScheme partitioning);
+
+ /**
+ * Creates an exchange source operator for receiving data from an upstream stage.
+ *
+ * @param context the operator context
+ * @param sourceStageId the upstream stage sending the data
+ * @return the exchange source operator
+ */
+ ExchangeSourceOperator createSource(OperatorContext context, String sourceStageId);
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/ExchangeSinkOperator.java b/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/ExchangeSinkOperator.java
new file mode 100644
index 00000000000..29cde441fca
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/ExchangeSinkOperator.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.exchange;
+
+import org.opensearch.sql.planner.distributed.operator.SinkOperator;
+
+/**
+ * A sink operator that sends pages to a downstream compute stage. Implementations handle the
+ * serialization and transport of data between stages (e.g., via OpenSearch transport, Arrow Flight,
+ * or in-memory buffers for local exchanges).
+ */
+public interface ExchangeSinkOperator extends SinkOperator {
+
+ /** Returns the ID of the downstream stage this sink sends data to. */
+ String getTargetStageId();
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/ExchangeSourceOperator.java b/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/ExchangeSourceOperator.java
new file mode 100644
index 00000000000..7c228c68a89
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/ExchangeSourceOperator.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.exchange;
+
+import org.opensearch.sql.planner.distributed.operator.SourceOperator;
+
+/**
+ * A source operator that receives pages from an upstream compute stage. Implementations handle
+ * deserialization and buffering of data received from upstream stages.
+ */
+public interface ExchangeSourceOperator extends SourceOperator {
+
+ /** Returns the ID of the upstream stage this source receives data from. */
+ String getSourceStageId();
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/OutputBuffer.java b/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/OutputBuffer.java
new file mode 100644
index 00000000000..6253ced27b2
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/exchange/OutputBuffer.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.exchange;
+
+import org.opensearch.sql.planner.distributed.page.Page;
+import org.opensearch.sql.planner.distributed.stage.PartitioningScheme;
+
+/**
+ * Buffers output pages from a stage before sending them to downstream consumers via the exchange
+ * layer. Provides back-pressure to prevent producers from overwhelming consumers.
+ *
+ *
Serialization format is an implementation detail. The default implementation uses OpenSearch
+ * transport ({@code StreamOutput}). A future implementation can use Arrow IPC ({@code
+ * ArrowRecordBatch}) for zero-copy columnar exchange.
+ */
+public interface OutputBuffer extends AutoCloseable {
+
+ /**
+ * Enqueues a page for delivery to downstream consumers.
+ *
+ * @param page the page to send
+ */
+ void enqueue(Page page);
+
+ /** Signals that no more pages will be enqueued. */
+ void setNoMorePages();
+
+ /** Returns true if the buffer is full and the producer should wait (back-pressure). */
+ boolean isFull();
+
+ /** Returns the total size of buffered data in bytes. */
+ long getBufferedBytes();
+
+ /** Aborts the buffer, discarding any buffered pages. */
+ void abort();
+
+ /** Returns true if all pages have been consumed and no more will be produced. */
+ boolean isFinished();
+
+ /** Returns the partitioning scheme for this buffer's output. */
+ PartitioningScheme getPartitioningScheme();
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/execution/QueryExecution.java b/core/src/main/java/org/opensearch/sql/planner/distributed/execution/QueryExecution.java
new file mode 100644
index 00000000000..f24808ecbfd
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/execution/QueryExecution.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.execution;
+
+import java.util.List;
+import org.opensearch.sql.planner.distributed.stage.StagedPlan;
+
+/**
+ * Represents the execution of a complete distributed query. Manages the lifecycle of all stage
+ * executions and provides query-level statistics.
+ */
+public interface QueryExecution {
+
+ /** Query execution states. */
+ enum State {
+ PLANNING,
+ STARTING,
+ RUNNING,
+ FINISHING,
+ FINISHED,
+ FAILED
+ }
+
+ /** Returns the unique query identifier. */
+ String getQueryId();
+
+ /** Returns the staged execution plan. */
+ StagedPlan getPlan();
+
+ /** Returns the current execution state. */
+ State getState();
+
+ /** Returns all stage executions for this query. */
+ List getStageExecutions();
+
+ /** Returns execution statistics for this query. */
+ QueryStats getStats();
+
+ /** Cancels the query and all its stage executions. */
+ void cancel();
+
+ /** Statistics for a query execution. */
+ interface QueryStats {
+
+ /** Returns the total number of output rows. */
+ long getTotalRows();
+
+ /** Returns the total elapsed execution time in milliseconds. */
+ long getElapsedTimeMillis();
+
+ /** Returns the time spent planning in milliseconds. */
+ long getPlanningTimeMillis();
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/execution/StageExecution.java b/core/src/main/java/org/opensearch/sql/planner/distributed/execution/StageExecution.java
new file mode 100644
index 00000000000..f6f9f19dc73
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/execution/StageExecution.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.execution;
+
+import java.util.List;
+import java.util.Map;
+import org.opensearch.sql.planner.distributed.split.DataUnit;
+import org.opensearch.sql.planner.distributed.stage.ComputeStage;
+
+/**
+ * Manages the execution of a single compute stage across multiple nodes. Tracks task executions,
+ * handles data unit assignment, and monitors stage completion.
+ */
+public interface StageExecution {
+
+ /** Stage execution states. */
+ enum State {
+ PLANNED,
+ SCHEDULING,
+ RUNNING,
+ FINISHED,
+ FAILED,
+ CANCELLED
+ }
+
+ /** Returns the compute stage being executed. */
+ ComputeStage getStage();
+
+ /** Returns the current execution state. */
+ State getState();
+
+ /**
+ * Adds data units to be processed by this stage.
+ *
+ * @param dataUnits the data units to add
+ */
+ void addDataUnits(List dataUnits);
+
+ /** Signals that no more data units will be added to this stage. */
+ void noMoreDataUnits();
+
+ /**
+ * Returns task executions grouped by node ID.
+ *
+ * @return map from node ID to list of task executions on that node
+ */
+ Map> getTaskExecutions();
+
+ /** Returns execution statistics for this stage. */
+ StageStats getStats();
+
+ /** Cancels all tasks in this stage. */
+ void cancel();
+
+ /**
+ * Adds a listener to be notified when the stage state changes.
+ *
+ * @param listener the state change listener
+ */
+ void addStateChangeListener(StateChangeListener listener);
+
+ /** Listener for stage state changes. */
+ @FunctionalInterface
+ interface StateChangeListener {
+
+ /**
+ * Called when the stage transitions to a new state.
+ *
+ * @param newState the new state
+ */
+ void onStateChange(State newState);
+ }
+
+ /** Statistics for a stage execution. */
+ interface StageStats {
+
+ /** Returns the total number of rows processed across all tasks. */
+ long getTotalRows();
+
+ /** Returns the total number of bytes processed across all tasks. */
+ long getTotalBytes();
+
+ /** Returns the number of completed tasks. */
+ int getCompletedTasks();
+
+ /** Returns the total number of tasks. */
+ int getTotalTasks();
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/execution/TaskExecution.java b/core/src/main/java/org/opensearch/sql/planner/distributed/execution/TaskExecution.java
new file mode 100644
index 00000000000..4048fab1ec9
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/execution/TaskExecution.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.execution;
+
+import java.util.List;
+import org.opensearch.sql.planner.distributed.split.DataUnit;
+
+/**
+ * Represents the execution of a single task within a stage. Each task processes a subset of data
+ * units on a specific node.
+ */
+public interface TaskExecution {
+
+ /** Task execution states. */
+ enum State {
+ PLANNED,
+ RUNNING,
+ FLUSHING,
+ FINISHED,
+ FAILED,
+ CANCELLED
+ }
+
+ /** Returns the unique identifier for this task. */
+ String getTaskId();
+
+ /** Returns the node ID where this task is executing. */
+ String getNodeId();
+
+ /** Returns the current execution state. */
+ State getState();
+
+ /** Returns the data units assigned to this task. */
+ List getAssignedDataUnits();
+
+ /** Returns execution statistics for this task. */
+ TaskStats getStats();
+
+ /** Cancels this task. */
+ void cancel();
+
+ /** Statistics for a task execution. */
+ interface TaskStats {
+
+ /** Returns the number of rows processed by this task. */
+ long getProcessedRows();
+
+ /** Returns the number of bytes processed by this task. */
+ long getProcessedBytes();
+
+ /** Returns the number of output rows produced by this task. */
+ long getOutputRows();
+
+ /** Returns the elapsed execution time in milliseconds. */
+ long getElapsedTimeMillis();
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/operator/Operator.java b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/Operator.java
new file mode 100644
index 00000000000..165d1eb1655
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/Operator.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.operator;
+
+import org.opensearch.sql.planner.distributed.page.Page;
+
+/**
+ * Core operator interface using a push/pull model. Operators form a pipeline where data flows as
+ * {@link Page} batches. Each operator declares whether it needs input ({@link #needsInput()}),
+ * accepts input ({@link #addInput(Page)}), and produces output ({@link #getOutput()}).
+ *
+ * Lifecycle:
+ *
+ *
+ * Pipeline driver calls {@link #needsInput()} to check readiness
+ * If ready, driver calls {@link #addInput(Page)} with upstream output
+ * Driver calls {@link #getOutput()} to pull processed results
+ * When upstream is done, driver calls {@link #finish()} to signal no more input
+ * Operator produces remaining buffered output via {@link #getOutput()}
+ * When {@link #isFinished()} returns true, operator is done
+ * Driver calls {@link #close()} to release resources
+ *
+ */
+public interface Operator extends AutoCloseable {
+
+ /** Returns true if this operator is ready to accept input via {@link #addInput(Page)}. */
+ boolean needsInput();
+
+ /**
+ * Provides a page of input data to this operator.
+ *
+ * @param page the input page (must not be null)
+ * @throws IllegalStateException if {@link #needsInput()} returns false
+ */
+ void addInput(Page page);
+
+ /**
+ * Returns the next page of output, or null if no output is available yet. A null return does not
+ * mean the operator is finished — call {@link #isFinished()} to check.
+ */
+ Page getOutput();
+
+ /** Returns true if this operator has completed all processing and will produce no more output. */
+ boolean isFinished();
+
+ /** Signals that no more input will be provided. The operator should flush buffered results. */
+ void finish();
+
+ /** Returns the runtime context for this operator. */
+ OperatorContext getContext();
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/operator/OperatorContext.java b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/OperatorContext.java
new file mode 100644
index 00000000000..ebeba3548a4
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/OperatorContext.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.operator;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * Runtime context available to operators during execution. Provides access to memory limits,
+ * cancellation, and operator identity.
+ */
+public class OperatorContext {
+
+ private final String operatorId;
+ private final String stageId;
+ private final long memoryLimitBytes;
+ private final AtomicBoolean cancelled;
+
+ public OperatorContext(String operatorId, String stageId, long memoryLimitBytes) {
+ this.operatorId = operatorId;
+ this.stageId = stageId;
+ this.memoryLimitBytes = memoryLimitBytes;
+ this.cancelled = new AtomicBoolean(false);
+ }
+
+ /** Returns the unique identifier for this operator instance. */
+ public String getOperatorId() {
+ return operatorId;
+ }
+
+ /** Returns the stage ID this operator belongs to. */
+ public String getStageId() {
+ return stageId;
+ }
+
+ /** Returns the memory limit in bytes for this operator. */
+ public long getMemoryLimitBytes() {
+ return memoryLimitBytes;
+ }
+
+ /** Returns true if the query has been cancelled. */
+ public boolean isCancelled() {
+ return cancelled.get();
+ }
+
+ /** Requests cancellation of the query. */
+ public void cancel() {
+ cancelled.set(true);
+ }
+
+ /** Creates a default context for testing. */
+ public static OperatorContext createDefault(String operatorId) {
+ return new OperatorContext(operatorId, "default-stage", Long.MAX_VALUE);
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/operator/OperatorFactory.java b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/OperatorFactory.java
new file mode 100644
index 00000000000..b7f5cf954e8
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/OperatorFactory.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.operator;
+
+/**
+ * Factory for creating {@link Operator} instances. Each factory creates operators for a specific
+ * pipeline position (e.g., filter, project, aggregation). The pipeline uses factories so that
+ * multiple operator instances can be created for parallel execution.
+ */
+public interface OperatorFactory {
+
+ /**
+ * Creates a new operator instance.
+ *
+ * @param context the runtime context for the operator
+ * @return a new operator instance
+ */
+ Operator createOperator(OperatorContext context);
+
+ /** Signals that no more operators will be created from this factory. */
+ void noMoreOperators();
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/operator/SinkOperator.java b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/SinkOperator.java
new file mode 100644
index 00000000000..10c88dd911c
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/SinkOperator.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.operator;
+
+import org.opensearch.sql.planner.distributed.page.Page;
+
+/**
+ * A terminal operator that consumes pages without producing output. Sink operators collect results
+ * (e.g., into a response buffer) or send data to downstream stages (e.g., exchange sinks).
+ *
+ * Sink operators always need input (until finished) and never produce output via {@link
+ * #getOutput()}.
+ */
+public interface SinkOperator extends Operator {
+
+ /** Sink operators do not produce output pages. Always returns null. */
+ @Override
+ default Page getOutput() {
+ return null;
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/operator/SourceOperator.java b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/SourceOperator.java
new file mode 100644
index 00000000000..1edd4f11b3a
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/SourceOperator.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.operator;
+
+import org.opensearch.sql.planner.distributed.page.Page;
+import org.opensearch.sql.planner.distributed.split.DataUnit;
+
+/**
+ * A source operator that reads data from external storage (e.g., Lucene shards). Source operators
+ * do not accept input from upstream operators — they produce data from assigned {@link DataUnit}s.
+ *
+ *
The pipeline driver assigns data units via {@link #addDataUnit(DataUnit)} and signals
+ * completion via {@link #noMoreDataUnits()}. The operator reads data from data units and produces
+ * {@link Page} batches via {@link #getOutput()}.
+ */
+public interface SourceOperator extends Operator {
+
+ /**
+ * Assigns a unit of work (e.g., a shard) to this source operator.
+ *
+ * @param dataUnit the data unit to read from
+ */
+ void addDataUnit(DataUnit dataUnit);
+
+ /** Signals that no more data units will be assigned. */
+ void noMoreDataUnits();
+
+ /** Source operators never accept input from upstream. */
+ @Override
+ default boolean needsInput() {
+ return false;
+ }
+
+ /** Source operators never accept input from upstream. */
+ @Override
+ default void addInput(Page page) {
+ throw new UnsupportedOperationException("Source operators do not accept input");
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/operator/SourceOperatorFactory.java b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/SourceOperatorFactory.java
new file mode 100644
index 00000000000..a06617d97d1
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/operator/SourceOperatorFactory.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.operator;
+
+/**
+ * Factory for creating {@link SourceOperator} instances. Source operator factories are used at the
+ * beginning of a pipeline to create operators that read from external storage.
+ */
+public interface SourceOperatorFactory {
+
+ /**
+ * Creates a new source operator instance.
+ *
+ * @param context the runtime context for the operator
+ * @return a new source operator instance
+ */
+ SourceOperator createOperator(OperatorContext context);
+
+ /** Signals that no more operators will be created from this factory. */
+ void noMoreOperators();
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/page/Block.java b/core/src/main/java/org/opensearch/sql/planner/distributed/page/Block.java
new file mode 100644
index 00000000000..f9b7674d064
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/page/Block.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.page;
+
+/**
+ * A column of data within a {@link Page}. Each Block holds values for a single column across all
+ * rows in the page. Designed to align with Apache Arrow's columnar model: a future {@code
+ * ArrowBlock} implementation can wrap Arrow {@code FieldVector} for zero-copy exchange via Arrow
+ * IPC.
+ */
+public interface Block {
+
+ /** Returns the number of values (rows) in this block. */
+ int getPositionCount();
+
+ /**
+ * Returns the value at the given position.
+ *
+ * @param position the row index (0-based)
+ * @return the value, or null if the position is null
+ */
+ Object getValue(int position);
+
+ /**
+ * Returns true if the value at the given position is null.
+ *
+ * @param position the row index (0-based)
+ * @return true if null
+ */
+ boolean isNull(int position);
+
+ /** Returns the estimated memory retained by this block in bytes. */
+ long getRetainedSizeBytes();
+
+ /**
+ * Returns a sub-region of this block.
+ *
+ * @param positionOffset the starting row index
+ * @param length the number of rows in the region
+ * @return a new Block representing the sub-region
+ */
+ Block getRegion(int positionOffset, int length);
+
+ /** Returns the data type of this block's values. */
+ BlockType getType();
+
+ /** Supported block data types, aligned with Arrow's type system. */
+ enum BlockType {
+ BOOLEAN,
+ INT,
+ LONG,
+ FLOAT,
+ DOUBLE,
+ STRING,
+ BYTES,
+ TIMESTAMP,
+ DATE,
+ NULL,
+ UNKNOWN
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/page/Page.java b/core/src/main/java/org/opensearch/sql/planner/distributed/page/Page.java
new file mode 100644
index 00000000000..926a8524bc8
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/page/Page.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.page;
+
+/**
+ * A batch of rows or columns flowing through the operator pipeline. Designed to be columnar-ready:
+ * Phase 5A uses a row-based implementation ({@link RowPage}), but future phases can swap in an
+ * Arrow-backed implementation for zero-copy columnar processing.
+ */
+public interface Page {
+
+ /** Returns the number of rows in this page. */
+ int getPositionCount();
+
+ /** Returns the number of columns in this page. */
+ int getChannelCount();
+
+ /**
+ * Returns the value at the given row and column position.
+ *
+ * @param position the row index (0-based)
+ * @param channel the column index (0-based)
+ * @return the value, or null if the cell is null
+ */
+ Object getValue(int position, int channel);
+
+ /**
+ * Returns a sub-region of this page.
+ *
+ * @param positionOffset the starting row index
+ * @param length the number of rows in the region
+ * @return a new Page representing the sub-region
+ */
+ Page getRegion(int positionOffset, int length);
+
+ /**
+ * Returns the columnar block for the given channel. Default implementation throws
+ * UnsupportedOperationException; columnar Page implementations (e.g., Arrow-backed) override
+ * this.
+ *
+ * @param channel the column index (0-based)
+ * @return the block for the channel
+ */
+ default Block getBlock(int channel) {
+ throw new UnsupportedOperationException(
+ "Columnar access not supported by " + getClass().getSimpleName());
+ }
+
+ /**
+ * Returns the estimated memory retained by this page in bytes. Default implementation estimates
+ * based on position count, channel count, and 8 bytes per value.
+ */
+ default long getRetainedSizeBytes() {
+ return (long) getPositionCount() * getChannelCount() * 8L;
+ }
+
+ /** Returns an empty page with zero rows and the given number of columns. */
+ static Page empty(int channelCount) {
+ return new RowPage(new Object[0][channelCount], channelCount);
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/page/PageBuilder.java b/core/src/main/java/org/opensearch/sql/planner/distributed/page/PageBuilder.java
new file mode 100644
index 00000000000..1b3f0e76daa
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/page/PageBuilder.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.page;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Builds a {@link Page} row by row. Call {@link #beginRow()}, set values via {@link #setValue(int,
+ * Object)}, then {@link #endRow()} to commit. Call {@link #build()} to produce the final Page.
+ */
+public class PageBuilder {
+
+ private final int channelCount;
+ private final List rows;
+ private Object[] currentRow;
+
+ public PageBuilder(int channelCount) {
+ if (channelCount < 0) {
+ throw new IllegalArgumentException("channelCount must be non-negative: " + channelCount);
+ }
+ this.channelCount = channelCount;
+ this.rows = new ArrayList<>();
+ }
+
+ /** Starts a new row. Values default to null. */
+ public void beginRow() {
+ currentRow = new Object[channelCount];
+ }
+
+ /**
+ * Sets a value in the current row.
+ *
+ * @param channel the column index (0-based)
+ * @param value the value to set
+ */
+ public void setValue(int channel, Object value) {
+ if (currentRow == null) {
+ throw new IllegalStateException("beginRow() must be called before setValue()");
+ }
+ if (channel < 0 || channel >= channelCount) {
+ throw new IndexOutOfBoundsException(
+ "Channel " + channel + " out of range [0, " + channelCount + ")");
+ }
+ currentRow[channel] = value;
+ }
+
+ /** Commits the current row to the page. */
+ public void endRow() {
+ if (currentRow == null) {
+ throw new IllegalStateException("beginRow() must be called before endRow()");
+ }
+ rows.add(currentRow);
+ currentRow = null;
+ }
+
+ /** Returns the number of rows added so far. */
+ public int getRowCount() {
+ return rows.size();
+ }
+
+ /** Returns true if no rows have been added. */
+ public boolean isEmpty() {
+ return rows.isEmpty();
+ }
+
+ /** Builds the final Page from all committed rows and resets the builder. */
+ public Page build() {
+ if (currentRow != null) {
+ throw new IllegalStateException("endRow() must be called before build()");
+ }
+ Object[][] data = rows.toArray(new Object[0][]);
+ rows.clear();
+ return new RowPage(data, channelCount);
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/page/RowPage.java b/core/src/main/java/org/opensearch/sql/planner/distributed/page/RowPage.java
new file mode 100644
index 00000000000..568fd93728b
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/page/RowPage.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.page;
+
+import java.util.Arrays;
+
+/**
+ * Simple row-based {@link Page} implementation. Each row is an Object array where the index
+ * corresponds to the column (channel) position. This is the Phase 5A implementation; future phases
+ * will add an Arrow-backed columnar implementation.
+ */
+public class RowPage implements Page {
+
+ private final Object[][] rows;
+ private final int channelCount;
+
+ /**
+ * Creates a RowPage from pre-built row data.
+ *
+ * @param rows 2D array where rows[i][j] is the value at row i, column j
+ * @param channelCount the number of columns
+ */
+ public RowPage(Object[][] rows, int channelCount) {
+ this.rows = rows;
+ this.channelCount = channelCount;
+ }
+
+ @Override
+ public int getPositionCount() {
+ return rows.length;
+ }
+
+ @Override
+ public int getChannelCount() {
+ return channelCount;
+ }
+
+ @Override
+ public Object getValue(int position, int channel) {
+ if (position < 0 || position >= rows.length) {
+ throw new IndexOutOfBoundsException(
+ "Position " + position + " out of range [0, " + rows.length + ")");
+ }
+ if (channel < 0 || channel >= channelCount) {
+ throw new IndexOutOfBoundsException(
+ "Channel " + channel + " out of range [0, " + channelCount + ")");
+ }
+ return rows[position][channel];
+ }
+
+ @Override
+ public Page getRegion(int positionOffset, int length) {
+ if (positionOffset < 0 || positionOffset + length > rows.length) {
+ throw new IndexOutOfBoundsException(
+ "Region ["
+ + positionOffset
+ + ", "
+ + (positionOffset + length)
+ + ") out of range [0, "
+ + rows.length
+ + ")");
+ }
+ Object[][] region = Arrays.copyOfRange(rows, positionOffset, positionOffset + length);
+ return new RowPage(region, channelCount);
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/pipeline/Pipeline.java b/core/src/main/java/org/opensearch/sql/planner/distributed/pipeline/Pipeline.java
new file mode 100644
index 00000000000..2e63d8715d5
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/pipeline/Pipeline.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.pipeline;
+
+import java.util.Collections;
+import java.util.List;
+import org.opensearch.sql.planner.distributed.operator.OperatorFactory;
+import org.opensearch.sql.planner.distributed.operator.SourceOperatorFactory;
+
+/**
+ * An ordered chain of operator factories that defines the processing logic for a compute stage. The
+ * first element is a {@link SourceOperatorFactory} (reads from storage or exchange), followed by
+ * zero or more intermediate {@link OperatorFactory} instances (filter, project, aggregate, etc.).
+ */
+public class Pipeline {
+
+ private final String pipelineId;
+ private final SourceOperatorFactory sourceFactory;
+ private final List operatorFactories;
+
+ /**
+ * Creates a pipeline.
+ *
+ * @param pipelineId unique identifier
+ * @param sourceFactory the source operator factory (first in chain)
+ * @param operatorFactories ordered list of intermediate operator factories
+ */
+ public Pipeline(
+ String pipelineId,
+ SourceOperatorFactory sourceFactory,
+ List operatorFactories) {
+ this.pipelineId = pipelineId;
+ this.sourceFactory = sourceFactory;
+ this.operatorFactories = Collections.unmodifiableList(operatorFactories);
+ }
+
+ public String getPipelineId() {
+ return pipelineId;
+ }
+
+ public SourceOperatorFactory getSourceFactory() {
+ return sourceFactory;
+ }
+
+ public List getOperatorFactories() {
+ return operatorFactories;
+ }
+
+ /** Returns the total number of operators (source + intermediates). */
+ public int getOperatorCount() {
+ return 1 + operatorFactories.size();
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/pipeline/PipelineContext.java b/core/src/main/java/org/opensearch/sql/planner/distributed/pipeline/PipelineContext.java
new file mode 100644
index 00000000000..05a4ae0f7bc
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/pipeline/PipelineContext.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.pipeline;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/** Runtime state for a pipeline execution. Tracks status and provides cancellation. */
+public class PipelineContext {
+
+ /** Pipeline execution status. */
+ public enum Status {
+ CREATED,
+ RUNNING,
+ FINISHED,
+ FAILED,
+ CANCELLED
+ }
+
+ private volatile Status status;
+ private final AtomicBoolean cancelled;
+ private volatile String failureMessage;
+
+ public PipelineContext() {
+ this.status = Status.CREATED;
+ this.cancelled = new AtomicBoolean(false);
+ }
+
+ public Status getStatus() {
+ return status;
+ }
+
+ public void setRunning() {
+ this.status = Status.RUNNING;
+ }
+
+ public void setFinished() {
+ this.status = Status.FINISHED;
+ }
+
+ public void setFailed(String message) {
+ this.status = Status.FAILED;
+ this.failureMessage = message;
+ }
+
+ public void setCancelled() {
+ this.status = Status.CANCELLED;
+ this.cancelled.set(true);
+ }
+
+ public boolean isCancelled() {
+ return cancelled.get();
+ }
+
+ public String getFailureMessage() {
+ return failureMessage;
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/pipeline/PipelineDriver.java b/core/src/main/java/org/opensearch/sql/planner/distributed/pipeline/PipelineDriver.java
new file mode 100644
index 00000000000..c4a26267bd5
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/pipeline/PipelineDriver.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.pipeline;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.opensearch.sql.planner.distributed.operator.Operator;
+import org.opensearch.sql.planner.distributed.operator.OperatorContext;
+import org.opensearch.sql.planner.distributed.operator.OperatorFactory;
+import org.opensearch.sql.planner.distributed.operator.SourceOperator;
+import org.opensearch.sql.planner.distributed.page.Page;
+import org.opensearch.sql.planner.distributed.split.DataUnit;
+
+/**
+ * Executes a pipeline by driving data through a chain of operators. The driver implements a
+ * pull/push loop: it pulls output from upstream operators and pushes it as input to downstream
+ * operators.
+ *
+ * Execution model:
+ *
+ *
+ * Source operator produces pages from data units
+ * Each intermediate operator transforms pages
+ * The last operator (or sink) consumes the final output
+ * When all operators are finished, the pipeline is complete
+ *
+ */
+public class PipelineDriver {
+
+ private static final Logger log = LogManager.getLogger(PipelineDriver.class);
+
+ private final SourceOperator sourceOperator;
+ private final List operators;
+ private final PipelineContext context;
+
+ /**
+ * Creates a PipelineDriver from a Pipeline definition.
+ *
+ * @param pipeline the pipeline to execute
+ * @param operatorContext the context for creating operators
+ * @param dataUnits the data units to assign to the source operator
+ */
+ public PipelineDriver(
+ Pipeline pipeline, OperatorContext operatorContext, List dataUnits) {
+ this.context = new PipelineContext();
+
+ // Create source operator
+ this.sourceOperator = pipeline.getSourceFactory().createOperator(operatorContext);
+ for (DataUnit dataUnit : dataUnits) {
+ this.sourceOperator.addDataUnit(dataUnit);
+ }
+ this.sourceOperator.noMoreDataUnits();
+
+ // Create intermediate operators
+ this.operators = new ArrayList<>();
+ for (OperatorFactory factory : pipeline.getOperatorFactories()) {
+ this.operators.add(factory.createOperator(operatorContext));
+ }
+ }
+
+ /**
+ * Creates a PipelineDriver from pre-built operators (for testing).
+ *
+ * @param sourceOperator the source operator
+ * @param operators the intermediate operators
+ */
+ public PipelineDriver(SourceOperator sourceOperator, List operators) {
+ this.context = new PipelineContext();
+ this.sourceOperator = sourceOperator;
+ this.operators = new ArrayList<>(operators);
+ }
+
+ /**
+ * Runs the pipeline to completion. Drives data from source through all operators until all are
+ * finished or cancellation is requested.
+ *
+ * @return the final output page from the last operator (may be null for sink pipelines)
+ */
+ public Page run() {
+ context.setRunning();
+ Page lastOutput = null;
+
+ try {
+ while (!isFinished() && !context.isCancelled()) {
+ boolean madeProgress = processOnce();
+ if (!madeProgress && !isFinished()) {
+ // No progress and not finished — avoid busy-wait
+ Thread.yield();
+ }
+ }
+
+ // Collect any remaining output from the last operator
+ if (!operators.isEmpty()) {
+ Page output = operators.get(operators.size() - 1).getOutput();
+ if (output != null) {
+ lastOutput = output;
+ }
+ } else {
+ Page output = sourceOperator.getOutput();
+ if (output != null) {
+ lastOutput = output;
+ }
+ }
+
+ if (context.isCancelled()) {
+ context.setCancelled();
+ } else {
+ context.setFinished();
+ }
+ } catch (Exception e) {
+ context.setFailed(e.getMessage());
+ throw new RuntimeException("Pipeline execution failed", e);
+ } finally {
+ closeAll();
+ }
+
+ return lastOutput;
+ }
+
+ /**
+ * Processes one iteration of the pipeline loop. Returns true if any progress was made (data
+ * moved).
+ */
+ boolean processOnce() {
+ boolean madeProgress = false;
+
+ // Drive source → first operator (or collect output if no intermediates)
+ if (!sourceOperator.isFinished()) {
+ Page sourcePage = sourceOperator.getOutput();
+ if (sourcePage != null && sourcePage.getPositionCount() > 0) {
+ if (!operators.isEmpty() && operators.get(0).needsInput()) {
+ operators.get(0).addInput(sourcePage);
+ madeProgress = true;
+ }
+ }
+ } else if (!operators.isEmpty()) {
+ // Source finished — signal finish to first operator
+ Operator first = operators.get(0);
+ if (!first.isFinished()) {
+ first.finish();
+ madeProgress = true;
+ }
+ }
+
+ // Drive through intermediate operators: operator[i] → operator[i+1]
+ for (int i = 0; i < operators.size() - 1; i++) {
+ Operator current = operators.get(i);
+ Operator next = operators.get(i + 1);
+
+ Page output = current.getOutput();
+ if (output != null && output.getPositionCount() > 0 && next.needsInput()) {
+ next.addInput(output);
+ madeProgress = true;
+ }
+
+ if (current.isFinished() && !next.isFinished()) {
+ next.finish();
+ madeProgress = true;
+ }
+ }
+
+ // Drain the last operator's output so it can transition to finished.
+ // Without this, operators that buffer pages (e.g., PassThroughOperator)
+ // would never have getOutput() called, preventing isFinished() from
+ // returning true.
+ if (!operators.isEmpty()) {
+ Operator last = operators.get(operators.size() - 1);
+ Page output = last.getOutput();
+ if (output != null) {
+ madeProgress = true;
+ }
+ }
+
+ return madeProgress;
+ }
+
+ /** Returns true if all operators have finished processing. */
+ public boolean isFinished() {
+ if (!operators.isEmpty()) {
+ return operators.get(operators.size() - 1).isFinished();
+ }
+ return sourceOperator.isFinished();
+ }
+
+ /** Returns the pipeline execution context. */
+ public PipelineContext getContext() {
+ return context;
+ }
+
+ /** Closes all operators, releasing resources. */
+ private void closeAll() {
+ try {
+ sourceOperator.close();
+ } catch (Exception e) {
+ log.warn("Error closing source operator", e);
+ }
+ for (Operator op : operators) {
+ try {
+ op.close();
+ } catch (Exception e) {
+ log.warn("Error closing operator", e);
+ }
+ }
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/planner/CostEstimator.java b/core/src/main/java/org/opensearch/sql/planner/distributed/planner/CostEstimator.java
new file mode 100644
index 00000000000..efc7a39f8bb
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/planner/CostEstimator.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.planner;
+
+import org.apache.calcite.rel.RelNode;
+
+/**
+ * Estimates the cost of executing a RelNode subtree. Used by the physical planner to make decisions
+ * about stage boundaries, exchange types (broadcast vs. hash repartition), and operator placement.
+ *
+ * Phase 5A defines the interface. Phase 5G implements it using Lucene statistics (doc count,
+ * field cardinality, selectivity estimates).
+ */
+public interface CostEstimator {
+
+ /**
+ * Estimates the number of output rows for a RelNode.
+ *
+ * @param relNode the plan node to estimate
+ * @return estimated row count
+ */
+ long estimateRowCount(RelNode relNode);
+
+ /**
+ * Estimates the output size in bytes for a RelNode.
+ *
+ * @param relNode the plan node to estimate
+ * @return estimated size in bytes
+ */
+ long estimateSizeBytes(RelNode relNode);
+
+ /**
+ * Estimates the selectivity of a filter condition (0.0 to 1.0).
+ *
+ * @param relNode the filter node
+ * @return selectivity ratio
+ */
+ double estimateSelectivity(RelNode relNode);
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/planner/FragmentationContext.java b/core/src/main/java/org/opensearch/sql/planner/distributed/planner/FragmentationContext.java
new file mode 100644
index 00000000000..7c8377a1d79
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/planner/FragmentationContext.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.planner;
+
+import java.util.List;
+import org.opensearch.sql.planner.distributed.split.DataUnitSource;
+
+/**
+ * Provides context to the {@link PlanFragmenter} during plan fragmentation. Supplies information
+ * about cluster topology, cost estimates, and data unit discovery needed to make fragmentation
+ * decisions (e.g., broadcast vs. hash repartition, stage parallelism).
+ */
+public interface FragmentationContext {
+
+ /** Returns the list of available data node IDs in the cluster. */
+ List getAvailableNodes();
+
+ /** Returns the cost estimator for sizing stages and choosing exchange types. */
+ CostEstimator getCostEstimator();
+
+ /**
+ * Returns a data unit source for the given table name. Used to discover shards and their
+ * locations during fragmentation.
+ *
+ * @param tableName the table (index) name
+ * @return the data unit source for shard discovery
+ */
+ DataUnitSource getDataUnitSource(String tableName);
+
+ /** Returns the maximum number of tasks per stage (limits parallelism). */
+ int getMaxTasksPerStage();
+
+ /** Returns the node ID of the coordinator node. */
+ String getCoordinatorNodeId();
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/planner/PhysicalPlanner.java b/core/src/main/java/org/opensearch/sql/planner/distributed/planner/PhysicalPlanner.java
new file mode 100644
index 00000000000..13650af5084
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/planner/PhysicalPlanner.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.planner;
+
+import org.apache.calcite.rel.RelNode;
+import org.opensearch.sql.planner.distributed.stage.StagedPlan;
+
+/**
+ * Converts a Calcite logical plan (RelNode) into a distributed execution plan (StagedPlan).
+ * Implementations walk the RelNode tree, decide stage boundaries (where exchanges go), and build
+ * operator pipelines for each stage.
+ */
+public interface PhysicalPlanner {
+
+ /**
+ * Plans a Calcite RelNode tree into a distributed StagedPlan.
+ *
+ * @param relNode the optimized Calcite logical plan
+ * @return the distributed execution plan
+ */
+ StagedPlan plan(RelNode relNode);
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/planner/PlanFragmenter.java b/core/src/main/java/org/opensearch/sql/planner/distributed/planner/PlanFragmenter.java
new file mode 100644
index 00000000000..51799eec462
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/planner/PlanFragmenter.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.planner;
+
+import org.apache.calcite.rel.RelNode;
+import org.opensearch.sql.planner.distributed.stage.StagedPlan;
+
+/**
+ * Fragments an optimized Calcite RelNode tree into a multi-stage distributed execution plan. Walks
+ * the RelNode tree, identifies stage boundaries (where exchanges are needed), and creates {@link
+ * SubPlan} fragments for each stage. Replaces the manual stage creation in the old {@code
+ * DistributedQueryPlanner}.
+ *
+ * Stage boundaries are inserted at:
+ *
+ *
+ * Table scans (leaf stages)
+ * Aggregations requiring repartition (hash exchange)
+ * Joins requiring repartition or broadcast
+ * Sort requiring gather exchange
+ *
+ */
+public interface PlanFragmenter {
+
+ /**
+ * Fragments an optimized RelNode tree into a staged execution plan.
+ *
+ * @param optimizedPlan the Calcite-optimized RelNode tree
+ * @param context fragmentation context providing cluster topology and cost estimates
+ * @return the staged distributed execution plan
+ */
+ StagedPlan fragment(RelNode optimizedPlan, FragmentationContext context);
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/planner/SubPlan.java b/core/src/main/java/org/opensearch/sql/planner/distributed/planner/SubPlan.java
new file mode 100644
index 00000000000..7a9b2070812
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/planner/SubPlan.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.planner;
+
+import java.util.Collections;
+import java.util.List;
+import org.apache.calcite.rel.RelNode;
+import org.opensearch.sql.planner.distributed.stage.PartitioningScheme;
+
+/**
+ * A fragment of the query plan that executes within a single stage. Contains a sub-plan (Calcite
+ * RelNode tree) that can be sent to data nodes for local execution, enabling query pushdown.
+ *
+ * The {@code root} RelNode represents the computation to execute locally on each data node. For
+ * example, a scan stage's SubPlan might contain: Filter → TableScan, allowing the data node to
+ * apply the filter during scanning rather than sending all data to the coordinator.
+ */
+public class SubPlan {
+
+ private final String fragmentId;
+ private final RelNode root;
+ private final PartitioningScheme outputPartitioning;
+ private final List children;
+
+ public SubPlan(
+ String fragmentId,
+ RelNode root,
+ PartitioningScheme outputPartitioning,
+ List children) {
+ this.fragmentId = fragmentId;
+ this.root = root;
+ this.outputPartitioning = outputPartitioning;
+ this.children = Collections.unmodifiableList(children);
+ }
+
+ /** Returns the unique identifier for this plan fragment. */
+ public String getFragmentId() {
+ return fragmentId;
+ }
+
+ /** Returns the root of the sub-plan RelNode tree for data node execution. */
+ public RelNode getRoot() {
+ return root;
+ }
+
+ /** Returns the output partitioning scheme for this fragment. */
+ public PartitioningScheme getOutputPartitioning() {
+ return outputPartitioning;
+ }
+
+ /** Returns child sub-plans that feed data into this fragment. */
+ public List getChildren() {
+ return children;
+ }
+
+ @Override
+ public String toString() {
+ return "SubPlan{"
+ + "id='"
+ + fragmentId
+ + "', partitioning="
+ + outputPartitioning.getExchangeType()
+ + ", children="
+ + children.size()
+ + '}';
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/split/DataUnit.java b/core/src/main/java/org/opensearch/sql/planner/distributed/split/DataUnit.java
new file mode 100644
index 00000000000..52a6738fa16
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/split/DataUnit.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.split;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A unit of data assigned to a SourceOperator. Each DataUnit represents a portion of data to read —
+ * typically one OpenSearch shard. Includes preferred nodes for data locality and estimated size for
+ * load balancing.
+ *
+ * Subclasses provide storage-specific details (e.g., {@code OpenSearchDataUnit} adds index name
+ * and shard ID).
+ */
+public abstract class DataUnit {
+
+ /** Returns a unique identifier for this data unit. */
+ public abstract String getDataUnitId();
+
+ /** Returns the nodes where this data unit can be read locally (primary + replicas). */
+ public abstract List getPreferredNodes();
+
+ /** Returns the estimated number of rows in this data unit. */
+ public abstract long getEstimatedRows();
+
+ /** Returns the estimated size in bytes of this data unit. */
+ public abstract long getEstimatedSizeBytes();
+
+ /** Returns storage-specific properties for this data unit. */
+ public abstract Map getProperties();
+
+ /**
+ * Returns whether this data unit can be read from any node (true) or requires execution on a
+ * preferred node (false). Default is true; OpenSearch shard data units override to false because
+ * Lucene requires local access.
+ */
+ public boolean isRemotelyAccessible() {
+ return true;
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/split/DataUnitAssignment.java b/core/src/main/java/org/opensearch/sql/planner/distributed/split/DataUnitAssignment.java
new file mode 100644
index 00000000000..8f0f983bd3d
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/split/DataUnitAssignment.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.split;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Assigns data units to nodes, respecting data locality and load balance. Implementations decide
+ * which node should process each data unit based on preferred nodes, current load, and cluster
+ * topology.
+ */
+public interface DataUnitAssignment {
+
+ /**
+ * Assigns data units to nodes.
+ *
+ * @param dataUnits the data units to assign
+ * @param availableNodes the nodes available for execution
+ * @return a mapping from node ID to the list of data units assigned to that node
+ */
+ Map> assign(List dataUnits, List availableNodes);
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/split/DataUnitSource.java b/core/src/main/java/org/opensearch/sql/planner/distributed/split/DataUnitSource.java
new file mode 100644
index 00000000000..5bfa5ddd623
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/split/DataUnitSource.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.split;
+
+import java.util.List;
+
+/**
+ * Generates {@link DataUnit}s for a source operator. Implementations discover available data units
+ * (e.g., shards) from cluster state and create them with preferred node information.
+ */
+public interface DataUnitSource extends AutoCloseable {
+
+ /**
+ * Returns the next batch of data units, up to the specified maximum batch size. Returns an empty
+ * list if no more data units are available.
+ *
+ * @param maxBatchSize maximum number of data units to return
+ * @return list of data units
+ */
+ List getNextBatch(int maxBatchSize);
+
+ /**
+ * Returns the next batch of data units with a default batch size.
+ *
+ * @return list of data units
+ */
+ default List getNextBatch() {
+ return getNextBatch(1000);
+ }
+
+ /** Returns true if all data units have been generated. */
+ boolean isFinished();
+
+ @Override
+ void close();
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/stage/ComputeStage.java b/core/src/main/java/org/opensearch/sql/planner/distributed/stage/ComputeStage.java
new file mode 100644
index 00000000000..a71eeeca093
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/stage/ComputeStage.java
@@ -0,0 +1,149 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.stage;
+
+import java.util.Collections;
+import java.util.List;
+import org.apache.calcite.rel.RelNode;
+import org.opensearch.sql.planner.distributed.operator.OperatorFactory;
+import org.opensearch.sql.planner.distributed.operator.SourceOperatorFactory;
+import org.opensearch.sql.planner.distributed.split.DataUnit;
+
+/**
+ * A portion of the distributed plan that runs as a pipeline on one or more nodes. Each ComputeStage
+ * contains a pipeline of operators (source + transforms), an output partitioning scheme (how
+ * results flow to the next stage), and metadata about dependencies and parallelism.
+ *
+ * Naming follows the convention: "ComputeStage" (not "Fragment") — a unit of distributed
+ * computation.
+ */
+public class ComputeStage {
+
+ private final String stageId;
+ private final SourceOperatorFactory sourceFactory;
+ private final List operatorFactories;
+ private final PartitioningScheme outputPartitioning;
+ private final List sourceStageIds;
+ private final List dataUnits;
+ private final long estimatedRows;
+ private final long estimatedBytes;
+ private final RelNode planFragment;
+
+ public ComputeStage(
+ String stageId,
+ SourceOperatorFactory sourceFactory,
+ List operatorFactories,
+ PartitioningScheme outputPartitioning,
+ List sourceStageIds,
+ List dataUnits,
+ long estimatedRows,
+ long estimatedBytes) {
+ this(
+ stageId,
+ sourceFactory,
+ operatorFactories,
+ outputPartitioning,
+ sourceStageIds,
+ dataUnits,
+ estimatedRows,
+ estimatedBytes,
+ null);
+ }
+
+ public ComputeStage(
+ String stageId,
+ SourceOperatorFactory sourceFactory,
+ List operatorFactories,
+ PartitioningScheme outputPartitioning,
+ List sourceStageIds,
+ List dataUnits,
+ long estimatedRows,
+ long estimatedBytes,
+ RelNode planFragment) {
+ this.stageId = stageId;
+ this.sourceFactory = sourceFactory;
+ this.operatorFactories = Collections.unmodifiableList(operatorFactories);
+ this.outputPartitioning = outputPartitioning;
+ this.sourceStageIds = Collections.unmodifiableList(sourceStageIds);
+ this.dataUnits = Collections.unmodifiableList(dataUnits);
+ this.estimatedRows = estimatedRows;
+ this.estimatedBytes = estimatedBytes;
+ this.planFragment = planFragment;
+ }
+
+ public String getStageId() {
+ return stageId;
+ }
+
+ public SourceOperatorFactory getSourceFactory() {
+ return sourceFactory;
+ }
+
+ /** Returns the ordered list of intermediate operator factories (after source). */
+ public List getOperatorFactories() {
+ return operatorFactories;
+ }
+
+ /** Returns how this stage's output is partitioned for the downstream stage. */
+ public PartitioningScheme getOutputPartitioning() {
+ return outputPartitioning;
+ }
+
+ /** Returns the IDs of upstream stages that feed data into this stage. */
+ public List getSourceStageIds() {
+ return sourceStageIds;
+ }
+
+ /** Returns the data units assigned to this stage (for source stages with shard assignments). */
+ public List getDataUnits() {
+ return dataUnits;
+ }
+
+ /** Returns the estimated row count for this stage's output. */
+ public long getEstimatedRows() {
+ return estimatedRows;
+ }
+
+ /** Returns the estimated byte size for this stage's output. */
+ public long getEstimatedBytes() {
+ return estimatedBytes;
+ }
+
+ /**
+ * Returns the sub-plan (Calcite RelNode) for data node execution, or null if this stage does not
+ * push down a plan fragment. Enables query pushdown: the data node can execute this sub-plan
+ * locally instead of just scanning raw data.
+ */
+ public RelNode getPlanFragment() {
+ return planFragment;
+ }
+
+ /** Returns true if this is a leaf stage (no upstream dependencies). */
+ public boolean isLeaf() {
+ return sourceStageIds.isEmpty();
+ }
+
+ /** Returns the total operator count (source + intermediates). */
+ public int getOperatorCount() {
+ return 1 + operatorFactories.size();
+ }
+
+ @Override
+ public String toString() {
+ return "ComputeStage{"
+ + "id='"
+ + stageId
+ + "', operators="
+ + getOperatorCount()
+ + ", exchange="
+ + outputPartitioning.getExchangeType()
+ + ", dataUnits="
+ + dataUnits.size()
+ + ", deps="
+ + sourceStageIds
+ + '}';
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/stage/ExchangeType.java b/core/src/main/java/org/opensearch/sql/planner/distributed/stage/ExchangeType.java
new file mode 100644
index 00000000000..3b4a84d47b2
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/stage/ExchangeType.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.stage;
+
+/** How data is exchanged between compute stages. */
+public enum ExchangeType {
+ /** All data flows to a single node (coordinator). Used for final merge. */
+ GATHER,
+
+ /** Data is repartitioned by hash key across nodes. Used for distributed joins and aggs. */
+ HASH_REPARTITION,
+
+ /** Data is sent to all downstream nodes. Used for broadcast joins (small table). */
+ BROADCAST,
+
+ /** No exchange — stage runs locally after the previous stage on the same node. */
+ NONE
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/stage/PartitioningScheme.java b/core/src/main/java/org/opensearch/sql/planner/distributed/stage/PartitioningScheme.java
new file mode 100644
index 00000000000..15e97ef7aed
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/stage/PartitioningScheme.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.stage;
+
+import java.util.Collections;
+import java.util.List;
+
+/** Describes how a stage's output data is partitioned across nodes. */
+public class PartitioningScheme {
+
+ private final ExchangeType exchangeType;
+ private final List hashChannels;
+
+ private PartitioningScheme(ExchangeType exchangeType, List hashChannels) {
+ this.exchangeType = exchangeType;
+ this.hashChannels = Collections.unmodifiableList(hashChannels);
+ }
+
+ /** Creates a GATHER partitioning (all data to coordinator). */
+ public static PartitioningScheme gather() {
+ return new PartitioningScheme(ExchangeType.GATHER, List.of());
+ }
+
+ /** Creates a HASH_REPARTITION partitioning on the given column indices. */
+ public static PartitioningScheme hashRepartition(List hashChannels) {
+ return new PartitioningScheme(ExchangeType.HASH_REPARTITION, hashChannels);
+ }
+
+ /** Creates a BROADCAST partitioning (all data to all nodes). */
+ public static PartitioningScheme broadcast() {
+ return new PartitioningScheme(ExchangeType.BROADCAST, List.of());
+ }
+
+ /** Creates a NONE partitioning (no exchange). */
+ public static PartitioningScheme none() {
+ return new PartitioningScheme(ExchangeType.NONE, List.of());
+ }
+
+ public ExchangeType getExchangeType() {
+ return exchangeType;
+ }
+
+ /** Returns the column indices used for hash partitioning. Empty for non-hash schemes. */
+ public List getHashChannels() {
+ return hashChannels;
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/planner/distributed/stage/StagedPlan.java b/core/src/main/java/org/opensearch/sql/planner/distributed/stage/StagedPlan.java
new file mode 100644
index 00000000000..d8aed791a9a
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/planner/distributed/stage/StagedPlan.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.stage;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * The complete distributed execution plan as a tree of {@link ComputeStage}s. Created by the
+ * physical planner from a Calcite RelNode tree. Stages are ordered by dependency — leaf stages
+ * (scans) first, root stage (final merge) last.
+ */
+public class StagedPlan {
+
+ private final String planId;
+ private final List stages;
+
+ public StagedPlan(String planId, List stages) {
+ this.planId = planId;
+ this.stages = Collections.unmodifiableList(stages);
+ }
+
+ public String getPlanId() {
+ return planId;
+ }
+
+ /** Returns all stages in dependency order (leaves first, root last). */
+ public List getStages() {
+ return stages;
+ }
+
+ /** Returns the root stage (last in the list — typically the coordinator merge stage). */
+ public ComputeStage getRootStage() {
+ if (stages.isEmpty()) {
+ throw new IllegalStateException("StagedPlan has no stages");
+ }
+ return stages.get(stages.size() - 1);
+ }
+
+ /** Returns leaf stages (stages with no upstream dependencies). */
+ public List getLeafStages() {
+ return stages.stream().filter(ComputeStage::isLeaf).collect(Collectors.toList());
+ }
+
+ /** Returns a stage by its ID. */
+ public ComputeStage getStage(String stageId) {
+ return stages.stream()
+ .filter(s -> s.getStageId().equals(stageId))
+ .findFirst()
+ .orElseThrow(() -> new IllegalArgumentException("Stage not found: " + stageId));
+ }
+
+ /** Returns the total number of stages. */
+ public int getStageCount() {
+ return stages.size();
+ }
+
+ /**
+ * Validates the plan. Returns a list of validation errors, or empty list if valid.
+ *
+ * @return list of error messages
+ */
+ public List validate() {
+ List errors = new ArrayList<>();
+ if (planId == null || planId.isEmpty()) {
+ errors.add("Plan ID is required");
+ }
+ if (stages.isEmpty()) {
+ errors.add("Plan must have at least one stage");
+ }
+
+ // Check that all referenced source stages exist
+ Map stageMap =
+ stages.stream().collect(Collectors.toMap(ComputeStage::getStageId, s -> s));
+ for (ComputeStage stage : stages) {
+ for (String depId : stage.getSourceStageIds()) {
+ if (!stageMap.containsKey(depId)) {
+ errors.add("Stage '" + stage.getStageId() + "' references unknown stage: " + depId);
+ }
+ }
+ }
+
+ return errors;
+ }
+
+ @Override
+ public String toString() {
+ return "StagedPlan{id='" + planId + "', stages=" + stages.size() + '}';
+ }
+}
diff --git a/core/src/test/java/org/opensearch/sql/planner/distributed/page/PageBuilderTest.java b/core/src/test/java/org/opensearch/sql/planner/distributed/page/PageBuilderTest.java
new file mode 100644
index 00000000000..dc5ff6c1840
--- /dev/null
+++ b/core/src/test/java/org/opensearch/sql/planner/distributed/page/PageBuilderTest.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.page;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.DisplayNameGenerator;
+import org.junit.jupiter.api.Test;
+
+@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class)
+class PageBuilderTest {
+
+ @Test
+ void should_build_page_row_by_row() {
+ PageBuilder builder = new PageBuilder(3);
+
+ builder.beginRow();
+ builder.setValue(0, "Alice");
+ builder.setValue(1, 30);
+ builder.setValue(2, 1000.0);
+ builder.endRow();
+
+ builder.beginRow();
+ builder.setValue(0, "Bob");
+ builder.setValue(1, 25);
+ builder.setValue(2, 2000.0);
+ builder.endRow();
+
+ Page page = builder.build();
+ assertEquals(2, page.getPositionCount());
+ assertEquals(3, page.getChannelCount());
+ assertEquals("Alice", page.getValue(0, 0));
+ assertEquals(2000.0, page.getValue(1, 2));
+ }
+
+ @Test
+ void should_track_row_count() {
+ PageBuilder builder = new PageBuilder(2);
+ assertTrue(builder.isEmpty());
+ assertEquals(0, builder.getRowCount());
+
+ builder.beginRow();
+ builder.setValue(0, "A");
+ builder.setValue(1, 1);
+ builder.endRow();
+
+ assertEquals(1, builder.getRowCount());
+ }
+
+ @Test
+ void should_reset_after_build() {
+ PageBuilder builder = new PageBuilder(1);
+ builder.beginRow();
+ builder.setValue(0, "test");
+ builder.endRow();
+
+ Page page = builder.build();
+ assertEquals(1, page.getPositionCount());
+
+ // Builder should be empty after build
+ assertTrue(builder.isEmpty());
+ assertEquals(0, builder.getRowCount());
+ }
+
+ @Test
+ void should_throw_on_set_before_begin() {
+ PageBuilder builder = new PageBuilder(2);
+ assertThrows(IllegalStateException.class, () -> builder.setValue(0, "value"));
+ }
+
+ @Test
+ void should_throw_on_end_before_begin() {
+ PageBuilder builder = new PageBuilder(2);
+ assertThrows(IllegalStateException.class, () -> builder.endRow());
+ }
+
+ @Test
+ void should_throw_on_build_with_uncommitted_row() {
+ PageBuilder builder = new PageBuilder(2);
+ builder.beginRow();
+ builder.setValue(0, "value");
+ assertThrows(IllegalStateException.class, () -> builder.build());
+ }
+
+ @Test
+ void should_throw_on_invalid_channel() {
+ PageBuilder builder = new PageBuilder(2);
+ builder.beginRow();
+ assertThrows(IndexOutOfBoundsException.class, () -> builder.setValue(2, "value"));
+ assertThrows(IndexOutOfBoundsException.class, () -> builder.setValue(-1, "value"));
+ }
+}
diff --git a/core/src/test/java/org/opensearch/sql/planner/distributed/page/RowPageTest.java b/core/src/test/java/org/opensearch/sql/planner/distributed/page/RowPageTest.java
new file mode 100644
index 00000000000..f4443d566d1
--- /dev/null
+++ b/core/src/test/java/org/opensearch/sql/planner/distributed/page/RowPageTest.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.page;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.DisplayNameGenerator;
+import org.junit.jupiter.api.Test;
+
+@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class)
+class RowPageTest {
+
+ @Test
+ void should_create_page_with_rows_and_columns() {
+ Object[][] data = {
+ {"Alice", 30, 1000.0},
+ {"Bob", 25, 2000.0}
+ };
+ RowPage page = new RowPage(data, 3);
+
+ assertEquals(2, page.getPositionCount());
+ assertEquals(3, page.getChannelCount());
+ }
+
+ @Test
+ void should_access_values_by_position_and_channel() {
+ Object[][] data = {
+ {"Alice", 30, 1000.0},
+ {"Bob", 25, 2000.0}
+ };
+ RowPage page = new RowPage(data, 3);
+
+ assertEquals("Alice", page.getValue(0, 0));
+ assertEquals(30, page.getValue(0, 1));
+ assertEquals(1000.0, page.getValue(0, 2));
+ assertEquals("Bob", page.getValue(1, 0));
+ assertEquals(25, page.getValue(1, 1));
+ }
+
+ @Test
+ void should_handle_null_values() {
+ Object[][] data = {{null, 30, null}};
+ RowPage page = new RowPage(data, 3);
+
+ assertNull(page.getValue(0, 0));
+ assertEquals(30, page.getValue(0, 1));
+ assertNull(page.getValue(0, 2));
+ }
+
+ @Test
+ void should_create_sub_region() {
+ Object[][] data = {
+ {"Alice", 30},
+ {"Bob", 25},
+ {"Charlie", 35},
+ {"Diana", 28}
+ };
+ RowPage page = new RowPage(data, 2);
+
+ Page region = page.getRegion(1, 2);
+ assertEquals(2, region.getPositionCount());
+ assertEquals("Bob", region.getValue(0, 0));
+ assertEquals("Charlie", region.getValue(1, 0));
+ }
+
+ @Test
+ void should_create_empty_page() {
+ Page empty = Page.empty(3);
+ assertEquals(0, empty.getPositionCount());
+ assertEquals(3, empty.getChannelCount());
+ }
+
+ @Test
+ void should_throw_on_invalid_position() {
+ Object[][] data = {{"Alice", 30}};
+ RowPage page = new RowPage(data, 2);
+
+ assertThrows(IndexOutOfBoundsException.class, () -> page.getValue(-1, 0));
+ assertThrows(IndexOutOfBoundsException.class, () -> page.getValue(1, 0));
+ }
+
+ @Test
+ void should_throw_on_invalid_channel() {
+ Object[][] data = {{"Alice", 30}};
+ RowPage page = new RowPage(data, 2);
+
+ assertThrows(IndexOutOfBoundsException.class, () -> page.getValue(0, -1));
+ assertThrows(IndexOutOfBoundsException.class, () -> page.getValue(0, 2));
+ }
+
+ @Test
+ void should_throw_on_invalid_region() {
+ Object[][] data = {{"Alice"}, {"Bob"}};
+ RowPage page = new RowPage(data, 1);
+
+ assertThrows(IndexOutOfBoundsException.class, () -> page.getRegion(1, 3));
+ assertThrows(IndexOutOfBoundsException.class, () -> page.getRegion(-1, 1));
+ }
+}
diff --git a/core/src/test/java/org/opensearch/sql/planner/distributed/pipeline/PipelineDriverTest.java b/core/src/test/java/org/opensearch/sql/planner/distributed/pipeline/PipelineDriverTest.java
new file mode 100644
index 00000000000..5a8735f4b6d
--- /dev/null
+++ b/core/src/test/java/org/opensearch/sql/planner/distributed/pipeline/PipelineDriverTest.java
@@ -0,0 +1,232 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.pipeline;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.DisplayNameGenerator;
+import org.junit.jupiter.api.Test;
+import org.opensearch.sql.planner.distributed.operator.Operator;
+import org.opensearch.sql.planner.distributed.operator.OperatorContext;
+import org.opensearch.sql.planner.distributed.operator.SourceOperator;
+import org.opensearch.sql.planner.distributed.page.Page;
+import org.opensearch.sql.planner.distributed.page.PageBuilder;
+import org.opensearch.sql.planner.distributed.split.DataUnit;
+
+@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class)
+class PipelineDriverTest {
+
+ @Test
+ void should_run_source_only_pipeline() {
+ // Given: A source that produces one page
+ MockSourceOperator source = new MockSourceOperator(List.of(createTestPage(3, 2)));
+
+ // When
+ PipelineDriver driver = new PipelineDriver(source, List.of());
+ Page result = driver.run();
+
+ // Then
+ assertTrue(driver.isFinished());
+ assertEquals(PipelineContext.Status.FINISHED, driver.getContext().getStatus());
+ }
+
+ @Test
+ void should_run_source_to_transform_pipeline() {
+ // Given: Source produces a page, transform doubles column 1 values
+ Page inputPage = createTestPage(3, 2);
+ MockSourceOperator source = new MockSourceOperator(List.of(inputPage));
+ PassThroughOperator passThrough = new PassThroughOperator();
+
+ // When
+ PipelineDriver driver = new PipelineDriver(source, List.of(passThrough));
+ driver.run();
+
+ // Then
+ assertTrue(driver.isFinished());
+ assertTrue(passThrough.receivedPages > 0);
+ }
+
+ @Test
+ void should_run_source_to_sink_pipeline() {
+ // Given: Source produces pages, sink collects them
+ Page page1 = createTestPage(2, 2);
+ Page page2 = createTestPage(3, 2);
+ MockSourceOperator source = new MockSourceOperator(List.of(page1, page2));
+ CollectingSinkOperator sink = new CollectingSinkOperator();
+
+ // When
+ PipelineDriver driver = new PipelineDriver(source, List.of(sink));
+ driver.run();
+
+ // Then
+ assertTrue(driver.isFinished());
+ assertEquals(2, sink.collectedPages.size());
+ assertEquals(2, sink.collectedPages.get(0).getPositionCount());
+ assertEquals(3, sink.collectedPages.get(1).getPositionCount());
+ }
+
+ @Test
+ void should_chain_multiple_operators() {
+ // Given: source → passthrough1 → passthrough2 → sink
+ Page inputPage = createTestPage(5, 3);
+ MockSourceOperator source = new MockSourceOperator(List.of(inputPage));
+ PassThroughOperator pass1 = new PassThroughOperator();
+ PassThroughOperator pass2 = new PassThroughOperator();
+ CollectingSinkOperator sink = new CollectingSinkOperator();
+
+ // When
+ PipelineDriver driver = new PipelineDriver(source, List.of(pass1, pass2, sink));
+ driver.run();
+
+ // Then
+ assertTrue(driver.isFinished());
+ assertTrue(pass1.receivedPages > 0);
+ assertTrue(pass2.receivedPages > 0);
+ assertEquals(1, sink.collectedPages.size());
+ }
+
+ private Page createTestPage(int rows, int cols) {
+ PageBuilder builder = new PageBuilder(cols);
+ for (int r = 0; r < rows; r++) {
+ builder.beginRow();
+ for (int c = 0; c < cols; c++) {
+ builder.setValue(c, "r" + r + "c" + c);
+ }
+ builder.endRow();
+ }
+ return builder.build();
+ }
+
+ /** Mock source operator that produces pre-built pages. */
+ static class MockSourceOperator implements SourceOperator {
+ private final List pages;
+ private int index = 0;
+ private boolean finished = false;
+
+ MockSourceOperator(List pages) {
+ this.pages = new ArrayList<>(pages);
+ }
+
+ @Override
+ public void addDataUnit(DataUnit dataUnit) {}
+
+ @Override
+ public void noMoreDataUnits() {}
+
+ @Override
+ public Page getOutput() {
+ if (index < pages.size()) {
+ return pages.get(index++);
+ }
+ finished = true;
+ return null;
+ }
+
+ @Override
+ public boolean isFinished() {
+ return finished && index >= pages.size();
+ }
+
+ @Override
+ public void finish() {
+ finished = true;
+ }
+
+ @Override
+ public OperatorContext getContext() {
+ return OperatorContext.createDefault("mock-source");
+ }
+
+ @Override
+ public void close() {}
+ }
+
+ /** Pass-through operator that forwards pages unchanged. */
+ static class PassThroughOperator implements Operator {
+ private Page buffered;
+ private boolean finished = false;
+ int receivedPages = 0;
+
+ @Override
+ public boolean needsInput() {
+ return buffered == null && !finished;
+ }
+
+ @Override
+ public void addInput(Page page) {
+ buffered = page;
+ receivedPages++;
+ }
+
+ @Override
+ public Page getOutput() {
+ Page out = buffered;
+ buffered = null;
+ return out;
+ }
+
+ @Override
+ public boolean isFinished() {
+ return finished && buffered == null;
+ }
+
+ @Override
+ public void finish() {
+ finished = true;
+ }
+
+ @Override
+ public OperatorContext getContext() {
+ return OperatorContext.createDefault("passthrough");
+ }
+
+ @Override
+ public void close() {}
+ }
+
+ /** Sink operator that collects all received pages. */
+ static class CollectingSinkOperator implements Operator {
+ final List collectedPages = new ArrayList<>();
+ private boolean finished = false;
+
+ @Override
+ public boolean needsInput() {
+ return !finished;
+ }
+
+ @Override
+ public void addInput(Page page) {
+ collectedPages.add(page);
+ }
+
+ @Override
+ public Page getOutput() {
+ return null;
+ }
+
+ @Override
+ public boolean isFinished() {
+ return finished;
+ }
+
+ @Override
+ public void finish() {
+ finished = true;
+ }
+
+ @Override
+ public OperatorContext getContext() {
+ return OperatorContext.createDefault("sink");
+ }
+
+ @Override
+ public void close() {}
+ }
+}
diff --git a/core/src/test/java/org/opensearch/sql/planner/distributed/stage/ComputeStageTest.java b/core/src/test/java/org/opensearch/sql/planner/distributed/stage/ComputeStageTest.java
new file mode 100644
index 00000000000..9b2b5675050
--- /dev/null
+++ b/core/src/test/java/org/opensearch/sql/planner/distributed/stage/ComputeStageTest.java
@@ -0,0 +1,301 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.planner.distributed.stage;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.DisplayNameGenerator;
+import org.junit.jupiter.api.Test;
+import org.opensearch.sql.planner.distributed.operator.Operator;
+import org.opensearch.sql.planner.distributed.operator.OperatorContext;
+import org.opensearch.sql.planner.distributed.operator.OperatorFactory;
+import org.opensearch.sql.planner.distributed.operator.SourceOperator;
+import org.opensearch.sql.planner.distributed.operator.SourceOperatorFactory;
+import org.opensearch.sql.planner.distributed.page.Page;
+import org.opensearch.sql.planner.distributed.split.DataUnit;
+
+@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class)
+class ComputeStageTest {
+
+ @Test
+ void should_create_leaf_stage_with_data_units() {
+ DataUnit du1 = new TestDataUnit("accounts/0", List.of("node-1", "node-2"), 50000L);
+ DataUnit du2 = new TestDataUnit("accounts/1", List.of("node-2", "node-3"), 45000L);
+
+ ComputeStage stage =
+ new ComputeStage(
+ "stage-0",
+ new NoOpSourceFactory(),
+ List.of(),
+ PartitioningScheme.gather(),
+ List.of(),
+ List.of(du1, du2),
+ 95000L,
+ 0L);
+
+ assertEquals("stage-0", stage.getStageId());
+ assertTrue(stage.isLeaf());
+ assertEquals(2, stage.getDataUnits().size());
+ assertEquals(1, stage.getOperatorCount());
+ assertEquals(ExchangeType.GATHER, stage.getOutputPartitioning().getExchangeType());
+ assertEquals(95000L, stage.getEstimatedRows());
+ }
+
+ @Test
+ void should_create_non_leaf_stage_with_dependencies() {
+ ComputeStage stage =
+ new ComputeStage(
+ "stage-1",
+ new NoOpSourceFactory(),
+ List.of(new NoOpOperatorFactory()),
+ PartitioningScheme.none(),
+ List.of("stage-0"),
+ List.of(),
+ 0L,
+ 0L);
+
+ assertFalse(stage.isLeaf());
+ assertEquals(List.of("stage-0"), stage.getSourceStageIds());
+ assertEquals(2, stage.getOperatorCount());
+ }
+
+ @Test
+ void should_create_staged_plan() {
+ ComputeStage scan =
+ new ComputeStage(
+ "scan",
+ new NoOpSourceFactory(),
+ List.of(),
+ PartitioningScheme.gather(),
+ List.of(),
+ List.of(new TestDataUnit("idx/0", List.of("n1"), 1000L)),
+ 1000L,
+ 0L);
+
+ ComputeStage merge =
+ new ComputeStage(
+ "merge",
+ new NoOpSourceFactory(),
+ List.of(),
+ PartitioningScheme.none(),
+ List.of("scan"),
+ List.of(),
+ 1000L,
+ 0L);
+
+ StagedPlan plan = new StagedPlan("plan-1", List.of(scan, merge));
+
+ assertEquals("plan-1", plan.getPlanId());
+ assertEquals(2, plan.getStageCount());
+ assertEquals("merge", plan.getRootStage().getStageId());
+ assertEquals(1, plan.getLeafStages().size());
+ assertEquals("scan", plan.getLeafStages().get(0).getStageId());
+ }
+
+ @Test
+ void should_validate_staged_plan() {
+ StagedPlan validPlan =
+ new StagedPlan(
+ "p1",
+ List.of(
+ new ComputeStage(
+ "s1",
+ new NoOpSourceFactory(),
+ List.of(),
+ PartitioningScheme.gather(),
+ List.of(),
+ List.of(),
+ 0L,
+ 0L)));
+
+ assertTrue(validPlan.validate().isEmpty());
+ }
+
+ @Test
+ void should_detect_invalid_plan() {
+ // Null plan ID
+ StagedPlan nullId = new StagedPlan(null, List.of());
+ assertFalse(nullId.validate().isEmpty());
+
+ // Empty stages
+ StagedPlan noStages = new StagedPlan("p1", List.of());
+ assertFalse(noStages.validate().isEmpty());
+
+ // Reference to non-existent stage
+ StagedPlan badRef =
+ new StagedPlan(
+ "p1",
+ List.of(
+ new ComputeStage(
+ "s1",
+ new NoOpSourceFactory(),
+ List.of(),
+ PartitioningScheme.none(),
+ List.of("nonexistent"),
+ List.of(),
+ 0L,
+ 0L)));
+ assertFalse(badRef.validate().isEmpty());
+ }
+
+ @Test
+ void should_lookup_stage_by_id() {
+ ComputeStage s1 =
+ new ComputeStage(
+ "s1",
+ new NoOpSourceFactory(),
+ List.of(),
+ PartitioningScheme.gather(),
+ List.of(),
+ List.of(),
+ 0L,
+ 0L);
+ StagedPlan plan = new StagedPlan("p1", List.of(s1));
+
+ assertEquals("s1", plan.getStage("s1").getStageId());
+ assertThrows(IllegalArgumentException.class, () -> plan.getStage("nonexistent"));
+ }
+
+ @Test
+ void should_create_partitioning_schemes() {
+ PartitioningScheme gather = PartitioningScheme.gather();
+ assertEquals(ExchangeType.GATHER, gather.getExchangeType());
+ assertTrue(gather.getHashChannels().isEmpty());
+
+ PartitioningScheme hash = PartitioningScheme.hashRepartition(List.of(0, 1));
+ assertEquals(ExchangeType.HASH_REPARTITION, hash.getExchangeType());
+ assertEquals(List.of(0, 1), hash.getHashChannels());
+
+ PartitioningScheme broadcast = PartitioningScheme.broadcast();
+ assertEquals(ExchangeType.BROADCAST, broadcast.getExchangeType());
+
+ PartitioningScheme none = PartitioningScheme.none();
+ assertEquals(ExchangeType.NONE, none.getExchangeType());
+ }
+
+ /** Minimal test stub for DataUnit. */
+ static class TestDataUnit extends DataUnit {
+ private final String id;
+ private final List preferredNodes;
+ private final long estimatedRows;
+
+ TestDataUnit(String id, List preferredNodes, long estimatedRows) {
+ this.id = id;
+ this.preferredNodes = preferredNodes;
+ this.estimatedRows = estimatedRows;
+ }
+
+ @Override
+ public String getDataUnitId() {
+ return id;
+ }
+
+ @Override
+ public List getPreferredNodes() {
+ return preferredNodes;
+ }
+
+ @Override
+ public long getEstimatedRows() {
+ return estimatedRows;
+ }
+
+ @Override
+ public long getEstimatedSizeBytes() {
+ return 0;
+ }
+
+ @Override
+ public Map getProperties() {
+ return Collections.emptyMap();
+ }
+ }
+
+ /** No-op source factory for testing. */
+ static class NoOpSourceFactory implements SourceOperatorFactory {
+ @Override
+ public SourceOperator createOperator(OperatorContext context) {
+ return new SourceOperator() {
+ @Override
+ public void addDataUnit(DataUnit dataUnit) {}
+
+ @Override
+ public void noMoreDataUnits() {}
+
+ @Override
+ public Page getOutput() {
+ return null;
+ }
+
+ @Override
+ public boolean isFinished() {
+ return true;
+ }
+
+ @Override
+ public void finish() {}
+
+ @Override
+ public OperatorContext getContext() {
+ return context;
+ }
+
+ @Override
+ public void close() {}
+ };
+ }
+
+ @Override
+ public void noMoreOperators() {}
+ }
+
+ /** No-op operator factory for testing. */
+ static class NoOpOperatorFactory implements OperatorFactory {
+ @Override
+ public Operator createOperator(OperatorContext context) {
+ return new Operator() {
+ @Override
+ public boolean needsInput() {
+ return false;
+ }
+
+ @Override
+ public void addInput(Page page) {}
+
+ @Override
+ public Page getOutput() {
+ return null;
+ }
+
+ @Override
+ public boolean isFinished() {
+ return true;
+ }
+
+ @Override
+ public void finish() {}
+
+ @Override
+ public OperatorContext getContext() {
+ return context;
+ }
+
+ @Override
+ public void close() {}
+ };
+ }
+
+ @Override
+ public void noMoreOperators() {}
+ }
+}
diff --git a/docs/distributed-engine-architecture.md b/docs/distributed-engine-architecture.md
new file mode 100644
index 00000000000..8ddb4584c5e
--- /dev/null
+++ b/docs/distributed-engine-architecture.md
@@ -0,0 +1,294 @@
+# Distributed PPL Query Engine — Architecture
+
+## High-Level Execution Flow
+
+```
+ PPL Query: "search source=accounts | stats avg(age) by gender"
+ |
+ v
+ +--------------------+
+ | PPL Parser / |
+ | Calcite Planner |
+ +--------------------+
+ |
+ RelNode tree
+ |
+ v
+ +---------------------------------+
+ | DistributedExecutionEngine |
+ | (routing shell) |
+ +---------------------------------+
+ | |
+ distributed=true distributed=false (default)
+ | |
+ v v
+ UnsupportedOperationException +------------------------+
+ (execution not yet implemented) | OpenSearchExecution |
+ | Engine (legacy) |
+ +------------------------+
+```
+
+When `plugins.ppl.distributed.enabled=true`, the engine throws `UnsupportedOperationException`.
+Distributed execution will be implemented in the next phase against the clean H2 interfaces.
+
+---
+
+## Module Layout
+
+```
+sql/
+ ├── core/src/main/java/org/opensearch/sql/planner/distributed/
+ │ │
+ │ ├── operator/ ── Core Operator Framework ──
+ │ │ ├── Operator.java Push/pull interface (Page batches)
+ │ │ ├── SourceOperator.java Reads from storage (extends Operator)
+ │ │ ├── SinkOperator.java Terminal consumer (extends Operator)
+ │ │ ├── OperatorFactory.java Creates Operator instances
+ │ │ ├── SourceOperatorFactory.java Creates SourceOperator instances
+ │ │ └── OperatorContext.java Runtime context (memory, cancellation)
+ │ │
+ │ ├── page/ ── Data Batching (Columnar-Ready) ──
+ │ │ ├── Page.java Columnar-ready batch interface
+ │ │ ├── Block.java Single-column data (Arrow-aligned)
+ │ │ ├── RowPage.java Row-based Page implementation
+ │ │ └── PageBuilder.java Row-by-row Page builder
+ │ │
+ │ ├── pipeline/ ── Pipeline Execution ──
+ │ │ ├── Pipeline.java Ordered chain of OperatorFactories
+ │ │ ├── PipelineDriver.java Drives data through operator chain
+ │ │ └── PipelineContext.java Runtime state (status, cancellation)
+ │ │
+ │ ├── stage/ ── Staged Planning ──
+ │ │ ├── StagedPlan.java Tree of ComputeStages (dependency order)
+ │ │ ├── ComputeStage.java Stage with pipeline + partitioning + planFragment
+ │ │ ├── PartitioningScheme.java Output partitioning (gather, hash, broadcast)
+ │ │ └── ExchangeType.java Enum: GATHER / HASH_REPARTITION / BROADCAST / NONE
+ │ │
+ │ ├── exchange/ ── Inter-Stage Data Transfer ──
+ │ │ ├── ExchangeManager.java Creates sink/source operators
+ │ │ ├── ExchangeSinkOperator.java Sends pages downstream
+ │ │ ├── ExchangeSourceOperator.java Receives pages from upstream
+ │ │ └── OutputBuffer.java Back-pressure buffering for pages
+ │ │
+ │ ├── split/ ── Data Assignment ──
+ │ │ ├── DataUnit.java Abstract: unit of data (shard, file, etc.)
+ │ │ ├── DataUnitSource.java Generates DataUnits (shard discovery)
+ │ │ └── DataUnitAssignment.java Assigns DataUnits to nodes
+ │ │
+ │ ├── planner/ ── Physical Planning Interfaces ──
+ │ │ ├── PhysicalPlanner.java RelNode → StagedPlan
+ │ │ ├── PlanFragmenter.java Auto stage creation from RelNode tree
+ │ │ ├── FragmentationContext.java Context for fragmentation (nodes, costs)
+ │ │ ├── SubPlan.java RelNode fragment for data node pushdown
+ │ │ └── CostEstimator.java Row count / size / selectivity estimation
+ │ │
+ │ └── execution/ ── Execution Lifecycle ──
+ │ ├── QueryExecution.java Full query lifecycle management
+ │ ├── StageExecution.java Per-stage execution tracking
+ │ └── TaskExecution.java Per-task execution tracking
+ │
+ └── opensearch/src/main/java/org/opensearch/sql/opensearch/executor/
+ ├── DistributedExecutionEngine.java Routing shell: legacy vs distributed
+ │
+ └── distributed/
+ ├── TransportExecuteDistributedTaskAction.java Transport handler (data node)
+ ├── ExecuteDistributedTaskAction.java ActionType for routing
+ ├── ExecuteDistributedTaskRequest.java Request wire format
+ ├── ExecuteDistributedTaskResponse.java Response wire format
+ │
+ ├── split/
+ │ └── OpenSearchDataUnit.java DataUnit impl (index + shard + locality)
+ │
+ ├── operator/ ── OpenSearch Operators ──
+ │ ├── LuceneScanOperator.java Direct Lucene _source reads (Weight/Scorer)
+ │ ├── LimitOperator.java Row limit enforcement
+ │ ├── ResultCollector.java Collects pages into row lists
+ │ └── FilterToLuceneConverter.java Filter conditions → Lucene Query
+ │
+ └── pipeline/
+ └── OperatorPipelineExecutor.java Orchestrates pipeline on data node
+```
+
+---
+
+## Class Hierarchy
+
+### DataUnit Model
+
+```
+ DataUnit (abstract class)
+ ├── getDataUnitId() Unique identifier
+ ├── getPreferredNodes() Nodes where data is local
+ ├── getEstimatedRows() Row count estimate
+ ├── getEstimatedSizeBytes() Size estimate
+ ├── getProperties() Storage-specific metadata
+ └── isRemotelyAccessible() Default: true
+ │
+ └── OpenSearchDataUnit (concrete)
+ ├── indexName, shardId
+ └── isRemotelyAccessible() → false (Lucene requires locality)
+
+ DataUnitSource (interface, AutoCloseable)
+ └── getNextBatch(maxBatchSize) → List
+
+ DataUnitAssignment (interface)
+ └── assign(dataUnits, availableNodes) → Map>
+```
+
+### Block / Page Columnar Model
+
+```
+ Page (interface)
+ ├── getPositionCount() Row count
+ ├── getChannelCount() Column count
+ ├── getValue(pos, channel) Cell access
+ ├── getBlock(channel) Columnar access (default: throws)
+ ├── getRetainedSizeBytes() Memory estimate
+ └── getRegion(offset, len) Sub-page slice
+ │
+ └── RowPage (row-based impl)
+
+ Block (interface)
+ ├── getPositionCount() Row count in this column
+ ├── getValue(position) Value at row
+ ├── isNull(position) Null check
+ ├── getRetainedSizeBytes() Memory estimate
+ ├── getRegion(offset, len) Sub-block slice
+ └── getType() → BlockType BOOLEAN, INT, LONG, FLOAT, DOUBLE, STRING, ...
+
+ Future: ArrowBlock wraps Arrow FieldVector for zero-copy exchange
+```
+
+### PlanFragmenter → StagedPlan → ComputeStage
+
+```
+ PlanFragmenter (interface)
+ └── fragment(RelNode, FragmentationContext) → StagedPlan
+ │
+ │ FragmentationContext (interface)
+ │ ├── getAvailableNodes()
+ │ ├── getCostEstimator()
+ │ ├── getDataUnitSource(tableName)
+ │ ├── getMaxTasksPerStage()
+ │ └── getCoordinatorNodeId()
+ │
+ │ SubPlan (class)
+ │ ├── fragmentId
+ │ ├── root: RelNode ← sub-plan for data node execution (pushdown)
+ │ ├── outputPartitioning
+ │ └── children: List
+ │
+ └── StagedPlan
+ └── List (dependency order: leaves → root)
+ ├── stageId
+ ├── SourceOperatorFactory
+ ├── List
+ ├── PartitioningScheme
+ │ ├── ExchangeType: GATHER | HASH_REPARTITION | BROADCAST | NONE
+ │ └── hashChannels: List
+ ├── sourceStageIds (upstream dependencies)
+ ├── List (data assignments)
+ ├── planFragment: RelNode (nullable — sub-plan for pushdown)
+ └── estimatedRows / estimatedBytes
+```
+
+### Exchange Interfaces
+
+```
+ ExchangeManager (interface)
+ ├── createSink(context, targetStageId, partitioning) → ExchangeSinkOperator
+ └── createSource(context, sourceStageId) → ExchangeSourceOperator
+
+ OutputBuffer (interface, AutoCloseable)
+ ├── enqueue(Page) Add page to buffer
+ ├── setNoMorePages() Signal completion
+ ├── isFull() Back-pressure check
+ ├── getBufferedBytes() Buffer size
+ ├── abort() Discard buffered pages
+ ├── isFinished() All pages consumed
+ └── getPartitioningScheme() Output partitioning
+
+ Exchange protocol:
+ Current: OpenSearch transport (Netty TCP, StreamOutput/StreamInput)
+ Future: Arrow IPC (ArrowRecordBatch for zero-copy columnar exchange)
+```
+
+### Execution Lifecycle
+
+```
+ QueryExecution (interface)
+ ├── State: PLANNING → STARTING → RUNNING → FINISHING → FINISHED | FAILED
+ ├── getQueryId()
+ ├── getPlan() → StagedPlan
+ ├── getStageExecutions() → List
+ ├── getStats() → QueryStats (totalRows, elapsedTime, planningTime)
+ └── cancel()
+
+ StageExecution (interface)
+ ├── State: PLANNED → SCHEDULING → RUNNING → FINISHED | FAILED | CANCELLED
+ ├── getStage() → ComputeStage
+ ├── addDataUnits(List)
+ ├── noMoreDataUnits()
+ ├── getTaskExecutions() → Map>
+ ├── getStats() → StageStats (totalRows, totalBytes, completedTasks, totalTasks)
+ ├── addStateChangeListener(listener)
+ └── cancel()
+
+ TaskExecution (interface)
+ ├── State: PLANNED → RUNNING → FLUSHING → FINISHED | FAILED | CANCELLED
+ ├── getTaskId(), getNodeId()
+ ├── getAssignedDataUnits() → List
+ ├── getStats() → TaskStats (processedRows, processedBytes, outputRows, elapsedTime)
+ └── cancel()
+```
+
+### Operator Framework
+
+```
+ Operator (interface)
+ / \
+ SourceOperator SinkOperator
+ (adds DataUnits) (terminal)
+ | |
+ ExchangeSourceOperator ExchangeSinkOperator
+ |
+ LuceneScanOperator (OpenSearch impl)
+
+ Other Operators:
+ ├── LimitOperator (implements Operator)
+ └── (future: FilterOperator, ProjectOperator, AggOperator, etc.)
+
+ Factories:
+ ├── OperatorFactory → creates Operator
+ └── SourceOperatorFactory → creates SourceOperator
+
+ Data Flow:
+ DataUnit → SourceOperator → Page → Operator → Page → ... → SinkOperator
+ ↑
+ OperatorContext (memory, cancellation)
+```
+
+---
+
+## Configuration
+
+| Setting | Default | Description |
+|---------|---------|-------------|
+| `plugins.ppl.distributed.enabled` | `false` | Single toggle: legacy engine (off/default) or distributed (on, not yet implemented) |
+
+**No sub-settings.** When distributed is enabled in the future, the operator pipeline will be the only execution path.
+
+---
+
+## Two Execution Paths (No Fallback)
+
+```
+ plugins.ppl.distributed.enabled = false (default) plugins.ppl.distributed.enabled = true
+ ────────────────────────────────────────────── ──────────────────────────────────────
+ PPL → Calcite → DistributedExecutionEngine PPL → Calcite → DistributedExecutionEngine
+ │ │
+ v v
+ OpenSearchExecutionEngine (legacy) UnsupportedOperationException
+ client.search() (SSB pushdown) (execution not yet implemented)
+ Single-node coordinator
+```
diff --git a/docs/ppl-test-queries.md b/docs/ppl-test-queries.md
new file mode 100644
index 00000000000..a1ff402c796
--- /dev/null
+++ b/docs/ppl-test-queries.md
@@ -0,0 +1,324 @@
+# PPL Test Queries & Index Setup
+
+Quick-reference for manual testing against a live OpenSearch cluster.
+Data files live in `sql/doctest/test_data/*.json`.
+
+---
+
+## Index Setup (Bulk Ingest)
+
+Run these to create and populate all required test indices.
+
+### accounts (4 docs, used by most commands)
+```bash
+curl -s -XPOST 'localhost:9200/accounts/_bulk?refresh=true' -H 'Content-Type: application/json' --data-binary '
+{"index":{"_id":"1"}}
+{"account_number":1,"balance":39225,"firstname":"Amber","lastname":"Duke","age":32,"gender":"M","address":"880 Holmes Lane","employer":"Pyrami","email":"amberduke@pyrami.com","city":"Brogan","state":"IL"}
+{"index":{"_id":"6"}}
+{"account_number":6,"balance":5686,"firstname":"Hattie","lastname":"Bond","age":36,"gender":"M","address":"671 Bristol Street","employer":"Netagy","email":"hattiebond@netagy.com","city":"Dante","state":"TN"}
+{"index":{"_id":"13"}}
+{"account_number":13,"balance":32838,"firstname":"Nanette","lastname":"Bates","age":28,"gender":"F","address":"789 Madison Street","employer":"Quility","city":"Nogal","state":"VA"}
+{"index":{"_id":"18"}}
+{"account_number":18,"balance":4180,"firstname":"Dale","lastname":"Adams","age":33,"gender":"M","address":"467 Hutchinson Court","employer":null,"email":"daleadams@boink.com","city":"Orick","state":"MD"}
+'
+```
+
+### state_country (8 docs, used by join/explain/streamstats)
+```bash
+curl -s -XPOST 'localhost:9200/state_country/_bulk?refresh=true' -H 'Content-Type: application/json' --data-binary '
+{"index":{"_id":"1"}}
+{"name":"Jake","age":70,"state":"California","country":"USA","year":2023,"month":4}
+{"index":{"_id":"2"}}
+{"name":"Hello","age":30,"state":"New York","country":"USA","year":2023,"month":4}
+{"index":{"_id":"3"}}
+{"name":"John","age":25,"state":"Ontario","country":"Canada","year":2023,"month":4}
+{"index":{"_id":"4"}}
+{"name":"Jane","age":20,"state":"Quebec","country":"Canada","year":2023,"month":4}
+{"index":{"_id":"5"}}
+{"name":"Jim","age":27,"state":"B.C","country":"Canada","year":2023,"month":4}
+{"index":{"_id":"6"}}
+{"name":"Peter","age":57,"state":"B.C","country":"Canada","year":2023,"month":4}
+{"index":{"_id":"7"}}
+{"name":"Rick","age":70,"state":"B.C","country":"Canada","year":2023,"month":4}
+{"index":{"_id":"8"}}
+{"name":"David","age":40,"state":"Washington","country":"USA","year":2023,"month":4}
+'
+```
+
+### occupation (6 docs, used by join examples)
+```bash
+curl -s -XPOST 'localhost:9200/occupation/_bulk?refresh=true' -H 'Content-Type: application/json' --data-binary '
+{"index":{"_id":"1"}}
+{"name":"Jake","occupation":"Engineer","country":"England","salary":100000,"year":2023,"month":4}
+{"index":{"_id":"2"}}
+{"name":"Hello","occupation":"Artist","country":"USA","salary":70000,"year":2023,"month":4}
+{"index":{"_id":"3"}}
+{"name":"John","occupation":"Doctor","country":"Canada","salary":120000,"year":2023,"month":4}
+{"index":{"_id":"4"}}
+{"name":"David","occupation":"Doctor","country":"USA","salary":120000,"year":2023,"month":4}
+{"index":{"_id":"5"}}
+{"name":"David","occupation":"Unemployed","country":"Canada","salary":0,"year":2023,"month":4}
+{"index":{"_id":"6"}}
+{"name":"Jane","occupation":"Scientist","country":"Canada","salary":90000,"year":2023,"month":4}
+'
+```
+
+### employees (used by basic queries)
+```bash
+curl -s -XPOST 'localhost:9200/employees/_bulk?refresh=true' -H 'Content-Type: application/json' --data-binary '
+{"index":{"_id":"1"}}
+{"name":"Alice","age":30,"department":"Engineering","salary":90000}
+{"index":{"_id":"2"}}
+{"name":"Bob","age":35,"department":"Marketing","salary":75000}
+{"index":{"_id":"3"}}
+{"name":"Carol","age":28,"department":"Engineering","salary":85000}
+{"index":{"_id":"4"}}
+{"name":"Dave","age":42,"department":"Sales","salary":70000}
+{"index":{"_id":"5"}}
+{"name":"Eve","age":31,"department":"Engineering","salary":95000}
+{"index":{"_id":"6"}}
+{"name":"Frank","age":45,"department":"Marketing","salary":80000}
+{"index":{"_id":"7"}}
+{"name":"Grace","age":27,"department":"Sales","salary":65000}
+{"index":{"_id":"8"}}
+{"name":"Hank","age":38,"department":"Engineering","salary":105000}
+'
+```
+
+### people (used by functions: math, string, datetime, crypto, collection, conversion)
+```bash
+curl -s -XPOST 'localhost:9200/people/_bulk?refresh=true' -H 'Content-Type: application/json' --data-binary '
+{"index":{"_id":"1"}}
+{"name":"Alice","age":30,"city":"Seattle"}
+{"index":{"_id":"2"}}
+{"name":"Bob","age":25,"city":"Portland"}
+{"index":{"_id":"3"}}
+{"name":"Carol","age":35,"city":"Vancouver"}
+'
+```
+
+### products (used by basic queries)
+```bash
+curl -s -XPOST 'localhost:9200/products/_bulk?refresh=true' -H 'Content-Type: application/json' --data-binary '
+{"index":{"_id":"1"}}
+{"name":"Widget","price":9.99,"category":"Tools","stock":100}
+{"index":{"_id":"2"}}
+{"name":"Gadget","price":24.99,"category":"Electronics","stock":50}
+{"index":{"_id":"3"}}
+{"name":"Doohickey","price":4.99,"category":"Tools","stock":200}
+{"index":{"_id":"4"}}
+{"name":"Thingamajig","price":49.99,"category":"Electronics","stock":25}
+{"index":{"_id":"5"}}
+{"name":"Whatchamacallit","price":14.99,"category":"Misc","stock":75}
+{"index":{"_id":"6"}}
+{"name":"Gizmo","price":34.99,"category":"Electronics","stock":30}
+'
+```
+
+### Ingest ALL at once
+```bash
+# One-liner to ingest all indices (copy-paste friendly)
+for idx in accounts state_country occupation employees people products; do
+ echo "--- $idx ---"
+done
+# Or run each curl block above individually
+```
+
+### Enable distributed execution
+```bash
+curl -s -XPUT 'localhost:9200/_cluster/settings' -H 'Content-Type: application/json' -d '{
+ "persistent": {"plugins.ppl.distributed.enabled": true}
+}'
+```
+
+### Disable distributed execution (revert to legacy)
+```bash
+curl -s -XPUT 'localhost:9200/_cluster/settings' -H 'Content-Type: application/json' -d '{
+ "persistent": {"plugins.ppl.distributed.enabled": false}
+}'
+```
+
+---
+
+## PPL Queries by Category
+
+Helper function for running queries:
+```bash
+ppl() { curl -s 'localhost:9200/_plugins/_ppl' -H 'Content-Type: application/json' -d "{\"query\":\"$1\"}" | python3 -m json.tool; }
+```
+
+---
+
+### Join Queries (state_country + occupation)
+
+```bash
+# Inner join
+ppl "source = state_country | inner join left=a right=b ON a.name = b.name occupation | fields a.name, a.age, b.occupation, b.salary"
+
+# Left join
+ppl "source = state_country as a | left join left=a right=b ON a.name = b.name occupation as b | fields a.name, a.age, b.occupation, b.salary"
+
+# Right join (requires plugins.calcite.all_join_types.allowed=true)
+ppl "source = state_country as a | right join left=a right=b ON a.name = b.name occupation as b | fields a.name, a.age, b.occupation, b.salary"
+
+# Semi join
+ppl "source = state_country as a | left semi join left=a right=b ON a.name = b.name occupation as b | fields a.name, a.age, a.country"
+
+# Anti join
+ppl "source = state_country as a | left anti join left=a right=b ON a.name = b.name occupation as b | fields a.name, a.age, a.country"
+
+# Join with filter
+ppl "source = state_country | inner join left=a right=b ON a.name = b.name occupation | where b.salary > 80000 | fields a.name, b.salary"
+
+# Join with sort + limit
+ppl "source = state_country | inner join left=a right=b ON a.name = b.name occupation | sort - b.salary | head 3"
+
+# Join with subsearch
+ppl "source = state_country as a | left join ON a.name = b.name [ source = occupation | where salary > 0 | fields name, country, salary | sort salary | head 3 ] as b | fields a.name, a.age, b.salary"
+
+# Join with stats
+ppl "source = state_country | inner join left=a right=b ON a.name = b.name occupation | stats avg(salary) by span(age, 10) as age_span, b.country"
+```
+
+### Explain (shows distributed plan)
+```bash
+# Explain a join query
+curl -s 'localhost:9200/_plugins/_ppl/_explain' -H 'Content-Type: application/json' \
+ -d '{"query":"source = state_country | inner join left=a right=b ON a.name = b.name occupation | fields a.name, b.salary"}' | python3 -m json.tool
+
+# Explain a simple query
+curl -s 'localhost:9200/_plugins/_ppl/_explain' -H 'Content-Type: application/json' \
+ -d '{"query":"source = accounts | where age > 30 | head 5"}' | python3 -m json.tool
+```
+
+---
+
+### Basic Scan / Filter / Limit (accounts)
+
+```bash
+ppl "source=accounts"
+ppl "source=accounts | head 2"
+ppl "source=accounts | fields firstname, age"
+ppl "source=accounts | where age > 30"
+ppl "source=accounts | where age > 30 | fields firstname, age"
+ppl "source=accounts | where age > 30 | head 2"
+ppl "source=accounts | fields firstname, age | head 3 from 1"
+```
+
+### Sort (accounts)
+```bash
+ppl "source=accounts | sort age | fields firstname, age"
+ppl "source=accounts | sort - balance | fields firstname, balance | head 3"
+ppl "source=accounts | sort + age | fields firstname, age"
+```
+
+### Rename (accounts)
+```bash
+ppl "source=accounts | rename firstname as first_name | fields first_name, age"
+ppl "source=accounts | rename firstname as first_name, lastname as last_name | fields first_name, last_name"
+```
+
+### Where / Filter (accounts)
+```bash
+ppl "source=accounts | where gender = 'M' | fields firstname, gender"
+ppl "source=accounts | where age > 30 AND gender = 'M' | fields firstname, age, gender"
+ppl "source=accounts | where balance > 10000 | fields firstname, balance"
+ppl "source=accounts | where employer IS NOT NULL | fields firstname, employer"
+```
+
+### Dedup (accounts)
+```bash
+ppl "source=accounts | dedup gender | fields account_number, gender | sort account_number"
+ppl "source=accounts | dedup 2 gender | fields account_number, gender | sort account_number"
+```
+
+### Eval (accounts)
+```bash
+ppl "source=accounts | eval doubleAge = age * 2 | fields age, doubleAge"
+ppl "source=accounts | eval greeting = 'Hello ' + firstname | fields firstname, greeting"
+```
+
+### Stats / Aggregation (accounts)
+```bash
+ppl "source=accounts | stats count()"
+ppl "source=accounts | stats avg(age)"
+ppl "source=accounts | stats avg(age) by gender"
+ppl "source=accounts | stats max(age), min(age) by gender"
+ppl "source=accounts | stats count() as cnt by state"
+```
+
+### Parse (accounts)
+```bash
+ppl "source=accounts | parse email '.+@(?.+)' | fields email, host"
+ppl "source=accounts | parse address '\\d+ (?.+)' | fields address, street"
+```
+
+### Regex (accounts)
+```bash
+ppl "source=accounts | regex email=\"@pyrami\\.com$\" | fields account_number, email"
+```
+
+### Fillnull (accounts)
+```bash
+ppl "source=accounts | fields email, employer | fillnull with '' in employer"
+```
+
+### Replace (accounts)
+```bash
+ppl "source=accounts | replace \"IL\" WITH \"Illinois\" IN state | fields state"
+```
+
+---
+
+### Streamstats (state_country)
+```bash
+ppl "source=state_country | streamstats avg(age) as running_avg, count() as running_count by country"
+ppl "source=state_country | streamstats current=false window=2 max(age) as prev_max_age"
+```
+
+### Explain (state_country)
+```bash
+ppl "explain source=state_country | where country = 'USA' OR country = 'England' | stats count() by country"
+```
+
+---
+
+### Functions (people)
+```bash
+ppl "source=people | eval len = LENGTH(name) | fields name, len"
+ppl "source=people | eval upper = UPPER(name) | fields name, upper"
+ppl "source=people | eval abs_val = ABS(-42) | fields name, abs_val | head 1"
+```
+
+---
+
+## Index Summary
+
+| Index | Docs | Used By | Key Fields |
+|-------|------|---------|------------|
+| `accounts` | 4 | head, stats, where, sort, dedup, eval, parse, regex, fillnull, rename, replace, fields, addtotals, transpose, appendpipe, condition, expressions, statistical, aggregations, relevance | account_number, balance, firstname, lastname, age, gender, address, employer, email, city, state |
+| `state_country` | 8 | join, explain, streamstats | name, age, state, country, year, month |
+| `occupation` | 6 | join | name, occupation, country, salary, year, month |
+| `employees` | 8 | basic queries | name, age, department, salary |
+| `people` | 3 | math, string, datetime, crypto, collection, conversion functions | name, age, city |
+| `products` | 6 | basic queries | name, price, category, stock |
+
+### Additional indices in doctest/test_data/ (ingest from file if needed)
+| Index | Data File |
+|-------|-----------|
+| `books` | `doctest/test_data/books.json` |
+| `nyc_taxi` | `doctest/test_data/nyc_taxi.json` |
+| `weblogs` | `doctest/test_data/weblogs.json` |
+| `json_test` | `doctest/test_data/json_test.json` |
+| `otellogs` | `doctest/test_data/otellogs.json` |
+| `mvcombine_data` | `doctest/test_data/mvcombine.json` |
+| `work_information` | `doctest/test_data/work_information.json` |
+| `worker` | `doctest/test_data/worker.json` |
+| `events` | `doctest/test_data/events.json` |
+
+To ingest from file:
+```bash
+curl -s -XPOST 'localhost:9200//_bulk?refresh=true' \
+ -H 'Content-Type: application/json' \
+ --data-binary @sql/doctest/test_data/.json
+```
diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCastFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCastFunctionIT.java
index 9560aa0939a..eb1dfb6190c 100644
--- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCastFunctionIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCastFunctionIT.java
@@ -196,4 +196,10 @@ public void testCastIpToString() throws IOException {
rows("1.2.3.5"),
rows("::ffff:1234"));
}
+
+ @Override
+ @Test
+ public void testCastToIP() throws IOException {
+ super.testCastToIP();
+ }
}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/DistributedExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/DistributedExecutionEngine.java
new file mode 100644
index 00000000000..b59c0ec219e
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/DistributedExecutionEngine.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.opensearch.sql.ast.statement.ExplainMode;
+import org.opensearch.sql.calcite.CalcitePlanContext;
+import org.opensearch.sql.common.response.ResponseListener;
+import org.opensearch.sql.executor.ExecutionContext;
+import org.opensearch.sql.executor.ExecutionEngine;
+import org.opensearch.sql.opensearch.setting.OpenSearchSettings;
+import org.opensearch.sql.planner.physical.PhysicalPlan;
+
+/**
+ * Distributed execution engine that routes queries between legacy single-node execution and
+ * distributed multi-node execution based on configuration.
+ *
+ * When distributed execution is disabled (default), all queries delegate to the legacy {@link
+ * OpenSearchExecutionEngine}. When enabled, queries throw {@link UnsupportedOperationException} —
+ * distributed execution will be implemented in the next phase against the clean H2 interfaces
+ * (ComputeStage, DataUnit, PlanFragmenter, etc.).
+ */
+public class DistributedExecutionEngine implements ExecutionEngine {
+ private static final Logger logger = LogManager.getLogger(DistributedExecutionEngine.class);
+
+ private final OpenSearchExecutionEngine legacyEngine;
+ private final OpenSearchSettings settings;
+
+ public DistributedExecutionEngine(
+ OpenSearchExecutionEngine legacyEngine, OpenSearchSettings settings) {
+ this.legacyEngine = legacyEngine;
+ this.settings = settings;
+ logger.info("Initialized DistributedExecutionEngine");
+ }
+
+ @Override
+ public void execute(PhysicalPlan plan, ResponseListener listener) {
+ execute(plan, ExecutionContext.emptyExecutionContext(), listener);
+ }
+
+ @Override
+ public void execute(
+ PhysicalPlan plan, ExecutionContext context, ResponseListener listener) {
+ if (isDistributedEnabled()) {
+ throw new UnsupportedOperationException("Distributed execution not yet implemented");
+ }
+ legacyEngine.execute(plan, context, listener);
+ }
+
+ @Override
+ public void explain(PhysicalPlan plan, ResponseListener listener) {
+ legacyEngine.explain(plan, listener);
+ }
+
+ @Override
+ public void execute(
+ RelNode plan, CalcitePlanContext context, ResponseListener listener) {
+ if (isDistributedEnabled()) {
+ throw new UnsupportedOperationException("Distributed execution not yet implemented");
+ }
+ legacyEngine.execute(plan, context, listener);
+ }
+
+ @Override
+ public void explain(
+ RelNode plan,
+ ExplainMode mode,
+ CalcitePlanContext context,
+ ResponseListener listener) {
+ if (isDistributedEnabled()) {
+ throw new UnsupportedOperationException("Distributed execution not yet implemented");
+ }
+ legacyEngine.explain(plan, mode, context, listener);
+ }
+
+ private boolean isDistributedEnabled() {
+ return settings.getDistributedExecutionEnabled();
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/ExecuteDistributedTaskAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/ExecuteDistributedTaskAction.java
new file mode 100644
index 00000000000..be04f909c9a
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/ExecuteDistributedTaskAction.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed;
+
+import org.opensearch.action.ActionType;
+
+/**
+ * Transport action for executing distributed query tasks on remote cluster nodes.
+ *
+ * This action enables the DistributedTaskScheduler to send operator pipeline requests to
+ * specific nodes for execution. Each node executes the pipeline locally using direct Lucene access
+ * and returns rows back to the coordinator.
+ */
+public class ExecuteDistributedTaskAction extends ActionType {
+
+ /** Action name used for transport routing */
+ public static final String NAME = "cluster:admin/opensearch/sql/distributed/execute";
+
+ /** Singleton instance */
+ public static final ExecuteDistributedTaskAction INSTANCE = new ExecuteDistributedTaskAction();
+
+ private ExecuteDistributedTaskAction() {
+ super(NAME, ExecuteDistributedTaskResponse::new);
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/ExecuteDistributedTaskRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/ExecuteDistributedTaskRequest.java
new file mode 100644
index 00000000000..fbae67dd5b7
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/ExecuteDistributedTaskRequest.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
+import org.opensearch.action.ActionRequest;
+import org.opensearch.action.ActionRequestValidationException;
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+
+/**
+ * Request message for executing distributed query tasks on a remote node.
+ *
+ * Contains the operator pipeline parameters needed for execution: index name, shard IDs, field
+ * names, query limit, and optional filter conditions.
+ */
+@Data
+@EqualsAndHashCode(callSuper = false)
+@AllArgsConstructor
+@NoArgsConstructor
+public class ExecuteDistributedTaskRequest extends ActionRequest {
+
+ /** ID of the execution stage these work units belong to */
+ private String stageId;
+
+ /** Index name for per-shard execution. */
+ private String indexName;
+
+ /** Shard IDs to execute on the target node. */
+ private List shardIds;
+
+ /** Execution mode: always "OPERATOR_PIPELINE". */
+ private String executionMode;
+
+ /** Fields to return when using operator pipeline mode. */
+ private List fieldNames;
+
+ /** Row limit when using operator pipeline mode. */
+ private int queryLimit;
+
+ /**
+ * Filter conditions for operator pipeline. Each entry is a Map with keys: "field" (String), "op"
+ * (String: EQ, NEQ, GT, GTE, LT, LTE), "value" (Object). Multiple conditions are ANDed. Compound
+ * boolean uses "bool" key with "AND"/"OR" and "children" list. Null means match all.
+ */
+ @SuppressWarnings("unchecked")
+ private List> filterConditions;
+
+ /** Constructor for deserialization from stream. */
+ public ExecuteDistributedTaskRequest(StreamInput in) throws IOException {
+ super(in);
+ this.stageId = in.readString();
+ this.indexName = in.readOptionalString();
+
+ // Skip SearchSourceBuilder field (backward compat: always false for new requests)
+ if (in.readBoolean()) {
+ // Consume the SearchSourceBuilder bytes for backward compatibility
+ new org.opensearch.search.builder.SearchSourceBuilder(in);
+ }
+
+ // Deserialize shard IDs
+ if (in.readBoolean()) {
+ int shardCount = in.readVInt();
+ this.shardIds = new java.util.ArrayList<>(shardCount);
+ for (int i = 0; i < shardCount; i++) {
+ this.shardIds.add(in.readVInt());
+ }
+ }
+
+ // Deserialize operator pipeline fields
+ this.executionMode = in.readOptionalString();
+ if (in.readBoolean()) {
+ this.fieldNames = in.readStringList();
+ }
+ this.queryLimit = in.readVInt();
+
+ // Deserialize filter conditions
+ if (in.readBoolean()) {
+ int filterCount = in.readVInt();
+ this.filterConditions = new java.util.ArrayList<>(filterCount);
+ for (int i = 0; i < filterCount; i++) {
+ @SuppressWarnings("unchecked")
+ Map condition = (Map) in.readGenericValue();
+ this.filterConditions.add(condition);
+ }
+ }
+ }
+
+ /** Serializes this request to a stream for network transport. */
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ super.writeTo(out);
+ out.writeString(stageId != null ? stageId : "");
+ out.writeOptionalString(indexName);
+
+ // SearchSourceBuilder field — always false for new requests
+ out.writeBoolean(false);
+
+ // Serialize shard IDs
+ if (shardIds != null) {
+ out.writeBoolean(true);
+ out.writeVInt(shardIds.size());
+ for (int shardId : shardIds) {
+ out.writeVInt(shardId);
+ }
+ } else {
+ out.writeBoolean(false);
+ }
+
+ // Serialize operator pipeline fields
+ out.writeOptionalString(executionMode);
+ if (fieldNames != null) {
+ out.writeBoolean(true);
+ out.writeStringCollection(fieldNames);
+ } else {
+ out.writeBoolean(false);
+ }
+ out.writeVInt(queryLimit);
+
+ // Serialize filter conditions
+ if (filterConditions != null && !filterConditions.isEmpty()) {
+ out.writeBoolean(true);
+ out.writeVInt(filterConditions.size());
+ for (Map condition : filterConditions) {
+ out.writeGenericValue(condition);
+ }
+ } else {
+ out.writeBoolean(false);
+ }
+ }
+
+ /**
+ * Validates the request before execution.
+ *
+ * @return true if request is valid for execution
+ */
+ public boolean isValid() {
+ return indexName != null
+ && !indexName.isEmpty()
+ && shardIds != null
+ && !shardIds.isEmpty()
+ && fieldNames != null
+ && !fieldNames.isEmpty()
+ && queryLimit > 0;
+ }
+
+ @Override
+ public ActionRequestValidationException validate() {
+ ActionRequestValidationException validationException = null;
+ if (indexName == null || indexName.trim().isEmpty()) {
+ validationException = new ActionRequestValidationException();
+ validationException.addValidationError("Index name cannot be null or empty");
+ }
+ if (shardIds == null || shardIds.isEmpty()) {
+ if (validationException == null) {
+ validationException = new ActionRequestValidationException();
+ }
+ validationException.addValidationError("Shard IDs cannot be null or empty");
+ }
+ if (fieldNames == null || fieldNames.isEmpty()) {
+ if (validationException == null) {
+ validationException = new ActionRequestValidationException();
+ }
+ validationException.addValidationError("Field names cannot be null or empty");
+ }
+ return validationException;
+ }
+
+ @Override
+ public String toString() {
+ return String.format(
+ "ExecuteDistributedTaskRequest{stageId='%s', index='%s', shards=%s, mode='%s'}",
+ stageId, indexName, shardIds, executionMode);
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/ExecuteDistributedTaskResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/ExecuteDistributedTaskResponse.java
new file mode 100644
index 00000000000..fdc77dcc3ba
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/ExecuteDistributedTaskResponse.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.core.action.ActionResponse;
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+
+/**
+ * Response message containing results from distributed query task execution.
+ *
+ * Contains the execution results, performance metrics, and any error information from executing
+ * WorkUnits on a remote cluster node.
+ *
+ *
Phase 1B Serialization: Serializes the SearchResponse (which implements
+ * Writeable) for returning per-shard search results from remote nodes. This prepares for Phase 1C
+ * transport-based execution.
+ */
+@Data
+@EqualsAndHashCode(callSuper = false)
+@AllArgsConstructor
+@NoArgsConstructor
+public class ExecuteDistributedTaskResponse extends ActionResponse {
+
+ /** Results from executing the work units */
+ private List results;
+
+ /** Execution statistics and performance metrics */
+ private Map executionStats;
+
+ /** Node ID where the tasks were executed */
+ private String nodeId;
+
+ /** Whether execution completed successfully */
+ private boolean success;
+
+ /** Error message if execution failed */
+ private String errorMessage;
+
+ /** SearchResponse from per-shard execution (Phase 1B). */
+ private SearchResponse searchResponse;
+
+ /** Column names from operator pipeline execution (Phase 5B). */
+ private List pipelineFieldNames;
+
+ /** Row data from operator pipeline execution (Phase 5B). */
+ private List> pipelineRows;
+
+ /** Constructor with original fields for backward compatibility. */
+ public ExecuteDistributedTaskResponse(
+ List results,
+ Map executionStats,
+ String nodeId,
+ boolean success,
+ String errorMessage) {
+ this.results = results;
+ this.executionStats = executionStats;
+ this.nodeId = nodeId;
+ this.success = success;
+ this.errorMessage = errorMessage;
+ }
+
+ /** Constructor for deserialization from stream. */
+ public ExecuteDistributedTaskResponse(StreamInput in) throws IOException {
+ super(in);
+ this.nodeId = in.readString();
+ this.success = in.readBoolean();
+ this.errorMessage = in.readOptionalString();
+
+ // Deserialize SearchResponse (implements Writeable)
+ if (in.readBoolean()) {
+ this.searchResponse = new SearchResponse(in);
+ }
+
+ // Deserialize operator pipeline results (Phase 5B)
+ if (in.readBoolean()) {
+ this.pipelineFieldNames = in.readStringList();
+ int rowCount = in.readVInt();
+ this.pipelineRows = new java.util.ArrayList<>(rowCount);
+ int colCount = this.pipelineFieldNames.size();
+ for (int i = 0; i < rowCount; i++) {
+ List row = new java.util.ArrayList<>(colCount);
+ for (int j = 0; j < colCount; j++) {
+ row.add(in.readGenericValue());
+ }
+ this.pipelineRows.add(row);
+ }
+ }
+
+ // Generic results not serialized over transport
+ this.results = List.of();
+ this.executionStats = Map.of();
+ }
+
+ /** Serializes this response to a stream for network transport. */
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeString(nodeId != null ? nodeId : "");
+ out.writeBoolean(success);
+ out.writeOptionalString(errorMessage);
+
+ // Serialize SearchResponse (implements Writeable)
+ if (searchResponse != null) {
+ out.writeBoolean(true);
+ searchResponse.writeTo(out);
+ } else {
+ out.writeBoolean(false);
+ }
+
+ // Serialize operator pipeline results (Phase 5B)
+ if (pipelineFieldNames != null && pipelineRows != null) {
+ out.writeBoolean(true);
+ out.writeStringCollection(pipelineFieldNames);
+ out.writeVInt(pipelineRows.size());
+ for (List row : pipelineRows) {
+ for (Object value : row) {
+ out.writeGenericValue(value);
+ }
+ }
+ } else {
+ out.writeBoolean(false);
+ }
+ }
+
+ /** Creates a successful response with results. */
+ public static ExecuteDistributedTaskResponse success(
+ String nodeId, List results, Map stats) {
+ return new ExecuteDistributedTaskResponse(results, stats, nodeId, true, null);
+ }
+
+ /** Creates a failure response with error information. */
+ public static ExecuteDistributedTaskResponse failure(String nodeId, String errorMessage) {
+ return new ExecuteDistributedTaskResponse(List.of(), Map.of(), nodeId, false, errorMessage);
+ }
+
+ /** Creates a successful response containing row data from operator pipeline (Phase 5B). */
+ public static ExecuteDistributedTaskResponse successWithRows(
+ String nodeId, List fieldNames, List> rows) {
+ ExecuteDistributedTaskResponse resp =
+ new ExecuteDistributedTaskResponse(List.of(), Map.of(), nodeId, true, null);
+ resp.setPipelineFieldNames(fieldNames);
+ resp.setPipelineRows(rows);
+ return resp;
+ }
+
+ /** Creates a successful response containing a SearchResponse (Phase 1C). */
+ public static ExecuteDistributedTaskResponse successWithSearch(
+ String nodeId, SearchResponse searchResponse) {
+ ExecuteDistributedTaskResponse resp =
+ new ExecuteDistributedTaskResponse(List.of(), Map.of(), nodeId, true, null);
+ resp.setSearchResponse(searchResponse);
+ return resp;
+ }
+
+ /** Gets the number of results returned. */
+ public int getResultCount() {
+ return results != null ? results.size() : 0;
+ }
+
+ /** Checks if the execution was successful. */
+ public boolean isSuccessful() {
+ return success && errorMessage == null;
+ }
+
+ @Override
+ public String toString() {
+ return String.format(
+ "ExecuteDistributedTaskResponse{nodeId='%s', success=%s, results=%d, error='%s'}",
+ nodeId, success, getResultCount(), errorMessage);
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/TransportExecuteDistributedTaskAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/TransportExecuteDistributedTaskAction.java
new file mode 100644
index 00000000000..e7b6b4df211
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/TransportExecuteDistributedTaskAction.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed;
+
+import lombok.extern.log4j.Log4j2;
+import org.opensearch.action.support.ActionFilters;
+import org.opensearch.action.support.HandledTransportAction;
+import org.opensearch.cluster.service.ClusterService;
+import org.opensearch.common.inject.Inject;
+import org.opensearch.core.action.ActionListener;
+import org.opensearch.indices.IndicesService;
+import org.opensearch.sql.opensearch.executor.distributed.pipeline.OperatorPipelineExecutor;
+import org.opensearch.tasks.Task;
+import org.opensearch.transport.TransportService;
+import org.opensearch.transport.client.Client;
+
+/**
+ * Transport action handler for executing distributed query tasks on data nodes.
+ *
+ * This handler runs on each cluster node and processes ExecuteDistributedTaskRequest messages
+ * from the coordinator. It executes the operator pipeline locally using direct Lucene access and
+ * returns results via ExecuteDistributedTaskResponse.
+ *
+ *
Execution Process:
+ *
+ *
+ * Receive OPERATOR_PIPELINE request from coordinator node
+ * Execute LuceneScanOperator + LimitOperator pipeline on assigned shards
+ * Return rows to coordinator
+ *
+ */
+@Log4j2
+public class TransportExecuteDistributedTaskAction
+ extends HandledTransportAction {
+
+ public static final String NAME = "cluster:admin/opensearch/sql/distributed/execute";
+
+ private final ClusterService clusterService;
+ private final Client client;
+ private final IndicesService indicesService;
+
+ @Inject
+ public TransportExecuteDistributedTaskAction(
+ TransportService transportService,
+ ActionFilters actionFilters,
+ ClusterService clusterService,
+ Client client,
+ IndicesService indicesService) {
+ super(
+ ExecuteDistributedTaskAction.NAME,
+ transportService,
+ actionFilters,
+ ExecuteDistributedTaskRequest::new);
+ this.clusterService = clusterService;
+ this.client = client;
+ this.indicesService = indicesService;
+ }
+
+ @Override
+ protected void doExecute(
+ Task task,
+ ExecuteDistributedTaskRequest request,
+ ActionListener listener) {
+
+ String nodeId = clusterService.localNode().getId();
+
+ try {
+ log.info(
+ "[Operator Pipeline] Executing on node: {} for index: {}, shards: {}",
+ nodeId,
+ request.getIndexName(),
+ request.getShardIds());
+
+ OperatorPipelineExecutor.OperatorPipelineResult result =
+ OperatorPipelineExecutor.execute(indicesService, request);
+
+ log.info(
+ "[Operator Pipeline] Completed on node: {} - {} rows", nodeId, result.getRows().size());
+
+ listener.onResponse(
+ ExecuteDistributedTaskResponse.successWithRows(
+ nodeId, result.getFieldNames(), result.getRows()));
+ } catch (Exception e) {
+ log.error("[Operator Pipeline] Failed on node: {}", nodeId, e);
+ listener.onResponse(
+ ExecuteDistributedTaskResponse.failure(
+ nodeId, "Operator pipeline failed: " + e.getMessage()));
+ }
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/FilterToLuceneConverter.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/FilterToLuceneConverter.java
new file mode 100644
index 00000000000..b9345a7c139
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/FilterToLuceneConverter.java
@@ -0,0 +1,320 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed.operator;
+
+import java.util.List;
+import java.util.Map;
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.util.BytesRef;
+import org.opensearch.index.IndexService;
+import org.opensearch.index.mapper.KeywordFieldMapper;
+import org.opensearch.index.mapper.MappedFieldType;
+import org.opensearch.index.mapper.MapperService;
+import org.opensearch.index.mapper.NumberFieldMapper;
+import org.opensearch.index.mapper.TextFieldMapper;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.index.query.QueryShardContext;
+
+/**
+ * Converts serialized filter conditions to Lucene queries using the local shard's field mappings.
+ *
+ * Each filter condition is a Map with keys:
+ *
+ *
+ * "field" (String) - field name
+ * "op" (String) - operator: EQ, NEQ, GT, GTE, LT, LTE
+ * "value" (Object) - comparison value
+ *
+ *
+ * Multiple conditions are combined with AND. The converter uses MapperService to resolve field
+ * types and creates appropriate Lucene queries:
+ *
+ *
+ * Keyword fields: TermQuery / TermRangeQuery
+ * Numeric fields: LongPoint / IntPoint / DoublePoint range queries
+ * Text fields: TermQuery on the field directly
+ *
+ */
+@Log4j2
+public class FilterToLuceneConverter {
+
+ private FilterToLuceneConverter() {}
+
+ /**
+ * Converts a list of filter conditions to a single Lucene query.
+ *
+ * @param conditions filter conditions (null or empty means match all)
+ * @param mapperService the shard's mapper service for field type resolution
+ * @return a Lucene Query
+ */
+ public static Query convert(List> conditions, MapperService mapperService) {
+ return convert(conditions, mapperService, null);
+ }
+
+ /**
+ * Converts a list of filter conditions to a single Lucene query with IndexService for
+ * query_string support.
+ *
+ * @param conditions filter conditions (null or empty means match all)
+ * @param mapperService the shard's mapper service for field type resolution
+ * @param indexService the index service for creating SearchExecutionContext (for query_string)
+ * @return a Lucene Query
+ */
+ public static Query convert(
+ List> conditions,
+ MapperService mapperService,
+ IndexService indexService) {
+ if (conditions == null || conditions.isEmpty()) {
+ return new MatchAllDocsQuery();
+ }
+
+ if (conditions.size() == 1) {
+ return convertSingle(conditions.get(0), mapperService, indexService);
+ }
+
+ // Multiple conditions: AND them together
+ BooleanQuery.Builder bool = new BooleanQuery.Builder();
+ for (Map condition : conditions) {
+ Query q = convertSingle(condition, mapperService, indexService);
+ bool.add(q, BooleanClause.Occur.FILTER);
+ }
+ return bool.build();
+ }
+
+ private static Query convertSingle(
+ Map condition, MapperService mapperService, IndexService indexService) {
+ // Handle query_string type (from PPL inline filters)
+ String type = (String) condition.get("type");
+ if ("query_string".equals(type)) {
+ return convertQueryString(condition, indexService);
+ }
+
+ String field = (String) condition.get("field");
+ String op = (String) condition.get("op");
+ Object value = condition.get("value");
+
+ if (field == null || op == null) {
+ log.warn("[Filter] Invalid filter condition: {}", condition);
+ return new MatchAllDocsQuery();
+ }
+
+ // Resolve field type from shard mapping
+ MappedFieldType fieldType = mapperService.fieldType(field);
+ if (fieldType == null) {
+ // Try with .keyword suffix for text fields
+ fieldType = mapperService.fieldType(field + ".keyword");
+ if (fieldType != null) {
+ field = field + ".keyword";
+ } else {
+ log.warn("[Filter] Field '{}' not found in mapping, skipping filter", field);
+ return new MatchAllDocsQuery();
+ }
+ }
+
+ log.debug(
+ "[Filter] Converting: field={}, op={}, value={}, fieldType={}",
+ field,
+ op,
+ value,
+ fieldType.getClass().getSimpleName());
+
+ return switch (op) {
+ case "EQ" -> buildEqualityQuery(field, value, fieldType);
+ case "NEQ" -> buildNegationQuery(buildEqualityQuery(field, value, fieldType));
+ case "GT" -> buildRangeQuery(field, value, fieldType, false, false);
+ case "GTE" -> buildRangeQuery(field, value, fieldType, true, false);
+ case "LT" -> buildRangeQuery(field, value, fieldType, false, true);
+ case "LTE" -> buildRangeQuery(field, value, fieldType, true, true);
+ default -> {
+ log.warn("[Filter] Unknown operator: {}", op);
+ yield new MatchAllDocsQuery();
+ }
+ };
+ }
+
+ private static Query buildEqualityQuery(String field, Object value, MappedFieldType fieldType) {
+ if (fieldType instanceof NumberFieldMapper.NumberFieldType numType) {
+ return buildNumericExactQuery(field, value, numType);
+ } else if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
+ return new TermQuery(new Term(field, value.toString()));
+ } else if (fieldType instanceof TextFieldMapper.TextFieldType) {
+ // For text fields, use the analyzed field
+ return new TermQuery(new Term(field, value.toString().toLowerCase()));
+ } else {
+ // Generic fallback: term query
+ return new TermQuery(new Term(field, value.toString()));
+ }
+ }
+
+ private static Query buildNegationQuery(Query inner) {
+ BooleanQuery.Builder bool = new BooleanQuery.Builder();
+ bool.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
+ bool.add(inner, BooleanClause.Occur.MUST_NOT);
+ return bool.build();
+ }
+
+ /**
+ * Builds a range query for the given field and value.
+ *
+ * @param inclusive whether the bound is inclusive (>= or <=)
+ * @param isUpper if true, value is the upper bound; if false, value is the lower bound
+ */
+ private static Query buildRangeQuery(
+ String field, Object value, MappedFieldType fieldType, boolean inclusive, boolean isUpper) {
+
+ if (fieldType instanceof NumberFieldMapper.NumberFieldType numType) {
+ return buildNumericRangeQuery(field, value, numType, inclusive, isUpper);
+ } else if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
+ return buildKeywordRangeQuery(field, value, inclusive, isUpper);
+ } else {
+ // Generic fallback: keyword range
+ return buildKeywordRangeQuery(field, value, inclusive, isUpper);
+ }
+ }
+
+ private static Query buildNumericExactQuery(
+ String field, Object value, NumberFieldMapper.NumberFieldType numType) {
+ String typeName = numType.typeName();
+ return switch (typeName) {
+ case "long" -> LongPoint.newExactQuery(field, toLong(value));
+ case "integer" -> IntPoint.newExactQuery(field, toInt(value));
+ case "double" -> DoublePoint.newExactQuery(field, toDouble(value));
+ case "float" -> FloatPoint.newExactQuery(field, toFloat(value));
+ default -> LongPoint.newExactQuery(field, toLong(value));
+ };
+ }
+
+ private static Query buildNumericRangeQuery(
+ String field,
+ Object value,
+ NumberFieldMapper.NumberFieldType numType,
+ boolean inclusive,
+ boolean isUpper) {
+ String typeName = numType.typeName();
+ return switch (typeName) {
+ case "long" -> buildLongRange(field, toLong(value), inclusive, isUpper);
+ case "integer" -> buildIntRange(field, toInt(value), inclusive, isUpper);
+ case "double" -> buildDoubleRange(field, toDouble(value), inclusive, isUpper);
+ case "float" -> buildFloatRange(field, toFloat(value), inclusive, isUpper);
+ default -> buildLongRange(field, toLong(value), inclusive, isUpper);
+ };
+ }
+
+ private static Query buildLongRange(
+ String field, long value, boolean inclusive, boolean isUpper) {
+ if (isUpper) {
+ long upper = inclusive ? value : value - 1;
+ return LongPoint.newRangeQuery(field, Long.MIN_VALUE, upper);
+ } else {
+ long lower = inclusive ? value : value + 1;
+ return LongPoint.newRangeQuery(field, lower, Long.MAX_VALUE);
+ }
+ }
+
+ private static Query buildIntRange(String field, int value, boolean inclusive, boolean isUpper) {
+ if (isUpper) {
+ int upper = inclusive ? value : value - 1;
+ return IntPoint.newRangeQuery(field, Integer.MIN_VALUE, upper);
+ } else {
+ int lower = inclusive ? value : value + 1;
+ return IntPoint.newRangeQuery(field, lower, Integer.MAX_VALUE);
+ }
+ }
+
+ private static Query buildDoubleRange(
+ String field, double value, boolean inclusive, boolean isUpper) {
+ if (isUpper) {
+ double upper = inclusive ? value : Math.nextDown(value);
+ return DoublePoint.newRangeQuery(field, Double.NEGATIVE_INFINITY, upper);
+ } else {
+ double lower = inclusive ? value : Math.nextUp(value);
+ return DoublePoint.newRangeQuery(field, lower, Double.POSITIVE_INFINITY);
+ }
+ }
+
+ private static Query buildFloatRange(
+ String field, float value, boolean inclusive, boolean isUpper) {
+ if (isUpper) {
+ float upper = inclusive ? value : Math.nextDown(value);
+ return FloatPoint.newRangeQuery(field, Float.NEGATIVE_INFINITY, upper);
+ } else {
+ float lower = inclusive ? value : Math.nextUp(value);
+ return FloatPoint.newRangeQuery(field, lower, Float.POSITIVE_INFINITY);
+ }
+ }
+
+ private static Query buildKeywordRangeQuery(
+ String field, Object value, boolean inclusive, boolean isUpper) {
+ BytesRef bytesVal = new BytesRef(value.toString());
+ if (isUpper) {
+ return new TermRangeQuery(field, null, bytesVal, true, inclusive);
+ } else {
+ return new TermRangeQuery(field, bytesVal, null, inclusive, true);
+ }
+ }
+
+ private static long toLong(Object value) {
+ if (value instanceof Number n) return n.longValue();
+ return Long.parseLong(value.toString());
+ }
+
+ private static int toInt(Object value) {
+ if (value instanceof Number n) return n.intValue();
+ return Integer.parseInt(value.toString());
+ }
+
+ private static double toDouble(Object value) {
+ if (value instanceof Number n) return n.doubleValue();
+ return Double.parseDouble(value.toString());
+ }
+
+ private static float toFloat(Object value) {
+ if (value instanceof Number n) return n.floatValue();
+ return Float.parseFloat(value.toString());
+ }
+
+ /**
+ * Converts a query_string filter to a Lucene query using OpenSearch's QueryStringQueryBuilder.
+ * PPL inline filters (e.g., source=bank gender='F') get converted to query_string syntax like
+ * "gender:F" by the PPL parser. Uses OpenSearch's query builder for proper field type handling
+ * (numeric, keyword, text, etc.).
+ */
+ private static Query convertQueryString(
+ Map condition, IndexService indexService) {
+ String queryText = (String) condition.get("query");
+ if (queryText == null || queryText.isEmpty()) {
+ log.warn("[Filter] Empty query_string condition");
+ return new MatchAllDocsQuery();
+ }
+
+ if (indexService == null) {
+ log.warn("[Filter] IndexService not available, can't convert query_string: {}", queryText);
+ return new MatchAllDocsQuery();
+ }
+
+ try {
+ QueryShardContext queryShardContext =
+ indexService.newQueryShardContext(0, null, () -> 0L, null);
+ Query query = QueryBuilders.queryStringQuery(queryText).toQuery(queryShardContext);
+ log.info("[Filter] Converted query_string '{}' to Lucene query: {}", queryText, query);
+ return query;
+ } catch (Exception e) {
+ log.warn("[Filter] Failed to convert query_string '{}': {}", queryText, e.getMessage());
+ return new MatchAllDocsQuery();
+ }
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/LimitOperator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/LimitOperator.java
new file mode 100644
index 00000000000..3d57d241177
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/LimitOperator.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed.operator;
+
+import org.opensearch.sql.planner.distributed.operator.Operator;
+import org.opensearch.sql.planner.distributed.operator.OperatorContext;
+import org.opensearch.sql.planner.distributed.page.Page;
+
+/**
+ * Operator that limits the number of rows passing through the pipeline. Truncates pages when the
+ * accumulated row count reaches the configured limit.
+ */
+public class LimitOperator implements Operator {
+
+ private final int limit;
+ private final OperatorContext context;
+
+ private int accumulatedRows;
+ private Page pendingOutput;
+ private boolean inputFinished;
+
+ public LimitOperator(int limit, OperatorContext context) {
+ this.limit = limit;
+ this.context = context;
+ this.accumulatedRows = 0;
+ }
+
+ @Override
+ public boolean needsInput() {
+ return pendingOutput == null && accumulatedRows < limit && !inputFinished;
+ }
+
+ @Override
+ public void addInput(Page page) {
+ if (page == null || accumulatedRows >= limit) {
+ return;
+ }
+
+ int remaining = limit - accumulatedRows;
+ int pageRows = page.getPositionCount();
+
+ if (pageRows <= remaining) {
+ // Entire page fits within limit
+ accumulatedRows += pageRows;
+ pendingOutput = page;
+ } else {
+ // Truncate page to remaining rows
+ pendingOutput = page.getRegion(0, remaining);
+ accumulatedRows += remaining;
+ }
+ }
+
+ @Override
+ public Page getOutput() {
+ Page output = pendingOutput;
+ pendingOutput = null;
+ return output;
+ }
+
+ @Override
+ public boolean isFinished() {
+ return accumulatedRows >= limit || (inputFinished && pendingOutput == null);
+ }
+
+ @Override
+ public void finish() {
+ inputFinished = true;
+ }
+
+ @Override
+ public OperatorContext getContext() {
+ return context;
+ }
+
+ @Override
+ public void close() {
+ // No resources to release
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/LuceneScanOperator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/LuceneScanOperator.java
new file mode 100644
index 00000000000..b6d14608143
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/LuceneScanOperator.java
@@ -0,0 +1,272 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed.operator;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.StoredFields;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.opensearch.common.xcontent.XContentHelper;
+import org.opensearch.common.xcontent.XContentType;
+import org.opensearch.core.common.bytes.BytesArray;
+import org.opensearch.index.engine.Engine;
+import org.opensearch.index.shard.IndexShard;
+import org.opensearch.sql.planner.distributed.operator.OperatorContext;
+import org.opensearch.sql.planner.distributed.operator.SourceOperator;
+import org.opensearch.sql.planner.distributed.page.Page;
+import org.opensearch.sql.planner.distributed.page.PageBuilder;
+import org.opensearch.sql.planner.distributed.split.DataUnit;
+
+/**
+ * Source operator that reads documents directly from Lucene via {@link
+ * IndexShard#acquireSearcher(String)}.
+ *
+ * Uses Lucene's Weight/Scorer pattern to iterate only documents matching the filter query. When
+ * no filter is provided, uses {@link MatchAllDocsQuery} to match all documents.
+ *
+ *
Reads {@code _source} JSON from stored fields and extracts requested field values.
+ */
+@Log4j2
+public class LuceneScanOperator implements SourceOperator {
+
+ private final IndexShard indexShard;
+ private final List fieldNames;
+ private final int batchSize;
+ private final OperatorContext context;
+ private final Query luceneQuery;
+
+ private DataUnit dataUnit;
+ private boolean noMoreDataUnits;
+ private boolean finished;
+ private Engine.Searcher engineSearcher;
+
+ // Weight/Scorer state for filtered iteration
+ private List leaves;
+ private int currentLeafIndex;
+ private StoredFields currentStoredFields;
+ private Scorer currentScorer;
+ private DocIdSetIterator currentDocIdIterator;
+ private Bits currentLiveDocs;
+
+ /**
+ * Creates a LuceneScanOperator with a filter query merged into the scan.
+ *
+ * @param indexShard the shard to read from
+ * @param fieldNames fields to extract from _source
+ * @param batchSize rows per page batch
+ * @param context operator context
+ * @param luceneQuery the Lucene query for filtering (null means match all)
+ */
+ public LuceneScanOperator(
+ IndexShard indexShard,
+ List fieldNames,
+ int batchSize,
+ OperatorContext context,
+ Query luceneQuery) {
+ this.indexShard = indexShard;
+ this.fieldNames = fieldNames;
+ this.batchSize = batchSize;
+ this.context = context;
+ this.luceneQuery = luceneQuery != null ? luceneQuery : new MatchAllDocsQuery();
+ this.finished = false;
+ this.currentLeafIndex = 0;
+ }
+
+ /** Backward-compatible constructor that matches all documents. */
+ public LuceneScanOperator(
+ IndexShard indexShard, List fieldNames, int batchSize, OperatorContext context) {
+ this(indexShard, fieldNames, batchSize, context, null);
+ }
+
+ @Override
+ public void addDataUnit(DataUnit dataUnit) {
+ this.dataUnit = dataUnit;
+ }
+
+ @Override
+ public void noMoreDataUnits() {
+ this.noMoreDataUnits = true;
+ }
+
+ @Override
+ public Page getOutput() {
+ if (finished) {
+ return null;
+ }
+
+ try {
+ // Lazy initialization: acquire searcher and prepare Weight on first call
+ if (engineSearcher == null) {
+ engineSearcher = indexShard.acquireSearcher("distributed-pipeline");
+ leaves = engineSearcher.getIndexReader().leaves();
+ if (leaves.isEmpty()) {
+ finished = true;
+ return null;
+ }
+ advanceToLeaf(0);
+ }
+
+ PageBuilder builder = new PageBuilder(fieldNames.size());
+ int rowsInBatch = 0;
+
+ while (rowsInBatch < batchSize) {
+ // Advance to next matching doc
+ int docId = nextMatchingDoc();
+ if (docId == DocIdSetIterator.NO_MORE_DOCS) {
+ finished = true;
+ return builder.isEmpty() ? null : builder.build();
+ }
+
+ // Read the document's _source
+ org.apache.lucene.document.Document doc = currentStoredFields.document(docId);
+ BytesRef sourceBytes = doc.getBinaryValue("_source");
+
+ if (sourceBytes == null) {
+ continue;
+ }
+
+ Map source =
+ XContentHelper.convertToMap(new BytesArray(sourceBytes), false, XContentType.JSON).v2();
+
+ builder.beginRow();
+ for (int i = 0; i < fieldNames.size(); i++) {
+ builder.setValue(i, getNestedValue(source, fieldNames.get(i)));
+ }
+ builder.endRow();
+ rowsInBatch++;
+ }
+
+ return builder.isEmpty() ? null : builder.build();
+
+ } catch (IOException e) {
+ log.error("Error reading from Lucene shard: {}", indexShard.shardId(), e);
+ finished = true;
+ throw new RuntimeException("Failed to read from Lucene shard", e);
+ }
+ }
+
+ /**
+ * Returns the next matching live document ID using the Weight/Scorer pattern. Advances across
+ * leaf readers (segments) as needed. Skips deleted/soft-deleted documents by checking the
+ * segment's liveDocs bitset — Lucene's Scorer.iterator() does NOT filter deleted docs.
+ */
+ private int nextMatchingDoc() throws IOException {
+ while (currentLeafIndex < leaves.size()) {
+ if (currentDocIdIterator != null) {
+ while (true) {
+ int docId = currentDocIdIterator.nextDoc();
+ if (docId == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
+ }
+ // Skip deleted/soft-deleted docs: liveDocs == null means all docs are live
+ if (currentLiveDocs == null || currentLiveDocs.get(docId)) {
+ return docId;
+ }
+ }
+ }
+ // Move to next leaf
+ currentLeafIndex++;
+ if (currentLeafIndex < leaves.size()) {
+ advanceToLeaf(currentLeafIndex);
+ }
+ }
+ return DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ /**
+ * Advances to the specified leaf (segment) and creates a Scorer for it. The Scorer uses the
+ * Lucene query to efficiently iterate only matching documents in that segment. Also captures the
+ * segment's liveDocs bitset for filtering deleted/soft-deleted documents.
+ */
+ private void advanceToLeaf(int leafIndex) throws IOException {
+ LeafReaderContext leafCtx = leaves.get(leafIndex);
+ currentStoredFields = leafCtx.reader().storedFields();
+ currentLiveDocs = leafCtx.reader().getLiveDocs();
+
+ // Create Weight/Scorer for filtered iteration using the engine's IndexSearcher
+ // (Engine.Searcher extends IndexSearcher with proper soft-delete handling)
+ Query rewritten = engineSearcher.rewrite(luceneQuery);
+ Weight weight = engineSearcher.createWeight(rewritten, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
+
+ currentScorer = weight.scorer(leafCtx);
+ if (currentScorer != null) {
+ currentDocIdIterator = currentScorer.iterator();
+ } else {
+ // No matching docs in this segment
+ currentDocIdIterator = null;
+ }
+ }
+
+ /**
+ * Navigates a nested map using a dotted field path. For "machine.os1", navigates into
+ * source["machine"]["os1"]. Handles arrays by extracting values from the first element. Falls
+ * back to direct key lookup for non-dotted fields.
+ */
+ @SuppressWarnings("unchecked")
+ private Object getNestedValue(Map source, String fieldName) {
+ // Try direct key first (covers non-dotted names and flattened fields)
+ Object direct = source.get(fieldName);
+ if (direct != null) {
+ return direct;
+ }
+
+ // Navigate dotted path: "machine.os1" → source["machine"]["os1"]
+ if (fieldName.contains(".")) {
+ String[] parts = fieldName.split("\\.");
+ Object current = source;
+ for (String part : parts) {
+ if (current instanceof Map) {
+ current = ((Map) current).get(part);
+ } else if (current instanceof List> list) {
+ // For array fields, extract from the first element
+ if (!list.isEmpty() && list.get(0) instanceof Map) {
+ current = ((Map) list.get(0)).get(part);
+ } else {
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+ return current;
+ }
+
+ return null;
+ }
+
+ @Override
+ public boolean isFinished() {
+ return finished;
+ }
+
+ @Override
+ public void finish() {
+ finished = true;
+ }
+
+ @Override
+ public OperatorContext getContext() {
+ return context;
+ }
+
+ @Override
+ public void close() {
+ if (engineSearcher != null) {
+ engineSearcher.close();
+ engineSearcher = null;
+ }
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/ResultCollector.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/ResultCollector.java
new file mode 100644
index 00000000000..c33d82cefd1
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/operator/ResultCollector.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed.operator;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.opensearch.sql.planner.distributed.page.Page;
+
+/**
+ * Collects pages from the operator pipeline into a list of rows. Used on data nodes to gather
+ * pipeline output before serializing into transport response.
+ */
+public class ResultCollector {
+
+ private final List fieldNames;
+ private final List> rows;
+
+ public ResultCollector(List fieldNames) {
+ this.fieldNames = fieldNames;
+ this.rows = new ArrayList<>();
+ }
+
+ /** Extracts rows from a page and adds them to the collected results. */
+ public void addPage(Page page) {
+ if (page == null) {
+ return;
+ }
+ int channelCount = page.getChannelCount();
+ for (int pos = 0; pos < page.getPositionCount(); pos++) {
+ List row = new ArrayList<>(channelCount);
+ for (int ch = 0; ch < channelCount; ch++) {
+ row.add(page.getValue(pos, ch));
+ }
+ rows.add(row);
+ }
+ }
+
+ /** Returns the field names for the collected data. */
+ public List getFieldNames() {
+ return fieldNames;
+ }
+
+ /** Returns all collected rows. */
+ public List> getRows() {
+ return rows;
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/pipeline/OperatorPipelineExecutor.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/pipeline/OperatorPipelineExecutor.java
new file mode 100644
index 00000000000..07bd095b082
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/pipeline/OperatorPipelineExecutor.java
@@ -0,0 +1,177 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed.pipeline;
+
+import java.util.List;
+import java.util.Map;
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.search.Query;
+import org.opensearch.index.IndexService;
+import org.opensearch.index.mapper.MapperService;
+import org.opensearch.index.shard.IndexShard;
+import org.opensearch.indices.IndicesService;
+import org.opensearch.sql.opensearch.executor.distributed.ExecuteDistributedTaskRequest;
+import org.opensearch.sql.opensearch.executor.distributed.operator.FilterToLuceneConverter;
+import org.opensearch.sql.opensearch.executor.distributed.operator.LimitOperator;
+import org.opensearch.sql.opensearch.executor.distributed.operator.LuceneScanOperator;
+import org.opensearch.sql.opensearch.executor.distributed.operator.ResultCollector;
+import org.opensearch.sql.planner.distributed.operator.OperatorContext;
+import org.opensearch.sql.planner.distributed.page.Page;
+
+/**
+ * Orchestrates operator pipeline execution on a data node. Creates a LuceneScanOperator for each
+ * assigned shard, pipes output through a LimitOperator, and collects results.
+ *
+ * Filter conditions from the transport request are converted to Lucene queries using the local
+ * shard's field mappings via {@link FilterToLuceneConverter}.
+ */
+@Log4j2
+public class OperatorPipelineExecutor {
+
+ private OperatorPipelineExecutor() {}
+
+ /**
+ * Executes the operator pipeline for the given request.
+ *
+ * @param indicesService used to resolve IndexShard instances and field mappings
+ * @param request contains index name, shard IDs, field names, limit, and filter conditions
+ * @return the collected field names and rows
+ */
+ public static OperatorPipelineResult execute(
+ IndicesService indicesService, ExecuteDistributedTaskRequest request) {
+
+ String indexName = request.getIndexName();
+ List shardIds = request.getShardIds();
+ List fieldNames = request.getFieldNames();
+ int queryLimit = request.getQueryLimit();
+ List> filterConditions = request.getFilterConditions();
+
+ log.info(
+ "[Operator Pipeline] Executing on shards {} for index: {}, fields: {}, limit: {},"
+ + " filters: {}",
+ shardIds,
+ indexName,
+ fieldNames,
+ queryLimit,
+ filterConditions != null ? filterConditions.size() : 0);
+
+ // Resolve MapperService for field type lookup
+ IndexService indexService = resolveIndexService(indicesService, indexName);
+ MapperService mapperService = indexService != null ? indexService.mapperService() : null;
+
+ // Convert filter conditions to Lucene query using local field mappings
+ Query luceneQuery = null;
+ if (mapperService != null && filterConditions != null && !filterConditions.isEmpty()) {
+ luceneQuery = FilterToLuceneConverter.convert(filterConditions, mapperService, indexService);
+ log.info("[Operator Pipeline] Lucene filter query: {}", luceneQuery);
+ }
+
+ ResultCollector collector = new ResultCollector(fieldNames);
+ int remainingLimit = queryLimit;
+
+ for (int shardId : shardIds) {
+ if (remainingLimit <= 0) {
+ break;
+ }
+
+ IndexShard indexShard = resolveIndexShard(indicesService, indexName, shardId);
+ if (indexShard == null) {
+ log.warn("[Operator Pipeline] Could not resolve shard {}/{}", indexName, shardId);
+ continue;
+ }
+
+ OperatorContext ctx = OperatorContext.createDefault("lucene-scan-" + shardId);
+
+ try (LuceneScanOperator source =
+ new LuceneScanOperator(indexShard, fieldNames, 1024, ctx, luceneQuery)) {
+
+ LimitOperator limit = new LimitOperator(remainingLimit, ctx);
+
+ // Pull loop: source → limit → collector
+ while (!source.isFinished() && !limit.isFinished()) {
+ Page page = source.getOutput();
+ if (page != null) {
+ limit.addInput(page);
+ Page limited = limit.getOutput();
+ if (limited != null) {
+ collector.addPage(limited);
+ }
+ }
+ }
+
+ // Flush any remaining output from limit
+ limit.finish();
+ Page remaining = limit.getOutput();
+ if (remaining != null) {
+ collector.addPage(remaining);
+ }
+
+ limit.close();
+ } catch (Exception e) {
+ log.error("[Operator Pipeline] Error processing shard {}/{}", indexName, shardId, e);
+ throw new RuntimeException(
+ "Operator pipeline failed on shard " + indexName + "/" + shardId, e);
+ }
+
+ remainingLimit = queryLimit - collector.getRows().size();
+ }
+
+ log.info(
+ "[Operator Pipeline] Completed - collected {} rows from {} shards",
+ collector.getRows().size(),
+ shardIds.size());
+
+ return new OperatorPipelineResult(collector.getFieldNames(), collector.getRows());
+ }
+
+ private static IndexService resolveIndexService(IndicesService indicesService, String indexName) {
+ for (IndexService indexService : indicesService) {
+ if (indexService.index().getName().equals(indexName)) {
+ return indexService;
+ }
+ }
+ log.warn("[Operator Pipeline] Index {} not found on this node", indexName);
+ return null;
+ }
+
+ private static IndexShard resolveIndexShard(
+ IndicesService indicesService, String indexName, int shardId) {
+ for (IndexService indexService : indicesService) {
+ if (indexService.index().getName().equals(indexName)) {
+ try {
+ return indexService.getShard(shardId);
+ } catch (Exception e) {
+ log.warn(
+ "[Operator Pipeline] Shard {} not found on this node for index: {}",
+ shardId,
+ indexName);
+ return null;
+ }
+ }
+ }
+ log.warn("[Operator Pipeline] Index {} not found on this node", indexName);
+ return null;
+ }
+
+ /** Result of operator pipeline execution containing field names and row data. */
+ public static class OperatorPipelineResult {
+ private final List fieldNames;
+ private final List> rows;
+
+ public OperatorPipelineResult(List fieldNames, List> rows) {
+ this.fieldNames = fieldNames;
+ this.rows = rows;
+ }
+
+ public List getFieldNames() {
+ return fieldNames;
+ }
+
+ public List> getRows() {
+ return rows;
+ }
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/split/OpenSearchDataUnit.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/split/OpenSearchDataUnit.java
new file mode 100644
index 00000000000..c63c0129051
--- /dev/null
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/distributed/split/OpenSearchDataUnit.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed.split;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.opensearch.sql.planner.distributed.split.DataUnit;
+
+/**
+ * An OpenSearch-specific data unit representing a single shard of an index. Requires local Lucene
+ * access (not remotely accessible) because the LuceneScanOperator reads directly from the shard's
+ * IndexShard via {@code acquireSearcher}.
+ */
+public class OpenSearchDataUnit extends DataUnit {
+
+ private final String indexName;
+ private final int shardId;
+ private final List preferredNodes;
+ private final long estimatedRows;
+ private final long estimatedSizeBytes;
+
+ public OpenSearchDataUnit(
+ String indexName,
+ int shardId,
+ List preferredNodes,
+ long estimatedRows,
+ long estimatedSizeBytes) {
+ this.indexName = indexName;
+ this.shardId = shardId;
+ this.preferredNodes = Collections.unmodifiableList(preferredNodes);
+ this.estimatedRows = estimatedRows;
+ this.estimatedSizeBytes = estimatedSizeBytes;
+ }
+
+ @Override
+ public String getDataUnitId() {
+ return indexName + "/" + shardId;
+ }
+
+ @Override
+ public List getPreferredNodes() {
+ return preferredNodes;
+ }
+
+ @Override
+ public long getEstimatedRows() {
+ return estimatedRows;
+ }
+
+ @Override
+ public long getEstimatedSizeBytes() {
+ return estimatedSizeBytes;
+ }
+
+ @Override
+ public Map getProperties() {
+ return Map.of("indexName", indexName, "shardId", String.valueOf(shardId));
+ }
+
+ /**
+ * OpenSearch shard data units require local Lucene access — they cannot be read remotely.
+ *
+ * @return false
+ */
+ @Override
+ public boolean isRemotelyAccessible() {
+ return false;
+ }
+
+ /** Returns the index name this data unit reads from. */
+ public String getIndexName() {
+ return indexName;
+ }
+
+ /** Returns the shard ID within the index. */
+ public int getShardId() {
+ return shardId;
+ }
+
+ @Override
+ public String toString() {
+ return "OpenSearchDataUnit{"
+ + "index='"
+ + indexName
+ + "', shard="
+ + shardId
+ + ", nodes="
+ + preferredNodes
+ + ", ~rows="
+ + estimatedRows
+ + '}';
+ }
+}
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java
index bd8001f589d..01da8ce050c 100644
--- a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java
@@ -151,6 +151,13 @@ public class OpenSearchSettings extends Settings {
Setting.Property.NodeScope,
Setting.Property.Dynamic);
+ public static final Setting> PPL_DISTRIBUTED_ENABLED_SETTING =
+ Setting.boolSetting(
+ Key.PPL_DISTRIBUTED_ENABLED.getKeyValue(),
+ false,
+ Setting.Property.NodeScope,
+ Setting.Property.Dynamic);
+
public static final Setting> CALCITE_ENGINE_ENABLED_SETTING =
Setting.boolSetting(
Key.CALCITE_ENGINE_ENABLED.getKeyValue(),
@@ -437,6 +444,12 @@ public OpenSearchSettings(ClusterSettings clusterSettings) {
Key.PPL_JOIN_SUBSEARCH_MAXOUT,
PPL_JOIN_SUBSEARCH_MAXOUT_SETTING,
new Updater(Key.PPL_JOIN_SUBSEARCH_MAXOUT));
+ register(
+ settingBuilder,
+ clusterSettings,
+ Key.PPL_DISTRIBUTED_ENABLED,
+ PPL_DISTRIBUTED_ENABLED_SETTING,
+ new Updater(Key.PPL_DISTRIBUTED_ENABLED));
register(
settingBuilder,
clusterSettings,
@@ -667,6 +680,7 @@ public static List> pluginSettings() {
.add(PPL_VALUES_MAX_LIMIT_SETTING)
.add(PPL_SUBSEARCH_MAXOUT_SETTING)
.add(PPL_JOIN_SUBSEARCH_MAXOUT_SETTING)
+ .add(PPL_DISTRIBUTED_ENABLED_SETTING)
.add(QUERY_MEMORY_LIMIT_SETTING)
.add(QUERY_SIZE_LIMIT_SETTING)
.add(QUERY_BUCKET_SIZE_SETTING)
@@ -702,4 +716,14 @@ public static List> pluginNonDynamicSettings() {
public List> getSettings() {
return pluginSettings();
}
+
+ /**
+ * Returns whether distributed PPL execution is enabled. Defaults to false for safety -
+ * distributed execution must be explicitly enabled.
+ *
+ * @return true if distributed execution is enabled, false otherwise
+ */
+ public boolean getDistributedExecutionEnabled() {
+ return getSettingValue(Key.PPL_DISTRIBUTED_ENABLED);
+ }
}
diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/DistributedExecutionEngineTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/DistributedExecutionEngineTest.java
new file mode 100644
index 00000000000..57353b3afd2
--- /dev/null
+++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/DistributedExecutionEngineTest.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import org.apache.calcite.rel.RelNode;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.DisplayNameGenerator;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.mockito.junit.jupiter.MockitoSettings;
+import org.mockito.quality.Strictness;
+import org.opensearch.sql.ast.statement.ExplainMode;
+import org.opensearch.sql.calcite.CalcitePlanContext;
+import org.opensearch.sql.common.response.ResponseListener;
+import org.opensearch.sql.executor.ExecutionContext;
+import org.opensearch.sql.executor.ExecutionEngine;
+import org.opensearch.sql.executor.ExecutionEngine.QueryResponse;
+import org.opensearch.sql.opensearch.setting.OpenSearchSettings;
+import org.opensearch.sql.planner.physical.PhysicalPlan;
+
+@ExtendWith(MockitoExtension.class)
+@MockitoSettings(strictness = Strictness.LENIENT)
+@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class)
+class DistributedExecutionEngineTest {
+
+ @Mock private OpenSearchExecutionEngine legacyEngine;
+ @Mock private OpenSearchSettings settings;
+ @Mock private PhysicalPlan physicalPlan;
+ @Mock private RelNode relNode;
+ @Mock private CalcitePlanContext calciteContext;
+ @Mock private ResponseListener responseListener;
+ @Mock private ExecutionContext executionContext;
+
+ private DistributedExecutionEngine distributedEngine;
+
+ @BeforeEach
+ void setUp() {
+ distributedEngine = new DistributedExecutionEngine(legacyEngine, settings);
+ }
+
+ @Test
+ void should_use_legacy_engine_when_distributed_execution_disabled() {
+ when(settings.getDistributedExecutionEnabled()).thenReturn(false);
+
+ distributedEngine.execute(physicalPlan, executionContext, responseListener);
+
+ verify(legacyEngine, times(1)).execute(physicalPlan, executionContext, responseListener);
+ }
+
+ @Test
+ void should_throw_when_distributed_enabled_for_physical_plan() {
+ when(settings.getDistributedExecutionEnabled()).thenReturn(true);
+
+ assertThrows(
+ UnsupportedOperationException.class,
+ () -> distributedEngine.execute(physicalPlan, executionContext, responseListener));
+ }
+
+ @Test
+ void should_throw_when_distributed_enabled_for_calcite_relnode() {
+ when(settings.getDistributedExecutionEnabled()).thenReturn(true);
+
+ assertThrows(
+ UnsupportedOperationException.class,
+ () -> distributedEngine.execute(relNode, calciteContext, responseListener));
+ }
+
+ @Test
+ void should_use_legacy_engine_for_calcite_relnode_when_disabled() {
+ when(settings.getDistributedExecutionEnabled()).thenReturn(false);
+
+ distributedEngine.execute(relNode, calciteContext, responseListener);
+
+ verify(legacyEngine, times(1)).execute(relNode, calciteContext, responseListener);
+ }
+
+ @Test
+ void should_delegate_explain_to_legacy_engine() {
+ @SuppressWarnings("unchecked")
+ ResponseListener explainListener =
+ mock(ResponseListener.class);
+
+ distributedEngine.explain(physicalPlan, explainListener);
+
+ verify(legacyEngine, times(1)).explain(physicalPlan, explainListener);
+ }
+
+ @Test
+ void should_delegate_calcite_explain_to_legacy_when_disabled() {
+ @SuppressWarnings("unchecked")
+ ResponseListener explainListener =
+ mock(ResponseListener.class);
+ ExplainMode mode = ExplainMode.STANDARD;
+ when(settings.getDistributedExecutionEnabled()).thenReturn(false);
+
+ distributedEngine.explain(relNode, mode, calciteContext, explainListener);
+
+ verify(legacyEngine, times(1)).explain(relNode, mode, calciteContext, explainListener);
+ }
+
+ @Test
+ void should_throw_for_calcite_explain_when_distributed_enabled() {
+ @SuppressWarnings("unchecked")
+ ResponseListener explainListener =
+ mock(ResponseListener.class);
+ ExplainMode mode = ExplainMode.STANDARD;
+ when(settings.getDistributedExecutionEnabled()).thenReturn(true);
+
+ assertThrows(
+ UnsupportedOperationException.class,
+ () -> distributedEngine.explain(relNode, mode, calciteContext, explainListener));
+ }
+
+ @Test
+ void constructor_should_initialize() {
+ DistributedExecutionEngine engine = new DistributedExecutionEngine(legacyEngine, settings);
+ assertNotNull(engine);
+ }
+}
diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/distributed/TransportExecuteDistributedTaskActionTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/distributed/TransportExecuteDistributedTaskActionTest.java
new file mode 100644
index 00000000000..b66d6e67d5f
--- /dev/null
+++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/distributed/TransportExecuteDistributedTaskActionTest.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.opensearch.executor.distributed;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.List;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.DisplayNameGenerator;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.mockito.junit.jupiter.MockitoSettings;
+import org.mockito.quality.Strictness;
+import org.opensearch.action.support.ActionFilters;
+import org.opensearch.cluster.node.DiscoveryNode;
+import org.opensearch.cluster.service.ClusterService;
+import org.opensearch.indices.IndicesService;
+import org.opensearch.tasks.Task;
+import org.opensearch.transport.TransportService;
+import org.opensearch.transport.client.Client;
+
+@ExtendWith(MockitoExtension.class)
+@MockitoSettings(strictness = Strictness.LENIENT)
+@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class)
+class TransportExecuteDistributedTaskActionTest {
+
+ @Mock private TransportService transportService;
+ @Mock private ClusterService clusterService;
+ @Mock private ActionFilters actionFilters;
+ @Mock private Client client;
+ @Mock private IndicesService indicesService;
+ @Mock private Task task;
+
+ private TransportExecuteDistributedTaskAction action;
+
+ @BeforeEach
+ void setUp() {
+ action =
+ new TransportExecuteDistributedTaskAction(
+ transportService, actionFilters, clusterService, client, indicesService);
+
+ // Setup cluster service mock
+ DiscoveryNode localNode = mock(DiscoveryNode.class);
+ when(localNode.getId()).thenReturn("test-node-1");
+ when(clusterService.localNode()).thenReturn(localNode);
+ }
+
+ @Test
+ void action_name_should_be_defined() {
+ assertEquals(
+ "cluster:admin/opensearch/sql/distributed/execute",
+ TransportExecuteDistributedTaskAction.NAME);
+ }
+
+ @Test
+ void should_validate_operator_pipeline_request() {
+ // Given: Valid operator pipeline request
+ ExecuteDistributedTaskRequest request = new ExecuteDistributedTaskRequest();
+ request.setExecutionMode("OPERATOR_PIPELINE");
+ request.setIndexName("test-index");
+ request.setShardIds(List.of(0, 1));
+ request.setFieldNames(List.of("field1", "field2"));
+ request.setQueryLimit(100);
+ request.setStageId("operator-pipeline");
+
+ // Then
+ assertTrue(request.isValid());
+ assertNotNull(request.toString());
+ }
+
+ @Test
+ void should_reject_invalid_request_missing_index() {
+ // Given: Request without index name
+ ExecuteDistributedTaskRequest request = new ExecuteDistributedTaskRequest();
+ request.setExecutionMode("OPERATOR_PIPELINE");
+ request.setShardIds(List.of(0, 1));
+ request.setFieldNames(List.of("field1"));
+ request.setQueryLimit(100);
+
+ // Then
+ assertNotNull(request.validate());
+ }
+
+ @Test
+ void should_reject_invalid_request_missing_shards() {
+ // Given: Request without shard IDs
+ ExecuteDistributedTaskRequest request = new ExecuteDistributedTaskRequest();
+ request.setExecutionMode("OPERATOR_PIPELINE");
+ request.setIndexName("test-index");
+ request.setFieldNames(List.of("field1"));
+ request.setQueryLimit(100);
+
+ // Then
+ assertNotNull(request.validate());
+ }
+
+ @Test
+ void should_reject_invalid_request_missing_fields() {
+ // Given: Request without field names
+ ExecuteDistributedTaskRequest request = new ExecuteDistributedTaskRequest();
+ request.setExecutionMode("OPERATOR_PIPELINE");
+ request.setIndexName("test-index");
+ request.setShardIds(List.of(0, 1));
+ request.setQueryLimit(100);
+
+ // Then
+ assertNotNull(request.validate());
+ }
+}
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java
index d817e13c69f..54ff9cb146a 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java
@@ -90,10 +90,11 @@
import org.opensearch.sql.legacy.plugin.RestSqlAction;
import org.opensearch.sql.legacy.plugin.RestSqlStatsAction;
import org.opensearch.sql.opensearch.client.OpenSearchNodeClient;
+import org.opensearch.sql.opensearch.executor.distributed.ExecuteDistributedTaskResponse;
+import org.opensearch.sql.opensearch.executor.distributed.TransportExecuteDistributedTaskAction;
import org.opensearch.sql.opensearch.setting.OpenSearchSettings;
import org.opensearch.sql.opensearch.storage.OpenSearchDataSourceFactory;
import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine;
-import org.opensearch.sql.plugin.config.OpenSearchPluginModule;
import org.opensearch.sql.plugin.rest.RestPPLQueryAction;
import org.opensearch.sql.plugin.rest.RestPPLStatsAction;
import org.opensearch.sql.plugin.rest.RestQuerySettingsAction;
@@ -225,7 +226,11 @@ public List getRestHandlers(
new ActionType<>(
TransportWriteDirectQueryResourcesRequestAction.NAME,
WriteDirectQueryResourcesActionResponse::new),
- TransportWriteDirectQueryResourcesRequestAction.class));
+ TransportWriteDirectQueryResourcesRequestAction.class),
+ new ActionHandler<>(
+ new ActionType<>(
+ TransportExecuteDistributedTaskAction.NAME, ExecuteDistributedTaskResponse::new),
+ TransportExecuteDistributedTaskAction.class));
}
@Override
@@ -250,7 +255,7 @@ public Collection createComponents(
LocalClusterState.state().setPluginSettings((OpenSearchSettings) pluginSettings);
LocalClusterState.state().setClient(client);
ModulesBuilder modules = new ModulesBuilder();
- modules.add(new OpenSearchPluginModule());
+ // Removed OpenSearchPluginModule - SQLPlugin only needs async and direct query services
modules.add(
b -> {
b.bind(NodeClient.class).toInstance((NodeClient) client);
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java
index 8027301073f..9ae5ef567dc 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java
@@ -6,6 +6,7 @@
package org.opensearch.sql.plugin.config;
import lombok.RequiredArgsConstructor;
+import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.inject.AbstractModule;
import org.opensearch.common.inject.Provides;
import org.opensearch.common.inject.Singleton;
@@ -22,12 +23,14 @@
import org.opensearch.sql.monitor.ResourceMonitor;
import org.opensearch.sql.opensearch.client.OpenSearchClient;
import org.opensearch.sql.opensearch.client.OpenSearchNodeClient;
+import org.opensearch.sql.opensearch.executor.DistributedExecutionEngine;
import org.opensearch.sql.opensearch.executor.OpenSearchExecutionEngine;
import org.opensearch.sql.opensearch.executor.OpenSearchQueryManager;
import org.opensearch.sql.opensearch.executor.protector.ExecutionProtector;
import org.opensearch.sql.opensearch.executor.protector.OpenSearchExecutionProtector;
import org.opensearch.sql.opensearch.monitor.OpenSearchMemoryHealthy;
import org.opensearch.sql.opensearch.monitor.OpenSearchResourceMonitor;
+import org.opensearch.sql.opensearch.setting.OpenSearchSettings;
import org.opensearch.sql.opensearch.storage.OpenSearchStorageEngine;
import org.opensearch.sql.planner.Planner;
import org.opensearch.sql.planner.optimizer.LogicalPlanOptimizer;
@@ -59,8 +62,17 @@ public StorageEngine storageEngine(OpenSearchClient client, Settings settings) {
@Provides
public ExecutionEngine executionEngine(
- OpenSearchClient client, ExecutionProtector protector, PlanSerializer planSerializer) {
- return new OpenSearchExecutionEngine(client, protector, planSerializer);
+ OpenSearchClient client,
+ ExecutionProtector protector,
+ PlanSerializer planSerializer,
+ ClusterService clusterService) {
+ OpenSearchExecutionEngine legacyEngine =
+ new OpenSearchExecutionEngine(client, protector, planSerializer);
+
+ OpenSearchSettings openSearchSettings =
+ new OpenSearchSettings(clusterService.getClusterSettings());
+
+ return new DistributedExecutionEngine(legacyEngine, openSearchSettings);
}
@Provides
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java
index 27bfe2084f7..5dbb840b07d 100644
--- a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java
+++ b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java
@@ -75,6 +75,8 @@ public TransportPPLQueryAction(
b.bind(org.opensearch.sql.common.setting.Settings.class)
.toInstance(new OpenSearchSettings(clusterService.getClusterSettings()));
b.bind(DataSourceService.class).toInstance(dataSourceService);
+ b.bind(ClusterService.class).toInstance(clusterService);
+ b.bind(TransportService.class).toInstance(transportService);
});
this.injector = Guice.createInjector(modules);
this.pplEnabled =