@@ -86,10 +86,18 @@ impl ArrayRef {
8686 /// Iteratively execute this array until the [`Matcher`] matches, using an explicit work
8787 /// stack.
8888 ///
89- /// The scheduler repeatedly:
90- /// 1. Checks if the current array matches `M` — if so, pops the stack or returns.
91- /// 2. Runs `execute_parent` on each child for child-driven optimizations.
92- /// 3. Calls `execute` which returns an [`ExecutionStep`].
89+ /// Each iteration proceeds through three steps in order:
90+ ///
91+ /// 1. **Done / canonical check** — if `current` satisfies the active done predicate or is
92+ /// canonical, splice it back into the stacked parent (if any) and continue, or return.
93+ /// 2. **`execute_parent` on children** — try each child's `execute_parent` against `current`
94+ /// as the parent (e.g. `Filter(RunEnd)` → `FilterExecuteAdaptor` fires from RunEnd).
95+ /// If there is a stacked parent frame, the rewritten child is spliced back into it so
96+ /// that optimize and further `execute_parent` can fire on the reconstructed parent
97+ /// (e.g. `Slice(RunEnd)` → `RunEnd` spliced into stacked `Filter` → `Filter(RunEnd)`
98+ /// whose `FilterExecuteAdaptor` fires on the next iteration).
99+ /// 3. **`execute`** — call the encoding's own execute step, which either returns `Done` or
100+ /// `ExecuteSlot(i)` to push a child onto the stack for focused execution.
93101 ///
94102 /// Note: the returned array may not match `M`. If execution converges to a canonical form
95103 /// that does not match `M`, the canonical array is returned since no further execution
@@ -103,51 +111,41 @@ impl ArrayRef {
103111 let mut stack: Vec < ( ArrayRef , usize , DonePredicate ) > = Vec :: new ( ) ;
104112
105113 for _ in 0 ..max_iterations ( ) {
106- // Check for termination: use the stack frame's done predicate, or the root matcher .
114+ // Step 1: done / canonical — splice back into stacked parent or return .
107115 let is_done = stack
108116 . last ( )
109117 . map_or ( M :: matches as DonePredicate , |frame| frame. 2 ) ;
110- if is_done ( & current) {
118+ if is_done ( & current) || AnyCanonical :: matches ( & current ) {
111119 match stack. pop ( ) {
112120 None => {
113121 ctx. log ( format_args ! ( "-> {}" , current) ) ;
114122 return Ok ( current) ;
115123 }
116124 Some ( ( parent, slot_idx, _) ) => {
117- current = parent. with_slot ( slot_idx, current) ?;
118- current = current. optimize ( ) ?;
125+ current = parent. with_slot ( slot_idx, current) ?. optimize ( ) ?;
119126 continue ;
120127 }
121128 }
122129 }
123130
124- // If we've reached canonical form, we can't execute any further regardless
125- // of whether the matcher matched.
126- if AnyCanonical :: matches ( & current) {
127- match stack. pop ( ) {
128- None => {
129- ctx. log ( format_args ! ( "-> canonical (unmatched) {}" , current) ) ;
130- return Ok ( current) ;
131- }
132- Some ( ( parent, slot_idx, _) ) => {
133- current = parent. with_slot ( slot_idx, current) ?;
134- current = current. optimize ( ) ?;
135- continue ;
136- }
137- }
138- }
139-
140- // Try execute_parent (child-driven optimized execution)
131+ // Step 2: execute_parent on children (current is the parent).
132+ // If there is a stacked parent frame, splice the rewritten child back into it
133+ // so that optimize and execute_parent can fire naturally on the reconstructed parent
134+ // (e.g. Slice(RunEnd) -RunEndSliceKernel-> RunEnd, spliced back into Filter gives
135+ // Filter(RunEnd), whose FilterExecuteAdaptor fires on the next iteration).
141136 if let Some ( rewritten) = try_execute_parent ( & current, ctx) ? {
142137 ctx. log ( format_args ! (
143138 "execute_parent rewrote {} -> {}" ,
144139 current, rewritten
145140 ) ) ;
146141 current = rewritten. optimize ( ) ?;
142+ if let Some ( ( parent, slot_idx, _) ) = stack. pop ( ) {
143+ current = parent. with_slot ( slot_idx, current) ?. optimize ( ) ?;
144+ }
147145 continue ;
148146 }
149147
150- // Execute the array itself .
148+ // Step 4: execute the encoding's own step .
151149 let result = execute_step ( current, ctx) ?;
152150 let ( array, step) = result. into_parts ( ) ;
153151 match step {
@@ -177,9 +175,6 @@ impl ArrayRef {
177175}
178176
179177/// Execution context for batch CPU compute.
180- ///
181- /// Accumulates a trace of execution steps. Individual steps are logged at TRACE level for
182- /// real-time following, and the full trace is dumped at DEBUG level when the context is dropped.
183178#[ derive( Debug , Clone ) ]
184179pub struct ExecutionCtx {
185180 id : usize ,
@@ -193,8 +188,8 @@ impl ExecutionCtx {
193188 static EXEC_CTX_ID : AtomicUsize = AtomicUsize :: new ( 0 ) ;
194189 let id = EXEC_CTX_ID . fetch_add ( 1 , std:: sync:: atomic:: Ordering :: Relaxed ) ;
195190 Self {
196- id,
197191 session,
192+ id,
198193 ops : Vec :: new ( ) ,
199194 }
200195 }
0 commit comments