kfallah · kfallah · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026 · chatgpt-codex-connector
diff --git a/claas/core/types.py b/claas/core/types.py
@@ -32,6 +32,8 @@ class TrainingConfig:
     max_grad_norm: float = 1.0
     kl_reg_weight: float = 0.0
     teacher_top_k: int = 100
+    steps_per_batch: int = 4
+    feedback_repetitions: int = 1
 
 
 class SDPOLossInput(BaseModel):

diff --git a/claas/eval/README.md b/claas/eval/README.md
@@ -26,22 +26,23 @@ metrics:                             # metrics to evaluate per step
 
 num_steps: 20
 batch_size: 4
-steps_per_batch: 4                   # gradient updates per batch
-feedback_repetitions: 1              # times to repeat feedback string
-training:                            # forwarded to /v1/feedback training config
-  learning_rate: 3e-5
-  alpha: 0.5
-  is_clip: 5.0
-  max_grad_norm: 1.0
-  kl_reg_weight: 0.0
-  teacher_top_k: 100
 collapse_steps: [0, 5, 10, 15, 19]  # steps where collapse metric runs
 plots: true                          # generate matplotlib plots
 seed: 42
 lora_id_prefix: eval
 output_dir: ./data/evals/${now:%Y%m%d-%H%M%SZ}
 
 openclaw_url: http://localhost:18789  # OpenClaw gateway (null = use CLaaS API directly)
+
+training:                             # forwarded to /v1/feedback TrainingConfig
+  learning_rate: 3e-5
+  alpha: 0.5
+  is_clip: 5.0
+  max_grad_norm: 1.0
+  kl_reg_weight: 0.0
+  teacher_top_k: 100
+  steps_per_batch: 4                 # gradient updates per batch
+  feedback_repetitions: 1            # times to repeat feedback string
 ```
 
 ### Overriding config via CLI

diff --git a/claas/eval/configs/base.yaml b/claas/eval/configs/base.yaml
@@ -22,17 +22,18 @@ plots: true
 
 num_steps: 20
 batch_size: 4
-steps_per_batch: 4
-feedback_repetitions: 1
+seed: 42
+lora_id_prefix: eval
+output_dir: ./data/evals/${now:%Y%m%d-%H%M%SZ}
+
+openclaw_url: http://localhost:18789
+
 training:
   learning_rate: 3e-5
   alpha: 0.5
   is_clip: 5.0
   max_grad_norm: 1.0
   kl_reg_weight: 0.0
   teacher_top_k: 100
-seed: 42
-lora_id_prefix: eval
-output_dir: ./data/evals/${now:%Y%m%d-%H%M%SZ}
-
-openclaw_url: http://localhost:18789
+  steps_per_batch: 4
+  feedback_repetitions: 1
diff --git a/claas/eval/runner.py b/claas/eval/runner.py
@@ -81,13 +81,15 @@ async def _submit_feedback(
             adv_abs_mean_raw=metadata["adv_abs_mean_raw"],
             completion_len=metadata["completion_len"],
             batch_size=metadata["batch_size"],
+            steps_per_batch_applied=metadata["steps_per_batch_applied"],
         )
 
     return LocalDistillMetrics(
         distill_loss=metadata.get("distill_loss"),
         kl_reg=metadata.get("kl_reg"),
         mean_is_ratio=metadata.get("mean_is_ratio"),
         clip_fraction=metadata.get("clip_fraction"),
+        steps_per_batch_applied=metadata["steps_per_batch_applied"],
     )
 
 
@@ -190,6 +192,7 @@ def _load_completed_steps(output_dir: str, preference: str) -> list[StepResult]:
                 prompt_used=data["prompt_used"],
                 response_text=data.get("response_text"),
                 timing_s=data.get("timing_s", 0.0),
+                sub_step_count=data.get("sub_step_count", 1),
             ))
     return steps
 
@@ -362,8 +365,8 @@ async def run_preference_experiment(
     for step in range(resume_from, config.num_steps):
         step_start = time.perf_counter()
 
-        # Determine feedback string
-        feedback_str = " ".join([pref.feedback_string] * config.feedback_repetitions)
+        # Feedback repetition is a training concern configured via TrainingConfig.
+        feedback_str = pref.feedback_string
 
         # Collect samples for this step (batch_size >= 1)
         samples: list[FeedbackItem] = []
@@ -398,29 +401,21 @@ async def run_preference_experiment(
         if response_text is None:
             response_text = "I'd be happy to help you with that."
 
-        # Submit feedback — possibly multiple gradient steps on same batch
+        # Submit feedback for this step. Training engine applies steps_per_batch.
         sdpo_metrics = None
-        sub_steps_completed = 0
         if samples:
-            for sub_step in range(config.steps_per_batch):
-                try:
-                    sdpo_metrics = await _submit_feedback(
-                        config, actual_lora_id, samples,
-                    )
-                    sub_steps_completed += 1
-                except (httpx.HTTPError, KeyError) as e:
-                    logger.warning(
-                        "[%s] Step %d sub-step %d feedback failed: %s",
-                        pref.name, step, sub_step, e,
-                    )
-                    break
-
-            if config.steps_per_batch > 1:
-                logger.info(
-                    "[%s] Step %d: %d sub-steps completed",
-                    pref.name, step, sub_steps_completed,
+            try:
+                sdpo_metrics = await _submit_feedback(
+                    config, actual_lora_id, samples,
+                )
+            except (httpx.HTTPError, KeyError) as e:
+                logger.warning(
+                    "[%s] Step %d feedback failed: %s",
+                    pref.name, step, e,
                 )
 
+        sub_step_count = sdpo_metrics.steps_per_batch_applied if sdpo_metrics else 1
+
         # Measure eval
         try:
             eval_metrics = await _measure_eval_metrics(
@@ -447,7 +442,7 @@ async def run_preference_experiment(
             ],
             response_text=response_text if needs_generation else None,
             timing_s=timing_s,
-            sub_step_count=sub_steps_completed if sub_steps_completed > 0 else 1,
+            sub_step_count=sub_step_count,
         )
 
         result.steps.append(step_result)

diff --git a/claas/eval/types.py b/claas/eval/types.py
@@ -94,8 +94,6 @@ class EvalConfig:
     openclaw_url: Optional[str] = None
     base_model: str = "Qwen/Qwen3-8B"
     batch_size: int = 4
-    steps_per_batch: int = 4
-    feedback_repetitions: int = 1
     training: TrainingConfig = field(default_factory=TrainingConfig)
 
 
@@ -117,6 +115,7 @@ class LocalDistillMetrics:
     kl_reg: float | None
     mean_is_ratio: float | None
     clip_fraction: float | None
+    steps_per_batch_applied: int = 1
 
 
 @dataclass
@@ -131,6 +130,7 @@ class TinkerDistillMetrics:
     adv_abs_mean_raw: float
     completion_len: int = 0
     batch_size: int = 0
+    steps_per_batch_applied: int = 1
 
 
 @dataclass