Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions claas/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ class TrainingConfig:
max_grad_norm: float = 1.0
kl_reg_weight: float = 0.0
teacher_top_k: int = 100
steps_per_batch: int = 4

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Enforce positive steps_per_batch in TrainingConfig

The newly added steps_per_batch field has no lower-bound validation, but both multi-step trainers now assume at least one iteration and unconditionally read step_metrics[-1] (claas/training/distillation.py and claas/training/engine/tinker/engine.py), so sending training.steps_per_batch=0 is currently accepted and then crashes /v1/feedback with a server error instead of a clean 4xx validation failure; this can break eval runs by turning every feedback update into a failed request.

Useful? React with 👍 / 👎.

feedback_repetitions: int = 1


class SDPOLossInput(BaseModel):
Expand Down
19 changes: 10 additions & 9 deletions claas/eval/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,23 @@ metrics: # metrics to evaluate per step

num_steps: 20
batch_size: 4
steps_per_batch: 4 # gradient updates per batch
feedback_repetitions: 1 # times to repeat feedback string
training: # forwarded to /v1/feedback training config
learning_rate: 3e-5
alpha: 0.5
is_clip: 5.0
max_grad_norm: 1.0
kl_reg_weight: 0.0
teacher_top_k: 100
collapse_steps: [0, 5, 10, 15, 19] # steps where collapse metric runs
plots: true # generate matplotlib plots
seed: 42
lora_id_prefix: eval
output_dir: ./data/evals/${now:%Y%m%d-%H%M%SZ}

openclaw_url: http://localhost:18789 # OpenClaw gateway (null = use CLaaS API directly)

training: # forwarded to /v1/feedback TrainingConfig
learning_rate: 3e-5
alpha: 0.5
is_clip: 5.0
max_grad_norm: 1.0
kl_reg_weight: 0.0
teacher_top_k: 100
steps_per_batch: 4 # gradient updates per batch
feedback_repetitions: 1 # times to repeat feedback string
```

### Overriding config via CLI
Expand Down
15 changes: 8 additions & 7 deletions claas/eval/configs/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,18 @@ plots: true

num_steps: 20
batch_size: 4
steps_per_batch: 4
feedback_repetitions: 1
seed: 42
lora_id_prefix: eval
output_dir: ./data/evals/${now:%Y%m%d-%H%M%SZ}

openclaw_url: http://localhost:18789

training:
learning_rate: 3e-5
alpha: 0.5
is_clip: 5.0
max_grad_norm: 1.0
kl_reg_weight: 0.0
teacher_top_k: 100
seed: 42
lora_id_prefix: eval
output_dir: ./data/evals/${now:%Y%m%d-%H%M%SZ}

openclaw_url: http://localhost:18789
steps_per_batch: 4
feedback_repetitions: 1
39 changes: 17 additions & 22 deletions claas/eval/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,15 @@ async def _submit_feedback(
adv_abs_mean_raw=metadata["adv_abs_mean_raw"],
completion_len=metadata["completion_len"],
batch_size=metadata["batch_size"],
steps_per_batch_applied=metadata["steps_per_batch_applied"],
)

return LocalDistillMetrics(
distill_loss=metadata.get("distill_loss"),
kl_reg=metadata.get("kl_reg"),
mean_is_ratio=metadata.get("mean_is_ratio"),
clip_fraction=metadata.get("clip_fraction"),
steps_per_batch_applied=metadata["steps_per_batch_applied"],
)
Comment on lines +84 to 93
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Use .get() with a default for steps_per_batch_applied to match resilience of other fields.

Lines 84 and 92 use hard metadata["steps_per_batch_applied"] access. If an older or third-party engine omits this new field, both branches raise KeyError. While the caller catches KeyError (line 409), that discards the entire metrics object — losing distill_loss, kl_mean, etc. for the step.

This is especially inconsistent in the local branch (lines 88–92), where every other field uses .get().

🛡️ Proposed fix — use `.get()` with default 1
             batch_size=metadata["batch_size"],
-            steps_per_batch_applied=metadata["steps_per_batch_applied"],
+            steps_per_batch_applied=metadata.get("steps_per_batch_applied", 1),
         )
 
     return LocalDistillMetrics(
         distill_loss=metadata.get("distill_loss"),
         kl_reg=metadata.get("kl_reg"),
         mean_is_ratio=metadata.get("mean_is_ratio"),
         clip_fraction=metadata.get("clip_fraction"),
-        steps_per_batch_applied=metadata["steps_per_batch_applied"],
+        steps_per_batch_applied=metadata.get("steps_per_batch_applied", 1),
     )
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@claas/eval/runner.py` around lines 84 - 93, The code currently indexes
metadata["steps_per_batch_applied"] in the LocalDistillMetrics return path which
can raise KeyError and drop the entire metrics object; change that access to use
metadata.get("steps_per_batch_applied", 1) so LocalDistillMetrics is constructed
with a default of 1 when the field is absent (keep identical pattern used for
other fields like distill_loss, kl_reg, mean_is_ratio, clip_fraction) to make
the metrics construction resilient.



Expand Down Expand Up @@ -190,6 +192,7 @@ def _load_completed_steps(output_dir: str, preference: str) -> list[StepResult]:
prompt_used=data["prompt_used"],
response_text=data.get("response_text"),
timing_s=data.get("timing_s", 0.0),
sub_step_count=data.get("sub_step_count", 1),
))
return steps

Expand Down Expand Up @@ -362,8 +365,8 @@ async def run_preference_experiment(
for step in range(resume_from, config.num_steps):
step_start = time.perf_counter()

# Determine feedback string
feedback_str = " ".join([pref.feedback_string] * config.feedback_repetitions)
# Feedback repetition is a training concern configured via TrainingConfig.
feedback_str = pref.feedback_string

# Collect samples for this step (batch_size >= 1)
samples: list[FeedbackItem] = []
Expand Down Expand Up @@ -398,29 +401,21 @@ async def run_preference_experiment(
if response_text is None:
response_text = "I'd be happy to help you with that."

# Submit feedback — possibly multiple gradient steps on same batch
# Submit feedback for this step. Training engine applies steps_per_batch.
sdpo_metrics = None
sub_steps_completed = 0
if samples:
for sub_step in range(config.steps_per_batch):
try:
sdpo_metrics = await _submit_feedback(
config, actual_lora_id, samples,
)
sub_steps_completed += 1
except (httpx.HTTPError, KeyError) as e:
logger.warning(
"[%s] Step %d sub-step %d feedback failed: %s",
pref.name, step, sub_step, e,
)
break

if config.steps_per_batch > 1:
logger.info(
"[%s] Step %d: %d sub-steps completed",
pref.name, step, sub_steps_completed,
try:
sdpo_metrics = await _submit_feedback(
config, actual_lora_id, samples,
)
except (httpx.HTTPError, KeyError) as e:
logger.warning(
"[%s] Step %d feedback failed: %s",
pref.name, step, e,
)

sub_step_count = sdpo_metrics.steps_per_batch_applied if sdpo_metrics else 1

# Measure eval
try:
eval_metrics = await _measure_eval_metrics(
Expand All @@ -447,7 +442,7 @@ async def run_preference_experiment(
],
response_text=response_text if needs_generation else None,
timing_s=timing_s,
sub_step_count=sub_steps_completed if sub_steps_completed > 0 else 1,
sub_step_count=sub_step_count,
)

result.steps.append(step_result)
Expand Down
4 changes: 2 additions & 2 deletions claas/eval/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,6 @@ class EvalConfig:
openclaw_url: Optional[str] = None
base_model: str = "Qwen/Qwen3-8B"
batch_size: int = 4
steps_per_batch: int = 4
feedback_repetitions: int = 1
training: TrainingConfig = field(default_factory=TrainingConfig)


Expand All @@ -117,6 +115,7 @@ class LocalDistillMetrics:
kl_reg: float | None
mean_is_ratio: float | None
clip_fraction: float | None
steps_per_batch_applied: int = 1


@dataclass
Expand All @@ -131,6 +130,7 @@ class TinkerDistillMetrics:
adv_abs_mean_raw: float
completion_len: int = 0
batch_size: int = 0
steps_per_batch_applied: int = 1


@dataclass
Expand Down
Loading
Loading