-
Notifications
You must be signed in to change notification settings - Fork 1
Move multi-step training into TrainingConfig with per-step IS correction #39
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,13 +81,15 @@ async def _submit_feedback( | |
| adv_abs_mean_raw=metadata["adv_abs_mean_raw"], | ||
| completion_len=metadata["completion_len"], | ||
| batch_size=metadata["batch_size"], | ||
| steps_per_batch_applied=metadata["steps_per_batch_applied"], | ||
| ) | ||
|
|
||
| return LocalDistillMetrics( | ||
| distill_loss=metadata.get("distill_loss"), | ||
| kl_reg=metadata.get("kl_reg"), | ||
| mean_is_ratio=metadata.get("mean_is_ratio"), | ||
| clip_fraction=metadata.get("clip_fraction"), | ||
| steps_per_batch_applied=metadata["steps_per_batch_applied"], | ||
| ) | ||
|
Comment on lines
+84
to
93
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use Lines 84 and 92 use hard This is especially inconsistent in the local branch (lines 88–92), where every other field uses 🛡️ Proposed fix — use `.get()` with default 1 batch_size=metadata["batch_size"],
- steps_per_batch_applied=metadata["steps_per_batch_applied"],
+ steps_per_batch_applied=metadata.get("steps_per_batch_applied", 1),
)
return LocalDistillMetrics(
distill_loss=metadata.get("distill_loss"),
kl_reg=metadata.get("kl_reg"),
mean_is_ratio=metadata.get("mean_is_ratio"),
clip_fraction=metadata.get("clip_fraction"),
- steps_per_batch_applied=metadata["steps_per_batch_applied"],
+ steps_per_batch_applied=metadata.get("steps_per_batch_applied", 1),
)🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
|
|
@@ -190,6 +192,7 @@ def _load_completed_steps(output_dir: str, preference: str) -> list[StepResult]: | |
| prompt_used=data["prompt_used"], | ||
| response_text=data.get("response_text"), | ||
| timing_s=data.get("timing_s", 0.0), | ||
| sub_step_count=data.get("sub_step_count", 1), | ||
| )) | ||
| return steps | ||
|
|
||
|
|
@@ -362,8 +365,8 @@ async def run_preference_experiment( | |
| for step in range(resume_from, config.num_steps): | ||
| step_start = time.perf_counter() | ||
|
|
||
| # Determine feedback string | ||
| feedback_str = " ".join([pref.feedback_string] * config.feedback_repetitions) | ||
| # Feedback repetition is a training concern configured via TrainingConfig. | ||
| feedback_str = pref.feedback_string | ||
|
|
||
| # Collect samples for this step (batch_size >= 1) | ||
| samples: list[FeedbackItem] = [] | ||
|
|
@@ -398,29 +401,21 @@ async def run_preference_experiment( | |
| if response_text is None: | ||
| response_text = "I'd be happy to help you with that." | ||
|
|
||
| # Submit feedback — possibly multiple gradient steps on same batch | ||
| # Submit feedback for this step. Training engine applies steps_per_batch. | ||
| sdpo_metrics = None | ||
| sub_steps_completed = 0 | ||
| if samples: | ||
| for sub_step in range(config.steps_per_batch): | ||
| try: | ||
| sdpo_metrics = await _submit_feedback( | ||
| config, actual_lora_id, samples, | ||
| ) | ||
| sub_steps_completed += 1 | ||
| except (httpx.HTTPError, KeyError) as e: | ||
| logger.warning( | ||
| "[%s] Step %d sub-step %d feedback failed: %s", | ||
| pref.name, step, sub_step, e, | ||
| ) | ||
| break | ||
|
|
||
| if config.steps_per_batch > 1: | ||
| logger.info( | ||
| "[%s] Step %d: %d sub-steps completed", | ||
| pref.name, step, sub_steps_completed, | ||
| try: | ||
| sdpo_metrics = await _submit_feedback( | ||
| config, actual_lora_id, samples, | ||
| ) | ||
| except (httpx.HTTPError, KeyError) as e: | ||
| logger.warning( | ||
| "[%s] Step %d feedback failed: %s", | ||
| pref.name, step, e, | ||
| ) | ||
|
|
||
| sub_step_count = sdpo_metrics.steps_per_batch_applied if sdpo_metrics else 1 | ||
|
|
||
| # Measure eval | ||
| try: | ||
| eval_metrics = await _measure_eval_metrics( | ||
|
|
@@ -447,7 +442,7 @@ async def run_preference_experiment( | |
| ], | ||
| response_text=response_text if needs_generation else None, | ||
| timing_s=timing_s, | ||
| sub_step_count=sub_steps_completed if sub_steps_completed > 0 else 1, | ||
| sub_step_count=sub_step_count, | ||
| ) | ||
|
|
||
| result.steps.append(step_result) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
steps_per_batchin TrainingConfigThe newly added
steps_per_batchfield has no lower-bound validation, but both multi-step trainers now assume at least one iteration and unconditionally readstep_metrics[-1](claas/training/distillation.pyandclaas/training/engine/tinker/engine.py), so sendingtraining.steps_per_batch=0is currently accepted and then crashes/v1/feedbackwith a server error instead of a clean 4xx validation failure; this can break eval runs by turning every feedback update into a failed request.Useful? React with 👍 / 👎.