diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml index 3ff8c3f..f9244b5 100644 --- a/.github/workflows/package.yml +++ b/.github/workflows/package.yml @@ -10,7 +10,7 @@ on: - "docker/finetuning/rocm-silogen-finetuning-worker.Dockerfile" - "requirements.txt" - ".github/workflows/package.yml" - - "tests/" + - "tests/**" jobs: common: diff --git a/src/finetuning/config/data.py b/src/finetuning/config/data.py index 8d9f1bd..dfa3b96 100644 --- a/src/finetuning/config/data.py +++ b/src/finetuning/config/data.py @@ -68,11 +68,14 @@ class ConcatenationDataInput(DataInput): The datasets themselves need to be in the finetuning supported JSONL formats. For SFT this means lines: - {"messages": {"content": "string", "role": "string"}} + {"messages": [{"content": "string", "role": "string"}]} For DPO this means lines of: - - {"prompt_messages": {"content": "string", "role": "string"}, "chosen_messages": {"content": "string", "role": "string"}, "rejected_messages": {"content": "string", "role": "string"}} + { + "prompt_messages": [{"content": "string", "role": "string"}], + "chosen_messages": [{"content": "string", "role": "string"}], + "rejected_messages": [{"content": "string", "role": "string"}] + } """ type: Literal[DataInputType.CONCATENATION] @@ -89,11 +92,14 @@ class WeightedMixDataInput(DataInput): The datasets themselves need to be in the finetuning supported JSONL formats. For SFT this means lines: - {"messages": {"content": "string", "role": "string"}} + {"messages": [{"content": "string", "role": "string"}]} For DPO this means lines of: - - {"prompt_messages": {"content": "string", "role": "string"}, "chosen_messages": {"content": "string", "role": "string"}, "rejected_messages": {"content": "string", "role": "string"}} + { + "prompt_messages": [{"content": "string", "role": "string"}], + "chosen_messages": [{"content": "string", "role": "string"}], + "rejected_messages": [{"content": "string", "role": "string"}] + } """ type: Literal[DataInputType.PRECOMPUTE_WEIGHTED_MIX] diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 1594439..37ea0eb 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -218,7 +218,7 @@ def test_sft_gpt(tmpdir): assert trainer_state["log_history"][-1]["loss"] < 4.0 -def test_sft_mistral_lora(tmpdir): +def test_sft_gemma2_lora(tmpdir): # 0. Config definition: config = """\ data_conf: @@ -267,11 +267,11 @@ def test_sft_mistral_lora(tmpdir): - v_proj - o_proj run_conf: - model: hf-internal-testing/tiny-random-MistralForCausalLM + model: hf-internal-testing/tiny-random-Gemma2ForCausalLM model_args: attn_implementation: "eager" use_cache: False - revision: 7517176934d50d00707900b92ef6a95c782e51a2 + revision: de7c11b6c25d26ddd1bf4324fcf479b61d18e440 resume_from_checkpoint: False """ num_steps = 15 @@ -295,10 +295,10 @@ def test_sft_mistral_lora(tmpdir): # 3. Inspect results! with open(ckpt_dir / f"checkpoint-{num_steps}" / "trainer_state.json") as fi: trainer_state = json.loads(fi.read()) - # At the start, loss should still be around -log(1/32002)~10.3, (Even the tiny Mistral has 32002 units after Chat-ML) - assert np.isclose(trainer_state["log_history"][0]["loss"], 10.3, atol=0.5) - # In current setup, loss goes at least below 2.5 by 15 steps - assert trainer_state["log_history"][-1]["loss"] < 2.5 + # At the start, loss should still be around -log(1/256000)~12.4529327234617, (Even the tiny Gemma2 has 256000 units after Chat-ML) + assert np.isclose(trainer_state["log_history"][0]["loss"], 12.45, atol=0.5) + # In current setup, loss goes at least below 5.0 by 15 steps + assert trainer_state["log_history"][-1]["loss"] < 5.0 # LoRA setup should lead to adapter checkpoint assert (ckpt_dir / "checkpoint-final" / "adapter_model.safetensors").isfile() # And since the basemodel vocab was modified, there is a new basemodel checkpoint too @@ -310,7 +310,7 @@ def intercept_tokenizer_call(*args, overwrite_chat_template=False, **kwargs): @patch("finetuning.dpo.subsetup_tokenizer", intercept_tokenizer_call) -def test_dpo_mistral(tmpdir): +def test_dpo_gemma2(tmpdir): # 0. DPO Config definition: dpo_config = """\ data_conf: @@ -350,11 +350,11 @@ def test_dpo_mistral(tmpdir): peft_conf: peft_type: "NO_PEFT" run_conf: - model: hf-internal-testing/tiny-random-MistralForCausalLM + model: hf-internal-testing/tiny-random-Gemma2ForCausalLM model_args: attn_implementation: "eager" use_cache: False - revision: 7517176934d50d00707900b92ef6a95c782e51a2 + revision: de7c11b6c25d26ddd1bf4324fcf479b61d18e440 resume_from_checkpoint: False """ num_steps = 15 @@ -383,12 +383,12 @@ def test_dpo_mistral(tmpdir): with open(dpo_ckpt_dir / f"checkpoint-{num_steps}" / "trainer_state.json") as fi: trainer_state = json.loads(fi.read()) # The margins start close to zero - assert np.isclose(trainer_state["log_history"][0]["rewards/margins"], 0.0, atol=0.1) - # But they rise to >0.10 - assert trainer_state["log_history"][-1]["rewards/margins"] > 0.10 + assert np.isclose(trainer_state["log_history"][0]["rewards/margins"], 0.0, atol=0.03) + # But they rise to >0.07 + assert trainer_state["log_history"][-1]["rewards/margins"] > 0.07 -def test_sft_and_dpo_mistral(tmpdir): +def test_sft_and_dpo_gemma2(tmpdir): # 0. Config definition: sft_config = """\ data_conf: @@ -427,11 +427,11 @@ def test_sft_and_dpo_mistral(tmpdir): peft_conf: peft_type: "NO_PEFT" run_conf: - model: hf-internal-testing/tiny-random-MistralForCausalLM + model: hf-internal-testing/tiny-random-Gemma2ForCausalLM model_args: attn_implementation: "eager" use_cache: False - revision: 7517176934d50d00707900b92ef6a95c782e51a2 + revision: de7c11b6c25d26ddd1bf4324fcf479b61d18e440 resume_from_checkpoint: False """ num_steps = 15 @@ -455,8 +455,8 @@ def test_sft_and_dpo_mistral(tmpdir): # # 3. Inspect SFT results! with open(sft_ckpt_dir / f"checkpoint-{num_steps}" / "trainer_state.json") as fi: trainer_state = json.loads(fi.read()) - # At the start, loss should still be around -log(1/32002)~10.3, (Even the tiny Mistral has 32002 units after Chat-ML) - assert np.isclose(trainer_state["log_history"][0]["loss"], 10.3, atol=0.5) + # At the start, loss should still be around -log(1/256000)~12.4529327234617, (Even the tiny Gemma2 has 256000 units after Chat-ML) + assert np.isclose(trainer_state["log_history"][0]["loss"], 12.45, atol=0.5) # In current setup, loss goes at least below 4.5 by 15 steps assert trainer_state["log_history"][-1]["loss"] < 4.5 @@ -534,12 +534,12 @@ def test_sft_and_dpo_mistral(tmpdir): with open(dpo_ckpt_dir / f"checkpoint-{num_steps}" / "trainer_state.json") as fi: trainer_state = json.loads(fi.read()) # The margins start close to zero - assert np.isclose(trainer_state["log_history"][0]["rewards/margins"], 0.0, atol=0.1) - # But they rise to >0.15 - assert trainer_state["log_history"][-1]["rewards/margins"] > 0.15 + assert np.isclose(trainer_state["log_history"][0]["rewards/margins"], 0.0, atol=0.03) + # But they rise to >0.07 + assert trainer_state["log_history"][-1]["rewards/margins"] > 0.07 -def test_sft_and_dpo_lora_mistral(tmpdir): +def test_sft_and_dpo_lora_gemma2(tmpdir): # 0. Config definition: sft_config = """\ data_conf: @@ -561,6 +561,7 @@ def test_sft_and_dpo_lora_mistral(tmpdir): gradient_checkpointing_kwargs: use_reentrant: false learning_rate: 0.5 + logging_first_step: true logging_steps: 1 logging_strategy: "steps" lr_scheduler_type: "constant" @@ -578,11 +579,11 @@ def test_sft_and_dpo_lora_mistral(tmpdir): peft_conf: peft_type: "NO_PEFT" run_conf: - model: hf-internal-testing/tiny-random-MistralForCausalLM + model: hf-internal-testing/tiny-random-Gemma2ForCausalLM model_args: attn_implementation: "eager" use_cache: False - revision: 7517176934d50d00707900b92ef6a95c782e51a2 + revision: de7c11b6c25d26ddd1bf4324fcf479b61d18e440 resume_from_checkpoint: False """ num_steps = 15 @@ -606,8 +607,8 @@ def test_sft_and_dpo_lora_mistral(tmpdir): # 3. Inspect SFT results! with open(sft_ckpt_dir / f"checkpoint-{num_steps}" / "trainer_state.json") as fi: trainer_state = json.loads(fi.read()) - # At the start, loss should still be around -log(1/32002)~10.3, (Even the tiny Mistral has 32002 units after Chat-ML) - assert np.isclose(trainer_state["log_history"][0]["loss"], 10.3, atol=0.5) + # At the start, loss should still be around -log(1/256000)~12.4529327234617, (Even the tiny Gemma2 has 256000 units after Chat-ML) + assert np.isclose(trainer_state["log_history"][0]["loss"], 12.45, atol=0.5) # In current setup, loss goes at least below 4.5 by 15 steps assert trainer_state["log_history"][-1]["loss"] < 4.5 @@ -633,6 +634,7 @@ def test_sft_and_dpo_lora_mistral(tmpdir): gradient_checkpointing_kwargs: use_reentrant: false learning_rate: 0.5 + logging_first_step: true logging_steps: 1 logging_strategy: "steps" lr_scheduler_type: "constant" @@ -668,7 +670,7 @@ def test_sft_and_dpo_lora_mistral(tmpdir): resume_from_checkpoint: False """ # note: potential to redefine - num_steps = 15 + num_steps = 20 # 5. setup # Set paths dpotmpdir = tmpdir / "dpo" @@ -695,9 +697,9 @@ def test_sft_and_dpo_lora_mistral(tmpdir): with open(dpo_ckpt_dir / f"checkpoint-{num_steps}" / "trainer_state.json") as fi: trainer_state = json.loads(fi.read()) # The margins start close to zero - assert np.isclose(trainer_state["log_history"][0]["rewards/margins"], 0.0, atol=0.1) - # But they rise to >0.15 - assert trainer_state["log_history"][-1]["rewards/margins"] > 0.15 + assert np.isclose(trainer_state["log_history"][0]["rewards/margins"], 0.0, atol=0.02) + # But they rise to >0.03 + assert trainer_state["log_history"][-1]["rewards/margins"] > 0.03 # LoRA setup should lead to adapter checkpoint assert (dpo_ckpt_dir / "checkpoint-final" / "adapter_model.safetensors").isfile() @@ -707,12 +709,12 @@ def test_generate(tmpdir): # 0. Config definition: generate_config = """\ model_conf: - model: hf-internal-testing/tiny-random-MistralForCausalLM + model: hf-internal-testing/tiny-random-Gemma2ForCausalLM model_args: attn_implementation: "eager" use_cache: False device_map: cpu - revision: 7517176934d50d00707900b92ef6a95c782e51a2 + revision: de7c11b6c25d26ddd1bf4324fcf479b61d18e440 prompt_conf: type: "open-input" input: "Hi my name is " @@ -725,4 +727,4 @@ def test_generate(tmpdir): finetuning.cli.generation_cli([str(tmpdir / "gen_conf.yaml"), str(tmpdir / "generated_output.yaml")]) with open(tmpdir / "generated_output.yaml") as fin: list_of_outputs = yaml.safe_load(fin) - assert list_of_outputs[0]["answer"] == "Bod OFF soon FT" # This is what tiny-random-MistralForCausalLM replies. + assert list_of_outputs[0]["answer"] == "amientosamientosérômeérôme" # This is what tiny-random-Gemma2 replies.