From 74389775c2c31602dbc04b5f653113d5207bd9d0 Mon Sep 17 00:00:00 2001 From: Vatsal Shah Date: Fri, 18 Oct 2024 18:27:52 +0530 Subject: [PATCH] feat: deprecate Qwen1.5 and gemma; and replace them with Qwen2.5 and gemma2 --- README.md | 4 ++-- training_args.yaml | 10 +++++----- utils/constants.py | 30 +++++++++++++++--------------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 57dded9..89c91de 100644 --- a/README.md +++ b/README.md @@ -75,8 +75,8 @@ curl --location 'https://fed-ledger-prod.flock.io/api/v1/tasks/submit-result' \ --data '{ "task_id": 29, "data":{ - "hg_repo_id": "Qwen/Qwen1.5-1.8B-Chat", - "base_model": "qwen1.5", + "hg_repo_id": "Qwen/Qwen2.5-1.5B, + "base_model": "qwen2.5", "gpu_type": "", "revision": "" } diff --git a/training_args.yaml b/training_args.yaml index 5ec6ff1..5359d3f 100644 --- a/training_args.yaml +++ b/training_args.yaml @@ -1,4 +1,4 @@ -Qwen/Qwen1.5-0.5B: +Qwen/Qwen2.5-0.5B: per_device_train_batch_size: 1 gradient_accumulation_steps: 8 num_train_epochs: 1 @@ -6,7 +6,7 @@ Qwen/Qwen1.5-0.5B: lora_alpha: 16 lora_dropout: 0.1 -Qwen/Qwen1.5-1.8B: +Qwen/Qwen2.5-1.5B: per_device_train_batch_size: 1 gradient_accumulation_steps: 8 num_train_epochs: 1 @@ -14,7 +14,7 @@ Qwen/Qwen1.5-1.8B: lora_alpha: 8 lora_dropout: 0.1 -Qwen/Qwen1.5-7B: +Qwen/Qwen2.5-7B: per_device_train_batch_size: 1 gradient_accumulation_steps: 8 num_train_epochs: 1 @@ -22,7 +22,7 @@ Qwen/Qwen1.5-7B: lora_alpha: 8 lora_dropout: 0.1 -google/gemma-2b: +google/gemma-2-2b: per_device_train_batch_size: 1 gradient_accumulation_steps: 8 num_train_epochs: 1 @@ -30,7 +30,7 @@ google/gemma-2b: lora_alpha: 8 lora_dropout: 0.1 -google/gemma-7b: +google/gemma-2-7b: per_device_train_batch_size: 1 gradient_accumulation_steps: 8 num_train_epochs: 1 diff --git a/utils/constants.py b/utils/constants.py index 42b9aa4..d4317e6 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -13,25 +13,25 @@ } model2template = { - "Qwen/Qwen1.5-0.5B": qwen_template, - "Qwen/Qwen1.5-1.8B": qwen_template, - "Qwen/Qwen1.5-7B": qwen_template, - "google/gemma-2b": gemma_template, - "google/gemma-7b": gemma_template, + "Qwen/Qwen2.5-0.5B": qwen_template, + "Qwen/Qwen2.5-1.5B": qwen_template, + "Qwen/Qwen2.5-7B": qwen_template, + "google/gemma-2-2b": gemma_template, + "google/gemma-2-9b": gemma_template, } model2size = { - "Qwen/Qwen1.5-0.5B": 620_000_000, - "Qwen/Qwen1.5-1.8B": 1_840_000_000, - "Qwen/Qwen1.5-7B": 7_720_000_000, - "google/gemma-2b": 2_510_000_000, - "google/gemma-7b": 8_540_000_000, + "Qwen/Qwen2.5-0.5B": 494_000_000, + "Qwen/Qwen2.5-1.5B": 1_540_000_000, + "Qwen/Qwen2.5-7B": 7_620_000_000, + "google/gemma-2-2b": 2_610_000_000, + "google/gemma-2-9b": 9_240_000_000, } model2base_model = { - "Qwen/Qwen1.5-0.5B": "qwen1.5", - "Qwen/Qwen1.5-1.8B": "qwen1.5", - "Qwen/Qwen1.5-7B": "qwen1.5", - "google/gemma-2b": "gemma", - "google/gemma-7b": "gemma", + "Qwen/Qwen2.5-0.5B": "qwen2.5", + "Qwen/Qwen2.5-1.5B": "qwen2.5", + "Qwen/Qwen2.5-7B": "qwen2.5", + "google/gemma-2-2b": "gemma2", + "google/gemma-2-9b": "gemma2", }