-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutils.py
More file actions
203 lines (174 loc) · 7.92 KB
/
utils.py
File metadata and controls
203 lines (174 loc) · 7.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import os
from enum import Enum
import packaging.version
import torch
import transformers
from datasets import DatasetDict, load_dataset, load_from_disk
from datasets.builder import DatasetGenerationError
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
from peft import LoraConfig
DEFAULT_CHATML_CHAT_TEMPLATE = "{% for message in messages %}\n{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% if loop.last and add_generation_prompt %}{{'<|im_start|>assistant\n' }}{% endif %}{% endfor %}"
DEFAULT_ZEPHYR_CHAT_TEMPLATE = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
class ZephyrSpecialTokens(str, Enum):
user = "<|user|>"
assistant = "<|assistant|>"
system = "<|system|>"
eos_token = "</s>"
bos_token = "<s>"
pad_token = "<pad>"
@classmethod
def list(cls):
return [c.value for c in cls]
class ChatmlSpecialTokens(str, Enum):
user = "<|im_start|>user"
assistant = "<|im_start|>assistant"
system = "<|im_start|>system"
eos_token = "<|im_end|>"
bos_token = "<s>"
pad_token = "<pad>"
@classmethod
def list(cls):
return [c.value for c in cls]
def create_datasets(tokenizer, data_args, training_args, apply_chat_template=False):
def preprocess(samples):
batch = []
for conversation in samples["messages"]:
batch.append(tokenizer.apply_chat_template(conversation, tokenize=False))
return {"content": batch}
raw_datasets = DatasetDict()
for split in data_args.splits.split(","):
try:
# Try first if dataset on a Hub repo
dataset = load_dataset(data_args.dataset_name, split=split)
except DatasetGenerationError:
# If not, check local dataset
dataset = load_from_disk(os.path.join(data_args.dataset_name, split))
if "train" in split:
raw_datasets["train"] = dataset
elif "test" in split:
raw_datasets["test"] = dataset
else:
raise ValueError(f"Split type {split} not recognized as one of test or train.")
if apply_chat_template:
raw_datasets = raw_datasets.map(
preprocess,
batched=True,
remove_columns=raw_datasets["train"].column_names,
)
train_data = raw_datasets["train"]
valid_data = raw_datasets["test"]
print(f"Size of the train set: {len(train_data)}. Size of the validation set: {len(valid_data)}")
print(f"A sample of train dataset: {train_data[0]}")
return train_data, valid_data
def create_and_prepare_model(args, data_args, training_args):
if args.use_unsloth:
from unsloth import FastLanguageModel
bnb_config = None
quant_storage_dtype = None
if (
torch.distributed.is_available()
and torch.distributed.is_initialized()
and torch.distributed.get_world_size() > 1
and args.use_unsloth
):
raise NotImplementedError("Unsloth is not supported in distributed training")
if args.use_4bit_quantization:
compute_dtype = getattr(torch, args.bnb_4bit_compute_dtype)
quant_storage_dtype = getattr(torch, args.bnb_4bit_quant_storage_dtype)
bnb_config = BitsAndBytesConfig(
load_in_4bit=args.use_4bit_quantization,
bnb_4bit_quant_type=args.bnb_4bit_quant_type,
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=args.use_nested_quant,
bnb_4bit_quant_storage=quant_storage_dtype,
)
if compute_dtype == torch.float16 and args.use_4bit_quantization:
major, _ = torch.cuda.get_device_capability()
if major >= 8:
print("=" * 80)
print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16")
print("=" * 80)
elif args.use_8bit_quantization:
bnb_config = BitsAndBytesConfig(load_in_8bit=args.use_8bit_quantization)
if args.use_unsloth:
# Load model
model, _ = FastLanguageModel.from_pretrained(
model_name=args.model_name_or_path,
max_seq_length=training_args.max_seq_length,
dtype=None,
load_in_4bit=args.use_4bit_quantization,
)
else:
torch_dtype = (
quant_storage_dtype if quant_storage_dtype and quant_storage_dtype.is_floating_point else torch.float32
)
model = AutoModelForCausalLM.from_pretrained(
args.model_name_or_path,
quantization_config=bnb_config,
trust_remote_code=True,
attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
torch_dtype=torch_dtype,
)
peft_config = None
chat_template = None
if args.use_peft_lora and not args.use_unsloth:
peft_config = LoraConfig(
lora_alpha=args.lora_alpha,
lora_dropout=args.lora_dropout,
r=args.lora_r,
bias="none",
task_type="CAUSAL_LM",
target_modules=args.lora_target_modules.split(",")
if args.lora_target_modules != "all-linear"
else args.lora_target_modules,
)
special_tokens = None
chat_template = None
if args.chat_template_format == "chatml":
special_tokens = ChatmlSpecialTokens
chat_template = DEFAULT_CHATML_CHAT_TEMPLATE
elif args.chat_template_format == "zephyr":
special_tokens = ZephyrSpecialTokens
chat_template = DEFAULT_ZEPHYR_CHAT_TEMPLATE
if special_tokens is not None:
tokenizer = AutoTokenizer.from_pretrained(
args.model_name_or_path,
pad_token=special_tokens.pad_token.value,
bos_token=special_tokens.bos_token.value,
eos_token=special_tokens.eos_token.value,
additional_special_tokens=special_tokens.list(),
trust_remote_code=True,
)
tokenizer.chat_template = chat_template
# make embedding resizing configurable?
# Transformers 4.46.0+ defaults uses mean_resizing by default, which fails with QLoRA + FSDP because the
# embedding could be on meta device, therefore, we set mean_resizing=False in that case (i.e. the status quo
# ante). See https://github.com/huggingface/accelerate/issues/1620.
uses_transformers_4_46 = packaging.version.parse(transformers.__version__) >= packaging.version.parse("4.46.0")
uses_fsdp = os.environ.get("ACCELERATE_USE_FSDP", "false").lower() == "true"
if (bnb_config is not None) and uses_fsdp and uses_transformers_4_46:
model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8, mean_resizing=False)
else:
model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8)
else:
tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
if args.use_unsloth:
# Do model patching and add fast LoRA weights
model = FastLanguageModel.get_peft_model(
model,
lora_alpha=args.lora_alpha,
lora_dropout=args.lora_dropout,
r=args.lora_r,
target_modules=args.lora_target_modules.split(",")
if args.lora_target_modules != "all-linear"
else args.lora_target_modules,
use_gradient_checkpointing=training_args.gradient_checkpointing,
random_state=training_args.seed,
max_seq_length=training_args.max_seq_length,
)
return model, peft_config, tokenizer