diff --git a/README.md b/README.md index e451438..ba4097f 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ torchrun --nproc_per_node=4 --master_port= train.py \ --lr_scheduler_type "cosine" \ --logging_steps 1 \ --fsdp "full_shard auto_wrap" \ - --fsdp_transformer_layer_cls_to_wrap 'LLaMADecoderLayer' \ + --fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \ --tf32 True ``` diff --git a/chat.py b/chat.py index ba1f7ee..04b4aa3 100644 --- a/chat.py +++ b/chat.py @@ -24,8 +24,8 @@ def load_model(model_name, eight_bit=0, device_map="auto"): gpu_count = torch.cuda.device_count() print('gpu_count', gpu_count) - tokenizer = transformers.LLaMATokenizer.from_pretrained(model_name) - model = transformers.LLaMAForCausalLM.from_pretrained( + tokenizer = transformers.LlamaTokenizer.from_pretrained(model_name) + model = transformers.LlamaTokenizer.from_pretrained( model_name, #device_map=device_map, #device_map="auto", diff --git a/requirements.txt b/requirements.txt index d9f02de..8ac4d59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ numpy rouge_score fire openai -git+https://github.com/zphang/transformers.git@68d640f7c368bcaaaecfc678f11908ebbd3d6176 +transformers==4.28.1 torch sentencepiece tokenizers==0.12.1