-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmodel_test.py
More file actions
69 lines (54 loc) · 2.4 KB
/
model_test.py
File metadata and controls
69 lines (54 loc) · 2.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from transformers import AutoModelForCausalLM, AutoTokenizer
from safetensors import torch as sftorch
from huggingface_hub import hf_hub_download
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
sys.path.append("./llama_architecture")
from config import LlamaConfig
from model_trnsfmrs import LlamaForCausalLM, LlamaModel
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")
def test_model(model: LlamaForCausalLM, tokenizer):
model.to(device)
input_text = "Merhaba nasıl"
output_text = "sın"
input_tokens = tokenizer.encode(input_text)
inputs = torch.tensor(input_tokens).unsqueeze(0).to(device)
with torch.no_grad():
outputs = model(inputs)
logits = outputs.flatten(0, 1)[-1]
print("Logits shape:", logits.shape)
print("Logits:", logits)
output_token = tokenizer.encode(input_text + output_text)[len(input_tokens)]
print(f"Expected output token: {tokenizer.decode(output_token)}, token id: {output_token}")
predicted_token = torch.softmax(logits, dim=-1).argmax().item()
print(f"Predicted token: {tokenizer.decode(predicted_token)}, token id: {predicted_token}")
crossE_loss = F.cross_entropy(logits.unsqueeze(0), torch.tensor([output_token]).to(device))
print(f"Cross-Entropy Loss: {crossE_loss.item()}")
def load_model(model_name, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
model_files = hf_hub_download(repo_id=model_name, filename="model.safetensors")
state_dict = sftorch.load_file(model_files, device=device)
# Define model configuration
llama_config = LlamaConfig(
vocab_size=32768,
emb_dim=256,
context_length=256,
n_heads=128,
n_layers=20,
n_kv_groups=64,
hidden_dim=2048,
)
# define model
model = LlamaForCausalLM(llama_config, tokenizer)
model.load_state_dict(state_dict, strict=False)
return model, tokenizer
def main():
model_name = input("Enter the model name (e.g., 'TheBloke/Llama-2-7B-Chat-GPTQ'): ")
tokenizer_name = input("Enter the tokenizer name (e.g., 'TheBloke/Llama-2-7B-Chat-GPTQ'): ")
model_file_path = input("Enter the model file path ('model.safetensors' by default): ") or "model.safetensors"
model, tokenizer = load_model(model_name, tokenizer_name)
test_model(model, tokenizer)
main()