-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathconfig.py
More file actions
91 lines (88 loc) · 2.17 KB
/
config.py
File metadata and controls
91 lines (88 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import transformers
import torch
import torch.nn as nn
import argparse
import os
import sys
import random
import datasets
from datasets import load_dataset, load_from_disk
import pickle
import numpy as np
import json
import functools
import deepspeed
import logging
import jsonlines
import glob
import copy
import time
from datetime import datetime
from itertools import chain
from pathlib import Path
from tqdm import tqdm
from typing import List, Mapping, Optional, Dict
import torch
import torch.nn.functional as F
import torch.distributed as dist
from deepspeed.runtime.zero import GatheredParameters
from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus
from deepspeed.module_inject.replace_policy import BLOOMLayerPolicy
from torch.utils.data import (
DataLoader,
Dataset,
RandomSampler,
SequentialSampler,
DistributedSampler
)
from huggingface_hub import snapshot_download
from transformers import (
AutoModelForCausalLM,
AutoModel,
GenerationConfig,
AutoTokenizer,
AutoConfig,
DataCollatorForSeq2Seq,
DataCollatorWithPadding,
TrainingArguments,
Seq2SeqTrainingArguments,
Trainer,
Seq2SeqTrainer,
BitsAndBytesConfig
)
# from optimum.bettertransformer import BetterTransformer
from scipy.optimize import curve_fit
CONFIG = {
'batch_size': 256,
'micro_batch_size': 8,
'eval_batch_size':16,
'output_dir': './temp',
'accumulation_steps': 1,
'epochs' : 4,
'max_steps': -1,
'max_len' : 512,
'max_new_tokens': 1024,
'num_beams': 1,
'learning_rate' : 1e-5,
'warmup_steps': 16,
'student_model_path':'',
'val_data' : '/data/work/processed_cuge_datasets/C3/C3_valid.json',
'save_steps': 1024,
'eval_steps' : 102400,
'val_set_size': 0,
'train_data' : '/data/work/processed_cuge_datasets/C3/C3_train.json',
'pretrain_data' : '',
'model_name': '',
'model_path' : '',
'temperature': 3,
'loss_type': 'topk_normed_ce',
'alpha': 1e-4,
'sparsity_pool': 0.1,
'sparsity_noise': 0.1,
'lm_loss_weight': 0,
'loss_weights_2': 0,
'loss_weights_3': 0,
'deepspeed_config': None,
'pad_token_id': 0,
'num_relation_head': 64,
}