-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconf.py
More file actions
86 lines (73 loc) · 2 KB
/
conf.py
File metadata and controls
86 lines (73 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 3 14:44:11 2023
@author: ahannan
"""
import re
import os
import sentencepiece as spm
bpe_flag = True
flag_use_single_out = True
dataset_path = "/stek/corpora/"
dataset_to_use = "LibriSpeech" #### Or Tedlium-v3
project_name = "sample_pretrain_" + dataset_to_use
# project_name = "sample_ft_" + dataset_to_use
base_path = os.getcwd()
log_path = os.path.join(base_path, "runs", project_name)
os.makedirs(log_path, exist_ok = True)
model_save_path = os.path.join(base_path, "trained_model", project_name)
os.makedirs(model_save_path, exist_ok = True)
############ DataLoader Settings
train_num_workers = 12
test_num_workers = 8
shuffle = True
sample_rate = 16000
n_fft = 512
win_length = 320 # 20 ms Segments
hop_length = 160 # 10 ms Overlap
n_mels = 80
n_mfcc = 80
############ model parameter setting
batch_size = 64
max_len = 2000
conv_filters = [256, 256]
d_model = 256
n_encoder_layers = 2
n_decoder_layers = 6
n_heads = 4
n_enc_replay = 6
expansion_factor = 4
dim_feed_forward= d_model * expansion_factor
drop_prob = 0.1
depthwise_kernel_size = 31
max_utterance_length = 401
########### Token numbers when BPE_Flag is False.
src_pad_idx = 0
trg_pad_idx = 30
trg_sos_idx = 1
trg_eos_idx = 31
enc_voc_size = 32
dec_voc_size = 32
sp = spm.SentencePieceProcessor()
if bpe_flag == True:
sp.load(os.path.join(base_path, "libri.bpe-256.model"))
src_pad_idx = 0
trg_pad_idx = 126
trg_sos_idx = 1
trg_eos_idx = 2
enc_voc_size = sp.get_piece_size()
dec_voc_size = sp.get_piece_size()
lexicon = os.path.join(base_path, "librispeech-bpe-256.lex")
tokens = os.path.join(base_path, "librispeech-bpe-256.tok")
# optimizer parameter setting
factor = 0.9
adam_b1 = 0.9
adam_b2 = 0.98
adam_eps = 1e-9
warmup = 10000
grad_scaler = False
total_epochs = 50 # 100 - for pretraining # 50 - for fine-tuning # 150 - baseline
clip = 1.0
weight_decay = 1e-6
inf = float('inf')