Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added __pycache__/audio_processing.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/audio_processing.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/data_utils.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/data_utils.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/distributed.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/distributed.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/hparams.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/hparams.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/layers.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/layers.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/logger.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/logger.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/loss_function.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/loss_function.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/model.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/model.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/plotting_utils.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/plotting_utils.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/stft.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/stft.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/utils.cpython-311.pyc
Binary file not shown.
Binary file added __pycache__/utils.cpython-36.pyc
Binary file not shown.
9 changes: 6 additions & 3 deletions data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@
from utils import load_wav_to_torch, load_filepaths_and_text
from text import text_to_sequence

print("Trainloader script")

class TextMelLoader(torch.utils.data.Dataset):
"""
1) loads audio,text pairs
2) normalizes text and converts them to sequences of one-hot vectors
3) computes mel-spectrograms from audio files.
"""
def __init__(self, audiopaths_and_text, hparams):
def __init__(self, audiopaths_and_text, hparams, dataset_type):
self.dataset_type = dataset_type
self.audiopaths_and_text = load_filepaths_and_text(audiopaths_and_text)
self.text_cleaners = hparams.text_cleaners
self.max_wav_value = hparams.max_wav_value
Expand All @@ -36,9 +38,10 @@ def get_mel_text_pair(self, audiopath_and_text):

def get_mel(self, filename):
if not self.load_mel_from_disk:
audio, sampling_rate = load_wav_to_torch(filename)
audio, sampling_rate = load_wav_to_torch(self.dataset_type ,filename, self.stft.sampling_rate)

if sampling_rate != self.stft.sampling_rate:
raise ValueError("{} {} SR doesn't match target {} SR".format(
raise ValueError("{} SR doesn't match target {} SR".format(
sampling_rate, self.stft.sampling_rate))
audio_norm = audio / self.max_wav_value
audio_norm = audio_norm.unsqueeze(0)
Expand Down
145 changes: 90 additions & 55 deletions hparams.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,130 @@
import tensorflow as tf
# import tensorflow as tf
import logging
from text import symbols

# Set up logging
logging.basicConfig(level=logging.INFO)

print("Hyper Params script")

# Custom HParams class to allow dot notation access
class HParams(dict):
"""Custom class that allows dot notation for dictionary keys."""

def __getattr__(self, name):
"""Override attribute access to allow dot notation."""
if name in self:
return self[name]
else:
raise AttributeError(f"'HParams' object has no attribute '{name}'")

def __setattr__(self, name, value):
"""Override setting attributes."""
self[name] = value

def __delattr__(self, name):
"""Override deleting attributes."""
del self[name]

# Create Hyperparameters using the custom HParams class
def create_hparams(hparams_string=None, verbose=False):
"""Create model hyperparameters. Parse nondefault from given string."""

hparams = tf.contrib.training.HParams(
# Initialize hyperparameters using the custom class
hparams = HParams({
################################
# Experiment Parameters #
################################
epochs=500,
iters_per_checkpoint=1000,
seed=1234,
dynamic_loss_scaling=True,
fp16_run=False,
distributed_run=False,
dist_backend="nccl",
dist_url="tcp://localhost:54321",
cudnn_enabled=True,
cudnn_benchmark=False,
ignore_layers=['embedding.weight'],
'epochs': 1600,
'iters_per_checkpoint': 1000,
'seed': 1234,
'dynamic_loss_scaling': True,
'fp16_run': False,
'distributed_run': False,
'dist_backend': "nccl",
'dist_url': "tcp://localhost:54321",
'cudnn_enabled': True,
'cudnn_benchmark': False,
'ignore_layers': ['embedding.weight'],

################################
# Data Parameters #
################################
load_mel_from_disk=False,
training_files='filelists/ljs_audio_text_train_filelist.txt',
validation_files='filelists/ljs_audio_text_val_filelist.txt',
text_cleaners=['english_cleaners'],
'load_mel_from_disk': False,
'training_files': './datasets/train_datasets/line_index.tsv',
'validation_files': './datasets/validation_datasets/line_index.tsv',
'text_cleaners': ['transliteration_cleaners'],

################################
# Audio Parameters #
################################
max_wav_value=32768.0,
sampling_rate=22050,
filter_length=1024,
hop_length=256,
win_length=1024,
n_mel_channels=80,
mel_fmin=0.0,
mel_fmax=8000.0,
'max_wav_value': 32768.0,
'sampling_rate': 22050,
'filter_length': 1024,
'hop_length': 256,
'win_length': 1024,
'n_mel_channels': 80,
'mel_fmin': 0.0,
'mel_fmax': 8000.0,

################################
# Model Parameters #
################################
n_symbols=len(symbols),
symbols_embedding_dim=512,
'n_symbols': len(symbols),
'symbols_embedding_dim': 512,

# Encoder parameters
encoder_kernel_size=5,
encoder_n_convolutions=3,
encoder_embedding_dim=512,
'encoder_kernel_size': 5,
'encoder_n_convolutions': 3,
'encoder_embedding_dim': 512,

# Decoder parameters
n_frames_per_step=1, # currently only 1 is supported
decoder_rnn_dim=1024,
prenet_dim=256,
max_decoder_steps=1000,
gate_threshold=0.5,
p_attention_dropout=0.1,
p_decoder_dropout=0.1,
'n_frames_per_step': 1, # currently only 1 is supported
'decoder_rnn_dim': 1024,
'prenet_dim': 256,
'max_decoder_steps': 1000,
'gate_threshold': 0.5,
'p_attention_dropout': 0.1,
'p_decoder_dropout': 0.1,

# Attention parameters
attention_rnn_dim=1024,
attention_dim=128,
'attention_rnn_dim': 1024,
'attention_dim': 128,

# Location Layer parameters
attention_location_n_filters=32,
attention_location_kernel_size=31,
'attention_location_n_filters': 32,
'attention_location_kernel_size': 31,

# Mel-post processing network parameters
postnet_embedding_dim=512,
postnet_kernel_size=5,
postnet_n_convolutions=5,
'postnet_embedding_dim': 512,
'postnet_kernel_size': 5,
'postnet_n_convolutions': 5,

################################
# Optimization Hyperparameters #
################################
use_saved_learning_rate=False,
learning_rate=1e-3,
weight_decay=1e-6,
grad_clip_thresh=1.0,
batch_size=64,
mask_padding=True # set model's padded outputs to padded values
)
'use_saved_learning_rate': False,
'learning_rate': 1e-3,
'weight_decay': 1e-6,
'grad_clip_thresh': 1.0,
'batch_size': 32,
'mask_padding': True # set model's padded outputs to padded values
})

# If a hparams string is provided, parse it
if hparams_string:
tf.logging.info('Parsing command line hparams: %s', hparams_string)
hparams.parse(hparams_string)
logging.info('Parsing command line hparams: %s', hparams_string)
# Assuming hparams_string is in a format where key=value pairs are provided (like 'epochs=1000')
hparams_list = hparams_string.split(',')
for param in hparams_list:
key, value = param.split('=')
if key in hparams:
hparams[key] = type(hparams[key])(value) # Convert to the correct type
else:
logging.warning("Unknown parameter: %s", key)

# If verbose, log the final parsed hyperparameters
if verbose:
tf.logging.info('Final parsed hparams: %s', hparams.values())
logging.info('Final parsed hparams: %s', hparams)

return hparams
return hparams
Loading