Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions egs/mini_librispeech/s5/cmd.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export train_cmd="queue.pl --mem 2G"
export decode_cmd="queue.pl --mem 4G"
export mkgraph_cmd="queue.pl --mem 8G"
export train_cmd="queue.pl --mem 2G --config conf/queue_no_k20.conf --allow-k10-k20 true"
export decode_cmd="queue.pl --mem 4G --config conf/queue_no_k20.conf --allow-k10-k20 true"
export mkgraph_cmd="queue.pl --mem 8G --config conf/queue_no_k20.conf --allow-k10-k20 true"
export cuda_cmd="queue.pl --gpu 1 --config conf/queue_no_k20.conf --allow-k10-k20 true"
16 changes: 16 additions & 0 deletions egs/mini_librispeech/s5/conf/queue_no_k20.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Default configuration
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -l 'hostname=!a08*&!a09*&!a10*&!c04*&!b18*&!b19*&!b20*'
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
default gpu=0
option gpu=0 -q all.q
option gpu=* -l gpu=$0 -q g.q
default allow_k20=true
option allow_k20=true
option allow_k20=false -l 'hostname=!g01*&!g02*&!b06*'
default allow_k10_k20=true
option allow_k10_k20=true
option allow_k10_k20=false -l 'hostname=!b0*&!b10*&!g01*&!g02'
68 changes: 68 additions & 0 deletions egs/mini_librispeech/s5/local/fvector/add_output_node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env/python

from __future__ import print_function
import argparse
import logging
import os
import pprint
import sys
import shutil
import traceback

def get_args():
parser = argparse.ArgumentParser(description="Add the S and b output node "
"which is used in plda object function.",
epilog="Called by local/fvector/run_fvector.sh")
parser.add_argument("--input-dim", type=int, required=True,
help="The input dimension of fvector network.")
parser.add_argument("--output-dim", type=int, required=True,
help="The output dimension of fvector network which is used to "
"compute the dimension of S matrix.")
parser.add_argument("--s-scale", type=float, default=0.2,
help="Scaling factor on the output 's' (s is a symmetric matrix "
"used for scoring).")
parser.add_argument("--b-scale", type=float, default=0.2,
help="Scaling factor on output 'b' (b is a scalar offset used in scoring).")
parser.add_argument("--config-file", type=str, required=True,
help="The file is needed to be modified. It's always is configs/final.config")

print(' '.join(sys.argv), file=sys.stderr)
print(sys.argv, file=sys.stderr)
args = parser.parse_args()
return args


def main():
args = get_args()

f = open(args.config_file, "a")
# The s output
s_dim = (args.output_dim) * (args.output_dim+1) / 2

print('component name=x-s type=ConstantFunctionComponent input-dim={0} output-dim={1} '
'output-mean=0 output-stddev=0 '.format(
args.input_dim, s_dim), file=f)
print('component-node name=x-s component=x-s input=IfDefined(input)',
file=f)
print('component name=x-s-scale type=FixedScaleComponent dim={0} scale={1}'.format(
s_dim, args.s_scale), file=f);
print('component-node name=x-s-scale component=x-s-scale input=x-s',
file=f)
print('output-node name=s input=x-s-scale', file=f)

# now the 'b' output, which is just a scalar.
b_dim = 1
print('component name=x-b type=ConstantFunctionComponent input-dim={0} output-dim=1 '
'output-mean=0 output-stddev=0 '.format(args.input_dim), file=f)
print('component-node name=x-b component=x-b input=IfDefined(input)', file=f)
print('component name=x-b-scale type=FixedScaleComponent dim=1 scale={0}'.format(
args.b_scale), file=f);
print('component-node name=x-b-scale component=x-b-scale input=x-b',
file=f)
print('output-node name=b input=x-b-scale', file=f)
f.close()



if __name__ == "__main__":
main()
52 changes: 52 additions & 0 deletions egs/mini_librispeech/s5/local/fvector/generate_sin_cos_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env/python

from __future__ import print_function
import argparse
import logging
import os
import pprint
import shutil
import sys
import traceback

sys.path.insert(0, 'steps')
import libs.common as common_lib

def get_args():
parser = argparse.ArgumentParser(description="Generate sine_transform.mat "
"and cosine_transform.mat for frequency domain raw waveform setup.",
epilog="Called by local/fvector/run_fvector.sh")
parser.add_argument("--feat-dim", type=int, required=True,
help="The dimension of input.")
parser.add_argument("--add-bias", type=str,
help="If true, add a column for fft matrix.",
default=True, choices=["True","False"])
parser.add_argument("--half-range", type=str,
help="If true, generate half fft matrix.",
default=True, choices=["True","False"])
parser.add_argument("--dir", type=str, required=True,
help="The output directory.")

print(' '.join(sys.argv), file=sys.stderr)
print(sys.argv, file=sys.stderr)
args = parser.parse_args()
return args


def main():
args = get_args()

feat_dim = args.feat_dim
num_fft_bins = (2**(args.feat_dim-1).bit_length())
add_bias = args.add_bias
half_range = args.half_range

common_lib.write_sin_cos_transform_matrix(feat_dim, num_fft_bins,
"{0}/configs/cos_transform.mat".format(args.dir),
compute_cosine=True, add_bias=add_bias, half_range=half_range)
common_lib.write_sin_cos_transform_matrix(feat_dim, num_fft_bins,
"{0}/configs/sin_transform.mat".format(args.dir),
compute_cosine=False, add_bias=add_bias, half_range=half_range)

if __name__ == "__main__":
main()
78 changes: 78 additions & 0 deletions egs/mini_librispeech/s5/local/fvector/run_fvector.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash

. ./cmd.sh
set -e

stage=3
train_stage=-10
data=data/train_clean_5
noise_data=data/noise
egs_dir=exp/fvector/egs
fvector_dir=exp/fvector
use_gpu=true

. ./path.sh
. ./cmd.sh
. ./utils/parse_options.sh

if [ $stage -le 3 ]; then
#dump egs
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $egs_dir/storage ]; then
utils/create_split_dir.pl \
/export/b{11,12,13}/$USER/kaldi-data/egs/minilibrispeech-$(date +'%m_%d_%H_%M')/s5/$egs_dir/storage $egs_dir/storage
fi

steps/nnet3/fvector/get_egs.sh --cmd "$train_cmd" \
--nj 8 \
--stage 0 \
--egs-per-iter 12500 \
--egs-per-iter-diagnostic 10000 \
--num-diagnostic-percent 5 \
--frame-length 25 \
--left-padding 1 \
--right-padding 1 \
"$data" "$noise_data" "$egs_dir"
fi

if [ $stage -le 4 ]; then
#prepare configs
echo "$0: creating neural net configs using the xconfig parser";
#options
input_dim=400
num_filters=100

mkdir -p $fvector_dir/configs

cat <<EOF > $fvector_dir/configs/network.xconfig
input dim=$input_dim name=input
# Each eg contains 8 frames, do Frequency-domain feature learning, and then
# use TDNN model split it into one vector
preprocess-fft-abs-lognorm-affine-log-layer name=raw0 cos-transform-file=$fvector_dir/configs/cos_transform.mat sin-transform-file=$fvector_dir/configs/sin_transform.mat num-filters=$num_filters half-fft-range=true
conv-relu-batchnorm-layer name=cnn1 height-in=$num_filters height-out=$[$num_filters/2] time-offsets=-1,0,1 height-offsets=-1,0,1 num-filters-out=64 height-subsample-out=2 learning-rate-factor=0.34 max-change=0.25

relu-batchnorm-layer name=tdnn0 input=cnn1 dim=625
relu-batchnorm-layer name=tdnn1 input=Append(0,1,2) dim=625
relu-batchnorm-layer name=tdnn2 input=Append(0,1,2) dim=625
relu-batchnorm-layer name=tdnn3 input=Append(0,1,2) dim=625
relu-batchnorm-layer name=tdnn4 input=Append(0,1) dim=625
output-layer name=output input=tdnn4 dim=200 include-log-softmax=False param-stddev=0.04 bias-stddev=1.0
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $fvector_dir/configs/network.xconfig --config-dir $fvector_dir/configs/
# Modify the final.config and generate sin.mat/cos.mat manually
python local/fvector/add_output_node.py --input-dim 400 --output-dim 200 --config-file $fvector_dir/configs/final.config
python local/fvector/generate_sin_cos_matrix.py \
--feat-dim 400 --dir $fvector_dir
fi

if [ $stage -le 5 ]; then
#training
steps/nnet3/xvector/train.sh --cmd "$train_cmd" \
--initial-effective-lrate 0.002 \
--final-effective-lrate 0.0002 \
--max-param-change 0.2 \
--minibatch-size 16 \
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't it too small? Are you sure the training objective converge?

Copy link
Author

@LvHang LvHang May 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will increase it. For now, when I use 40 epochs data, the log-likelihood on valid is -0.29. I will increase it to 64 and 128 firstly.

--num-epochs 8 --use-gpu $use_gpu --stage $train_stage \
--num-jobs-initial 1 --num-jobs-final 5 \
--egs-dir $egs_dir \
$fvector_dir
fi
79 changes: 79 additions & 0 deletions egs/mini_librispeech/s5/local/fvector/run_fvector_separate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/bin/bash

. ./cmd.sh
set -e

stage=5
train_stage=-10
data=data/train_clean_5
noise_data=data/noise
egs_dir=exp/fvector/egs
fvector_dir=exp/fvector
use_gpu=true

. ./path.sh
. ./cmd.sh
. ./utils/parse_options.sh

if [ $stage -le 3 ]; then
#dump egs
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $egs_dir/storage ]; then
utils/create_split_dir.pl \
/export/b{11,12,13}/$USER/kaldi-data/egs/minilibrispeech-$(date +'%m_%d_%H_%M')/s5/$egs_dir/storage $egs_dir/storage
fi

steps/nnet3/fvector/get_egs_separate.sh --cmd "$train_cmd" \
--nj 8 \
--stage 3 \
--egs-per-iter 100000 \
--egs-per-iter-diagnostic 10000 \
--num-diagnostic-percent 5 \
"$data" "$noise_data" "$egs_dir"
fi

if [ $stage -le 4 ]; then
#prepare configs
echo "$0: creating neural net configs using the xconfig parser";
#options
input_dim=400
num_filters=200

mkdir -p $fvector_dir/configs

cat <<EOF > $fvector_dir/configs/network.xconfig
input dim=$input_dim name=input
# Each eg contains 8 frames, do Frequency-domain feature learning, and then
# use TDNN model split it into one vector
preprocess-fft-abs-lognorm-affine-log-layer name=raw0 cos-transform-file=$fvector_dir/configs/cos_transform.mat sin-transform-file=$fvector_dir/configs/sin_transform.mat num-filters=$num_filters half-fft-range=true
conv-relu-batchnorm-layer name=cnn1 height-in=$num_filters height-out=$[$num_filters/2] time-offsets=-1,0,1 height-offsets=-1,0,1 num-filters-out=64 height-subsample-out=2 learning-rate-factor=0.34 max-change=0.25

relu-batchnorm-layer name=tdnn0 input=cnn1 dim=625
relu-batchnorm-layer name=tdnn1 input=Append(0,1,2) dim=625
relu-batchnorm-layer name=tdnn2 input=Append(0,1,2) dim=625
relu-batchnorm-layer name=tdnn3 input=Append(0,1,2) dim=625
relu-batchnorm-layer name=tdnn4 input=Append(0,1) dim=625
output-layer name=output input=tdnn4 dim=200 include-log-softmax=False param-stddev=0.04 bias-stddev=1.0
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $fvector_dir/configs/network.xconfig --config-dir $fvector_dir/configs/
# Modify the final.config and generate sin.mat/cos.mat manually
python local/fvector/add_output_node.py --input-dim 400 --output-dim 200 --config-file $fvector_dir/configs/final.config
python local/fvector/generate_sin_cos_matrix.py \
--feat-dim 400 --dir $fvector_dir
fi

if [ $stage -le 5 ]; then
#training
steps/nnet3/xvector/train_separate.sh --cmd "$train_cmd" \
--initial-effective-lrate 0.002 \
--final-effective-lrate 0.0002 \
--max-param-change 0.2 \
--minibatch-size 16 \
--left-padding 1 \
--right-padding 1 \
--max-snr 20 \
--min-snr 10 \
--num-epochs 8 --use-gpu $use_gpu --stage $train_stage \
--num-jobs-initial 1 --num-jobs-final 3 \
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you try to increase num-jobs-final?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure, what is the difference among your training scripts? Also _separate is not a good name for script, use better informative suffix.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The num-jobs-final is limited by the number of egs. As the minilibrispeech is small, it only has 4 egs. For now, I use "num-jobs-final=4"
I will think about a better suffix name.

--egs-dir $egs_dir \
$fvector_dir
fi
Loading