-
Notifications
You must be signed in to change notification settings - Fork 1
Fvector #17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Fvector #17
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| # Default configuration | ||
| command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -l 'hostname=!a08*&!a09*&!a10*&!c04*&!b18*&!b19*&!b20*' | ||
| option mem=* -l mem_free=$0,ram_free=$0 | ||
| option mem=0 # Do not add anything to qsub_opts | ||
| option num_threads=* -pe smp $0 | ||
| option num_threads=1 # Do not add anything to qsub_opts | ||
| option max_jobs_run=* -tc $0 | ||
| default gpu=0 | ||
| option gpu=0 -q all.q | ||
| option gpu=* -l gpu=$0 -q g.q | ||
| default allow_k20=true | ||
| option allow_k20=true | ||
| option allow_k20=false -l 'hostname=!g01*&!g02*&!b06*' | ||
| default allow_k10_k20=true | ||
| option allow_k10_k20=true | ||
| option allow_k10_k20=false -l 'hostname=!b0*&!b10*&!g01*&!g02' |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| #!/usr/bin/env/python | ||
|
|
||
| from __future__ import print_function | ||
| import argparse | ||
| import logging | ||
| import os | ||
| import pprint | ||
| import sys | ||
| import shutil | ||
| import traceback | ||
|
|
||
| def get_args(): | ||
| parser = argparse.ArgumentParser(description="Add the S and b output node " | ||
| "which is used in plda object function.", | ||
| epilog="Called by local/fvector/run_fvector.sh") | ||
| parser.add_argument("--input-dim", type=int, required=True, | ||
| help="The input dimension of fvector network.") | ||
| parser.add_argument("--output-dim", type=int, required=True, | ||
| help="The output dimension of fvector network which is used to " | ||
| "compute the dimension of S matrix.") | ||
| parser.add_argument("--s-scale", type=float, default=0.2, | ||
| help="Scaling factor on the output 's' (s is a symmetric matrix " | ||
| "used for scoring).") | ||
| parser.add_argument("--b-scale", type=float, default=0.2, | ||
| help="Scaling factor on output 'b' (b is a scalar offset used in scoring).") | ||
| parser.add_argument("--config-file", type=str, required=True, | ||
| help="The file is needed to be modified. It's always is configs/final.config") | ||
|
|
||
| print(' '.join(sys.argv), file=sys.stderr) | ||
| print(sys.argv, file=sys.stderr) | ||
| args = parser.parse_args() | ||
| return args | ||
|
|
||
|
|
||
| def main(): | ||
| args = get_args() | ||
|
|
||
| f = open(args.config_file, "a") | ||
| # The s output | ||
| s_dim = (args.output_dim) * (args.output_dim+1) / 2 | ||
|
|
||
| print('component name=x-s type=ConstantFunctionComponent input-dim={0} output-dim={1} ' | ||
| 'output-mean=0 output-stddev=0 '.format( | ||
| args.input_dim, s_dim), file=f) | ||
| print('component-node name=x-s component=x-s input=IfDefined(input)', | ||
| file=f) | ||
| print('component name=x-s-scale type=FixedScaleComponent dim={0} scale={1}'.format( | ||
| s_dim, args.s_scale), file=f); | ||
| print('component-node name=x-s-scale component=x-s-scale input=x-s', | ||
| file=f) | ||
| print('output-node name=s input=x-s-scale', file=f) | ||
|
|
||
| # now the 'b' output, which is just a scalar. | ||
| b_dim = 1 | ||
| print('component name=x-b type=ConstantFunctionComponent input-dim={0} output-dim=1 ' | ||
| 'output-mean=0 output-stddev=0 '.format(args.input_dim), file=f) | ||
| print('component-node name=x-b component=x-b input=IfDefined(input)', file=f) | ||
| print('component name=x-b-scale type=FixedScaleComponent dim=1 scale={0}'.format( | ||
| args.b_scale), file=f); | ||
| print('component-node name=x-b-scale component=x-b-scale input=x-b', | ||
| file=f) | ||
| print('output-node name=b input=x-b-scale', file=f) | ||
| f.close() | ||
|
|
||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| #!/usr/bin/env/python | ||
|
|
||
| from __future__ import print_function | ||
| import argparse | ||
| import logging | ||
| import os | ||
| import pprint | ||
| import shutil | ||
| import sys | ||
| import traceback | ||
|
|
||
| sys.path.insert(0, 'steps') | ||
| import libs.common as common_lib | ||
|
|
||
| def get_args(): | ||
| parser = argparse.ArgumentParser(description="Generate sine_transform.mat " | ||
| "and cosine_transform.mat for frequency domain raw waveform setup.", | ||
| epilog="Called by local/fvector/run_fvector.sh") | ||
| parser.add_argument("--feat-dim", type=int, required=True, | ||
| help="The dimension of input.") | ||
| parser.add_argument("--add-bias", type=str, | ||
| help="If true, add a column for fft matrix.", | ||
| default=True, choices=["True","False"]) | ||
| parser.add_argument("--half-range", type=str, | ||
| help="If true, generate half fft matrix.", | ||
| default=True, choices=["True","False"]) | ||
| parser.add_argument("--dir", type=str, required=True, | ||
| help="The output directory.") | ||
|
|
||
| print(' '.join(sys.argv), file=sys.stderr) | ||
| print(sys.argv, file=sys.stderr) | ||
| args = parser.parse_args() | ||
| return args | ||
|
|
||
|
|
||
| def main(): | ||
| args = get_args() | ||
|
|
||
| feat_dim = args.feat_dim | ||
| num_fft_bins = (2**(args.feat_dim-1).bit_length()) | ||
| add_bias = args.add_bias | ||
| half_range = args.half_range | ||
|
|
||
| common_lib.write_sin_cos_transform_matrix(feat_dim, num_fft_bins, | ||
| "{0}/configs/cos_transform.mat".format(args.dir), | ||
| compute_cosine=True, add_bias=add_bias, half_range=half_range) | ||
| common_lib.write_sin_cos_transform_matrix(feat_dim, num_fft_bins, | ||
| "{0}/configs/sin_transform.mat".format(args.dir), | ||
| compute_cosine=False, add_bias=add_bias, half_range=half_range) | ||
|
|
||
| if __name__ == "__main__": | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| #!/bin/bash | ||
|
|
||
| . ./cmd.sh | ||
| set -e | ||
|
|
||
| stage=3 | ||
| train_stage=-10 | ||
| data=data/train_clean_5 | ||
| noise_data=data/noise | ||
| egs_dir=exp/fvector/egs | ||
| fvector_dir=exp/fvector | ||
| use_gpu=true | ||
|
|
||
| . ./path.sh | ||
| . ./cmd.sh | ||
| . ./utils/parse_options.sh | ||
|
|
||
| if [ $stage -le 3 ]; then | ||
| #dump egs | ||
| if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $egs_dir/storage ]; then | ||
| utils/create_split_dir.pl \ | ||
| /export/b{11,12,13}/$USER/kaldi-data/egs/minilibrispeech-$(date +'%m_%d_%H_%M')/s5/$egs_dir/storage $egs_dir/storage | ||
| fi | ||
|
|
||
| steps/nnet3/fvector/get_egs.sh --cmd "$train_cmd" \ | ||
| --nj 8 \ | ||
| --stage 0 \ | ||
| --egs-per-iter 12500 \ | ||
| --egs-per-iter-diagnostic 10000 \ | ||
| --num-diagnostic-percent 5 \ | ||
| --frame-length 25 \ | ||
| --left-padding 1 \ | ||
| --right-padding 1 \ | ||
| "$data" "$noise_data" "$egs_dir" | ||
| fi | ||
|
|
||
| if [ $stage -le 4 ]; then | ||
| #prepare configs | ||
| echo "$0: creating neural net configs using the xconfig parser"; | ||
| #options | ||
| input_dim=400 | ||
| num_filters=100 | ||
|
|
||
| mkdir -p $fvector_dir/configs | ||
|
|
||
| cat <<EOF > $fvector_dir/configs/network.xconfig | ||
| input dim=$input_dim name=input | ||
| # Each eg contains 8 frames, do Frequency-domain feature learning, and then | ||
| # use TDNN model split it into one vector | ||
| preprocess-fft-abs-lognorm-affine-log-layer name=raw0 cos-transform-file=$fvector_dir/configs/cos_transform.mat sin-transform-file=$fvector_dir/configs/sin_transform.mat num-filters=$num_filters half-fft-range=true | ||
| conv-relu-batchnorm-layer name=cnn1 height-in=$num_filters height-out=$[$num_filters/2] time-offsets=-1,0,1 height-offsets=-1,0,1 num-filters-out=64 height-subsample-out=2 learning-rate-factor=0.34 max-change=0.25 | ||
|
|
||
| relu-batchnorm-layer name=tdnn0 input=cnn1 dim=625 | ||
| relu-batchnorm-layer name=tdnn1 input=Append(0,1,2) dim=625 | ||
| relu-batchnorm-layer name=tdnn2 input=Append(0,1,2) dim=625 | ||
| relu-batchnorm-layer name=tdnn3 input=Append(0,1,2) dim=625 | ||
| relu-batchnorm-layer name=tdnn4 input=Append(0,1) dim=625 | ||
| output-layer name=output input=tdnn4 dim=200 include-log-softmax=False param-stddev=0.04 bias-stddev=1.0 | ||
| EOF | ||
| steps/nnet3/xconfig_to_configs.py --xconfig-file $fvector_dir/configs/network.xconfig --config-dir $fvector_dir/configs/ | ||
| # Modify the final.config and generate sin.mat/cos.mat manually | ||
| python local/fvector/add_output_node.py --input-dim 400 --output-dim 200 --config-file $fvector_dir/configs/final.config | ||
| python local/fvector/generate_sin_cos_matrix.py \ | ||
| --feat-dim 400 --dir $fvector_dir | ||
| fi | ||
|
|
||
| if [ $stage -le 5 ]; then | ||
| #training | ||
| steps/nnet3/xvector/train.sh --cmd "$train_cmd" \ | ||
| --initial-effective-lrate 0.002 \ | ||
| --final-effective-lrate 0.0002 \ | ||
| --max-param-change 0.2 \ | ||
| --minibatch-size 16 \ | ||
| --num-epochs 8 --use-gpu $use_gpu --stage $train_stage \ | ||
| --num-jobs-initial 1 --num-jobs-final 5 \ | ||
| --egs-dir $egs_dir \ | ||
| $fvector_dir | ||
| fi | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| #!/bin/bash | ||
|
|
||
| . ./cmd.sh | ||
| set -e | ||
|
|
||
| stage=5 | ||
| train_stage=-10 | ||
| data=data/train_clean_5 | ||
| noise_data=data/noise | ||
| egs_dir=exp/fvector/egs | ||
| fvector_dir=exp/fvector | ||
| use_gpu=true | ||
|
|
||
| . ./path.sh | ||
| . ./cmd.sh | ||
| . ./utils/parse_options.sh | ||
|
|
||
| if [ $stage -le 3 ]; then | ||
| #dump egs | ||
| if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $egs_dir/storage ]; then | ||
| utils/create_split_dir.pl \ | ||
| /export/b{11,12,13}/$USER/kaldi-data/egs/minilibrispeech-$(date +'%m_%d_%H_%M')/s5/$egs_dir/storage $egs_dir/storage | ||
| fi | ||
|
|
||
| steps/nnet3/fvector/get_egs_separate.sh --cmd "$train_cmd" \ | ||
| --nj 8 \ | ||
| --stage 3 \ | ||
| --egs-per-iter 100000 \ | ||
| --egs-per-iter-diagnostic 10000 \ | ||
| --num-diagnostic-percent 5 \ | ||
| "$data" "$noise_data" "$egs_dir" | ||
| fi | ||
|
|
||
| if [ $stage -le 4 ]; then | ||
| #prepare configs | ||
| echo "$0: creating neural net configs using the xconfig parser"; | ||
| #options | ||
| input_dim=400 | ||
| num_filters=200 | ||
|
|
||
| mkdir -p $fvector_dir/configs | ||
|
|
||
| cat <<EOF > $fvector_dir/configs/network.xconfig | ||
| input dim=$input_dim name=input | ||
| # Each eg contains 8 frames, do Frequency-domain feature learning, and then | ||
| # use TDNN model split it into one vector | ||
| preprocess-fft-abs-lognorm-affine-log-layer name=raw0 cos-transform-file=$fvector_dir/configs/cos_transform.mat sin-transform-file=$fvector_dir/configs/sin_transform.mat num-filters=$num_filters half-fft-range=true | ||
| conv-relu-batchnorm-layer name=cnn1 height-in=$num_filters height-out=$[$num_filters/2] time-offsets=-1,0,1 height-offsets=-1,0,1 num-filters-out=64 height-subsample-out=2 learning-rate-factor=0.34 max-change=0.25 | ||
|
|
||
| relu-batchnorm-layer name=tdnn0 input=cnn1 dim=625 | ||
| relu-batchnorm-layer name=tdnn1 input=Append(0,1,2) dim=625 | ||
| relu-batchnorm-layer name=tdnn2 input=Append(0,1,2) dim=625 | ||
| relu-batchnorm-layer name=tdnn3 input=Append(0,1,2) dim=625 | ||
| relu-batchnorm-layer name=tdnn4 input=Append(0,1) dim=625 | ||
| output-layer name=output input=tdnn4 dim=200 include-log-softmax=False param-stddev=0.04 bias-stddev=1.0 | ||
| EOF | ||
| steps/nnet3/xconfig_to_configs.py --xconfig-file $fvector_dir/configs/network.xconfig --config-dir $fvector_dir/configs/ | ||
| # Modify the final.config and generate sin.mat/cos.mat manually | ||
| python local/fvector/add_output_node.py --input-dim 400 --output-dim 200 --config-file $fvector_dir/configs/final.config | ||
| python local/fvector/generate_sin_cos_matrix.py \ | ||
| --feat-dim 400 --dir $fvector_dir | ||
| fi | ||
|
|
||
| if [ $stage -le 5 ]; then | ||
| #training | ||
| steps/nnet3/xvector/train_separate.sh --cmd "$train_cmd" \ | ||
| --initial-effective-lrate 0.002 \ | ||
| --final-effective-lrate 0.0002 \ | ||
| --max-param-change 0.2 \ | ||
| --minibatch-size 16 \ | ||
| --left-padding 1 \ | ||
| --right-padding 1 \ | ||
| --max-snr 20 \ | ||
| --min-snr 10 \ | ||
| --num-epochs 8 --use-gpu $use_gpu --stage $train_stage \ | ||
| --num-jobs-initial 1 --num-jobs-final 3 \ | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you try to increase num-jobs-final?
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure, what is the difference among your training scripts? Also _separate is not a good name for script, use better informative suffix.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The num-jobs-final is limited by the number of egs. As the minilibrispeech is small, it only has 4 egs. For now, I use "num-jobs-final=4" |
||
| --egs-dir $egs_dir \ | ||
| $fvector_dir | ||
| fi | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isn't it too small? Are you sure the training objective converge?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will increase it. For now, when I use 40 epochs data, the log-likelihood on valid is -0.29. I will increase it to 64 and 128 firstly.