|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Copyright 2012 Johns Hopkins University (author: Daniel Povey) |
| 4 | +# 2015 Guoguo Chen |
| 5 | +# 2017 Hainan Xu |
| 6 | +# 2017 Xiaohui Zhang |
| 7 | + |
| 8 | +# This script trains a backward LMs on the swbd LM-training data, and use it |
| 9 | +# to rescore either decoded lattices, or lattices that are just rescored with |
| 10 | +# a forward RNNLM. In order to run this, you must first run the forward RNNLM |
| 11 | +# recipe at local/rnnlm/run_tdnn_lstm.sh |
| 12 | + |
| 13 | +# rnnlm/train_rnnlm.sh: best iteration (out of 35) was 34, linking it to final iteration. |
| 14 | +# rnnlm/train_rnnlm.sh: train/dev perplexity was 41.8 / 55.1. |
| 15 | +# Train objf: -5.18 -4.46 -4.26 -4.18 -4.12 -4.07 -4.04 -4.00 -3.99 -3.98 -3.95 -3.93 -3.91 -3.90 -3.88 -3.87 -3.86 -3.85 -3.83 -3.82 -3.82 -3.81 -3.79 -3.79 -3.78 -3.77 -3.76 -3.77 -3.75 -3.74 -3.74 -3.73 -3.72 -3.71 -3.71 |
| 16 | +# Dev objf: -10.32 -4.89 -4.57 -4.45 -4.37 -4.33 -4.29 -4.26 -4.24 -4.22 -4.18 -4.17 -4.15 -4.14 -4.13 -4.12 -4.11 -4.10 -4.09 -4.08 -4.07 -4.06 -4.06 -4.05 -4.05 -4.05 -4.04 -4.04 -4.03 -4.03 -4.02 -4.02 -4.02 -4.01 -4.01 |
| 17 | + |
| 18 | +# %WER 11.1 | 1831 21395 | 89.9 6.4 3.7 1.0 11.1 46.3 | exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp/decode_eval2000_sw1_fsh_fg_looped/score_13_0.0/eval2000_hires.ctm.swbd.filt.sys |
| 19 | +# %WER 9.9 | 1831 21395 | 91.0 5.8 3.2 0.9 9.9 43.2 | exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp/decode_eval2000_sw1_fsh_fg_looped_rnnlm_1e/score_11_0.0/eval2000_hires.ctm.swbd.filt.sys |
| 20 | +# %WER 9.5 | 1831 21395 | 91.4 5.5 3.1 0.9 9.5 42.5 | exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp/decode_eval2000_sw1_fsh_fg_looped_rnnlm_1e_back/score_11_0.0/eval2000_hires.ctm.swbd.filt.sys |
| 21 | + |
| 22 | +# %WER 15.9 | 4459 42989 | 85.7 9.7 4.6 1.6 15.9 51.6 | exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp/decode_eval2000_sw1_fsh_fg_looped/score_10_0.0/eval2000_hires.ctm.filt.sys |
| 23 | +# %WER 14.4 | 4459 42989 | 87.0 8.7 4.3 1.5 14.4 49.4 | exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp/decode_eval2000_sw1_fsh_fg_looped_rnnlm_1e/score_11_0.0/eval2000_hires.ctm.filt.sys |
| 24 | +# %WER 13.9 | 4459 42989 | 87.6 8.4 4.0 1.5 13.9 48.6 | exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp/decode_eval2000_sw1_fsh_fg_looped_rnnlm_1e_back/score_10_0.0/eval2000_hires.ctm.filt.sys |
| 25 | + |
| 26 | +# Begin configuration section. |
| 27 | + |
| 28 | +dir=exp/rnnlm_lstm_1e_backward |
| 29 | +embedding_dim=1024 |
| 30 | +lstm_rpd=256 |
| 31 | +lstm_nrpd=256 |
| 32 | +stage=-10 |
| 33 | +train_stage=-10 |
| 34 | + |
| 35 | +# variables for lattice rescoring |
| 36 | +run_lat_rescore=true |
| 37 | +ac_model_dir=exp/nnet3/tdnn_lstm_1a_adversarial0.3_epochs12_ld5_sp |
| 38 | +decode_dir_suffix_forward=rnnlm_1e |
| 39 | +decode_dir_suffix_backward=rnnlm_1e_back |
| 40 | +ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order |
| 41 | + # if it's set, it merges histories in the lattice if they share |
| 42 | + # the same ngram history and this prevents the lattice from |
| 43 | + # exploding exponentially |
| 44 | + |
| 45 | +. ./cmd.sh |
| 46 | +. ./utils/parse_options.sh |
| 47 | + |
| 48 | +text=data/train_nodev/text |
| 49 | +fisher_text=data/local/lm/fisher/text1.gz |
| 50 | +lexicon=data/local/dict_nosp/lexiconp.txt |
| 51 | +text_dir=data/rnnlm/text_nosp_1e_back |
| 52 | +mkdir -p $dir/config |
| 53 | +set -e |
| 54 | + |
| 55 | +for f in $text $lexicon; do |
| 56 | + [ ! -f $f ] && \ |
| 57 | + echo "$0: expected file $f to exist; search for local/wsj_extend_dict.sh in run.sh" && exit 1 |
| 58 | +done |
| 59 | + |
| 60 | +if [ $stage -le 0 ]; then |
| 61 | + mkdir -p $text_dir |
| 62 | + echo -n >$text_dir/dev.txt |
| 63 | + # hold out one in every 50 lines as dev data. |
| 64 | + cat $text | cut -d ' ' -f2- | awk '{for(i=NF;i>0;i--) printf("%s ", $i); print""}' | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/swbd.txt |
| 65 | + cat > $dir/config/hesitation_mapping.txt <<EOF |
| 66 | +hmm hum |
| 67 | +mmm um |
| 68 | +mm um |
| 69 | +mhm um-hum |
| 70 | +EOF |
| 71 | + gunzip -c $fisher_text | awk 'NR==FNR{a[$1]=$2;next}{for (n=1;n<=NF;n++) if ($n in a) $n=a[$n];print $0}' \ |
| 72 | + $dir/config/hesitation_mapping.txt - | awk '{for(i=NF;i>0;i--) printf("%s ", $i); print""}' > $text_dir/fisher.txt |
| 73 | +fi |
| 74 | + |
| 75 | +if [ $stage -le 1 ]; then |
| 76 | + cp data/lang/words.txt $dir/config/ |
| 77 | + n=`cat $dir/config/words.txt | wc -l` |
| 78 | + echo "<brk> $n" >> $dir/config/words.txt |
| 79 | + |
| 80 | + # words that are not present in words.txt but are in the training or dev data, will be |
| 81 | + # mapped to <SPOKEN_NOISE> during training. |
| 82 | + echo "<unk>" >$dir/config/oov.txt |
| 83 | + |
| 84 | + cat > $dir/config/data_weights.txt <<EOF |
| 85 | +swbd 3 1.0 |
| 86 | +fisher 1 1.0 |
| 87 | +EOF |
| 88 | + |
| 89 | + rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \ |
| 90 | + --unk-word="<unk>" \ |
| 91 | + --data-weights-file=$dir/config/data_weights.txt \ |
| 92 | + $text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt |
| 93 | + |
| 94 | + # choose features |
| 95 | + rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \ |
| 96 | + --use-constant-feature=true \ |
| 97 | + --special-words='<s>,</s>,<brk>,<unk>,[noise],[laughter],[vocalized-noise]' \ |
| 98 | + $dir/config/words.txt > $dir/config/features.txt |
| 99 | + |
| 100 | + cat >$dir/config/xconfig <<EOF |
| 101 | +input dim=$embedding_dim name=input |
| 102 | +relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1)) |
| 103 | +fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd |
| 104 | +relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3)) |
| 105 | +fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd |
| 106 | +relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3)) |
| 107 | +output-layer name=output include-log-softmax=false dim=$embedding_dim |
| 108 | +EOF |
| 109 | + rnnlm/validate_config_dir.sh $text_dir $dir/config |
| 110 | +fi |
| 111 | + |
| 112 | +if [ $stage -le 2 ]; then |
| 113 | + rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir |
| 114 | +fi |
| 115 | + |
| 116 | +if [ $stage -le 3 ]; then |
| 117 | + rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \ |
| 118 | + --stage $train_stage --num-epochs 10 --cmd "$train_cmd" $dir |
| 119 | +fi |
| 120 | + |
| 121 | +LM=sw1_fsh_fg # using the 4-gram const arpa file as old lm |
| 122 | +if [ $stage -le 4 ] && $run_lat_rescore; then |
| 123 | + echo "$0: Perform lattice-rescoring on $ac_model_dir" |
| 124 | + |
| 125 | + for decode_set in eval2000; do |
| 126 | + decode_dir=${ac_model_dir}/decode_${decode_set}_${LM}_looped |
| 127 | + if [ ! -d ${decode_dir}_${decode_dir_suffix_forward} ]; then |
| 128 | + echo "$0: Must run the forward recipe first at local/rnnlm/run_tdnn_lstm.sh" |
| 129 | + exit 1 |
| 130 | + fi |
| 131 | + |
| 132 | + # Lattice rescoring |
| 133 | + rnnlm/lmrescore_back.sh \ |
| 134 | + --cmd "$decode_cmd --mem 4G" \ |
| 135 | + --weight 0.45 --max-ngram-order $ngram_order \ |
| 136 | + data/lang_$LM $dir \ |
| 137 | + data/${decode_set}_hires ${decode_dir}_${decode_dir_suffix_forward}_0.45 \ |
| 138 | + ${decode_dir}_${decode_dir_suffix_backward}_0.45 |
| 139 | + done |
| 140 | +fi |
| 141 | + |
| 142 | +exit 0 |
0 commit comments