|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Copyright 2018 Tien-Hong Lo |
| 4 | + |
| 5 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | +# you may not use this file except in compliance with the License. |
| 7 | +# You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 12 | +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED |
| 13 | +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, |
| 14 | +# MERCHANTABLITY OR NON-INFRINGEMENT. |
| 15 | +# See the Apache 2 License for the specific language governing permissions and |
| 16 | +# limitations under the License. |
| 17 | + |
| 18 | + |
| 19 | +# Script for system combination using output of the neural networks. |
| 20 | +# This calls nnet3-compute, matrix-sum and latgen-faster-mapped to create a system combination. |
| 21 | +set -euo pipefail |
| 22 | +# begin configuration section. |
| 23 | +cmd=run.pl |
| 24 | + |
| 25 | +# Neural Network |
| 26 | +stage=0 |
| 27 | +iter=final |
| 28 | +nj=30 |
| 29 | +output_name="output" |
| 30 | +ivector_scale=1.0 |
| 31 | +apply_exp=false # Apply exp i.e. write likelihoods instead of log-likelihoods |
| 32 | +compress=false # Specifies whether the output should be compressed before |
| 33 | + # dumping to disk |
| 34 | +use_gpu=false |
| 35 | +skip_diagnostics=false |
| 36 | +extra_left_context=0 |
| 37 | +extra_right_context=0 |
| 38 | +extra_left_context_initial=-1 |
| 39 | +extra_right_context_final=-1 |
| 40 | +online_ivector_dir= |
| 41 | +frame_subsampling_factor=1 |
| 42 | +frames_per_chunk=150 |
| 43 | +average=true |
| 44 | + |
| 45 | +# Decode |
| 46 | +beam=15.0 # prune the lattices prior to MBR decoding, for speed. |
| 47 | +max_active=7000 |
| 48 | +min_active=200 |
| 49 | +acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. |
| 50 | +post_decode_acwt=1.0 # can be used in 'chain' systems to scale acoustics by 10 so the |
| 51 | + # regular scoring script works. |
| 52 | +lattice_beam=8.0 # Beam we use in lattice generation. |
| 53 | +num_threads=1 # if >1, will use latgen-faster--map-parallel |
| 54 | +min_lmwt=5 |
| 55 | +max_lmwt=15 |
| 56 | +parallel_opts="--num-threads 3" |
| 57 | +scoring_opts= |
| 58 | +minimize=false |
| 59 | +skip_scoring=false |
| 60 | + |
| 61 | +word_determinize=false # If set to true, then output lattice does not retain |
| 62 | + # alternate paths a sequence of words (with alternate pronunciations). |
| 63 | + # Setting to true is the default in steps/nnet3/decode.sh. |
| 64 | + # However, setting this to false |
| 65 | + # is useful for generation w of semi-supervised training |
| 66 | + # supervision and frame-level confidences. |
| 67 | +write_compact=true # If set to false, then writes the lattice in non-compact format, |
| 68 | + # retaining the acoustic scores on each arc. This is |
| 69 | + # required to be false for LM rescoring undeterminized |
| 70 | + # lattices (when --word-determinize is false) |
| 71 | +#end configuration section. |
| 72 | + |
| 73 | +[ -f ./path.sh ] && . ./path.sh |
| 74 | +. parse_options.sh || exit 1; |
| 75 | + |
| 76 | + |
| 77 | +if [ $# -lt 5 ]; then |
| 78 | + echo "Usage: $0 [options] <data-dir> <graph-dir> <nnet3-dir> <nnet3-dir2> [<nnet3-dir3> ... ] <output-dir>" |
| 79 | + echo "e.g.: local/socal/score_fusion.sh --nj 8 \\" |
| 80 | + echo "--online-ivector-dir exp/nnet3/ivectors_test_eval92 \\" |
| 81 | + echo " data/test_eval92_hires exp/nnet3/tdnn/graph exp/nnet3/tdnn/output exp/nnet3/tdnn1/output .. \\" |
| 82 | + echo " exp/nnet3/tdnn_comb/decode_dev" |
| 83 | + echo "main options (for others, see top of script file)" |
| 84 | + echo " --config <config-file> # config containing options" |
| 85 | + echo " --nj <nj> # number of parallel jobs" |
| 86 | + echo " --cmd <cmd> # Command to run in parallel with" |
| 87 | + echo " --iter <iter> # Iteration of model to decode; default is final." |
| 88 | + exit 1; |
| 89 | +fi |
| 90 | + |
| 91 | +echo "$0 $@" |
| 92 | + |
| 93 | +data=$1 |
| 94 | +graphdir=$2 |
| 95 | +dir=${@: -1} # last argument to the script |
| 96 | +shift 2; |
| 97 | +model_dirs=( $@ ) # read the remaining arguments into an array |
| 98 | +unset model_dirs[${#model_dirs[@]}-1] # 'pop' the last argument which is odir |
| 99 | +num_sys=${#model_dirs[@]} # number of systems to combine |
| 100 | + |
| 101 | +for f in $graphdir/words.txt $graphdir/phones/word_boundary.int ; do |
| 102 | + [ ! -f $f ] && echo "$0: file $f does not exist" && exit 1; |
| 103 | +done |
| 104 | + |
| 105 | +[ ! -z "$online_ivector_dir" ] && \ |
| 106 | + extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" |
| 107 | + |
| 108 | +if [ ! -z "$online_ivector_dir" ]; then |
| 109 | + ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; |
| 110 | + ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" |
| 111 | +fi |
| 112 | + |
| 113 | +frame_subsampling_opt= |
| 114 | +if [ $frame_subsampling_factor -ne 1 ]; then |
| 115 | + # e.g. for 'chain' systems |
| 116 | + frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor" |
| 117 | +fi |
| 118 | + |
| 119 | +# convert $dir to absolute pathname |
| 120 | +fdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}` |
| 121 | + |
| 122 | +# Possibly use multi-threaded decoder |
| 123 | +thread_string= |
| 124 | +[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" |
| 125 | + |
| 126 | +mkdir -p $dir/temp |
| 127 | + |
| 128 | +for i in `seq 0 $[num_sys-1]`; do |
| 129 | + srcdir=${model_dirs[$i]} |
| 130 | + |
| 131 | + model=$srcdir/$iter.mdl |
| 132 | + if [ ! -f $srcdir/$iter.mdl ]; then |
| 133 | + echo "$0: Error: no such file $srcdir/$iter.raw. Trying $srcdir/$iter.mdl exit" && exit 1; |
| 134 | + fi |
| 135 | + |
| 136 | + # check that they have the same tree |
| 137 | + show-transitions $graphdir/phones.txt $model > $dir/temp/transition.${i}.txt |
| 138 | + cmp_tree=`diff -q $dir/temp/transition.0.txt $dir/temp/transition.${i}.txt | awk '{print $5}'` |
| 139 | + if [ ! -z $cmp_tree ]; then |
| 140 | + echo "$0 tree must be the same." |
| 141 | + exit 0; |
| 142 | + fi |
| 143 | + |
| 144 | + # check that they have the same frame-subsampling-factor |
| 145 | + if [ $frame_subsampling_factor -ne `cat $srcdir/frame_subsampling_factor` ]; then |
| 146 | + echo "$0 frame_subsampling_factor must be the same." |
| 147 | + exit 0; |
| 148 | + fi |
| 149 | + |
| 150 | + for f in $data/feats.scp $model $extra_files; do |
| 151 | + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; |
| 152 | + done |
| 153 | + |
| 154 | + if [ ! -z "$output_name" ] && [ "$output_name" != "output" ]; then |
| 155 | + echo "$0: Using output-name $output_name" |
| 156 | + model="nnet3-copy --edits='remove-output-nodes name=output;rename-node old-name=$output_name new-name=output' $model - |" |
| 157 | + fi |
| 158 | + |
| 159 | + ## Set up features. |
| 160 | + if [ -f $srcdir/final.mat ]; then |
| 161 | + echo "$0: Error: lda feature type is no longer supported." && exit 1 |
| 162 | + fi |
| 163 | + |
| 164 | + sdata=$data/split$nj; |
| 165 | + cmvn_opts=`cat $srcdir/cmvn_opts` || exit 1; |
| 166 | + |
| 167 | + feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |" |
| 168 | + |
| 169 | + if $apply_exp; then |
| 170 | + output_wspecifier="ark:| copy-matrix --apply-exp ark:- ark:-" |
| 171 | + else |
| 172 | + output_wspecifier="ark:| copy-feats --compress=$compress ark:- ark:-" |
| 173 | + fi |
| 174 | + |
| 175 | + gpu_opt="--use-gpu=no" |
| 176 | + gpu_queue_opt= |
| 177 | + |
| 178 | + if $use_gpu; then |
| 179 | + gpu_queue_opt="--gpu 1" |
| 180 | + gpu_opt="--use-gpu=yes" |
| 181 | + fi |
| 182 | + |
| 183 | + echo "$i $model"; |
| 184 | + models[$i]="ark,s,cs:nnet3-compute $gpu_opt $ivector_opts $frame_subsampling_opt \ |
| 185 | + --frames-per-chunk=$frames_per_chunk \ |
| 186 | + --extra-left-context=$extra_left_context \ |
| 187 | + --extra-right-context=$extra_right_context \ |
| 188 | + --extra-left-context-initial=$extra_left_context_initial \ |
| 189 | + --extra-right-context-final=$extra_right_context_final \ |
| 190 | + '$model' '$feats' '$output_wspecifier' |" |
| 191 | +done |
| 192 | + |
| 193 | +# remove tempdir |
| 194 | +rm -rf $dir/temp |
| 195 | + |
| 196 | +# split data to nj |
| 197 | +[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; |
| 198 | +echo $nj > $dir/num_jobs |
| 199 | + |
| 200 | + |
| 201 | +# Assume the nnet trained by |
| 202 | +# the same tree and frame subsampling factor. |
| 203 | +mkdir -p $dir/log |
| 204 | + |
| 205 | +if [ -f $model ]; then |
| 206 | + echo "$0: $model exists, copy model to $dir/../" |
| 207 | + cp $model $dir/../ |
| 208 | +fi |
| 209 | + |
| 210 | +if [ -f $srcdir/frame_shift ]; then |
| 211 | + cp $srcdir/frame_shift $dir/../ |
| 212 | + echo "$0: $srcdir/frame_shift exists, copy $srcdir/frame_shift to $dir/../" |
| 213 | +elif [ -f $srcdir/frame_subsampling_factor ]; then |
| 214 | + cp $srcdir/frame_subsampling_factor $dir/../ |
| 215 | + echo "$0: $srcdir/frame_subsampling_factor exists, copy $srcdir/frame_subsampling_factor to $dir/../" |
| 216 | +fi |
| 217 | + |
| 218 | +lat_wspecifier="ark:|" |
| 219 | +extra_opts= |
| 220 | +if ! $write_compact; then |
| 221 | + extra_opts="--determinize-lattice=false" |
| 222 | + lat_wspecifier="ark:| lattice-determinize-phone-pruned --beam=$lattice_beam --acoustic-scale=$acwt --minimize=$minimize --word-determinize=$word_determinize --write-compact=false $model ark:- ark:- |" |
| 223 | +fi |
| 224 | + |
| 225 | +if [ "$post_decode_acwt" == 1.0 ]; then |
| 226 | + lat_wspecifier="$lat_wspecifier gzip -c >$dir/lat.JOB.gz" |
| 227 | +else |
| 228 | + lat_wspecifier="$lat_wspecifier lattice-scale --acoustic-scale=$post_decode_acwt --write-compact=$write_compact ark:- ark:- | gzip -c >$dir/lat.JOB.gz" |
| 229 | +fi |
| 230 | + |
| 231 | + |
| 232 | +if [ $stage -le 0 ]; then |
| 233 | + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \ |
| 234 | + matrix-sum --average=$average "${models[@]}" ark:- \| \ |
| 235 | + latgen-faster-mapped$thread_string --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=true \ |
| 236 | + --minimize=$minimize --max-active=$max_active --min-active=$min_active --beam=$beam \ |
| 237 | + --word-symbol-table=$graphdir/words.txt ${extra_opts} "$model" \ |
| 238 | + $graphdir/HCLG.fst ark:- "$lat_wspecifier" |
| 239 | +fi |
| 240 | + |
| 241 | +if [ $stage -le 1 ]; then |
| 242 | + if ! $skip_diagnostics ; then |
| 243 | + [ ! -z $iter ] && iter_opt="--iter $iter" |
| 244 | + steps/diagnostic/analyze_lats.sh --cmd "$cmd" $iter_opt $graphdir $dir |
| 245 | + fi |
| 246 | +fi |
| 247 | + |
| 248 | +if ! $skip_scoring ; then |
| 249 | + if [ $stage -le 2 ]; then |
| 250 | + [ ! -x local/score.sh ] && \ |
| 251 | + echo "Not scoring because local/score.sh does not exist or not executable." && exit 1; |
| 252 | + echo "score best paths" |
| 253 | + [ "$iter" != "final" ] && iter_opt="--iter $iter" |
| 254 | + scoring_opts="--min_lmwt $min_lmwt" |
| 255 | + local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir |
| 256 | + echo "score confidence and timing with sclite" |
| 257 | + fi |
| 258 | +fi |
| 259 | + |
| 260 | + |
| 261 | +exit 0 |
| 262 | + |
0 commit comments