@@ -10,6 +10,13 @@ word_ins_penalty=0.5
1010min_lmwt=7
1111max_lmwt=17
1212model=
13+ resolve_overlaps=false # If true, the words decoded in the regions where
14+ # two segments A and B are overlapping are resolved
15+ # such that only words before the mid-point of the
16+ # overlapping region are taken to be hypothesized for
17+ # segment A, and only words after the mid-point of
18+ # the overlapping region are taken to be
19+ # hypothesized for segment B.
1320
1421# end configuration section.
1522
@@ -43,10 +50,22 @@ for f in $lang/words.txt $model $data/segments $data/reco2file_and_channel $dir/
4350 [ ! -f $f ] && echo " $0 : expecting file $f to exist" && exit 1;
4451done
4552
53+ if [ -f $dir /../frame_subsampling_factor ]; then
54+ factor=$( cat $dir /../frame_subsampling_factor) || exit 1
55+ frame_shift_opt=" --frame-shift=0.0$factor "
56+ echo " $0 : $dir /../frame_subsampling_factor exists, using $frame_shift_opt "
57+ fi
58+
4659name=` basename $data ` ; # e.g. eval2000
4760
4861mkdir -p $dir /scoring/log
4962
63+ resolve_overlaps_cmd=" cat"
64+
65+ if $resolve_overlaps ; then
66+ resolve_overlaps_cmd=" utils/ctm/resolve_ctm_overlaps.py $data /segments - -"
67+ fi
68+
5069if [ $stage -le 0 ]; then
5170 if [ ! -f $lang /phones/word_boundary.int ] ; then
5271 $cmd LMWT=$min_lmwt :$max_lmwt $dir /scoring/log/get_ctm.LMWT.log \
@@ -56,8 +75,9 @@ if [ $stage -le 0 ]; then
5675 lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \
5776 lattice-prune --beam=$beam ark:- ark:- \| \
5877 lattice-align-words-lexicon $lang /phones/align_lexicon.int $model ark:- ark:- \| \
59- lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \
78+ lattice-to-ctm-conf $frame_shift_opt --decode-mbr=$decode_mbr ark:- - \| \
6079 utils/int2sym.pl -f 5 $lang /words.txt \| tee $dir /score_LMWT/$name .utt.ctm \| \
80+ $resolve_overlaps_cmd \| \
6181 utils/convert_ctm.pl $data /segments $data /reco2file_and_channel \
6282 ' >' $dir /score_LMWT/$name .ctm || exit 1;
6383 else
@@ -68,8 +88,9 @@ if [ $stage -le 0 ]; then
6888 lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \
6989 lattice-prune --beam=$beam ark:- ark:- \| \
7090 lattice-align-words $lang /phones/word_boundary.int $model ark:- ark:- \| \
71- lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \
91+ lattice-to-ctm-conf $frame_shift_opt --decode-mbr=$decode_mbr ark:- - \| \
7292 utils/int2sym.pl -f 5 $lang /words.txt \| tee $dir /score_LMWT/$name .utt.ctm \| \
93+ $resolve_overlaps_cmd \| \
7394 utils/convert_ctm.pl $data /segments $data /reco2file_and_channel \
7495 ' >' $dir /score_LMWT/$name .ctm || exit 1;
7596 fi
0 commit comments