2929
3030namespace kaldi {
3131
32- void GetEditsSingleHyp ( const std::string &hyp_rspecifier,
32+ void GetEditsSingleHyp ( const std::string &hyp_rspecifier,
3333 const std::string &ref_rspecifier,
3434 const std::string &mode,
3535 std::vector<std::pair<int32, int32> > & edit_word_per_hyp) {
36-
36+
3737 // Both text and integers are loaded as vector of strings,
3838 SequentialTokenVectorReader ref_reader (ref_rspecifier);
3939 RandomAccessTokenVectorReader hyp_reader (hyp_rspecifier);
4040 int32 num_words = 0 , word_errs = 0 , num_ins = 0 , num_del = 0 , num_sub = 0 ;
41-
41+
4242 // Main loop, store WER stats per hyp,
4343 for (; !ref_reader.Done (); ref_reader.Next ()) {
4444 std::string key = ref_reader.Key ();
@@ -54,36 +54,36 @@ void GetEditsSingleHyp( const std::string &hyp_rspecifier,
5454 hyp_sent = hyp_reader.Value (key);
5555 }
5656 num_words = ref_sent.size ();
57- word_errs = LevenshteinEditDistance (ref_sent, hyp_sent,
57+ word_errs = LevenshteinEditDistance (ref_sent, hyp_sent,
5858 &num_ins, &num_del, &num_sub);
5959 edit_word_per_hyp.push_back (std::pair<int32, int32>(word_errs, num_words));
6060 }
6161}
6262
63- void GetEditsDualHyp (const std::string &hyp_rspecifier,
64- const std::string &hyp_rspecifier2,
63+ void GetEditsDualHyp (const std::string &hyp_rspecifier,
64+ const std::string &hyp_rspecifier2,
6565 const std::string &ref_rspecifier,
6666 const std::string &mode,
6767 std::vector<std::pair<int32, int32> > & edit_word_per_hyp,
6868 std::vector<std::pair<int32, int32> > & edit_word_per_hyp2) {
69-
69+
7070 // Both text and integers are loaded as vector of strings,
7171 SequentialTokenVectorReader ref_reader (ref_rspecifier);
7272 RandomAccessTokenVectorReader hyp_reader (hyp_rspecifier);
7373 RandomAccessTokenVectorReader hyp_reader2 (hyp_rspecifier2);
74- int32 num_words = 0 , word_errs = 0 ,
74+ int32 num_words = 0 , word_errs = 0 ,
7575 num_ins = 0 , num_del = 0 , num_sub = 0 ;
76-
76+
7777 // Main loop, store WER stats per hyp,
7878 for (; !ref_reader.Done (); ref_reader.Next ()) {
7979 std::string key = ref_reader.Key ();
8080 const std::vector<std::string> &ref_sent = ref_reader.Value ();
8181 std::vector<std::string> hyp_sent, hyp_sent2;
82- if (mode == " strict" &&
82+ if (mode == " strict" &&
8383 (!hyp_reader.HasKey (key) || !hyp_reader2.HasKey (key))) {
8484 KALDI_ERR << " No hypothesis for key " << key << " in both transcripts "
8585 " comparison is not possible." ;
86- } else if (mode == " present" &&
86+ } else if (mode == " present" &&
8787 (!hyp_reader.HasKey (key) || !hyp_reader2.HasKey (key)))
8888 continue ;
8989
@@ -92,16 +92,16 @@ void GetEditsDualHyp(const std::string &hyp_rspecifier,
9292 // all mode, if a hypothesis is not present, consider as an error
9393 if (hyp_reader.HasKey (key)){
9494 hyp_sent = hyp_reader.Value (key);
95- word_errs = LevenshteinEditDistance (ref_sent, hyp_sent,
95+ word_errs = LevenshteinEditDistance (ref_sent, hyp_sent,
9696 &num_ins, &num_del, &num_sub);
97- }
97+ }
9898 else
9999 word_errs = num_words;
100100 edit_word_per_hyp.push_back (std::pair<int32, int32>(word_errs, num_words));
101101
102102 if (hyp_reader2.HasKey (key)){
103103 hyp_sent2 = hyp_reader2.Value (key);
104- word_errs = LevenshteinEditDistance (ref_sent, hyp_sent2,
104+ word_errs = LevenshteinEditDistance (ref_sent, hyp_sent2,
105105 &num_ins, &num_del, &num_sub);
106106 }
107107 else
@@ -112,7 +112,7 @@ void GetEditsDualHyp(const std::string &hyp_rspecifier,
112112
113113void GetBootstrapWERInterval (
114114 const std::vector<std::pair<int32, int32> > & edit_word_per_hyp,
115- int32 replications,
115+ int32 replications,
116116 BaseFloat *mean, BaseFloat *interval) {
117117 BaseFloat wer_accum = 0.0 , wer_mult_accum = 0.0 ;
118118
@@ -144,7 +144,7 @@ void GetBootstrapWERTwoSystemComparison(
144144 int32 word_errs = 0 ;
145145 for (int32 j = 0 ; j <= edit_word_per_hyp.size (); ++j) {
146146 int32 random_pos = kaldi::RandInt (0 , edit_word_per_hyp.size ());
147- word_errs += edit_word_per_hyp[random_pos].first -
147+ word_errs += edit_word_per_hyp[random_pos].first -
148148 edit_word_per_hyp2[random_pos].first ;
149149 }
150150 if (word_errs > 0 )
@@ -166,7 +166,7 @@ int main(int argc, char *argv[]) {
166166 " Take a reference and a transcription file, in integer or text format,\n "
167167 " and outputs overall WER statistics to standard output along with its\n "
168168 " confidence interval using the bootstrap methos of Bisani and Ney.\n "
169- " If a second transcription file corresponding to the same reference is\n "
169+ " If a second transcription file corresponding to the same reference is\n "
170170 " provided, a bootstrap comparison of the two transcription is performed\n "
171171 " to estimate the probability of improvement.\n "
172172 " \n "
@@ -185,7 +185,7 @@ int main(int argc, char *argv[]) {
185185 " \" strict\" means die if all in ref not also in hyp" );
186186
187187 int32 replications = 10000 ;
188- po.Register (" replications" , &replications,
188+ po.Register (" replications" , &replications,
189189 " Number of replications to compute the intervals" );
190190
191191 po.Read (argc, argv);
@@ -200,7 +200,7 @@ int main(int argc, char *argv[]) {
200200 std::string hyp2_rspecifier = (po.NumArgs () == 3 ?po.GetArg (3 ):" " );
201201
202202 if (mode != " strict" && mode != " present" && mode != " all" ) {
203- KALDI_ERR <<
203+ KALDI_ERR <<
204204 " --mode option invalid: expected \" present\" |\" all\" |\" strict\" , got "
205205 << mode;
206206 }
@@ -213,17 +213,17 @@ int main(int argc, char *argv[]) {
213213 GetEditsDualHyp (hyp_rspecifier, hyp2_rspecifier, ref_rspecifier, mode,
214214 edit_word_per_hyp, edit_word_per_hyp2);
215215
216- // Extract WER for a number of replications of the same size
216+ // Extract WER for a number of replications of the same size
217217 // as the hypothesis extracted
218- BaseFloat mean_wer = 0.0 , interval = 0.0 ,
219- mean_wer2 = 0.0 , interval2 = 0.0 ,
218+ BaseFloat mean_wer = 0.0 , interval = 0.0 ,
219+ mean_wer2 = 0.0 , interval2 = 0.0 ,
220220 p_improv = 0.0 ;
221221
222- GetBootstrapWERInterval (edit_word_per_hyp, replications,
222+ GetBootstrapWERInterval (edit_word_per_hyp, replications,
223223 &mean_wer, &interval);
224224
225225 if (!hyp2_rspecifier.empty ()) {
226- GetBootstrapWERInterval (edit_word_per_hyp2, replications,
226+ GetBootstrapWERInterval (edit_word_per_hyp2, replications,
227227 &mean_wer2, &interval2);
228228
229229 GetBootstrapWERTwoSystemComparison (edit_word_per_hyp, edit_word_per_hyp2,
@@ -233,18 +233,18 @@ int main(int argc, char *argv[]) {
233233 // Print the output,
234234 std::cout.precision (2 );
235235 std::cerr.precision (2 );
236- std::cout << " Set1: %WER " << std::fixed << 100 *mean_wer <<
237- " 95\% Conf Interval [ " << 100 *mean_wer-100 *interval <<
238- " , " << 100 *mean_wer+100 *interval << " ]" << ' \n ' ;
236+ std::cout << " Set1: %WER " << std::fixed << 100 *mean_wer <<
237+ " 95\% Conf Interval [ " << 100 *mean_wer-100 *interval <<
238+ " , " << 100 *mean_wer+100 *interval << " ]" << ' \n ' ;
239239
240240 if (!hyp2_rspecifier.empty ()) {
241- std::cout << " Set2: %WER " << std::fixed << 100 *mean_wer2 <<
242- " 95\% Conf Interval [ " << 100 *mean_wer2-100 *interval2 <<
241+ std::cout << " Set2: %WER " << std::fixed << 100 *mean_wer2 <<
242+ " 95\% Conf Interval [ " << 100 *mean_wer2-100 *interval2 <<
243243 " , " << 100 *mean_wer2+100 *interval2 << " ]" << ' \n ' ;
244244
245- std::cout << " Probability of Set2 improving Set1: " << std::fixed <<
245+ std::cout << " Probability of Set2 improving Set1: " << std::fixed <<
246246 100 *p_improv << ' \n ' ;
247- }
247+ }
248248
249249 return 0 ;
250250 } catch (const std::exception &e) {
0 commit comments