-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Expand file tree
/
Copy pathwer.py
More file actions
201 lines (180 loc) · 6.2 KB
/
wer.py
File metadata and controls
201 lines (180 loc) · 6.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
"""WER print functions.
The functions here are used to print the computed statistics
with human-readable formatting.
They have a file argument, but you can also just use
contextlib.redirect_stdout, which may give a nicer syntax.
Authors
* Aku Rouhe 2020
"""
import sys
from speechbrain.utils import edit_distance
def print_wer_summary(wer_details, file=sys.stdout):
"""Prints out WER summary details in human-readable format.
This function essentially mirrors the Kaldi compute-wer output format.
Arguments
---------
wer_details : dict
Dict of wer summary details,
see ``speechbrain.utils.edit_distance.wer_summary``
for format.
file : stream
Where to write. (default: sys.stdout)
"""
print(
"%WER {WER:.2f} [ {num_edits} / {num_scored_tokens}, {insertions} ins, {deletions} del, {substitutions} sub ]".format( # noqa
**wer_details
),
file=file,
end="",
)
print(
(
" [PARTIAL]"
if wer_details["num_scored_sents"] < wer_details["num_ref_sents"]
else ""
),
file=file,
)
print(
"%SER {SER:.2f} [ {num_erroneous_sents} / {num_scored_sents} ]".format(
**wer_details
),
file=file,
)
print(
"Scored {num_scored_sents} sentences, {num_absent_sents} not present in hyp.".format( # noqa
**wer_details
),
file=file,
)
def print_alignments(
details_by_utterance,
file=sys.stdout,
empty_symbol="<eps>",
separator=" ; ",
print_header=True,
sample_separator=None,
):
"""Print WER summary and alignments.
Arguments
---------
details_by_utterance : list
List of wer details by utterance,
see ``speechbrain.utils.edit_distance.wer_details_by_utterance``
for format. Has to have alignments included.
file : stream
Where to write. (default: sys.stdout)
empty_symbol : str
Symbol to use when aligning to nothing.
separator : str
String that separates each token in the output. Note the spaces in the
default.
print_header: bool
Whether to print headers
sample_separator: str
A separator to put between samples (optional)
"""
if print_header:
_print_alignments_global_header(
file=file, empty_symbol=empty_symbol, separator=separator
)
for dets in details_by_utterance:
if dets["scored"]:
if print_header:
_print_alignment_header(dets, file=file)
_print_alignment(
dets["alignment"],
dets["ref_tokens"],
dets["hyp_tokens"],
file=file,
empty_symbol=empty_symbol,
separator=separator,
)
if sample_separator:
print(sample_separator, file=file)
# The following internal functions are used to
# print out more specific things
def _print_top_wer_utts(top_non_empty, top_empty, file=sys.stdout):
print("=" * 80, file=file)
print("UTTERANCES WITH HIGHEST WER", file=file)
if top_non_empty:
print(
"Non-empty hypotheses -- utterances for which output was produced:",
file=file,
)
for dets in top_non_empty:
print("{key} %WER {WER:.2f}".format(**dets), file=file)
else:
print("No utterances which had produced output!", file=file)
if top_empty:
print(
"Empty hypotheses -- utterances for which no output was produced:",
file=file,
)
for dets in top_empty:
print("{key} %WER {WER:.2f}".format(**dets), file=file)
else:
print("No utterances which had not produced output!", file=file)
def _print_top_wer_spks(spks_by_wer, file=sys.stdout):
print("=" * 80, file=file)
print("SPEAKERS WITH HIGHEST WER", file=file)
for dets in spks_by_wer:
print("{speaker} %WER {WER:.2f}".format(**dets), file=file)
def _print_alignment(
alignment, a, b, empty_symbol="<eps>", separator=" ; ", file=sys.stdout
):
# First, get equal length text for all:
a_padded = []
b_padded = []
ops_padded = []
for op, i, j in alignment: # i indexes a, j indexes b
op_string = str(op)
a_string = str(a[i]) if i is not None else empty_symbol
b_string = str(b[j]) if j is not None else empty_symbol
# NOTE: the padding does not actually compute printed length,
# but hopefully we can assume that printed length is
# at most the str len
pad_length = max(len(op_string), len(a_string), len(b_string))
a_padded.append(a_string.center(pad_length))
b_padded.append(b_string.center(pad_length))
ops_padded.append(op_string.center(pad_length))
# Then print, in the order Ref, op, Hyp
print(separator.join(a_padded), file=file)
print(separator.join(ops_padded), file=file)
print(separator.join(b_padded), file=file)
def _print_alignments_global_header(
empty_symbol="<eps>", separator=" ; ", file=sys.stdout
):
print("=" * 80, file=file)
print("ALIGNMENTS", file=file)
print("", file=file)
print("Format:", file=file)
print("<utterance-id>, WER DETAILS", file=file)
# Print the format with the actual
# print_alignment function, using artificial data:
a = ["reference", "on", "the", "first", "line"]
b = ["and", "hypothesis", "on", "the", "third"]
alignment = [
(edit_distance.EDIT_SYMBOLS["ins"], None, 0),
(edit_distance.EDIT_SYMBOLS["sub"], 0, 1),
(edit_distance.EDIT_SYMBOLS["eq"], 1, 2),
(edit_distance.EDIT_SYMBOLS["eq"], 2, 3),
(edit_distance.EDIT_SYMBOLS["sub"], 3, 4),
(edit_distance.EDIT_SYMBOLS["del"], 4, None),
]
_print_alignment(
alignment,
a,
b,
file=file,
empty_symbol=empty_symbol,
separator=separator,
)
def _print_alignment_header(wer_details, file=sys.stdout):
print("=" * 80, file=file)
print(
"{key}, %WER {WER:.2f} [ {num_edits} / {num_ref_tokens}, {insertions} ins, {deletions} del, {substitutions} sub ]".format( # noqa
**wer_details
),
file=file,
)