forked from kaldi-asr/kaldi
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathopenfst-1.4.1.patch
More file actions
153 lines (138 loc) · 5.48 KB
/
openfst-1.4.1.patch
File metadata and controls
153 lines (138 loc) · 5.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
--- a/src/include/fst/minimize.h
+++ b/src/include/fst/minimize.h
@@ -134,7 +134,14 @@ class CyclicMinimizer {
typedef typename A::Weight Weight;
typedef ReverseArc<A> RevA;
- CyclicMinimizer(const ExpandedFst<A>& fst) {
+ CyclicMinimizer(const ExpandedFst<A>& fst):
+ // tell the Partition data-member to expect multiple repeated
+ // calls to SplitOn with the same element if we are non-deterministic.
+ P_(fst.Properties(kIDeterministic, true) == 0) {
+ if(fst.Properties(kIDeterministic, true) == 0)
+ CHECK(Weight::Properties() & kIdempotent); // this minimization
+ // algorithm for non-deterministic FSTs can only work with idempotent
+ // semirings.
Initialize(fst);
Compute(fst);
}
@@ -315,7 +322,13 @@ class AcyclicMinimizer {
typedef typename A::StateId ClassId;
typedef typename A::Weight Weight;
- AcyclicMinimizer(const ExpandedFst<A>& fst) {
+ AcyclicMinimizer(const ExpandedFst<A>& fst):
+ // tell the Partition data-member to expect multiple repeated
+ // calls to SplitOn with the same element if we are non-deterministic.
+ partition_(fst.Properties(kIDeterministic, true) == 0) {
+ if(fst.Properties(kIDeterministic, true) == 0)
+ CHECK(Weight::Properties() & kIdempotent); // minimization for
+ // non-deterministic FSTs can only work with idempotent semirings.
Initialize(fst);
Refine(fst);
}
@@ -531,13 +544,7 @@ template <class A>
void Minimize(MutableFst<A>* fst,
MutableFst<A>* sfst = 0,
float delta = kDelta) {
- uint64 props = fst->Properties(kAcceptor | kIDeterministic|
- kWeighted | kUnweighted, true);
- if (!(props & kIDeterministic)) {
- FSTERROR() << "FST is not deterministic";
- fst->SetProperties(kError, kError);
- return;
- }
+ uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true);
if (!(props & kAcceptor)) { // weighted transducer
VectorFst< GallicArc<A, GALLIC_LEFT> > gfst;
--- a/src/include/fst/partition.h
+++ b/src/include/fst/partition.h
@@ -43,8 +43,8 @@ class Partition {
friend class PartitionIterator<T>;
struct Element {
- Element() : value(0), next(0), prev(0) {}
- Element(T v) : value(v), next(0), prev(0) {}
+ Element() : value(0), next(0), prev(0) {}
+ Element(T v) : value(v), next(0), prev(0) {}
T value;
Element* next;
@@ -52,9 +52,11 @@ class Partition {
};
public:
- Partition() {}
+ Partition(bool allow_repeated_split):
+ allow_repeated_split_(allow_repeated_split) {}
- Partition(T num_states) {
+ Partition(bool allow_repeated_split, T num_states):
+ allow_repeated_split_(allow_repeated_split) {
Initialize(num_states);
}
@@ -137,16 +139,16 @@ class Partition {
if (class_size_[class_id] == 1) return;
// first time class is split
- if (split_size_[class_id] == 0)
+ if (split_size_[class_id] == 0) {
visited_classes_.push_back(class_id);
-
+ class_split_[class_id] = classes_[class_id];
+ }
// increment size of split (set of element at head of chain)
split_size_[class_id]++;
// update split point
- if (class_split_[class_id] == 0)
- class_split_[class_id] = classes_[class_id];
- if (class_split_[class_id] == elements_[element_id])
+ if (class_split_[class_id] != 0
+ && class_split_[class_id] == elements_[element_id])
class_split_[class_id] = elements_[element_id]->next;
// move to head of chain in same class
@@ -157,24 +159,31 @@ class Partition {
// class indices of the newly created class. Returns the new_class id
// or -1 if no new class was created.
T SplitRefine(T class_id) {
+
+ Element* split_el = class_split_[class_id];
// only split if necessary
- if (class_size_[class_id] == split_size_[class_id]) {
- class_split_[class_id] = 0;
+ //if (class_size_[class_id] == split_size_[class_id]) {
+ if(split_el == NULL) { // we split on everything...
split_size_[class_id] = 0;
return -1;
} else {
-
T new_class = AddClass();
+
+ if(allow_repeated_split_) { // split_size_ is possibly
+ // inaccurate, so work it out exactly.
+ size_t split_count; Element *e;
+ for(split_count=0,e=classes_[class_id];
+ e != split_el; split_count++, e=e->next);
+ split_size_[class_id] = split_count;
+ }
size_t remainder = class_size_[class_id] - split_size_[class_id];
if (remainder < split_size_[class_id]) { // add smaller
- Element* split_el = class_split_[class_id];
classes_[new_class] = split_el;
+ split_el->prev->next = 0;
+ split_el->prev = 0;
class_size_[class_id] = split_size_[class_id];
class_size_[new_class] = remainder;
- split_el->prev->next = 0;
- split_el->prev = 0;
} else {
- Element* split_el = class_split_[class_id];
classes_[new_class] = classes_[class_id];
class_size_[class_id] = remainder;
class_size_[new_class] = split_size_[class_id];
@@ -245,10 +254,16 @@ class Partition {
vector<T> class_size_;
// size of split for each class
+ // in the nondeterministic case, split_size_ is actually an upper
+ // bound on the size of split for each class.
vector<T> split_size_;
// set of visited classes to be used in split refine
vector<T> visited_classes_;
+
+ // true if input fst was deterministic: we can make
+ // certain assumptions in this case that speed up the algorithm.
+ bool allow_repeated_split_;
};