@@ -4,6 +4,30 @@ class GuessSpan(Block):
44 """Block corefud.GuessSpan heuristically fills mention spans, while keeping mention.head"""
55
66 def process_coref_mention (self , mention ):
7- mention .words = mention .head .descendants (add_self = True )
8- # TODO add empty nodes that are causing gaps
7+ mwords = mention .head .descendants (add_self = True )
98 # TODO add heuristics from corefud.PrintMentions almost_forest=1
9+
10+ # Add empty nodes that are causing gaps.
11+ # A node "within the span" whose enhanced parent is in the mentions
12+ # must be added to the mention as well.
13+ # "within the span" includes also empty nodes "on the boundary".
14+ # However, don't add empty nodes which are in a gap cause by non-empty nodes.
15+ to_add = []
16+ min_ord = int (mwords [0 ].ord ) if mwords [0 ].is_empty () else mwords [0 ].ord - 1
17+ max_ord = int (mwords [- 1 ].ord ) + 1
18+ root = mention .head .root
19+ for empty in root .empty_nodes :
20+ if empty in mwords :
21+ continue
22+ if empty .ord > max_ord :
23+ break
24+ if empty .ord > min_ord :
25+ if any (enh ['parent' ] in mwords for enh in empty .deps ):
26+ to_add .append (empty )
27+ elif empty .ord > min_ord + 1 and empty .ord < max_ord - 1 :
28+ prev_nonempty = root .descendants [int (empty .ord ) - 1 ]
29+ next_nonempty = root .descendants [int (empty .ord )]
30+ if prev_nonempty in mwords and next_nonempty in mwords :
31+ to_add .append (empty )
32+ #else: empty.misc['Mark'] = f'not_in_treelet_of_{mention.entity.eid}'
33+ mention .words = sorted (mwords + to_add )
0 commit comments