Skip to content

Commit 301b808

Browse files
committed
corefud.GuessSpan: add empty nodes that are causing gaps
1 parent 34aa19d commit 301b808

1 file changed

Lines changed: 26 additions & 2 deletions

File tree

udapi/block/corefud/guessspan.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,30 @@ class GuessSpan(Block):
44
"""Block corefud.GuessSpan heuristically fills mention spans, while keeping mention.head"""
55

66
def process_coref_mention(self, mention):
7-
mention.words = mention.head.descendants(add_self=True)
8-
# TODO add empty nodes that are causing gaps
7+
mwords = mention.head.descendants(add_self=True)
98
# TODO add heuristics from corefud.PrintMentions almost_forest=1
9+
10+
# Add empty nodes that are causing gaps.
11+
# A node "within the span" whose enhanced parent is in the mentions
12+
# must be added to the mention as well.
13+
# "within the span" includes also empty nodes "on the boundary".
14+
# However, don't add empty nodes which are in a gap cause by non-empty nodes.
15+
to_add = []
16+
min_ord = int(mwords[0].ord) if mwords[0].is_empty() else mwords[0].ord - 1
17+
max_ord = int(mwords[-1].ord) + 1
18+
root = mention.head.root
19+
for empty in root.empty_nodes:
20+
if empty in mwords:
21+
continue
22+
if empty.ord > max_ord:
23+
break
24+
if empty.ord > min_ord:
25+
if any(enh['parent'] in mwords for enh in empty.deps):
26+
to_add.append(empty)
27+
elif empty.ord > min_ord + 1 and empty.ord < max_ord - 1:
28+
prev_nonempty = root.descendants[int(empty.ord) - 1]
29+
next_nonempty = root.descendants[int(empty.ord)]
30+
if prev_nonempty in mwords and next_nonempty in mwords:
31+
to_add.append(empty)
32+
#else: empty.misc['Mark'] = f'not_in_treelet_of_{mention.entity.eid}'
33+
mention.words = sorted(mwords + to_add)

0 commit comments

Comments
 (0)