Skip to content

Commit a9b6591

Browse files
kybersutrBarbora Dohnalová
andauthored
Add block for fixing inconsistent parentheses in mention spans (#121)
* add FixParentheses block * nicer code * typo --------- Co-authored-by: Barbora Dohnalová <barca.dohnalova@seznam.cz>
1 parent be5c58e commit a9b6591

1 file changed

Lines changed: 31 additions & 0 deletions

File tree

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from udapi.core.block import Block
2+
3+
4+
class FixParentheses(Block):
5+
"""Find mentions that contain opening parenthesis but do not contain the closing one (or the other way around).
6+
If the missing parenthesis is an immediate neighbour of the mention span, add it to the span."""
7+
8+
def __init__(self, mark=True, **kwargs):
9+
super().__init__(**kwargs)
10+
self.mark = mark
11+
12+
def process_coref_mention(self, mention):
13+
words = [word.lemma for word in mention.words]
14+
pairs = ['()', '[]', '{}']
15+
for pair in pairs:
16+
if pair[0] in words:
17+
if not pair[1] in words and pair[1] in [node.lemma for node in mention.head.root.descendants]:
18+
if mention.words[-1].ord == int(mention.words[-1].ord) and mention.words[-1].next_node and \
19+
mention.words[-1].next_node.lemma == pair[1]:
20+
next_node = mention.words[-1].next_node
21+
mention.words.append(next_node)
22+
if self.mark:
23+
next_node.misc['Mark'] = 1
24+
25+
elif pair[1] in words and pair[0] in [node.lemma for node in mention.head.root.descendants]:
26+
if mention.words[0].ord == int(mention.words[0].ord) and mention.words[0].prev_node \
27+
and mention.words[0].prev_node.lemma == pair[0]:
28+
prev_node = mention.words[0].prev_node
29+
mention.words.append(prev_node)
30+
if self.mark:
31+
prev_node.misc['Mark'] = 1

0 commit comments

Comments
 (0)