Skip to content

Commit 5546a65

Browse files
author
Vincent Kriz
committed
Renamed block to proper project name.
1 parent 0261826 commit 5546a65

5 files changed

Lines changed: 33 additions & 8 deletions

File tree

udapi/block/read/conllu.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ def __init__(self, args=None):
2424
# TODO: this should be invoked from the parent class
2525
self.finished = False
2626

27+
# ID filter.
28+
self.sentence_id_filter = None
29+
if 'sentence_id_filter' in args:
30+
self.sentence_id_filter = re.compile(args['sentence_id_filter'])
31+
2732
# Bundles per document.
2833
self.bundles_per_document = float("inf")
2934
if 'bundles_per_document' in args:
@@ -146,8 +151,6 @@ def process_document(self, document):
146151
node = root_node.create_child()
147152
raw_node_attributes = line.split('\t')
148153

149-
150-
151154
for (n_attribute, attribute_name) in enumerate(self.node_attributes):
152155
setattr(node, attribute_name, raw_node_attributes[n_attribute])
153156

@@ -169,6 +172,12 @@ def process_document(self, document):
169172
if len(nodes) == 0:
170173
raise ValueError('Probably two empty lines following each other')
171174

175+
# If specified, check sentence ID to match the sentence ID filter.
176+
if self.sentence_id_filter is not None:
177+
if self.sentence_id_filter.match(root_node.sent_id) is None:
178+
logging.debug('Skipping sentence %s as it does not match the sentence ID filter.', root_node.sent_id)
179+
continue
180+
172181
# Set parents for each node.
173182
nodes[0]._aux['comments'] = '\n'.join(comments)
174183
nodes[0]._aux['descendants'] = nodes[1:]
File renamed without changes.
File renamed without changes.
Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ def __init__(self, args=None):
5454
if 'pos' in args:
5555
self.pos = args['pos'].split(',')
5656

57+
self.suffixed_forms = False
58+
if 'suffixed_form' in args and args['suffixed_forms'] == '1':
59+
self.suffixed_forms == True
60+
5761
def process_node(self, node):
5862
"""
5963
Extract context configuration for verbs according to (Vulic et al., 2016).
@@ -65,29 +69,41 @@ def process_node(self, node):
6569
if str(node.upostag) not in self.pos:
6670
return
6771

72+
node_form = node.form.lower()
73+
if self.suffixed_forms:
74+
node_form = node_form[:-3]
75+
76+
parent_form = node.parent.form.lower()
77+
if self.suffixed_forms:
78+
parent_form = parent_form[:-3]
79+
6880
# Process node's parent.
6981
parent_deprel_orig = node.deprel
7082
parent_deprel_merged = _merge_deprel(parent_deprel_orig)
7183

7284
if parent_deprel_orig in self.pool:
73-
print "%s %s_%sI" % (node.form[:-3].lower(), node.parent.form[:-3].lower(), parent_deprel_orig)
85+
print "%s %s_%sI" % (node_form, parent_form, parent_deprel_orig)
7486

7587
if parent_deprel_orig != parent_deprel_merged and parent_deprel_merged in self.pool:
76-
print "%s %s_%sI" % (node.form[:-3].lower(), node.parent.form[:-3].lower(), parent_deprel_merged)
88+
print "%s %s_%sI" % (node_form, parent_form, parent_deprel_merged)
7789

7890
if parent_deprel_orig in self.pool and parent_deprel_orig == 'conj':
79-
print "%s %s_%s" % (node.form[:-3].lower(), node.parent.form[:-3].lower(), parent_deprel_merged)
91+
print "%s %s_%s" % (node_form, parent_form, parent_deprel_merged)
8092

8193
# Process node's children.
8294
for child in node.children:
8395
child_deprel_orig = child.deprel
8496
child_deprel_merged = _merge_deprel(child_deprel_orig)
8597

98+
child_form = child.form.lower()
99+
if self.suffixed_forms:
100+
child_form = child_form[:-3]
101+
86102
if child_deprel_orig in self.pool:
87-
print "%s %s_%s" % (node.form[:-3].lower(), child.form[:-3].lower(), child_deprel_orig)
103+
print "%s %s_%s" % (node_form, child_form, child_deprel_orig)
88104

89105
if child_deprel_orig != child_deprel_merged and child_deprel_merged in self.pool:
90-
print "%s %s_%s" % (node.form[:-3].lower(), child.form[:-3].lower(), child_deprel_merged)
106+
print "%s %s_%s" % (node_form, child_form, child_deprel_merged)
91107

92108
if 'prep' in self.pool:
93109
has_preposition = False
@@ -97,7 +113,7 @@ def process_node(self, node):
97113
break
98114

99115
if has_preposition:
100-
print "%s %s_%s" % (node.form[:-3].lower(), child.form[:-3].lower(), 'prep')
116+
print "%s %s_%s" % (node_form, child_form, 'prep')
101117

102118

103119

0 commit comments

Comments
 (0)