Skip to content

Commit 7f5ab8c

Browse files
author
Vincent Kriz
committed
Removed loggings from Root. Improved Zellig Harris baseline.
1 parent 9056b83 commit 7f5ab8c

5 files changed

Lines changed: 71 additions & 29 deletions

File tree

udapi/block/zellig_harris/baseline.py

Lines changed: 68 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#!/usr/bin/env python
22

3-
import logging
4-
53
from udapi.core.block import Block
64

75

@@ -43,6 +41,8 @@ def __init__(self, args=None):
4341
:param args: A dict of optional parameters.
4442
4543
"""
44+
super(Block, self).__init__()
45+
4646
if args is None:
4747
args = {}
4848

@@ -54,10 +54,67 @@ def __init__(self, args=None):
5454
if 'pos' in args:
5555
self.pos = args['pos'].split(',')
5656

57+
self.lemmas = False
58+
if 'lemmas' in args and args['lemmas'] == '1':
59+
self.lemmas = True
60+
5761
self.suffixed_forms = False
5862
if 'suffixed_form' in args and args['suffixed_forms'] == '1':
5963
self.suffixed_forms = True
6064

65+
self.reflexive_verbs = False
66+
if 'reflexive_verbs' in args and args['reflexive_verbs'] == '1':
67+
self.reflexive_verbs = True
68+
69+
def get_word(self, node):
70+
"""
71+
Format the correct string representation of the given node according to the block settings.
72+
73+
:param node: A input node.
74+
:return: A node's string representation.
75+
76+
"""
77+
# If reflexive pronoun should be append to the verb, try to find such pronoun for each verb.
78+
word_suffix = ''
79+
if self.reflexive_verbs:
80+
for child in node.children:
81+
if child.deprel == 'expl':
82+
word_suffix = child.lemma
83+
break
84+
85+
# Use the node's form or the lemma.
86+
word = node.form
87+
if self.lemmas:
88+
word = node.lemma
89+
90+
# Append the word suffix, if found.
91+
if word_suffix != '':
92+
word = '%s_%s' % (word, word_suffix)
93+
94+
# Convert to lowercase.
95+
word = word.lower()
96+
97+
# Remove last 3 chars when the block is applied on a suffixed dataset.
98+
if self.suffixed_forms:
99+
word = word[:-3]
100+
101+
return word
102+
103+
def print_triple(self, target_node, context_node, relation_name):
104+
"""
105+
Print to the standard output the context triple according to the block settings.
106+
107+
:param target_node: A target word.
108+
:param context_node: A context word.
109+
:param relation_name: A relation name.
110+
111+
"""
112+
target_word = self.get_word(target_node)
113+
context_word = self.get_word(context_node)
114+
115+
triple = '%s %s_%s' % (target_word, context_word, relation_name)
116+
print triple.encode('utf-8')
117+
61118
def process_node(self, node):
62119
"""
63120
Extract context configuration for verbs according to (Vulic et al., 2016).
@@ -69,48 +126,37 @@ def process_node(self, node):
69126
if str(node.upostag) not in self.pos:
70127
return
71128

72-
node_form = node.form.lower()
73-
if self.suffixed_forms:
74-
node_form = node_form[:-3]
75-
76-
parent_form = node.parent.form.lower()
77-
if self.suffixed_forms:
78-
parent_form = parent_form[:-3]
79-
80129
# Process node's parent.
81130
parent_deprel_orig = node.deprel
82131
parent_deprel_merged = _merge_deprel(parent_deprel_orig)
83132

84133
if parent_deprel_orig in self.pool:
85-
print "%s %s_%sI" % (node_form, parent_form, parent_deprel_orig)
134+
self.print_triple(node, node.parent, parent_deprel_orig)
86135

87136
if parent_deprel_orig != parent_deprel_merged and parent_deprel_merged in self.pool:
88-
print "%s %s_%sI" % (node_form, parent_form, parent_deprel_merged)
137+
relation_name = '%sI' % parent_deprel_merged
138+
self.print_triple(node, node.parent, relation_name)
89139

90140
if parent_deprel_orig in self.pool and parent_deprel_orig == 'conj':
91-
print "%s %s_%s" % (node_form, parent_form, parent_deprel_merged)
141+
self.print_triple(node, node.parent, parent_deprel_merged)
92142

93143
# Process node's children.
94144
for child in node.children:
95145
child_deprel_orig = child.deprel
96146
child_deprel_merged = _merge_deprel(child_deprel_orig)
97147

98-
child_form = child.form.lower()
99-
if self.suffixed_forms:
100-
child_form = child_form[:-3]
101-
102148
if child_deprel_orig in self.pool:
103-
print "%s %s_%s" % (node_form, child_form, child_deprel_orig)
149+
self.print_triple(node, child, child_deprel_orig)
104150

105151
if child_deprel_orig != child_deprel_merged and child_deprel_merged in self.pool:
106-
print "%s %s_%s" % (node_form, child_form, child_deprel_merged)
152+
self.print_triple(node, child, child_deprel_merged)
107153

108154
if 'prep' in self.pool:
109155
has_preposition = False
110-
for subchild in child.children:
111-
if subchild.deprel == 'case':
156+
for sub_child in child.children:
157+
if sub_child.deprel == 'case':
112158
has_preposition = True
113159
break
114160

115161
if has_preposition:
116-
print "%s %s_%s" % (node_form, child_form, 'prep')
162+
self.print_triple(node, child, 'prep')

udapi/block/zellig_harris/csnouns.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
class CsNouns(Block):
1212
"""
13-
A block for extraction context configurations for English nouns.
13+
A block for extraction context configurations for Czech nouns.
1414
The configurations will be used as the train data for obtaining the word representations using word2vecf.
1515
1616
"""

udapi/block/zellig_harris/csverbs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
class CsVerbs(Block):
1212
"""
13-
A block for extraction context configurations for English nouns.
13+
A block for extraction context configurations for Czech verbs.
1414
The configurations will be used as the train data for obtaining the word representations using word2vecf.
1515
1616
"""

udapi/block/zellig_harris/enverbs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
class EnVerbs(Block):
1212
"""
13-
A block for extraction context configurations for English nouns.
13+
A block for extraction context configurations for English verbs.
1414
The configurations will be used as the train data for obtaining the word representations using word2vecf.
1515
1616
"""

udapi/core/root.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#!/usr/bin/env python
22

3-
import logging
4-
53
from udapi.core.node import Node
64

75

@@ -90,12 +88,10 @@ def bundle(self, bundle):
9088

9189
@property
9290
def children(self):
93-
logging.debug('davam deti roota')
9491
return self._children
9592

9693
@children.setter
9794
def children(self, children):
98-
logging.debug('nastavujem deti rootovi')
9995
self._children = children
10096

10197
@property

0 commit comments

Comments
 (0)