11#!/usr/bin/env python
22
3- import logging
4-
53from udapi .core .block import Block
64
75
@@ -43,6 +41,8 @@ def __init__(self, args=None):
4341 :param args: A dict of optional parameters.
4442
4543 """
44+ super (Block , self ).__init__ ()
45+
4646 if args is None :
4747 args = {}
4848
@@ -54,10 +54,67 @@ def __init__(self, args=None):
5454 if 'pos' in args :
5555 self .pos = args ['pos' ].split (',' )
5656
57+ self .lemmas = False
58+ if 'lemmas' in args and args ['lemmas' ] == '1' :
59+ self .lemmas = True
60+
5761 self .suffixed_forms = False
5862 if 'suffixed_form' in args and args ['suffixed_forms' ] == '1' :
5963 self .suffixed_forms = True
6064
65+ self .reflexive_verbs = False
66+ if 'reflexive_verbs' in args and args ['reflexive_verbs' ] == '1' :
67+ self .reflexive_verbs = True
68+
69+ def get_word (self , node ):
70+ """
71+ Format the correct string representation of the given node according to the block settings.
72+
73+ :param node: A input node.
74+ :return: A node's string representation.
75+
76+ """
77+ # If reflexive pronoun should be append to the verb, try to find such pronoun for each verb.
78+ word_suffix = ''
79+ if self .reflexive_verbs :
80+ for child in node .children :
81+ if child .deprel == 'expl' :
82+ word_suffix = child .lemma
83+ break
84+
85+ # Use the node's form or the lemma.
86+ word = node .form
87+ if self .lemmas :
88+ word = node .lemma
89+
90+ # Append the word suffix, if found.
91+ if word_suffix != '' :
92+ word = '%s_%s' % (word , word_suffix )
93+
94+ # Convert to lowercase.
95+ word = word .lower ()
96+
97+ # Remove last 3 chars when the block is applied on a suffixed dataset.
98+ if self .suffixed_forms :
99+ word = word [:- 3 ]
100+
101+ return word
102+
103+ def print_triple (self , target_node , context_node , relation_name ):
104+ """
105+ Print to the standard output the context triple according to the block settings.
106+
107+ :param target_node: A target word.
108+ :param context_node: A context word.
109+ :param relation_name: A relation name.
110+
111+ """
112+ target_word = self .get_word (target_node )
113+ context_word = self .get_word (context_node )
114+
115+ triple = '%s %s_%s' % (target_word , context_word , relation_name )
116+ print triple .encode ('utf-8' )
117+
61118 def process_node (self , node ):
62119 """
63120 Extract context configuration for verbs according to (Vulic et al., 2016).
@@ -69,48 +126,37 @@ def process_node(self, node):
69126 if str (node .upostag ) not in self .pos :
70127 return
71128
72- node_form = node .form .lower ()
73- if self .suffixed_forms :
74- node_form = node_form [:- 3 ]
75-
76- parent_form = node .parent .form .lower ()
77- if self .suffixed_forms :
78- parent_form = parent_form [:- 3 ]
79-
80129 # Process node's parent.
81130 parent_deprel_orig = node .deprel
82131 parent_deprel_merged = _merge_deprel (parent_deprel_orig )
83132
84133 if parent_deprel_orig in self .pool :
85- print "%s %s_%sI" % ( node_form , parent_form , parent_deprel_orig )
134+ self . print_triple ( node , node . parent , parent_deprel_orig )
86135
87136 if parent_deprel_orig != parent_deprel_merged and parent_deprel_merged in self .pool :
88- print "%s %s_%sI" % (node_form , parent_form , parent_deprel_merged )
137+ relation_name = '%sI' % parent_deprel_merged
138+ self .print_triple (node , node .parent , relation_name )
89139
90140 if parent_deprel_orig in self .pool and parent_deprel_orig == 'conj' :
91- print "%s %s_%s" % ( node_form , parent_form , parent_deprel_merged )
141+ self . print_triple ( node , node . parent , parent_deprel_merged )
92142
93143 # Process node's children.
94144 for child in node .children :
95145 child_deprel_orig = child .deprel
96146 child_deprel_merged = _merge_deprel (child_deprel_orig )
97147
98- child_form = child .form .lower ()
99- if self .suffixed_forms :
100- child_form = child_form [:- 3 ]
101-
102148 if child_deprel_orig in self .pool :
103- print "%s %s_%s" % ( node_form , child_form , child_deprel_orig )
149+ self . print_triple ( node , child , child_deprel_orig )
104150
105151 if child_deprel_orig != child_deprel_merged and child_deprel_merged in self .pool :
106- print "%s %s_%s" % ( node_form , child_form , child_deprel_merged )
152+ self . print_triple ( node , child , child_deprel_merged )
107153
108154 if 'prep' in self .pool :
109155 has_preposition = False
110- for subchild in child .children :
111- if subchild .deprel == 'case' :
156+ for sub_child in child .children :
157+ if sub_child .deprel == 'case' :
112158 has_preposition = True
113159 break
114160
115161 if has_preposition :
116- print "%s %s_%s" % ( node_form , child_form , 'prep' )
162+ self . print_triple ( node , child , 'prep' )
0 commit comments