Skip to content

Commit b2bc840

Browse files
author
Vincent Kriz
committed
Improved blocks for contexts extraction.
1 parent 14a74ae commit b2bc840

7 files changed

Lines changed: 126 additions & 118 deletions

File tree

udapi/block/zellig_harris/common.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import sys
23

34

45
def get_node_representation(node, print_lemma=False):
@@ -28,4 +29,5 @@ def print_triple(node_a, relation_name, node_b, print_lemma=False):
2829
node_a = get_node_representation(node_a, print_lemma=print_lemma)
2930
node_b = get_node_representation(node_b, print_lemma=print_lemma)
3031

31-
print("%s %s_%s", node_a, relation_name, node_b)
32+
context = u"%s %s_%s" % (node_a, relation_name, node_b)
33+
print(context)
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import logging
2+
3+
from udapi.core.block import Block
4+
5+
from udapi.block.zellig_harris.common import *
6+
from udapi.block.zellig_harris.queries import *
7+
8+
9+
class Configurations(Block):
10+
"""
11+
An abstract class for four extracting scenarios.
12+
13+
"""
14+
15+
def __init__(self, args=None):
16+
"""
17+
Initialization.
18+
19+
:param args: A dict of optional parameters.
20+
21+
"""
22+
if args is None:
23+
args = {}
24+
25+
# Call the constructor of the parent object.
26+
super(Configurations, self).__init__(args)
27+
28+
# Process the 'POS' argument.
29+
self.pos = []
30+
if 'pos' in args:
31+
self.pos = args['pos'].split(',')
32+
33+
# Process the 'print_lemmas' argument.
34+
self.print_lemmas = False
35+
if 'print_lemmas' in args and args['print_lemmas'] == '1':
36+
self.print_lemmas = True
37+
38+
# Process the 'print_lemmas' argument.
39+
self.verbose = False
40+
if 'verbose' in args and args['verbose'] == '1':
41+
self.verbose = True
42+
43+
def apply_query(self, query_id, node):
44+
"""
45+
A generic method for applying a specified query on a specified node.
46+
47+
:param query_id: A name of the query method to be called.
48+
:param node: An input node.
49+
50+
"""
51+
if self.verbose:
52+
logging.info(' - applying query %s', query_id)
53+
54+
try:
55+
methods = globals()
56+
method = methods.get(query_id)
57+
except Exception as exception:
58+
logging.fatal(' - no such query %s', query_id)
59+
sys.exit(1)
60+
61+
triples = []
62+
try:
63+
triples = method(node)
64+
except ValueError as exception:
65+
if self.verbose:
66+
logging.info(' - no configurations: %s', exception)
67+
pass
68+
69+
if len(triples) == 0:
70+
if self.verbose:
71+
logging.info(' - no configurations, but all conditions passed.')
72+
73+
for (node_a, relation_name, node_b) in triples:
74+
print_triple(node_a, relation_name, node_b,
75+
print_lemma=self.print_lemmas)
76+
77+
def process_tree(self, tree):
78+
"""
79+
If required, print detailed info about the processed sentence.
80+
81+
:param tree: A sentence to be processed.
82+
83+
"""
84+
if self.verbose:
85+
logging.info('')
86+
logging.info('---')
87+
logging.info('Sentence ID : %s', tree.sent_id)
88+
logging.info('Sentence : %s', ' '.join([node.form for node in tree.descendants()]))
89+
logging.info('---')
90+
91+
for node in tree.descendants():
92+
self.process_node(node)
93+
94+
def process_node(self, node):
95+
"""
96+
Extract context configuration for verbs according to (Vulic et al., 2016).
97+
98+
:param node: A node to be process.
99+
100+
"""
101+
raise NotImplementedError('Cannot call this abstract method.')

udapi/block/zellig_harris/csnouns.py

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
from udapi.core.block import Block
44

5-
from udapi.block.zellig_harris.common import *
5+
from udapi.block.zellig_harris.configurations import *
66
from udapi.block.zellig_harris.queries import *
77

88

9-
class CsNouns(Block):
9+
class CsNouns(Configurations):
1010
"""
1111
A block for extraction context configurations for Czech nouns.
1212
The configurations will be used as the train data for obtaining the word representations using word2vecf.
@@ -26,43 +26,19 @@ def __init__(self, args=None):
2626
# Call the constructor of the parent object.
2727
super(CsNouns, self).__init__(args)
2828

29-
# Process the 'POS' argument.
30-
self.pos = []
31-
if 'pos' in args:
32-
self.pos = args['pos'].split(',')
33-
34-
# Process the 'print_lemmas' argument.
35-
self.print_lemmas = False
36-
if 'print_lemmas' in args and args['print_lemmas'] == '1':
37-
self.print_lemmas = True
38-
39-
# Process the 'print_lemmas' argument.
40-
self.verbose = False
41-
if 'verbose' in args and args['verbose'] == '1':
42-
self.verbose = True
43-
4429
def process_node(self, node):
4530
"""
46-
Extract context configuration for verbs according to (Vulic et al., 2016).
31+
Extract context configurations for Czech nouns.
4732
4833
:param node: A node to be process.
4934
5035
"""
51-
# We want to extract contexts only for verbs.
36+
# We want to extract contexts only for the .
5237
if str(node.upostag) not in self.pos:
5338
return
5439

5540
if self.verbose:
41+
logging.info('')
5642
logging.info('Processing node %s/%s', node.root.sent_id, node)
5743

58-
# Apply the set of queries and extract the configurations.
59-
try:
60-
for (node_a, relation_name, node_b) in en_verb_mydobj(node):
61-
print_triple(node_a, relation_name, node_b,
62-
print_lemma=self.print_lemmas)
63-
except ValueError as exception:
64-
if self.verbose:
65-
logging.info('No configurations for node %s/%s: %s',
66-
node.root.sent_id, node, exception)
67-
68-
pass
44+
self.apply_query('en_verb_mydobj', node)

udapi/block/zellig_harris/csverbs.py

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
from udapi.core.block import Block
44

5-
from udapi.block.zellig_harris.common import *
5+
from udapi.block.zellig_harris.configurations import *
66
from udapi.block.zellig_harris.queries import *
77

88

9-
class CsVerbs(Block):
9+
class CsVerbs(Configurations):
1010
"""
1111
A block for extraction context configurations for Czech verbs.
1212
The configurations will be used as the train data for obtaining the word representations using word2vecf.
@@ -26,24 +26,9 @@ def __init__(self, args=None):
2626
# Call the constructor of the parent object.
2727
super(CsVerbs, self).__init__(args)
2828

29-
# Process the 'POS' argument.
30-
self.pos = []
31-
if 'pos' in args:
32-
self.pos = args['pos'].split(',')
33-
34-
# Process the 'print_lemmas' argument.
35-
self.print_lemmas = False
36-
if 'print_lemmas' in args and args['print_lemmas'] == '1':
37-
self.print_lemmas = True
38-
39-
# Process the 'print_lemmas' argument.
40-
self.verbose = False
41-
if 'verbose' in args and args['verbose'] == '1':
42-
self.verbose = True
43-
4429
def process_node(self, node):
4530
"""
46-
Extract context configuration for verbs according to (Vulic et al., 2016).
31+
Extract context configurations for Czech verbs.
4732
4833
:param node: A node to be process.
4934
@@ -53,16 +38,7 @@ def process_node(self, node):
5338
return
5439

5540
if self.verbose:
41+
logging.info('')
5642
logging.info('Processing node %s/%s', node.root.sent_id, node)
5743

58-
# Apply the set of queries and extract the configurations.
59-
try:
60-
for (node_a, relation_name, node_b) in en_verb_mydobj(node):
61-
print_triple(node_a, relation_name, node_b,
62-
print_lemma=self.print_lemmas)
63-
except ValueError as exception:
64-
if self.verbose:
65-
logging.info('No configurations for node %s/%s: %s',
66-
node.root.sent_id, node, exception)
67-
68-
pass
44+
self.apply_query('en_verb_mydobj', node)

udapi/block/zellig_harris/ennouns.py

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
from udapi.core.block import Block
44

5-
from udapi.block.zellig_harris.common import *
5+
from udapi.block.zellig_harris.configurations import *
66
from udapi.block.zellig_harris.queries import *
77

88

9-
class EnNouns(Block):
9+
class EnNouns(Configurations):
1010
"""
1111
A block for extraction context configurations for English nouns.
1212
The configurations will be used as the train data for obtaining the word representations using word2vecf.
@@ -26,24 +26,9 @@ def __init__(self, args=None):
2626
# Call the constructor of the parent object.
2727
super(EnNouns, self).__init__(args)
2828

29-
# Process the 'POS' argument.
30-
self.pos = []
31-
if 'pos' in args:
32-
self.pos = args['pos'].split(',')
33-
34-
# Process the 'print_lemmas' argument.
35-
self.print_lemmas = False
36-
if 'print_lemmas' in args and args['print_lemmas'] == '1':
37-
self.print_lemmas = True
38-
39-
# Process the 'print_lemmas' argument.
40-
self.verbose = False
41-
if 'verbose' in args and args['verbose'] == '1':
42-
self.verbose = True
43-
4429
def process_node(self, node):
4530
"""
46-
Extract context configuration for verbs according to (Vulic et al., 2016).
31+
Extract context configurations for English nouns.
4732
4833
:param node: A node to be process.
4934
@@ -53,16 +38,7 @@ def process_node(self, node):
5338
return
5439

5540
if self.verbose:
41+
logging.info('')
5642
logging.info('Processing node %s/%s', node.root.sent_id, node)
5743

58-
# Apply the set of queries and extract the configurations.
59-
try:
60-
for (node_a, relation_name, node_b) in en_verb_mydobj(node):
61-
print_triple(node_a, relation_name, node_b,
62-
print_lemma=self.print_lemmas)
63-
except ValueError as exception:
64-
if self.verbose:
65-
logging.info('No configurations for node %s/%s: %s',
66-
node.root.sent_id, node, exception)
67-
68-
pass
44+
self.apply_query('en_verb_mydobj', node)

udapi/block/zellig_harris/enverbs.py

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
from udapi.core.block import Block
44

5-
from udapi.block.zellig_harris.common import *
5+
from udapi.block.zellig_harris.configurations import *
66
from udapi.block.zellig_harris.queries import *
77

88

9-
class EnVerbs(Block):
9+
class EnVerbs(Configurations):
1010
"""
1111
A block for extraction context configurations for English verbs.
1212
The configurations will be used as the train data for obtaining the word representations using word2vecf.
@@ -26,24 +26,9 @@ def __init__(self, args=None):
2626
# Call the constructor of the parent object.
2727
super(EnVerbs, self).__init__(args)
2828

29-
# Process the 'POS' argument.
30-
self.pos = []
31-
if 'pos' in args:
32-
self.pos = args['pos'].split(',')
33-
34-
# Process the 'print_lemmas' argument.
35-
self.print_lemmas = False
36-
if 'print_lemmas' in args and args['print_lemmas'] == '1':
37-
self.print_lemmas = True
38-
39-
# Process the 'print_lemmas' argument.
40-
self.verbose = False
41-
if 'verbose' in args and args['verbose'] == '1':
42-
self.verbose = True
43-
4429
def process_node(self, node):
4530
"""
46-
Extract context configuration for verbs according to (Vulic et al., 2016).
31+
Extract context configurations for English verbs.
4732
4833
:param node: A node to be process.
4934
@@ -53,16 +38,7 @@ def process_node(self, node):
5338
return
5439

5540
if self.verbose:
41+
logging.info('')
5642
logging.info('Processing node %s/%s', node.root.sent_id, node)
5743

58-
# Apply the set of queries and extract the configurations.
59-
try:
60-
for (node_a, relation_name, node_b) in en_verb_mydobj(node):
61-
print_triple(node_a, relation_name, node_b,
62-
print_lemma=self.print_lemmas)
63-
except ValueError as exception:
64-
if self.verbose:
65-
logging.info('No configurations for node %s/%s: %s',
66-
node.root.sent_id, node, exception)
67-
68-
pass
44+
self.apply_query('en_verb_mydobj', node)

udapi/block/zellig_harris/queries.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ def en_verb_mydobj(node):
2424
triples.append((node, 'dobj', child_node))
2525

2626
return triples
27+

0 commit comments

Comments
 (0)