Skip to content

Commit 14a74ae

Browse files
author
Vincent Kriz
committed
Merge remote-tracking branch 'origin/master' into zellig-harris-transformations
2 parents e0fad04 + 08d2244 commit 14a74ae

36 files changed

Lines changed: 215 additions & 194 deletions

.travis.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
language: python
22
python:
3-
- "2.6"
4-
- "2.7"
5-
- "3.2"
3+
- "3.3"
64
- "3.4"
75
- "3.5"
86
install:

bin/udapy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22

33
import logging
44
import argparse

setup.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22

33
from setuptools import setup, find_packages
44

5+
# python_requires is supported by pip only from November 2016,
6+
# so let's check the Python version also the old way.
7+
import sys
8+
if sys.version_info < (3, 3):
9+
raise SystemExit('Udapi requires Python 3.3 or higher.')
10+
511
setup(
612
name='udapi-python',
713
version='0.1',
@@ -12,4 +18,5 @@
1218
packages=find_packages(),
1319
scripts=['bin/udapy'],
1420
tests_require=['pytest'],
21+
python_requires='>=3.3'
1522
)

udapi/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +0,0 @@
1-
#!/usr/bin/env python
2-

udapi/block/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +0,0 @@
1-
#!/usr/bin/env python
2-

udapi/block/demo/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +0,0 @@
1-
#!/usr/bin/env python
2-
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from udapi.core.block import Block
22

3+
34
class RehangPrepositions(Block):
45

5-
def process_node( self, node ):
6+
def process_node(self, node):
67

7-
if str(node.upostag) == "ADP": # TODO: why the hell is str needed
8+
if str(node.upostag) == "ADP": # TODO: why the hell is str needed
89

910
origparent = node.parent
1011
node.set_parent(origparent.parent)
1112
origparent.set_parent(node)
12-

udapi/block/dummy.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from udapi.core.block import Block
22

3+
34
class Dummy(Block):
4-
def process_tree(self,root):
5+
6+
def process_tree(self, root):
57
for node in root.descendants():
68
# print node.lemma
79
pass
8-

udapi/block/read/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +0,0 @@
1-
#!/usr/bin/env python
2-

udapi/block/read/conllu.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
#!/usr/bin/env python
2-
31
import logging
4-
import codecs
52
import re
63
import bz2
74

@@ -14,6 +11,7 @@ class Conllu(BaseReader):
1411
A reader of the Conll-u files.
1512
1613
"""
14+
1715
def __init__(self, args=None):
1816
if args is None:
1917
args = {}
@@ -55,15 +53,14 @@ def __init__(self, args=None):
5553
# Use bz2 lib when bz2 file is given.
5654
if filename_extension == 'bz2':
5755
logging.info('Opening BZ2 file %s', self.filename)
58-
self.file_handler = bz2.BZ2File(self.filename)
56+
self.file_handler = bz2.open(
57+
self.filename, 'rt', encoding='utf-8')
5958
else:
6059
logging.info('Opening regular file %s', self.filename)
61-
self.file_handler = open(self.filename, 'rb')
60+
self.file_handler = open(self.filename, 'rt', encoding='utf-8')
6261
else:
6362
raise ValueError('No file to process')
6463

65-
self.file_handler = codecs.getreader('utf8')(self.file_handler)
66-
6764
# Remember total number of bundles
6865
self.total_number_of_bundles = 0
6966

@@ -92,7 +89,8 @@ def process_document(self, document):
9289
number_of_processed_bundles = -1
9390
number_of_loaded_bundles = 0
9491

95-
# Compile a set of regular expressions that will be searched over the lines.
92+
# Compile a set of regular expressions that will be searched over the
93+
# lines.
9694
re_comment_like = re.compile(r'^#')
9795
re_sentence_id = re.compile(r'^# sent_id (\S+)')
9896
re_multiword_tokens = re.compile(r'^\d+-')
@@ -105,7 +103,8 @@ def process_document(self, document):
105103

106104
# If we can not add next bundle, return document.
107105
if number_of_loaded_bundles >= self.bundles_per_document:
108-
logging.debug('Reached number of requested bundles (%d)', self.bundles_per_document)
106+
logging.debug(
107+
'Reached number of requested bundles (%d)', self.bundles_per_document)
109108
return document
110109

111110
# Obtain a raw bundle.
@@ -127,7 +126,8 @@ def process_document(self, document):
127126
raw_bundle_check = False
128127

129128
if not raw_bundle_check:
130-
raise RuntimeError('Detected an invalid bundle: %r' % raw_bundle)
129+
raise RuntimeError(
130+
'Detected an invalid bundle: %r' % raw_bundle)
131131

132132
# Initialize the data structures.
133133
root_node = Root()
@@ -144,7 +144,8 @@ def process_document(self, document):
144144
match = re_sentence_id.search(line)
145145
if match is not None:
146146
sent_id = match.group(1)
147-
logging.debug('Matched sent_id keyword with value %s', sent_id)
147+
logging.debug(
148+
'Matched sent_id keyword with value %s', sent_id)
148149
root_node.sent_id = sent_id
149150
continue
150151

@@ -159,15 +160,17 @@ def process_document(self, document):
159160
logging.debug('Skipping multi-word tokens %s', line)
160161
continue
161162

162-
# Otherwise the line is a tab-separated list of node attributes.
163+
# Otherwise the line is a tab-separated list of node
164+
# attributes.
163165
node = root_node.create_child()
164166
raw_node_attributes = line.split('\t')
165167
for (n_attribute, attribute_name) in enumerate(self.node_attributes):
166168
if attribute_name == 'feats':
167169
attribute_name = 'raw_feats'
168170
if attribute_name == 'deps':
169171
attribute_name = 'raw_deps'
170-
setattr(node, attribute_name, raw_node_attributes[n_attribute])
172+
setattr(node, attribute_name,
173+
raw_node_attributes[n_attribute])
171174

172175
nodes.append(node)
173176

@@ -185,9 +188,11 @@ def process_document(self, document):
185188

186189
# At least one node should be parsed.
187190
if len(nodes) == 0:
188-
raise ValueError('Probably two empty lines following each other.')
191+
raise ValueError(
192+
'Probably two empty lines following each other.')
189193

190-
# If specified, check sentence ID to match the sentence ID filter.
194+
# If specified, check sentence ID to match the sentence ID
195+
# filter.
191196
if self.sentence_id_filter is not None:
192197
if self.sentence_id_filter.match(root_node.sent_id) is None:
193198
logging.debug('Skipping sentence %s as it does not match the sentence ID filter.',

0 commit comments

Comments
 (0)