Skip to content

Commit 11bd256

Browse files
committed
Voice and Polarity of some Czech verb forms.
1 parent 361f676 commit 11bd256

1 file changed

Lines changed: 5 additions & 4 deletions

File tree

udapi/block/ud/cs/fixmorpho.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -369,15 +369,16 @@ def process_node(self, node):
369369
node.upos = 'AUX'
370370
# In 19th century data, the conditional auxiliaries are tagged as SCONJ.
371371
# 'by' = 'J,-S---3------B-'
372-
# Fix it.
373-
if node.upos in ['SCONJ', 'PART'] and re.fullmatch(r'(by|bych|bys|bychom|byste)', node.form.lower()):
372+
# Fix it. And also make sure that the right features are present.
373+
if node.upos in ['AUX', 'SCONJ', 'PART'] and re.fullmatch(r'(by|bych|bys|bychom|byste)', node.form.lower()):
374374
node.upos = 'AUX'
375375
node.lemma = 'být'
376376
node.feats['VerbForm'] = 'Fin'
377377
node.feats['Mood'] = 'Cnd'
378378
node.feats['Tense'] = ''
379379
node.feats['Aspect'] = 'Imp'
380-
node.feats['Voice'] = 'Act'
380+
node.feats['Voice'] = '' ###!!! Maybe we should use Voice=Act with all non-passive verbal forms but we do not do it at present.
381+
node.feats['Polarity'] = ''
381382
if node.form.lower() == 'by':
382383
node.feats['Person'] = '' # theoretically sometimes also 2nd, although mostly 3rd
383384
node.feats['Number'] = ''
@@ -397,7 +398,7 @@ def process_node(self, node):
397398
# have it, too. Passive infinitives are always periphrastic.
398399
# (This is not done in the PDT tagset, but we should add it.)
399400
if node.feats['VerbForm'] == 'Inf':
400-
node.feats['Voice'] = 'Act'
401+
node.feats['Voice'] = '' ###!!! 'Act' is currently not permitted by ud.cs.MarkFeatsBugs and not used in older data (13th to 18th century)
401402
# Same for imperatives.
402403
elif node.feats['Mood'] == 'Imp':
403404
node.feats['Voice'] = 'Act'

0 commit comments

Comments
 (0)