@@ -369,15 +369,16 @@ def process_node(self, node):
369369 node .upos = 'AUX'
370370 # In 19th century data, the conditional auxiliaries are tagged as SCONJ.
371371 # 'by' = 'J,-S---3------B-'
372- # Fix it.
373- if node .upos in ['SCONJ' , 'PART' ] and re .fullmatch (r'(by|bych|bys|bychom|byste)' , node .form .lower ()):
372+ # Fix it. And also make sure that the right features are present.
373+ if node .upos in ['AUX' , ' SCONJ' , 'PART' ] and re .fullmatch (r'(by|bych|bys|bychom|byste)' , node .form .lower ()):
374374 node .upos = 'AUX'
375375 node .lemma = 'být'
376376 node .feats ['VerbForm' ] = 'Fin'
377377 node .feats ['Mood' ] = 'Cnd'
378378 node .feats ['Tense' ] = ''
379379 node .feats ['Aspect' ] = 'Imp'
380- node .feats ['Voice' ] = 'Act'
380+ node .feats ['Voice' ] = '' ###!!! Maybe we should use Voice=Act with all non-passive verbal forms but we do not do it at present.
381+ node .feats ['Polarity' ] = ''
381382 if node .form .lower () == 'by' :
382383 node .feats ['Person' ] = '' # theoretically sometimes also 2nd, although mostly 3rd
383384 node .feats ['Number' ] = ''
@@ -397,7 +398,7 @@ def process_node(self, node):
397398 # have it, too. Passive infinitives are always periphrastic.
398399 # (This is not done in the PDT tagset, but we should add it.)
399400 if node .feats ['VerbForm' ] == 'Inf' :
400- node .feats ['Voice' ] = 'Act'
401+ node .feats ['Voice' ] = '' ###!!! ' Act' is currently not permitted by ud.cs.MarkFeatsBugs and not used in older data (13th to 18th century)
401402 # Same for imperatives.
402403 elif node .feats ['Mood' ] == 'Imp' :
403404 node .feats ['Voice' ] = 'Act'
0 commit comments