Skip to content

Commit 9e68b89

Browse files
committed
Czech MWTs "naňž, oňž, proňž, zaňž".
1 parent e30ed90 commit 9e68b89

1 file changed

Lines changed: 15 additions & 0 deletions

File tree

udapi/block/ud/cs/addmwt.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,21 @@
7777
'shape': 'subtree',
7878
}
7979

80+
# Define static rules for 'naňž', 'oňž', 'proňž', 'zaňž'.
81+
# Add them to the already existing dictionary MWTS.
82+
# naňž -> na + nějž
83+
for prep in 'na o pro za'.split():
84+
MWTS[prep + 'ňž'] = {
85+
'form': prep + ' nějž',
86+
'lemma': prep + ' jenž',
87+
'upos': 'ADP PRON',
88+
'xpos': 'RR--4---------- P4ZS4---------2',
89+
'feats': 'AdpType=Prep|Case=Acc Case=Acc|Gender=Masc,Neut|Number=Sing|PrepCase=Pre|PronType=Rel',
90+
'deprel': 'case *',
91+
'main': 1,
92+
'shape': 'subtree',
93+
}
94+
8095
class AddMwt(udapi.block.ud.addmwt.AddMwt):
8196
"""Detect and mark MWTs (split them into words and add the words to the tree)."""
8297

0 commit comments

Comments
 (0)