Skip to content

Commit f747fa0

Browse files
committed
"Skirzěňž" et al.
1 parent 9e68b89 commit f747fa0

1 file changed

Lines changed: 23 additions & 0 deletions

File tree

udapi/block/ud/cs/addmwt.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,17 @@
7676
'main': 1,
7777
'shape': 'subtree',
7878
}
79+
for prep in 'skirzě skrzě skrze'.split():
80+
MWTS[prep + 'ň'] = {
81+
'form': prep + ' něj',
82+
'lemma': 'skrz on',
83+
'upos': 'ADP PRON',
84+
'xpos': 'RV--4---------- PEZS4--3-------',
85+
'feats': 'AdpType=Voc|Case=Acc Case=Acc|Gender=Masc,Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs',
86+
'deprel': 'case *',
87+
'main': 1,
88+
'shape': 'subtree',
89+
}
7990

8091
# Define static rules for 'naňž', 'oňž', 'proňž', 'zaňž'.
8192
# Add them to the already existing dictionary MWTS.
@@ -91,6 +102,18 @@
91102
'main': 1,
92103
'shape': 'subtree',
93104
}
105+
# Additional contractions in Old Czech with vocalization.
106+
for prep in 'skirzě skrzě skrze'.split():
107+
MWTS[prep + 'ňž'] = {
108+
'form': prep + ' nějž',
109+
'lemma': 'skrz jenž',
110+
'upos': 'ADP PRON',
111+
'xpos': 'RV--4---------- P4ZS4---------2',
112+
'feats': 'AdpType=Voc|Case=Acc Case=Acc|Gender=Masc,Neut|Number=Sing|PrepCase=Pre|PronType=Rel',
113+
'deprel': 'case *',
114+
'main': 1,
115+
'shape': 'subtree',
116+
}
94117

95118
class AddMwt(udapi.block.ud.addmwt.AddMwt):
96119
"""Detect and mark MWTs (split them into words and add the words to the tree)."""

0 commit comments

Comments
 (0)