33
44
55class IndexClusters (Block ):
6- """Re-index the coreference cluster IDs. The final cluster IDs are of the "c <ID>" form,
6+ """Re-index the coreference cluster IDs. The final cluster IDs are of the "e <ID>" form,
77 where <ID> are ordinal numbers starting from the one specified by the `start` parameter.
88 This block can be applied on multiple documents within one udapy call.
99 For example, to re-index ClusterId in all conllu files in the current directory
@@ -13,11 +13,14 @@ class IndexClusters(Block):
1313 Parameters:
1414 -----------
1515 start : int
16- the starting index (by default 1)
16+ the starting index (default=1)
17+ prefix : str
18+ prefix of the IDs before the number (default="e")
1719 """
1820
19- def __init__ (self , start = 1 ):
21+ def __init__ (self , start = 1 , prefix = 'e' ):
2022 self .start = start
23+ self .prefix = prefix
2124
2225 def process_document (self , doc ):
2326 clusters = doc .coref_clusters
@@ -26,7 +29,7 @@ def process_document(self, doc):
2629 new_clusters = {}
2730 for idx , cid in enumerate (clusters , self .start ):
2831 cluster = clusters [cid ]
29- new_cid = "c" + str (idx )
32+ new_cid = self . prefix + str (idx )
3033 cluster .cluster_id = new_cid
3134 new_clusters [new_cid ] = cluster
3235 self .start = idx + 1
0 commit comments