Skip to content

Commit fe4dfcf

Browse files
committed
corefud.IndexClusters will use prefix=e by default
1 parent b38e7e4 commit fe4dfcf

1 file changed

Lines changed: 7 additions & 4 deletions

File tree

udapi/block/corefud/indexclusters.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44

55
class IndexClusters(Block):
6-
"""Re-index the coreference cluster IDs. The final cluster IDs are of the "c<ID>" form,
6+
"""Re-index the coreference cluster IDs. The final cluster IDs are of the "e<ID>" form,
77
where <ID> are ordinal numbers starting from the one specified by the `start` parameter.
88
This block can be applied on multiple documents within one udapy call.
99
For example, to re-index ClusterId in all conllu files in the current directory
@@ -13,11 +13,14 @@ class IndexClusters(Block):
1313
Parameters:
1414
-----------
1515
start : int
16-
the starting index (by default 1)
16+
the starting index (default=1)
17+
prefix : str
18+
prefix of the IDs before the number (default="e")
1719
"""
1820

19-
def __init__(self, start=1):
21+
def __init__(self, start=1, prefix='e'):
2022
self.start = start
23+
self.prefix = prefix
2124

2225
def process_document(self, doc):
2326
clusters = doc.coref_clusters
@@ -26,7 +29,7 @@ def process_document(self, doc):
2629
new_clusters = {}
2730
for idx, cid in enumerate(clusters, self.start):
2831
cluster = clusters[cid]
29-
new_cid = "c" + str(idx)
32+
new_cid = self.prefix + str(idx)
3033
cluster.cluster_id = new_cid
3134
new_clusters[new_cid] = cluster
3235
self.start = idx + 1

0 commit comments

Comments
 (0)