Skip to content

Commit 7772c39

Browse files
committed
Debugging corefud.MergeSameSpan.
1 parent ee89d8f commit 7772c39

1 file changed

Lines changed: 7 additions & 0 deletions

File tree

udapi/block/corefud/mergesamespan.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ def process_tree(self, tree):
2424
for mA, mB in itertools.combinations(mentions, 2):
2525
if self.same_cluster_only and mA.cluster != mB.cluster:
2626
continue
27+
# Reduce non-determinism in which mention is removed:
28+
# If the mentions belong to different entities, sort them by entity (cluster) ids.
29+
if mA.cluster.cluster_id > mB.cluster.cluster_id:
30+
mX = mA
31+
mA = mB
32+
mB = mX
2733

2834
sA, sB = set(mA.words), set(mB.words)
2935
if sA != sB:
@@ -40,6 +46,7 @@ def process_tree(self, tree):
4046
# m.cluster = mA.cluster
4147
# Remove mention B. It may have been removed earlier because of
4248
# another duplicate, that is the purpose of try-except.
49+
###!!! TODO: If we remove a singleton, we are destroying the cluster. Then we must also handle possible bridging and split antecedents pointing to that cluster!
4350
for wb in sB:
4451
try:
4552
wb._mentions.remove(mB)

0 commit comments

Comments
 (0)