@@ -20,19 +20,21 @@ class Normalize(Block):
2020 util.Eval node='node.misc["NonExistentAttribute"] = None'
2121 """
2222
23- def __init__ (self , feats = True , misc = True , sent_id = False , start_sent_id = 1 , sent_id_prefix = "" , ** kwargs ):
23+ def __init__ (self , feats = True , misc = True , sent_id = False , empty_node_ord = False , start_sent_id = 1 , sent_id_prefix = "" , ** kwargs ):
2424 """
2525 Args:
2626 `feats`: normalize the ordering of FEATS. Default=True.
2727 `misc`: normalize the ordering of MISC. Default=True.
2828 `sent_id`: normalize sent_id so it forms a sequence of integers. Default=False.
29+ `empty_node_ord`: normalize ord attributes of empty nodes. Default=False.
2930 `start_sent_id`: the first sent_id number
3031 `sent_id_prefix`: a string to be prepended before the integer sent_id. Default=empty string.
3132 """
3233 super ().__init__ (** kwargs )
3334 self .feats = feats
3435 self .misc = misc
3536 self .sent_id = sent_id
37+ self .empty_node_ord = empty_node_ord
3638 self .next_sent_id = start_sent_id
3739 self .sent_id_prefix = sent_id_prefix
3840 if sent_id_prefix or start_sent_id != 1 :
@@ -49,6 +51,20 @@ def process_bundle(self, bundle):
4951 self .process_tree (tree )
5052
5153 def process_tree (self , tree ):
54+ if self .empty_node_ord :
55+ node_ord , empty_ord = 0 , 0
56+ for node in tree .descendants_and_empty :
57+ if node .is_empty ():
58+ empty_ord += 1
59+ old_empty_ord , new_empty_ord = str (node .ord ), f"{ node_ord } .{ empty_ord } "
60+ if old_empty_ord != new_empty_ord :
61+ # Make sure all nodes in this sentence have deserialized enhanced deps.
62+ for n in tree .descendants_and_empty :
63+ n .deps
64+ node .ord = new_empty_ord
65+ else :
66+ empty_ord = 0
67+ node_ord = node .ord
5268 for node in tree .descendants :
5369 self .process_node (node )
5470
0 commit comments