@@ -25,19 +25,15 @@ def is_root_reachable_by_deps(self, node, parents_to_ignore=None):
2525 proc_node , path = stack .pop ()
2626 # root is reachable
2727 if proc_node == node .root :
28- break
28+ return True
2929 # path forms a cycle, the root cannot be reached through this branch
30- if proc_node in path :
31- continue
32- for dep in proc_node .deps :
33- # the root cannot be reached through ignored nodes
34- if dep ['parent' ] in parents_to_ignore :
35- continue
36- # process the parent recursively
37- stack .append ((dep ['parent' ], path + [proc_node ]))
38- else :
39- return False
40- return True
30+ if proc_node not in path :
31+ for dep in proc_node .deps :
32+ # the root cannot be reached through ignored nodes
33+ if dep ['parent' ] not in parents_to_ignore :
34+ # process the parent recursively
35+ stack .append ((dep ['parent' ], path + [proc_node ]))
36+ return False
4137
4238 def _deps_ignore_nodes (self , node , parents_to_ignore ):
4339 """ Retrieve deps from the node, recursively ignoring specified parents.
@@ -46,18 +42,16 @@ def _deps_ignore_nodes(self, node, parents_to_ignore):
4642 stack = [(node , [])]
4743 while stack :
4844 proc_node , skipped_nodes = stack .pop ()
49- # if there is a cycle of skipped nodes, ground the subtree to the root
50- if proc_node in skipped_nodes :
51- newdeps .append ({'parent' : node .root , 'deprel' : 'root' })
52- continue
53- for dep in proc_node .deps :
54- # keep deps with a parent that shouldn't be ignored
55- if not dep ['parent' ] in parents_to_ignore :
56- newdeps .append (dep )
57- continue
58- # process the ignored parent recursively
59- stack .append ((dep ['parent' ], skipped_nodes + [proc_node ]))
60- return newdeps
45+ if proc_node not in skipped_nodes :
46+ for dep in proc_node .deps :
47+ if dep ['parent' ] in parents_to_ignore :
48+ # process the ignored parent recursively
49+ stack .append ((dep ['parent' ], skipped_nodes + [proc_node ]))
50+ else :
51+ # keep deps with a parent that shouldn't be ignored
52+ newdeps .append (dep )
53+ # If no newdeps were found (because of a cycle), return the root.
54+ return newdeps if newdeps else [{'parent' : node .root , 'deprel' : 'root' }]
6155
6256 def process_document (self , doc ):
6357 # This block should work both with coreference loaded (deserialized) and not.
@@ -67,17 +61,14 @@ def process_document(self, doc):
6761 if self .empty :
6862 for node in root .descendants :
6963 # process only the nodes dependent on empty nodes
70- if not '.' in node .raw_deps :
71- continue
72- # just remove empty parents if the root remains reachable
73- if self .is_root_reachable_by_deps (node , root .empty_nodes ):
74- node .deps = [dep for dep in node .deps if not dep ['parent' ] in root .empty_nodes ]
75- # otherwise propagate to non-empty ancestors
76- else :
77- newdeps = self ._deps_ignore_nodes (node , root .empty_nodes )
78- newdeps_sorted = sorted (set ((dep ['parent' ].ord , dep ['deprel' ]) for dep in newdeps ))
79- node .raw_deps = '|' .join (f"{ p } :{ r } " for p , r in newdeps_sorted )
80-
64+ if '.' in node .raw_deps :
65+ # just remove empty parents if the root remains reachable
66+ if self .is_root_reachable_by_deps (node , root .empty_nodes ):
67+ node .deps = [dep for dep in node .deps if not dep ['parent' ] in root .empty_nodes ]
68+ # otherwise propagate to non-empty ancestors
69+ else :
70+ node .deps = self ._deps_ignore_nodes (node , root .empty_nodes )
71+ # This needs to be done even if '.' not in node.raw_deps.
8172 if '.' in node .misc ['Functor' ].split (':' )[0 ]:
8273 del node .misc ['Functor' ]
8374 root .empty_nodes = []
0 commit comments