Skip to content

Commit 30d59d4

Browse files
authored
Refactor+fix corefud.Delete (#124)
* refactor corefud.Delete It may be subjective, but I find this easier to understand the code. * corefud.Delete bugfix: Functor may refere even to nodes outside DEPS * simplify the corefud.Delete code by using deps instead of raw_deps Fix node.raw_deps, so that it always sorts and removes duplicates, so that the output CoNLL-U files are valid. Add tests.
1 parent c068a44 commit 30d59d4

File tree

4 files changed

+48
-37
lines changed

4 files changed

+48
-37
lines changed

udapi/block/corefud/delete.py

Lines changed: 26 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,15 @@ def is_root_reachable_by_deps(self, node, parents_to_ignore=None):
2525
proc_node, path = stack.pop()
2626
# root is reachable
2727
if proc_node == node.root:
28-
break
28+
return True
2929
# path forms a cycle, the root cannot be reached through this branch
30-
if proc_node in path:
31-
continue
32-
for dep in proc_node.deps:
33-
# the root cannot be reached through ignored nodes
34-
if dep['parent'] in parents_to_ignore:
35-
continue
36-
# process the parent recursively
37-
stack.append((dep['parent'], path + [proc_node]))
38-
else:
39-
return False
40-
return True
30+
if proc_node not in path:
31+
for dep in proc_node.deps:
32+
# the root cannot be reached through ignored nodes
33+
if dep['parent'] not in parents_to_ignore:
34+
# process the parent recursively
35+
stack.append((dep['parent'], path + [proc_node]))
36+
return False
4137

4238
def _deps_ignore_nodes(self, node, parents_to_ignore):
4339
""" Retrieve deps from the node, recursively ignoring specified parents.
@@ -46,18 +42,16 @@ def _deps_ignore_nodes(self, node, parents_to_ignore):
4642
stack = [(node, [])]
4743
while stack:
4844
proc_node, skipped_nodes = stack.pop()
49-
# if there is a cycle of skipped nodes, ground the subtree to the root
50-
if proc_node in skipped_nodes:
51-
newdeps.append({'parent': node.root, 'deprel': 'root'})
52-
continue
53-
for dep in proc_node.deps:
54-
# keep deps with a parent that shouldn't be ignored
55-
if not dep['parent'] in parents_to_ignore:
56-
newdeps.append(dep)
57-
continue
58-
# process the ignored parent recursively
59-
stack.append((dep['parent'], skipped_nodes + [proc_node]))
60-
return newdeps
45+
if proc_node not in skipped_nodes:
46+
for dep in proc_node.deps:
47+
if dep['parent'] in parents_to_ignore:
48+
# process the ignored parent recursively
49+
stack.append((dep['parent'], skipped_nodes + [proc_node]))
50+
else:
51+
# keep deps with a parent that shouldn't be ignored
52+
newdeps.append(dep)
53+
# If no newdeps were found (because of a cycle), return the root.
54+
return newdeps if newdeps else [{'parent': node.root, 'deprel': 'root'}]
6155

6256
def process_document(self, doc):
6357
# This block should work both with coreference loaded (deserialized) and not.
@@ -67,17 +61,14 @@ def process_document(self, doc):
6761
if self.empty:
6862
for node in root.descendants:
6963
# process only the nodes dependent on empty nodes
70-
if not '.' in node.raw_deps:
71-
continue
72-
# just remove empty parents if the root remains reachable
73-
if self.is_root_reachable_by_deps(node, root.empty_nodes):
74-
node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes]
75-
# otherwise propagate to non-empty ancestors
76-
else:
77-
newdeps = self._deps_ignore_nodes(node, root.empty_nodes)
78-
newdeps_sorted = sorted(set((dep['parent'].ord, dep['deprel']) for dep in newdeps))
79-
node.raw_deps = '|'.join(f"{p}:{r}" for p, r in newdeps_sorted)
80-
64+
if '.' in node.raw_deps:
65+
# just remove empty parents if the root remains reachable
66+
if self.is_root_reachable_by_deps(node, root.empty_nodes):
67+
node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes]
68+
# otherwise propagate to non-empty ancestors
69+
else:
70+
node.deps = self._deps_ignore_nodes(node, root.empty_nodes)
71+
# This needs to be done even if '.' not in node.raw_deps.
8172
if '.' in node.misc['Functor'].split(':')[0]:
8273
del node.misc['Functor']
8374
root.empty_nodes = []

udapi/core/node.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def raw_deps(self):
252252
#if self._raw_deps is not None:
253253
# return self._raw_deps
254254
if self._deps:
255-
self._raw_deps = '|'.join(f"{dep['parent']._ord}:{dep['deprel']}" for dep in self._deps)
255+
self._raw_deps = '|'.join(f"{p}:{r}" for p, r in sorted(set((d['parent'].ord, d['deprel']) for d in self._deps)))
256256
return self._raw_deps
257257

258258
@raw_deps.setter

udapi/core/tests/test_enhdeps.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def test_create_deps2empty(self):
5757
e.deps.append({'parent': h, 'deprel':'dep:e2h'})
5858
d.deps.append({'parent': e, 'deprel': 'dep:d2e'})
5959
self.assertEqual("2:dep:e2h", e.raw_deps, )
60-
self.assertEqual("5:conj|3.1:dep:d2e", d.raw_deps)
60+
self.assertEqual("3.1:dep:d2e|5:conj", d.raw_deps)
6161
self.assertEqual(self.tree.descendants_and_empty, self.nodes[:3] + [e] + self.nodes[3:])
6262

6363

udapi/core/tests/test_node.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,5 +245,25 @@ def test_empty_nodes(self):
245245
self.assertEqual(root.descendants_and_empty, [e1, e2, e3, e4, e6, e7])
246246
self.assertEqual([n.ord for n in root.descendants_and_empty], [0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
247247

248+
def test_enh_deps_and_reordering(self):
249+
"""Test reordering of node ord in enhanced deps when reorderin/removing nodes."""
250+
root = Root()
251+
for i in range(3):
252+
root.create_child(form=f'node{i+1}')
253+
254+
n1, n2, n3 = root.descendants()
255+
n1.raw_deps = '2:nsubj|3:obj'
256+
self.assertEqual(n1.raw_deps, '2:nsubj|3:obj')
257+
self.assertEqual(n1.deps, [{'parent': n2, 'deprel': 'nsubj'}, {'parent': n3, 'deprel': 'obj'}])
258+
n2.shift_after_node(n3)
259+
self.assertEqual(n1.raw_deps, '2:obj|3:nsubj')
260+
# TODO only node.raw_deps are currently guaranteed to return the deps sorted, not node.deps
261+
#self.assertEqual(n1.deps, [{'parent': n3, 'deprel': 'obj'}, {'parent': n2, 'deprel': 'nsubj'}])
262+
# TODO: after removing a node, all deps should be updated
263+
#n2.remove()
264+
#self.assertEqual(n1.raw_deps, '2:nsubj')
265+
#self.assertEqual(n1.deps, [{'parent': n3, 'deprel': 'obj'}])
266+
267+
248268
if __name__ == "__main__":
249269
unittest.main()

0 commit comments

Comments
 (0)