77for person names. This block can be used to fix them.
88"""
99from udapi .core .block import Block
10+ import regex as re
1011import logging
1112
1213
@@ -19,18 +20,26 @@ class FixCompoundName(Block):
1920
2021 def process_node (self , node ):
2122 if node .upos == 'PROPN' and node .udeprel == 'compound' and node .parent .upos == 'PROPN' :
23+ origparent = node .parent
24+ grandparent = origparent .parent
25+ outdeprel = origparent .deprel
2226 # See if there are other PROPN compound siblings.
23- namewords = [x for x in node .siblings if x .upos == 'PROPN' and x .udeprel == 'compound' ]
24- namewords .append (node .parent )
25- namewords = sorted (namewords , key = lambda x : x .ord )
26- ###!!! We currently cannot transform enhanced dependencies.
27- ###!!! If we proceed, the basic tree would diverge from the enhanced dependencies.
28- if len (node .deps ) > 0 :
29- logging .fatal ('There are enhanced dependencies but ud.FixCompoundName has been implemented only for basic dependencies.' )
30- # The first name word will be the technical head. If it is the current parent, fine.
31- if namewords [0 ] != node .parent :
32- namewords [0 ].parent = node .parent .parent
33- namewords [0 ].deprel = node .parent .deprel
34- for i in range (len (namewords )- 1 ):
35- namewords [i + 1 ].parent = namewords [0 ]
36- namewords [i + 1 ].deprel = 'flat:name'
27+ namewords = sorted ([x for x in origparent .children (add_self = True ) if x .upos == 'PROPN' and (x .udeprel == 'compound' or x == origparent )], key = lambda y : y .ord )
28+ # The Hindi treebank tags dates (['30', 'navaṁbara'], ['disaṁbara', '1993']) as PROPN compounds.
29+ # This is wrong but it is also different from personal names we are targeting here.
30+ # Hence, we will skip "names" that contain numbers.
31+ if len ([x for x in namewords if re .search (r"\d" , x .form )]) == 0 :
32+ #logging.info(str([x.misc['Translit'] for x in namewords]))
33+ ###!!! We currently cannot transform enhanced dependencies.
34+ ###!!! If we proceed, the basic tree would diverge from the enhanced dependencies.
35+ if len (node .deps ) > 0 :
36+ logging .fatal ('There are enhanced dependencies but ud.FixCompoundName has been implemented only for basic dependencies.' )
37+ # The first name word will be the technical head. If it is the current parent, fine.
38+ head = namewords [0 ]
39+ rest = namewords [1 :]
40+ if head != origparent :
41+ head .parent = grandparent
42+ head .deprel = outdeprel
43+ for n in rest :
44+ n .parent = head
45+ n .deprel = 'flat:name'
0 commit comments