@@ -22,19 +22,30 @@ def process_node(self, node):
2222 if node .multiword_token :
2323 return
2424 mwt_nodes = [node ]
25- while (node .no_space_after and node . next_node and not node .next_node .multiword_token
26- and node . form [ - 1 ]. isalpha () and node .next_node . form [ 0 ]. isalpha ( )):
25+ while (node .next_node and not node .next_node .multiword_token
26+ and self . should_join ( node , node .next_node )):
2727 node = node .next_node
2828 mwt_nodes .append (node )
2929 if len (mwt_nodes ) > 1 :
30- mwt_form = '' .join ([n .form for n in mwt_nodes ])
31- mwt = node .root .create_multiword_token (mwt_nodes , mwt_form )
32- if node .misc ['SpaceAfter' ] == 'No' :
33- mwt .misc ['SpaceAfter' ] = 'No'
30+ self .create_mwt (mwt_nodes )
31+
32+ def should_join (self , node , next_node ):
33+ return node .no_space_after and node .form [- 1 ].isalpha () and next_node .form [0 ].isalpha ()
34+
35+ def create_mwt (self , mwt_nodes ):
36+ mwt_form = '' .join ([n .form for n in mwt_nodes ])
37+ mwt = node .root .create_multiword_token (mwt_nodes , mwt_form )
38+ if node .misc ['SpaceAfter' ] == 'No' :
39+ mwt .misc ['SpaceAfter' ] = 'No'
40+ for mwt_node in mwt_nodes :
41+ del mwt_node .misc ['SpaceAfter' ]
42+ if self .revert_orig_form :
3443 for mwt_node in mwt_nodes :
35- del mwt_node .misc ['SpaceAfter' ]
36- if self .revert_orig_form :
37- for mwt_node in mwt_nodes :
38- if mwt_node .misc ['OrigForm' ]:
39- mwt_node .form = mwt_node .misc ['OrigForm' ]
40- del mwt_node .misc ['OrigForm' ]
44+ if mwt_node .misc ['OrigForm' ]:
45+ mwt_node .form = mwt_node .misc ['OrigForm' ]
46+ del mwt_node .misc ['OrigForm' ]
47+ self .postprocess_mwt ()
48+
49+ # a helper method to be overriden
50+ def postprocess_mwt (self , mwt ):
51+ pass
0 commit comments