Skip to content

Commit 35e138a

Browse files
committed
ud.JoinAsMwt more general, so one can inherit from that class
1 parent e66067d commit 35e138a

File tree

1 file changed

+23
-12
lines changed

1 file changed

+23
-12
lines changed

udapi/block/ud/joinasmwt.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,30 @@ def process_node(self, node):
2222
if node.multiword_token:
2323
return
2424
mwt_nodes = [node]
25-
while (node.no_space_after and node.next_node and not node.next_node.multiword_token
26-
and node.form[-1].isalpha() and node.next_node.form[0].isalpha()):
25+
while (node.next_node and not node.next_node.multiword_token
26+
and self.should_join(node, node.next_node)):
2727
node = node.next_node
2828
mwt_nodes.append(node)
2929
if len(mwt_nodes) > 1:
30-
mwt_form = ''.join([n.form for n in mwt_nodes])
31-
mwt = node.root.create_multiword_token(mwt_nodes, mwt_form)
32-
if node.misc['SpaceAfter'] == 'No':
33-
mwt.misc['SpaceAfter'] = 'No'
30+
self.create_mwt(mwt_nodes)
31+
32+
def should_join(self, node, next_node):
33+
return node.no_space_after and node.form[-1].isalpha() and next_node.form[0].isalpha()
34+
35+
def create_mwt(self, mwt_nodes):
36+
mwt_form = ''.join([n.form for n in mwt_nodes])
37+
mwt = node.root.create_multiword_token(mwt_nodes, mwt_form)
38+
if node.misc['SpaceAfter'] == 'No':
39+
mwt.misc['SpaceAfter'] = 'No'
40+
for mwt_node in mwt_nodes:
41+
del mwt_node.misc['SpaceAfter']
42+
if self.revert_orig_form:
3443
for mwt_node in mwt_nodes:
35-
del mwt_node.misc['SpaceAfter']
36-
if self.revert_orig_form:
37-
for mwt_node in mwt_nodes:
38-
if mwt_node.misc['OrigForm']:
39-
mwt_node.form = mwt_node.misc['OrigForm']
40-
del mwt_node.misc['OrigForm']
44+
if mwt_node.misc['OrigForm']:
45+
mwt_node.form = mwt_node.misc['OrigForm']
46+
del mwt_node.misc['OrigForm']
47+
self.postprocess_mwt()
48+
49+
# a helper method to be overriden
50+
def postprocess_mwt(self, mwt):
51+
pass

0 commit comments

Comments
 (0)