2323"""
2424
2525import sys
26- import uncompyle6 .scanners .scanner2 as scan
2726
2827# bytecode verification, verify(), uses JUMP_OPs from here
2928from xdis import iscode
30- from xdis .opcodes import opcode_26
3129from xdis .bytecode import _get_const_info
30+ from xdis .opcodes import opcode_26
3231
32+ import uncompyle6 .scanners .scanner2 as scan
3333from uncompyle6 .scanner import Token
3434
3535intern = sys .intern
3636
3737JUMP_OPS = opcode_26 .JUMP_OPS
3838
39+
3940class Scanner26 (scan .Scanner2 ):
4041 def __init__ (self , show_asm = False ):
4142 super (Scanner26 , self ).__init__ ((2 , 6 ), show_asm )
4243
4344 # "setup" opcodes
44- self .setup_ops = frozenset ([
45- self .opc .SETUP_EXCEPT , self .opc .SETUP_FINALLY ,
46- ])
45+ self .setup_ops = frozenset (
46+ [
47+ self .opc .SETUP_EXCEPT ,
48+ self .opc .SETUP_FINALLY ,
49+ ]
50+ )
4751
4852 return
4953
@@ -76,8 +80,9 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
7680
7781 # show_asm = 'after'
7882 if show_asm in ("both" , "before" ):
83+ print ("\n # ---- before tokenization:" )
7984 for instr in bytecode .get_instructions (co ):
80- print (instr .disassemble ())
85+ print (instr .disassemble (self . opc ))
8186
8287 # Container for tokens
8388 tokens = []
@@ -96,17 +101,18 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
96101 # 'LOAD_ASSERT' is used in assert statements.
97102 self .load_asserts = set ()
98103 for i in self .op_range (0 , codelen ):
99-
100104 # We need to detect the difference between:
101105 # raise AssertionError
102106 # and
103107 # assert ...
104- if (self .code [i ] == self .opc .JUMP_IF_TRUE and
105- i + 4 < codelen and
106- self .code [i + 3 ] == self .opc .POP_TOP and
107- self .code [i + 4 ] == self .opc .LOAD_GLOBAL ):
108- if names [self .get_argument (i + 4 )] == 'AssertionError' :
109- self .load_asserts .add (i + 4 )
108+ if (
109+ self .code [i ] == self .opc .JUMP_IF_TRUE
110+ and i + 4 < codelen
111+ and self .code [i + 3 ] == self .opc .POP_TOP
112+ and self .code [i + 4 ] == self .opc .LOAD_GLOBAL
113+ ):
114+ if names [self .get_argument (i + 4 )] == "AssertionError" :
115+ self .load_asserts .add (i + 4 )
110116
111117 jump_targets = self .find_jump_targets (show_asm )
112118 # contains (code, [addrRefToCode])
@@ -131,7 +137,8 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
131137 i += 1
132138 op = self .code [offset ]
133139 op_name = self .opname [op ]
134- oparg = None ; pattr = None
140+ oparg = None
141+ pattr = None
135142
136143 if offset in jump_targets :
137144 jump_idx = 0
@@ -142,28 +149,37 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
142149 # properly. For example, a "loop" with an "if" nested in it should have the
143150 # "loop" tag last so the grammar rule matches that properly.
144151 last_jump_offset = - 1
145- for jump_offset in sorted (jump_targets [offset ], reverse = True ):
152+ for jump_offset in sorted (jump_targets [offset ], reverse = True ):
146153 if jump_offset != last_jump_offset :
147- tokens .append (Token (
148- 'COME_FROM' , jump_offset , repr (jump_offset ),
149- offset = "%s_%d" % (offset , jump_idx ),
150- has_arg = True ))
154+ tokens .append (
155+ Token (
156+ "COME_FROM" ,
157+ jump_offset ,
158+ repr (jump_offset ),
159+ offset = "%s_%d" % (offset , jump_idx ),
160+ has_arg = True ,
161+ )
162+ )
151163 jump_idx += 1
152164 last_jump_offset = jump_offset
153165 elif offset in self .thens :
154- tokens .append (Token (
155- 'THEN' , None , self .thens [offset ],
156- offset = "%s_0" % offset ,
157- has_arg = True ))
166+ tokens .append (
167+ Token (
168+ "THEN" ,
169+ None ,
170+ self .thens [offset ],
171+ offset = "%s_0" % offset ,
172+ has_arg = True ,
173+ )
174+ )
158175
159- has_arg = ( op >= self .opc .HAVE_ARGUMENT )
176+ has_arg = op >= self .opc .HAVE_ARGUMENT
160177 if has_arg :
161178 oparg = self .get_argument (offset ) + extended_arg
162179 extended_arg = 0
163180 if op == self .opc .EXTENDED_ARG :
164- extended_arg += self .extended_arg_val (oparg )
165- continue
166-
181+ extended_arg += self .extended_arg_val (oparg )
182+ continue
167183
168184 # Note: name used to match on rather than op since
169185 # BUILD_SET isn't in earlier Pythons.
@@ -172,7 +188,14 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
172188 "BUILD_SET" ,
173189 ):
174190 t = Token (
175- op_name , oparg , pattr , offset , self .linestarts .get (offset , None ), op , has_arg , self .opc
191+ op_name ,
192+ oparg ,
193+ pattr ,
194+ offset ,
195+ self .linestarts .get (offset , None ),
196+ op ,
197+ has_arg ,
198+ self .opc ,
176199 )
177200
178201 collection_type = op_name .split ("_" )[1 ]
@@ -221,8 +244,8 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
221244 # FIXME: this is a hack to catch stuff like:
222245 # if x: continue
223246 # the "continue" is not on a new line.
224- if len (tokens ) and tokens [- 1 ].kind == ' JUMP_BACK' :
225- tokens [- 1 ].kind = intern (' CONTINUE' )
247+ if len (tokens ) and tokens [- 1 ].kind == " JUMP_BACK" :
248+ tokens [- 1 ].kind = intern (" CONTINUE" )
226249
227250 elif op in self .opc .JABS_OPS :
228251 pattr = repr (oparg )
@@ -240,17 +263,23 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
240263 # CE - Hack for >= 2.5
241264 # Now all values loaded via LOAD_CLOSURE are packed into
242265 # a tuple before calling MAKE_CLOSURE.
243- if (self .version >= (2 , 5 ) and op == self .opc .BUILD_TUPLE and
244- self .code [self .prev [offset ]] == self .opc .LOAD_CLOSURE ):
266+ if (
267+ self .version >= (2 , 5 )
268+ and op == self .opc .BUILD_TUPLE
269+ and self .code [self .prev [offset ]] == self .opc .LOAD_CLOSURE
270+ ):
245271 continue
246272 else :
247- op_name = ' %s_%d' % (op_name , oparg )
273+ op_name = " %s_%d" % (op_name , oparg )
248274 customize [op_name ] = oparg
249275 elif self .version > (2 , 0 ) and op == self .opc .CONTINUE_LOOP :
250276 customize [op_name ] = 0
251- elif op_name in """
277+ elif (
278+ op_name
279+ in """
252280 CONTINUE_LOOP EXEC_STMT LOAD_LISTCOMP LOAD_SETCOMP
253- """ .split ():
281+ """ .split ()
282+ ):
254283 customize [op_name ] = 0
255284 elif op == self .opc .JUMP_ABSOLUTE :
256285 # Further classify JUMP_ABSOLUTE into backward jumps
@@ -266,23 +295,24 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
266295 # rule for that.
267296 target = self .get_target (offset )
268297 if target <= offset :
269- op_name = 'JUMP_BACK'
270- if (offset in self .stmts
271- and self .code [offset + 3 ] not in (self .opc .END_FINALLY ,
272- self .opc .POP_BLOCK )):
273- if ((offset in self .linestarts and
274- tokens [- 1 ].kind == 'JUMP_BACK' )
275- or offset not in self .not_continue ):
276- op_name = 'CONTINUE'
298+ op_name = "JUMP_BACK"
299+ if offset in self .stmts and self .code [offset + 3 ] not in (
300+ self .opc .END_FINALLY ,
301+ self .opc .POP_BLOCK ,
302+ ):
303+ if (
304+ offset in self .linestarts and tokens [- 1 ].kind == "JUMP_BACK"
305+ ) or offset not in self .not_continue :
306+ op_name = "CONTINUE"
277307 else :
278308 # FIXME: this is a hack to catch stuff like:
279309 # if x: continue
280310 # the "continue" is not on a new line.
281- if tokens [- 1 ].kind == ' JUMP_BACK' :
311+ if tokens [- 1 ].kind == " JUMP_BACK" :
282312 # We need 'intern' since we have
283313 # already have processed the previous
284314 # token.
285- tokens [- 1 ].kind = intern (' CONTINUE' )
315+ tokens [- 1 ].kind = intern (" CONTINUE" )
286316
287317 elif op == self .opc .LOAD_GLOBAL :
288318 if offset in self .load_asserts :
@@ -316,6 +346,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
316346 pass
317347
318348 if show_asm in ("both" , "after" ):
349+ print ("\n # ---- after tokenization:" )
319350 for t in tokens :
320351 print (t .format (line_prefix = "" ))
321352 print ()
0 commit comments