@@ -216,7 +216,7 @@ def bound_collection_from_inst(
216216 collection_type : str ,
217217 ) -> Optional [list ]:
218218 """
219- Try to a replace sequence of instruction that ends with a
219+ Try to replace a sequence of instruction that ends with a
220220 BUILD_xxx with a sequence that can be parsed much faster, but
221221 inserting the token boundary at the beginning of the sequence.
222222 """
@@ -298,8 +298,8 @@ def bound_collection_from_inst(
298298 )
299299 return new_tokens
300300
301- def bound_map_from_inst (
302- self , insts : list , next_tokens : list , inst : Instruction , t : Token , i : int
301+ def bound_map_from_inst_35 (
302+ self , insts : list , next_tokens : list , t : Token , i : int
303303 ) -> Optional [list ]:
304304 """
305305 Try to a sequence of instruction that ends with a BUILD_MAP into
@@ -315,25 +315,19 @@ def bound_map_from_inst(
315315 if count < 5 :
316316 return None
317317
318- if self .version >= (3 , 5 ):
319- # Newer Python BUILD_MAP argument's count is a
320- # key and value pair so it is multiplied by two.
321- collection_start = i - (count * 2 )
322- assert (count * 2 ) <= i
323-
324- for j in range (collection_start , i , 2 ):
325- if insts [j ].opname not in ("LOAD_CONST" ,):
326- return None
327- if insts [j + 1 ].opname not in ("LOAD_CONST" ,):
328- return None
329-
330- collection_start = i - (2 * count )
331- collection_enum = CONST_COLLECTIONS .index ("CONST_MAP" )
332- # else: Older Python count is sum of all key and value pairs
333- # Each pair is added individually like:
334- # LOAD_CONST ("Max-Age")
335- # LOAD_CONST ("max-age")
336- # STORE_MAP
318+ # Newer Python BUILD_MAP argument's count is a
319+ # key and value pair so it is multiplied by two.
320+ collection_start = i - (count * 2 )
321+ assert (count * 2 ) <= i
322+
323+ for j in range (collection_start , i , 2 ):
324+ if insts [j ].opname not in ("LOAD_CONST" ,):
325+ return None
326+ if insts [j + 1 ].opname not in ("LOAD_CONST" ,):
327+ return None
328+
329+ collection_start = i - (2 * count )
330+ collection_enum = CONST_COLLECTIONS .index ("CONST_MAP" )
337331
338332 # If we get here, all instructions before tokens[i] are LOAD_CONST and
339333 # we can replace add a boundary marker and change LOAD_CONST to
@@ -346,7 +340,7 @@ def bound_map_from_inst(
346340 attr = collection_enum ,
347341 pattr = "CONST_MAP" ,
348342 offset = f"{ start_offset } _0" ,
349- linestart = False ,
343+ linestart = insts [ collection_start ]. starts_line ,
350344 has_arg = True ,
351345 has_extended_arg = False ,
352346 opc = self .opc ,
@@ -364,6 +358,7 @@ def bound_map_from_inst(
364358 has_arg = True ,
365359 has_extended_arg = False ,
366360 opc = self .opc ,
361+ optype = "pseudo" ,
367362 )
368363 )
369364 new_tokens .append (
@@ -376,7 +371,7 @@ def bound_map_from_inst(
376371 has_arg = True ,
377372 has_extended_arg = False ,
378373 opc = self .opc ,
379- optype = insts [ j + 1 ]. optype ,
374+ optype = "pseudo" ,
380375 )
381376 )
382377 new_tokens .append (
@@ -389,7 +384,93 @@ def bound_map_from_inst(
389384 has_arg = t .has_arg ,
390385 has_extended_arg = False ,
391386 opc = t .opc ,
392- optype = t .optype ,
387+ optype = "pseudo" ,
388+ )
389+ )
390+ return new_tokens
391+
392+ def bound_map_from_inst_pre35 (
393+ self , insts : list , next_tokens : list , t : Token , i : int
394+ ):
395+ """
396+ Try to a sequence of instruction that ends with a BUILD_MAP into
397+ a sequence that can be parsed much faster, but inserting the
398+ token boundary at the beginning of the sequence.
399+ """
400+ count = t .attr
401+ assert isinstance (count , int )
402+
403+ # For small lists don't bother
404+ if count < 10 :
405+ return None
406+
407+ # Older Python BUILD_MAP argument's count is a
408+ # key and value pair and STORE_MAP. So it is multiplied by three.
409+ collection_end = i + 1 + count * 3
410+
411+ for j in range (i + 1 , collection_end , 3 ):
412+ if insts [j ].opname not in ("LOAD_CONST" ,):
413+ return None
414+ if insts [j + 1 ].opname not in ("LOAD_CONST" ,):
415+ return None
416+ if insts [j + 2 ].opname not in ("STORE_MAP" ,):
417+ return None
418+
419+ collection_enum = CONST_COLLECTIONS .index ("CONST_MAP" )
420+
421+ new_tokens = next_tokens [:i ]
422+ start_offset = insts [i ].offset
423+ new_tokens .append (
424+ Token (
425+ opname = "COLLECTION_START" ,
426+ attr = collection_enum ,
427+ pattr = "CONST_MAP" ,
428+ offset = f"{ start_offset } _0" ,
429+ linestart = insts [i ].starts_line ,
430+ has_arg = True ,
431+ has_extended_arg = False ,
432+ opc = self .opc ,
433+ optype = "pseudo" ,
434+ )
435+ )
436+ for j in range (i + 1 , collection_end , 3 ):
437+ new_tokens .append (
438+ Token (
439+ opname = "ADD_KEY" ,
440+ attr = insts [j + 1 ].argval ,
441+ pattr = insts [j + 1 ].argrepr ,
442+ offset = insts [j + 1 ].offset ,
443+ linestart = insts [j + 1 ].starts_line ,
444+ has_arg = True ,
445+ has_extended_arg = False ,
446+ opc = self .opc ,
447+ optype = "pseudo" ,
448+ )
449+ )
450+ new_tokens .append (
451+ Token (
452+ opname = "ADD_VALUE" ,
453+ attr = insts [j ].argval ,
454+ pattr = insts [j ].argrepr ,
455+ offset = insts [j ].offset ,
456+ linestart = insts [j ].starts_line ,
457+ has_arg = True ,
458+ has_extended_arg = False ,
459+ opc = self .opc ,
460+ optype = "pseudo" ,
461+ )
462+ )
463+ new_tokens .append (
464+ Token (
465+ opname = "BUILD_DICT_OLDER" ,
466+ attr = t .attr ,
467+ pattr = t .pattr ,
468+ offset = t .offset ,
469+ linestart = t .linestart ,
470+ has_arg = t .has_arg ,
471+ has_extended_arg = False ,
472+ opc = t .opc ,
473+ optype = "pseudo" ,
393474 )
394475 )
395476 return new_tokens
@@ -497,8 +578,16 @@ def ingest(
497578
498579 last_op_was_break = False
499580 new_tokens = []
581+ skip_end_offset = None
500582
501583 for i , inst in enumerate (self .insts ):
584+ # BUILD_MAP for < 3.5 can skip *forward* in instructions and
585+ # replace them. So we use the below to get up to the position
586+ # scanned and replaced forward
587+ if skip_end_offset and inst .offset <= skip_end_offset :
588+ continue
589+ skip_end_offset = None
590+
502591 opname = inst .opname
503592 argval = inst .argval
504593 pattr = inst .argrepr
@@ -532,17 +621,38 @@ def ingest(
532621 if try_tokens is not None :
533622 new_tokens = try_tokens
534623 continue
535- elif opname in ("BUILD_MAP" ,) and self .version >= (3 , 5 ):
536- try_tokens = self .bound_map_from_inst (
624+
625+ elif opname in ("BUILD_MAP" ,):
626+ bound_map_from_insts_fn = (
627+ self .bound_map_from_inst_35
628+ if self .version >= (3 , 5 )
629+ else self .bound_map_from_inst_pre35
630+ )
631+ try_tokens = bound_map_from_insts_fn (
537632 self .insts ,
538633 new_tokens ,
539- inst ,
540634 t ,
541635 i ,
542636 )
543637 if try_tokens is not None :
544- new_tokens = try_tokens
545- continue
638+ if self .version < (3 , 5 ):
639+ assert try_tokens [- 1 ] == "BUILD_DICT_OLDER"
640+ prev_offset = inst .offset
641+ for j in range (i , len (self .insts )):
642+ if self .insts [j ].opname == "STORE_NAME" :
643+ new_tokens = try_tokens
644+ skip_end_offset = prev_offset
645+ # Set a hacky sentinal to indicate skipping to the
646+ # next instruction
647+ opname = "EXTENDED_ARG"
648+ break
649+ prev_offset = self .insts [j ].offset
650+ pass
651+ pass
652+ else :
653+ new_tokens = try_tokens
654+ continue
655+ pass
546656
547657 argval = inst .argval
548658 op = inst .opcode
0 commit comments