11from __future__ import annotations
22
3+ import weakref
4+ from asyncio import CancelledError , Event , Task , create_task , sleep
35from contextlib import contextmanager
4- from typing import ContextManager
6+ from functools import partial
7+ from typing import Callable , ContextManager , NamedTuple
58
69try :
710 from tree_sitter import Language , Node , Parser , Query , Tree
@@ -43,6 +46,17 @@ def temporary_query_point_range(
4346 query .set_point_range (default_point_range )
4447
4548
49+ class SyntaxTreeEdit (NamedTuple ):
50+ """Details of a tree-sitter syntax tree edit operation."""
51+
52+ start_byte : int
53+ old_end_byte : int
54+ new_end_byte : int
55+ start_point : int
56+ old_end_point : int
57+ new_end_point : int
58+
59+
4660class SyntaxAwareDocumentError (Exception ):
4761 """General error raised when SyntaxAwareDocument is used incorrectly."""
4862
@@ -76,9 +90,37 @@ def __init__(
7690 self ._parser = Parser (self .language )
7791 """The tree-sitter Parser or None if tree-sitter is unavailable."""
7892
79- self ._syntax_tree : Tree = self ._parser .parse (self ._read_callable ) # type: ignore
93+ self ._syntax_tree : Tree = self ._parser .parse (
94+ partial (self ._read_callable , lines = self .lines )
95+ ) # type: ignore
8096 """The tree-sitter Tree (syntax tree) built from the document."""
8197
98+ self ._syntax_tree_update_callback : Callable [[], None ] | None = None
99+ self ._background_parser = BackgroundSyntaxParser (self )
100+ self ._pending_syntax_edits : list [SyntaxTreeEdit ] = []
101+
102+ def clean_up (self ) -> None :
103+ """Perform any pre-deletion clean up."""
104+ self ._background_parser .stop ()
105+
106+ def copy_of_lines (self ):
107+ """Provide a copy of the document's lines."""
108+ return list (self ._lines )
109+
110+ def apply_pending_syntax_edits (self ) -> bool :
111+ """Apply any pending edits to the syntax tree.
112+
113+ Returns:
114+ True if any edits were applied.
115+ """
116+ if self ._pending_syntax_edits :
117+ for edit in self ._pending_syntax_edits :
118+ self ._syntax_tree .edit (** edit ._asdict ())
119+ self ._pending_syntax_edits [:] = []
120+ return True
121+ else :
122+ return False
123+
82124 def prepare_query (self , query : str ) -> Query | None :
83125 """Prepare a tree-sitter tree query.
84126
@@ -117,6 +159,26 @@ def query_syntax_tree(
117159 with temporary_query_point_range (query , start_point , end_point ):
118160 return query .captures (self ._syntax_tree .root_node )
119161
162+ def set_syntax_tree_update_callback (
163+ self ,
164+ callback : Callable [[], None ],
165+ ) -> None :
166+ """Set a callback function for signalling a rebuild of the syntax tree.
167+
168+ Args:
169+ callback: A function that takes no arguments and returns None.
170+ """
171+ self ._syntax_tree_update_callback = callback
172+
173+ def trigger_syntax_tree_update (self , force_update : bool = False ) -> None :
174+ """Trigger a new syntax tree update to run in the background.
175+
176+ Args:
177+ force_update: When set, ensure that the syntax tree is regenerated
178+ unconditionally.
179+ """
180+ self ._background_parser .trigger_syntax_tree_update (force_update )
181+
120182 def replace_range (self , start : Location , end : Location , text : str ) -> EditResult :
121183 """Replace text at the given range.
122184
@@ -143,22 +205,47 @@ def replace_range(self, start: Location, end: Location, text: str) -> EditResult
143205 end_location = replace_result .end_location
144206 assert self ._syntax_tree is not None
145207 assert self ._parser is not None
146- self ._syntax_tree .edit (
147- start_byte = start_byte ,
148- old_end_byte = old_end_byte ,
149- new_end_byte = start_byte + text_byte_length ,
150- start_point = start_point ,
151- old_end_point = old_end_point ,
152- new_end_point = self ._location_to_point (end_location ),
153- )
154- # Incrementally parse the document.
155- self ._syntax_tree = self ._parser .parse (
156- self ._read_callable ,
157- self ._syntax_tree , # type: ignore[arg-type]
208+ self ._pending_syntax_edits .append (
209+ SyntaxTreeEdit (
210+ start_byte = start_byte ,
211+ old_end_byte = old_end_byte ,
212+ new_end_byte = start_byte + text_byte_length ,
213+ start_point = start_point ,
214+ old_end_point = old_end_point ,
215+ new_end_point = self ._location_to_point (end_location ),
216+ )
158217 )
159-
160218 return replace_result
161219
220+ def reparse (self , timeout_us : int , lines : list [str ], syntax_tree = None ) -> bool :
221+ """Reparse the document.
222+
223+ Args:
224+ timeout_us: The parser timeout in microseconds.
225+ lines: A list of the lines being parsed.
226+
227+ Returns:
228+ True if parsing succeeded and False if a timeout occurred.
229+ """
230+ assert timeout_us > 0
231+ read_source = partial (self ._read_callable , lines = lines )
232+ tree = self ._syntax_tree
233+ saved_timeout = self ._parser .timeout_micros
234+ try :
235+ self ._parser .timeout_micros = timeout_us
236+ try :
237+ tree = self ._parser .parse (read_source , tree ) # type: ignore[arg-type]
238+ except ValueError :
239+ # The only known cause is a timeout.
240+ return False
241+ else :
242+ self ._syntax_tree = tree
243+ if self ._syntax_tree_update_callback is not None :
244+ self ._syntax_tree_update_callback ()
245+ return True
246+ finally :
247+ self ._parser .timeout_micros = saved_timeout
248+
162249 def get_line (self , index : int ) -> str :
163250 """Return the string representing the line, not including new line characters.
164251
@@ -214,7 +301,12 @@ def _location_to_point(self, location: Location) -> tuple[int, int]:
214301 bytes_on_left = 0
215302 return row , bytes_on_left
216303
217- def _read_callable (self , byte_offset : int , point : tuple [int , int ]) -> bytes :
304+ def _read_callable (
305+ self ,
306+ byte_offset : int ,
307+ point : tuple [int , int ],
308+ lines : list [str ],
309+ ) -> bytes :
218310 """A callable which informs tree-sitter about the document content.
219311
220312 This is passed to tree-sitter which will call it frequently to retrieve
@@ -224,14 +316,14 @@ def _read_callable(self, byte_offset: int, point: tuple[int, int]) -> bytes:
224316 byte_offset: The number of (utf-8) bytes from the start of the document.
225317 point: A tuple (row index, column *byte* offset). Note that this differs
226318 from our Location tuple which is (row_index, column codepoint offset).
319+ lines: The lines of the document being parsed.
227320
228321 Returns:
229322 All the utf-8 bytes between the byte_offset/point and the end of the current
230323 line _including_ the line separator character(s). Returns None if the
231324 offset/point requested by tree-sitter doesn't correspond to a byte.
232325 """
233326 row , column = point
234- lines = self ._lines
235327 newline = self .newline
236328
237329 row_out_of_bounds = row >= len (lines )
@@ -252,3 +344,75 @@ def _read_callable(self, byte_offset: int, point: tuple[int, int]) -> bytes:
252344 return b"\n "
253345
254346 return b""
347+
348+
349+ class BackgroundSyntaxParser :
350+ """A provider of incremental background parsing for syntax highlighting.
351+
352+ This runs tree-sitter parsing as a parallel, background asyncio task. This
353+ prevents occasional, relatively long parsing times from making `TextArea`
354+ editing become unresponsive.
355+ """
356+
357+ PARSE_TIME_SLICE = 0.005
358+ PARSE_TIMEOUT_MICROSECONDS = int (PARSE_TIME_SLICE * 1_000_000 )
359+
360+ def __init__ (self , document : SyntaxAwareDocument ):
361+ self ._document_ref = weakref .ref (document )
362+ self ._event = Event ()
363+ self ._task : Task = create_task (self ._execute_reparsing ())
364+ self ._force_update = False
365+
366+ def stop (self ):
367+ """Stop running as a background task."""
368+ self ._task .cancel ()
369+
370+ def trigger_syntax_tree_update (self , force_update : bool ) -> None :
371+ """Trigger a new syntax tree update to run in the background.
372+
373+ Args:
374+ force_update: When set, ensure that the syntax tree is regenerated
375+ unconditionally.
376+ """
377+ if force_update :
378+ self ._force_update = True
379+ self ._event .set ()
380+
381+ async def _execute_reparsing (self ) -> None :
382+ """Run, as a task, tree-sitter reparse operations on demand."""
383+ while True :
384+ try :
385+ try :
386+ await self ._event .wait ()
387+ except Exception as e :
388+ return
389+ self ._event .clear ()
390+ force_update = self ._force_update
391+ self ._force_update = False
392+ await self ._perform_a_single_reparse (force_update )
393+ except CancelledError :
394+ return
395+
396+ async def _perform_a_single_reparse (self , force_update : bool ) -> None :
397+ document = self ._document_ref ()
398+ if document is None :
399+ return
400+ if not (document .apply_pending_syntax_edits () or force_update ):
401+ return
402+
403+ # In order to allow the user to continue editing without interruption, we reparse
404+ # a snapshot of the TextArea's document.
405+ copy_of_text_for_parsing = document .copy_of_lines ()
406+
407+ # Use tree-sitter's parser timeout mechanism, when necessary, break the
408+ # full reparse into multiple steps. Most of the time, tree-sitter is so
409+ # fast that no looping occurs.
410+ parsed_ok = False
411+ while not parsed_ok :
412+ parsed_ok = document .reparse (
413+ self .PARSE_TIMEOUT_MICROSECONDS , lines = copy_of_text_for_parsing
414+ )
415+ if not parsed_ok :
416+ # Sleeping for zero seconds allows other tasks, I/O, *etc.* to execute,
417+ # keeping the TextArea and other widgets responsive.
418+ await sleep (0.0 )
0 commit comments