jupyter-server · krassowski · Nov 29, 2025 · Nov 29, 2025 · Nov 30, 2025 · krassowski
diff --git a/jupyter_ydoc/yunicode.py b/jupyter_ydoc/yunicode.py
@@ -2,6 +2,7 @@
 # Distributed under the terms of the Modified BSD License.
 
 from collections.abc import Callable
+from difflib import SequenceMatcher
 from functools import partial
 from typing import Any
 
@@ -64,17 +65,40 @@ def set(self, value: str) -> None:
         :param value: The content of the document.
         :type value: str
         """
-        if self.get() == value:
+        old_value = self.get()
+        if old_value == value:
             # no-op if the values are already the same,
             # to avoid side-effects such as cursor jumping to the top
             return
 
         with self._ydoc.transaction():
-            # clear document
-            self._ysource.clear()
-            # initialize document
-            if value:
-                self._ysource += value
+            matcher = SequenceMatcher(a=old_value, b=value)
+
+            # for very different strings, just replace the whole content;
+            # this avoids generating a huge number of operations
+            if matcher.ratio() < 0.6:
+                # clear document
+                self._ysource.clear()
+                # initialize document
+                if value:
+                    self._ysource += value
+            else:
+                operations = matcher.get_opcodes()
+                offset = 0
+                for tag, i1, i2, j1, j2 in operations:
+                    if tag == "replace":
+                        self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
+                        offset += (j2 - j1) - (i2 - i1)
+                    elif tag == "delete":
+                        del self._ysource[i1 + offset : i2 + offset]
+                        offset -= i2 - i1
+                    elif tag == "insert":
+                        self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
+                        offset += j2 - j1
+                    elif tag == "equal":
+                        pass
+                    else:
+                        raise ValueError(f"Unknown tag '{tag}' in sequence matcher")
 
     def observe(self, callback: Callable[[str, Any], None]) -> None:
         """

diff --git a/tests/test_ynotebook.py b/tests/test_ynotebook.py
@@ -1,10 +1,9 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
 
-from dataclasses import dataclass
-
 from pycrdt import ArrayEvent, Map, MapEvent, TextEvent
 from pytest import mark
+from utils import ExpectedEvent
 
 from jupyter_ydoc import YNotebook
 
@@ -119,24 +118,6 @@ def record_changes(topic, event):
     ]
 
 
-@dataclass
-class ExpectedEvent:
-    kind: type
-    path: str | None = None
-
-    def __eq__(self, other):
-        if not isinstance(other, self.kind):
-            return False
-        if self.path is not None and self.path != other.path:
-            return False
-        return True
-
-    def __repr__(self):
-        if self.path is not None:
-            return f"ExpectedEvent({self.kind.__name__}, path={self.path!r})"
-        return f"ExpectedEvent({self.kind.__name__})"
-
-
 @mark.parametrize(
     "modifications, expected_events",
     [

diff --git a/tests/test_yunicode.py b/tests/test_yunicode.py
@@ -1,6 +1,9 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
 
+from pycrdt import TextEvent
+from utils import ExpectedEvent
+
 from jupyter_ydoc import YUnicode
 
 
@@ -25,3 +28,163 @@ def record_changes(topic, event):
 
     # No changes should be observed at all
     assert changes == []
+
+
+def test_set_granular_changes():
+    text = YUnicode()
+
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little lamb,",
+                "Its fleece was white as snow.",
+                "And everywhere that Mary went,",
+                "The lamb was sure to go.",
+            ]
+        )
+    )
+
+    changes = []
+
+    def record_changes(topic, event):
+        changes.append((topic, event))  # pragma: nocover
+
+    text.observe(record_changes)
+
+    # Call set with the bunny version
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little bunny,",
+                "Its fur was white as snow.",
+                "And everywhere that Mary went,",
+                "The bunny was sure to hop.",
+            ]
+        )
+    )
+
+    assert len(changes) == 1
+    source_events = [e for t, e in changes if t == "source"]
+    assert source_events == [
+        ExpectedEvent(
+            TextEvent,
+            delta=[
+                # "Mary had a little <delete:lam>b"
+                {"retain": 18},
+                {"delete": 3},
+                {"retain": 1},
+                # "Mary had a little b<insert:unny>"
+                {"insert": "unny"},
+                # ",↵ Its f<delete:leece>"
+                {"retain": 7},
+                {"delete": 5},
+                # ",↵ Its f<insert:ur>"
+                {"insert": "ur"},
+                # " was white as snow.↵"
+                # "And everywhere that Mary went,↵"
+                # "The <delete:lam>b"
+                {"retain": 55},
+                {"delete": 3},
+                {"retain": 1},
+                # "The b<insert:unny> was sure to"
+                {"insert": "unny"},
+                {"retain": 13},
+                # "<delete:g><insert:h>o<insert:p>"
+                {"delete": 1},
+                {"insert": "h"},
+                {"retain": 1},
+                {"insert": "p"},
+            ],
+        )
+    ]
+
+
+def test_set_granular_append():
+    text = YUnicode()
+
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little lamb,",
+                "Its fleece was white as snow.",
+            ]
+        )
+    )
+
+    changes = []
+
+    def record_changes(topic, event):
+        changes.append((topic, event))  # pragma: nocover
+
+    text.observe(record_changes)
+
+    # append a line
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little lamb,",
+                "Its fleece was white as snow.",
+                "And everywhere that Mary went,",
+            ]
+        )
+    )
+
+    # append one more line
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little lamb,",
+                "Its fleece was white as snow.",
+                "And everywhere that Mary went,",
+                "The lamb was sure to go.",
+            ]
+        )
+    )
+
+    assert len(changes) == 2
+    source_events = [e for t, e in changes if t == "source"]
+    assert source_events == [
+        ExpectedEvent(
+            TextEvent, delta=[{"retain": 53}, {"insert": "\nAnd everywhere that Mary went,"}]
+        ),
+        ExpectedEvent(TextEvent, delta=[{"retain": 84}, {"insert": "\nThe lamb was sure to go."}]),
+    ]
+
+
+def test_set_hard_reload_if_very_different():
+    text = YUnicode()
+
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little lamb,",
+                "Its fleece was white as snow.",
+                "And everywhere that Mary went,",
+                "The lamb was sure to go.",
+            ]
+        )
+    )
+
+    changes = []
+
+    def record_changes(topic, event):
+        changes.append((topic, event))  # pragma: nocover
+
+    text.observe(record_changes)
+
+    # Call set with a very different nursery rhyme
+    twinkle_lyrics = "\n".join(
+        [
+            "Twinkle, twinkle, little star,",
+            "How I wonder what you are!",
+            "Up above the world so high,",
+            "Like a diamond in the sky.",
+        ]
+    )
+    text.set(twinkle_lyrics)
+
+    assert len(changes) == 1
+    source_events = [e for t, e in changes if t == "source"]
+    assert source_events == [
+        ExpectedEvent(TextEvent, delta=[{"delete": 109}, {"insert": twinkle_lyrics}])
+    ]
diff --git a/tests/utils.py b/tests/utils.py
@@ -1,6 +1,8 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
 
+from dataclasses import dataclass
+
 from anyio import Lock, connect_tcp
 
 
@@ -41,3 +43,27 @@ async def ensure_server_running(host: str, port: int) -> None:
             pass
         else:
             break
+
+
+@dataclass
+class ExpectedEvent:
+    kind: type
+    path: str | None = None
+    delta: list[dict] | None = None
+
+    def __eq__(self, other):
+        if not isinstance(other, self.kind):
+            return False
+        if self.path is not None and self.path != other.path:
+            return False
+        if self.delta is not None and self.delta != other.delta:
+            return False
+        return True
+
+    def __repr__(self):
+        fragments = [self.kind.__name__]
+        if self.path is not None:
+            fragments.append(f"path={self.path!r}")
+        if self.delta is not None:
+            fragments.append(f"delta={self.delta!r}")
+        return f"ExpectedEvent({', '.join(fragments)})"