Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions jupyter_ydoc/yunicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Distributed under the terms of the Modified BSD License.

from collections.abc import Callable
from difflib import SequenceMatcher
from functools import partial
from typing import Any

Expand Down Expand Up @@ -64,17 +65,40 @@ def set(self, value: str) -> None:
:param value: The content of the document.
:type value: str
"""
if self.get() == value:
old_value = self.get()
if old_value == value:
# no-op if the values are already the same,
# to avoid side-effects such as cursor jumping to the top
return

with self._ydoc.transaction():
# clear document
self._ysource.clear()
# initialize document
if value:
self._ysource += value
matcher = SequenceMatcher(a=old_value, b=value)

# for very different strings, just replace the whole content;
# this avoids generating a huge number of operations
if matcher.ratio() < 0.6:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using 0.6 because:

As a rule of thumb, a ratio() value over 0.6 means the sequences are close matches

As per https://docs.python.org/3/library/difflib.html#sequencematcher-examples

Instead of ratio() we could use quick_ratio() or real_quick_ratio(). In case if the ratio is above the threshold this does not matter because we would end up computing matching_blocks anyways (this is computed and cached by a call to either raito() or get_opcodes(); in that case calling quick_ratio heuristic could actually end up taking more time as we end up doing more work. It all boils down to whether we think that we are more likely to see smaller updates or larger updates. One very cheap heuristic could be using len of both strings which is what real_quick_ratio does.

# clear document
self._ysource.clear()
# initialize document
if value:
self._ysource += value
else:
operations = matcher.get_opcodes()
offset = 0
for tag, i1, i2, j1, j2 in operations:
if tag == "replace":
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we require python 3.10+, maybe use match?

self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
offset += (j2 - j1) - (i2 - i1)
elif tag == "delete":
del self._ysource[i1 + offset : i2 + offset]
offset -= i2 - i1
elif tag == "insert":
self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this not using Text.insert?

offset += j2 - j1
elif tag == "equal":
pass
else:
raise ValueError(f"Unknown tag '{tag}' in sequence matcher")

def observe(self, callback: Callable[[str, Any], None]) -> None:
"""
Expand Down
21 changes: 1 addition & 20 deletions tests/test_ynotebook.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from dataclasses import dataclass

from pycrdt import ArrayEvent, Map, MapEvent, TextEvent
from pytest import mark
from utils import ExpectedEvent

from jupyter_ydoc import YNotebook

Expand Down Expand Up @@ -119,24 +118,6 @@ def record_changes(topic, event):
]


@dataclass
class ExpectedEvent:
kind: type
path: str | None = None

def __eq__(self, other):
if not isinstance(other, self.kind):
return False
if self.path is not None and self.path != other.path:
return False
return True

def __repr__(self):
if self.path is not None:
return f"ExpectedEvent({self.kind.__name__}, path={self.path!r})"
return f"ExpectedEvent({self.kind.__name__})"


@mark.parametrize(
"modifications, expected_events",
[
Expand Down
163 changes: 163 additions & 0 deletions tests/test_yunicode.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from pycrdt import TextEvent
from utils import ExpectedEvent

from jupyter_ydoc import YUnicode


Expand All @@ -25,3 +28,163 @@ def record_changes(topic, event):

# No changes should be observed at all
assert changes == []


def test_set_granular_changes():
text = YUnicode()

text.set(
"\n".join(
[
"Mary had a little lamb,",
"Its fleece was white as snow.",
"And everywhere that Mary went,",
"The lamb was sure to go.",
]
)
)

changes = []

def record_changes(topic, event):
changes.append((topic, event)) # pragma: nocover

text.observe(record_changes)

# Call set with the bunny version
text.set(
"\n".join(
[
"Mary had a little bunny,",
"Its fur was white as snow.",
"And everywhere that Mary went,",
"The bunny was sure to hop.",
]
)
)

assert len(changes) == 1
source_events = [e for t, e in changes if t == "source"]
assert source_events == [
ExpectedEvent(
TextEvent,
delta=[
# "Mary had a little <delete:lam>b"
{"retain": 18},
{"delete": 3},
{"retain": 1},
# "Mary had a little b<insert:unny>"
{"insert": "unny"},
# ",↵ Its f<delete:leece>"
{"retain": 7},
{"delete": 5},
# ",↵ Its f<insert:ur>"
{"insert": "ur"},
# " was white as snow.↵"
# "And everywhere that Mary went,↵"
# "The <delete:lam>b"
{"retain": 55},
{"delete": 3},
{"retain": 1},
# "The b<insert:unny> was sure to"
{"insert": "unny"},
{"retain": 13},
# "<delete:g><insert:h>o<insert:p>"
{"delete": 1},
{"insert": "h"},
{"retain": 1},
{"insert": "p"},
],
)
]


def test_set_granular_append():
text = YUnicode()

text.set(
"\n".join(
[
"Mary had a little lamb,",
"Its fleece was white as snow.",
]
)
)

changes = []

def record_changes(topic, event):
changes.append((topic, event)) # pragma: nocover

text.observe(record_changes)

# append a line
text.set(
"\n".join(
[
"Mary had a little lamb,",
"Its fleece was white as snow.",
"And everywhere that Mary went,",
]
)
)

# append one more line
text.set(
"\n".join(
[
"Mary had a little lamb,",
"Its fleece was white as snow.",
"And everywhere that Mary went,",
"The lamb was sure to go.",
]
)
)

assert len(changes) == 2
source_events = [e for t, e in changes if t == "source"]
assert source_events == [
ExpectedEvent(
TextEvent, delta=[{"retain": 53}, {"insert": "\nAnd everywhere that Mary went,"}]
),
ExpectedEvent(TextEvent, delta=[{"retain": 84}, {"insert": "\nThe lamb was sure to go."}]),
]


def test_set_hard_reload_if_very_different():
text = YUnicode()

text.set(
"\n".join(
[
"Mary had a little lamb,",
"Its fleece was white as snow.",
"And everywhere that Mary went,",
"The lamb was sure to go.",
]
)
)

changes = []

def record_changes(topic, event):
changes.append((topic, event)) # pragma: nocover

text.observe(record_changes)

# Call set with a very different nursery rhyme
twinkle_lyrics = "\n".join(
[
"Twinkle, twinkle, little star,",
"How I wonder what you are!",
"Up above the world so high,",
"Like a diamond in the sky.",
]
)
text.set(twinkle_lyrics)

assert len(changes) == 1
source_events = [e for t, e in changes if t == "source"]
assert source_events == [
ExpectedEvent(TextEvent, delta=[{"delete": 109}, {"insert": twinkle_lyrics}])
]
26 changes: 26 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from dataclasses import dataclass

from anyio import Lock, connect_tcp


Expand Down Expand Up @@ -41,3 +43,27 @@ async def ensure_server_running(host: str, port: int) -> None:
pass
else:
break


@dataclass
class ExpectedEvent:
kind: type
path: str | None = None
delta: list[dict] | None = None

def __eq__(self, other):
if not isinstance(other, self.kind):
return False
if self.path is not None and self.path != other.path:
return False
if self.delta is not None and self.delta != other.delta:
return False
return True

def __repr__(self):
fragments = [self.kind.__name__]
if self.path is not None:
fragments.append(f"path={self.path!r}")
if self.delta is not None:
fragments.append(f"delta={self.delta!r}")
return f"ExpectedEvent({', '.join(fragments)})"
Loading