Skip to content

Commit d75e570

Browse files
committed
ENH: Add snapshot functionality to DataFrame for versioning support
1 parent 021ea8d commit d75e570

File tree

1 file changed

+99
-0
lines changed

1 file changed

+99
-0
lines changed

pandas/core/frame.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
Self,
3535
cast,
3636
overload,
37+
Optional,
3738
)
3839
import warnings
3940

@@ -269,6 +270,8 @@
269270

270271
from pandas.io.formats.style import Styler
271272

273+
from pandas.core.frame_versioning import DataFrameSnapshotStore
274+
272275
# ---------------------------------------------------------------------
273276
# Docstring templates
274277

@@ -14353,6 +14356,93 @@ def values(self) -> np.ndarray:
1435314356
['monkey', nan, None]], dtype=object)
1435414357
"""
1435514358
return self._mgr.as_array()
14359+
14360+
def snapshot(self, name: Optional[str] = None) -> str:
14361+
"""
14362+
Create a named snapshot of this DataFrame and return the snapshot id.
14363+
14364+
Parameters
14365+
----------
14366+
name : str, optional
14367+
Optional snapshot name. If not provided a timestamped id is returned.
14368+
14369+
Returns
14370+
-------
14371+
str
14372+
Snapshot id.
14373+
"""
14374+
store = _ensure_snapshot_store(self)
14375+
return store.snapshot(self, name=name)
14376+
14377+
def restore(self, name: str, inplace: bool = False):
14378+
"""
14379+
Restore a previously created snapshot.
14380+
14381+
Parameters
14382+
----------
14383+
name : str
14384+
Snapshot id returned by :meth:`DataFrame.snapshot`.
14385+
inplace : bool, default False
14386+
If True, mutate this DataFrame to match the snapshot. Otherwise return
14387+
a restored copy.
14388+
14389+
Returns
14390+
-------
14391+
DataFrame or None
14392+
Restored DataFrame when ``inplace=False``, otherwise None.
14393+
"""
14394+
store = getattr(self, "_version_snapshots", None)
14395+
if store is None:
14396+
raise KeyError(f"No snapshots present for this DataFrame (requested: {name})")
14397+
restored = store.restore(name)
14398+
if inplace:
14399+
# Replace internal state. Using _mgr replacement is more correct than __dict__ update.
14400+
# Many pandas internals use the attribute _mgr for BlockManager. Use it cautiously.
14401+
try:
14402+
# pandas >= 1.x use _mgr (BlockManager); adapt if different in your branch.
14403+
object.__setattr__(self, "_mgr", restored._mgr)
14404+
# also copy other key attrs
14405+
object.__setattr__(self, "axes", restored.axes)
14406+
object.__setattr__(self, "_item_cache", restored._item_cache)
14407+
except Exception:
14408+
# fallback: shallow __dict__ update (less safe)
14409+
self.__dict__.update(restored.__dict__)
14410+
return None
14411+
return restored
14412+
14413+
def list_snapshots(self) -> list[str]:
14414+
"""
14415+
List snapshot ids for this DataFrame.
14416+
"""
14417+
store = getattr(self, "_version_snapshots", None)
14418+
return store.list() if store is not None else []
14419+
14420+
def drop_snapshot(self, name: str) -> None:
14421+
"""
14422+
Drop a snapshot by id.
14423+
"""
14424+
store = getattr(self, "_version_snapshots", None)
14425+
if store is None:
14426+
raise KeyError(f"No snapshots present for this DataFrame (requested drop: {name})")
14427+
store.drop(name)
14428+
14429+
def clear_snapshots(self) -> None:
14430+
"""
14431+
Clear all snapshots for this DataFrame.
14432+
"""
14433+
store = getattr(self, "_version_snapshots", None)
14434+
if store is not None:
14435+
store.clear()
14436+
14437+
def snapshot_info(self, name: Optional[str] = None) -> dict:
14438+
"""
14439+
Return metadata for all snapshots or a single snapshot.
14440+
"""
14441+
store = getattr(self, "_version_snapshots", None)
14442+
if store is None:
14443+
return {"count": 0, "snapshots": []}
14444+
return store.info(name)
14445+
1435614446

1435714447

1435814448
def _from_nested_dict(
@@ -14390,3 +14480,12 @@ def _reindex_for_setitem(
1439014480
"incompatible index of inserted column with frame index"
1439114481
) from err
1439214482
return reindexed_value, None
14483+
14484+
def _ensure_snapshot_store(self) -> DataFrameSnapshotStore:
14485+
# attach a per-instance store to DataFrame
14486+
store = getattr(self, "_version_snapshots", None)
14487+
if store is None:
14488+
store = DataFrameSnapshotStore()
14489+
# attach to object
14490+
object.__setattr__(self, "_version_snapshots", store)
14491+
return store

0 commit comments

Comments
 (0)