Skip to content
96 changes: 96 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
Self,
cast,
overload,
Optional,
)
import warnings

Expand Down Expand Up @@ -269,6 +270,8 @@

from pandas.io.formats.style import Styler

from pandas.core.frame_versioning import DataFrameSnapshotStore

# ---------------------------------------------------------------------
# Docstring templates

Expand Down Expand Up @@ -14353,6 +14356,90 @@ def values(self) -> np.ndarray:
['monkey', nan, None]], dtype=object)
"""
return self._mgr.as_array()

def snapshot(self, name: Optional[str] = None) -> str:
"""
Create a named snapshot of this DataFrame and return the snapshot id.

Parameters
----------
name : str, optional
Optional snapshot name. If not provided a timestamped id is returned.

Returns
-------
str
Snapshot id.
"""
store = _ensure_snapshot_store(self)
return store.snapshot(self, name=name)

def restore(self, name: str, inplace: bool = False):
"""
Restore a previously created snapshot.

Parameters
----------
name : str
Snapshot id returned by :meth:`DataFrame.snapshot`.
inplace : bool, default False
If True, mutate this DataFrame to match the snapshot. Otherwise return
a restored copy.

Returns
-------
DataFrame or None
Restored DataFrame when ``inplace=False``, otherwise None.
"""
store = getattr(self, "_version_snapshots", None)
if store is None:
raise KeyError(f"No snapshots present for this DataFrame(requested:{name})")
restored = store.restore(name)
if inplace:
try:
object.__setattr__(self, "_mgr", restored._mgr)
# also copy other key attrs
object.__setattr__(self, "axes", restored.axes)
object.__setattr__(self, "_item_cache", restored._item_cache)
except Exception:
# fallback: shallow __dict__ update (less safe)
self.__dict__.update(restored.__dict__)
return None
return restored

def list_snapshots(self) -> list[str]:
"""
List snapshot ids for this DataFrame.
"""
store = getattr(self, "_version_snapshots", None)
return store.list() if store is not None else []

def drop_snapshot(self, name: str) -> None:
"""
Drop a snapshot by id.
"""
store = getattr(self, "_version_snapshots", None)
if store is None:
raise KeyError(f"No snapshots present for this DataFrame(requested drop:{name})")
store.drop(name)

def clear_snapshots(self) -> None:
"""
Clear all snapshots for this DataFrame.
"""
store = getattr(self, "_version_snapshots", None)
if store is not None:
store.clear()

def snapshot_info(self, name: Optional[str] = None) -> dict:
"""
Return metadata for all snapshots or a single snapshot.
"""
store = getattr(self, "_version_snapshots", None)
if store is None:
return {"count": 0, "snapshots": []}
return store.info(name)



def _from_nested_dict(
Expand Down Expand Up @@ -14390,3 +14477,12 @@ def _reindex_for_setitem(
"incompatible index of inserted column with frame index"
) from err
return reindexed_value, None

def _ensure_snapshot_store(self) -> DataFrameSnapshotStore:
# attach a per-instance store to DataFrame
store = getattr(self, "_version_snapshots", None)
if store is None:
store = DataFrameSnapshotStore()
# attach to object
object.__setattr__(self, "_version_snapshots", store)
return store
70 changes: 70 additions & 0 deletions pandas/core/frame_versioning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime
import uuid
from typing import Dict, Optional, TYPE_CHECKING

if TYPE_CHECKING:
import pandas as pd # only used for type hints


def _generate_snapshot_id(name: Optional[str] = None) -> str:
if name:
return name
ts = datetime.utcnow().strftime("%Y%m%dT%H%M%S%fZ")
uid = uuid.uuid4().hex[:8]
return f"{ts}-{uid}"


@dataclass
class SnapshotMeta:
name: str
created_at: datetime


class DataFrameSnapshotStore:
"""
Per-DataFrame snapshot store.
Stores deep copies of DataFrames (safe, simple).
"""

def __init__(self) -> None:
# snapshot_id -> DataFrame
self._snapshots: Dict[str, "pd.DataFrame"] = {}
self._meta: Dict[str, SnapshotMeta] = {}

def snapshot(self, df: "pd.DataFrame", name: Optional[str] = None) -> str:
sid = _generate_snapshot_id(name)
# deep copy for safety
self._snapshots[sid] = df.copy(deep=True)
self._meta[sid] = SnapshotMeta(name=sid, created_at=datetime.utcnow())
return sid

def restore(self, name: str) -> "pd.DataFrame":
if name not in self._snapshots:
raise KeyError(f"Snapshot not found: {name}")
# return a deep copy so modifications don't change stored snapshot
return self._snapshots[name].copy(deep=True)

def list(self) -> list[str]:
return list(self._snapshots.keys())

def drop(self, name: str) -> None:
if name not in self._snapshots:
raise KeyError(f"Snapshot not found: {name}")
del self._snapshots[name]
del self._meta[name]

def clear(self) -> None:
self._snapshots.clear()
self._meta.clear()

def info(self, name: Optional[str] = None) -> dict:
if name:
if name not in self._meta:
raise KeyError(f"Snapshot not found: {name}")
meta = self._meta[name]
return {"name": meta.name, "created_at": meta.created_at.isoformat()}
return {"count": len(self._snapshots),
"snapshots": [m.name for m in self._meta.values()]}
54 changes: 54 additions & 0 deletions pandas/tests/frame/test_versioning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# pandas/tests/frame/test_versioning.py
import pandas as pd
import pytest


def test_snapshot_and_restore_returns_dataframe():
df = pd.DataFrame({"x": [1, 2, 3]})
sid = df.snapshot("t1")
assert sid in df.list_snapshots()
df.loc[0, "x"] = 99
restored = df.restore(sid)
assert list(restored["x"]) == [1, 2, 3]


def test_restore_inplace_mutates_dataframe():
df = pd.DataFrame({"x": [1, 2, 3]})
sid = df.snapshot("t2")
df.loc[1, "x"] = 999
df.restore(sid, inplace=True)
assert list(df["x"]) == [1, 2, 3]


def test_drop_and_clear_behaviour():
df = pd.DataFrame({"a": [1, 2]})
sid1 = df.snapshot("s1")
sid2 = df.snapshot("s2")
assert set(df.list_snapshots()) == {sid1, sid2}
df.drop_snapshot(sid1)
assert sid1 not in df.list_snapshots()
df.clear_snapshots()
assert df.list_snapshots() == []


def test_snapshot_on_empty_dataframe():
df = pd.DataFrame()
sid = df.snapshot()
df.loc[0, "a"] = 1
restored = df.restore(sid)
assert restored.empty


def test_copy_does_not_inherit_snapshots():
df = pd.DataFrame({"a": [1, 2, 3]})
sid = df.snapshot("orig")
df2 = df.copy()
# design decision: copies do not copy snapshots
assert df2.list_snapshots() == []
assert sid in df.list_snapshots()


def test_missing_snapshot_raises():
df = pd.DataFrame({"x": [1]})
with pytest.raises(KeyError):
df.restore("no-such-snapshot")
Loading