Skip to content

Commit 021ea8d

Browse files
committed
ENH: Implement DataFrame snapshot store for versioning support
1 parent df383c7 commit 021ea8d

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

pandas/core/frame_versioning.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# pandas/core/frame_versioning.py
2+
from __future__ import annotations
3+
4+
from dataclasses import dataclass
5+
from datetime import datetime
6+
import uuid
7+
from typing import Dict, Optional
8+
9+
import pandas as pd
10+
11+
12+
def _generate_snapshot_id(name: Optional[str] = None) -> str:
13+
if name:
14+
return name
15+
ts = datetime.utcnow().strftime("%Y%m%dT%H%M%S%fZ")
16+
uid = uuid.uuid4().hex[:8]
17+
return f"{ts}-{uid}"
18+
19+
20+
@dataclass
21+
class SnapshotMeta:
22+
name: str
23+
created_at: datetime
24+
25+
26+
class DataFrameSnapshotStore:
27+
"""
28+
Per-DataFrame snapshot store.
29+
Stores deep copies of DataFrames (safe, simple).
30+
"""
31+
32+
def __init__(self) -> None:
33+
# snapshot_id -> DataFrame
34+
self._snapshots: Dict[str, pd.DataFrame] = {}
35+
self._meta: Dict[str, SnapshotMeta] = {}
36+
37+
def snapshot(self, df: pd.DataFrame, name: Optional[str] = None) -> str:
38+
sid = _generate_snapshot_id(name)
39+
# deep copy for safety
40+
self._snapshots[sid] = df.copy(deep=True)
41+
self._meta[sid] = SnapshotMeta(name=sid, created_at=datetime.utcnow())
42+
return sid
43+
44+
def restore(self, name: str) -> pd.DataFrame:
45+
if name not in self._snapshots:
46+
raise KeyError(f"Snapshot not found: {name}")
47+
# return a deep copy so modifications don't change stored snapshot
48+
return self._snapshots[name].copy(deep=True)
49+
50+
def list(self) -> list[str]:
51+
return list(self._snapshots.keys())
52+
53+
def drop(self, name: str) -> None:
54+
if name not in self._snapshots:
55+
raise KeyError(f"Snapshot not found: {name}")
56+
del self._snapshots[name]
57+
del self._meta[name]
58+
59+
def clear(self) -> None:
60+
self._snapshots.clear()
61+
self._meta.clear()
62+
63+
def info(self, name: Optional[str] = None) -> dict:
64+
if name:
65+
if name not in self._meta:
66+
raise KeyError(f"Snapshot not found: {name}")
67+
meta = self._meta[name]
68+
return {"name": meta.name, "created_at": meta.created_at.isoformat()}
69+
return {"count": len(self._snapshots), "snapshots": [m.name for m in self._meta.values()]}

0 commit comments

Comments
 (0)