|
| 1 | +# pandas/core/frame_versioning.py |
| 2 | +from __future__ import annotations |
| 3 | + |
| 4 | +from dataclasses import dataclass |
| 5 | +from datetime import datetime |
| 6 | +import uuid |
| 7 | +from typing import Dict, Optional |
| 8 | + |
| 9 | +import pandas as pd |
| 10 | + |
| 11 | + |
| 12 | +def _generate_snapshot_id(name: Optional[str] = None) -> str: |
| 13 | + if name: |
| 14 | + return name |
| 15 | + ts = datetime.utcnow().strftime("%Y%m%dT%H%M%S%fZ") |
| 16 | + uid = uuid.uuid4().hex[:8] |
| 17 | + return f"{ts}-{uid}" |
| 18 | + |
| 19 | + |
| 20 | +@dataclass |
| 21 | +class SnapshotMeta: |
| 22 | + name: str |
| 23 | + created_at: datetime |
| 24 | + |
| 25 | + |
| 26 | +class DataFrameSnapshotStore: |
| 27 | + """ |
| 28 | + Per-DataFrame snapshot store. |
| 29 | + Stores deep copies of DataFrames (safe, simple). |
| 30 | + """ |
| 31 | + |
| 32 | + def __init__(self) -> None: |
| 33 | + # snapshot_id -> DataFrame |
| 34 | + self._snapshots: Dict[str, pd.DataFrame] = {} |
| 35 | + self._meta: Dict[str, SnapshotMeta] = {} |
| 36 | + |
| 37 | + def snapshot(self, df: pd.DataFrame, name: Optional[str] = None) -> str: |
| 38 | + sid = _generate_snapshot_id(name) |
| 39 | + # deep copy for safety |
| 40 | + self._snapshots[sid] = df.copy(deep=True) |
| 41 | + self._meta[sid] = SnapshotMeta(name=sid, created_at=datetime.utcnow()) |
| 42 | + return sid |
| 43 | + |
| 44 | + def restore(self, name: str) -> pd.DataFrame: |
| 45 | + if name not in self._snapshots: |
| 46 | + raise KeyError(f"Snapshot not found: {name}") |
| 47 | + # return a deep copy so modifications don't change stored snapshot |
| 48 | + return self._snapshots[name].copy(deep=True) |
| 49 | + |
| 50 | + def list(self) -> list[str]: |
| 51 | + return list(self._snapshots.keys()) |
| 52 | + |
| 53 | + def drop(self, name: str) -> None: |
| 54 | + if name not in self._snapshots: |
| 55 | + raise KeyError(f"Snapshot not found: {name}") |
| 56 | + del self._snapshots[name] |
| 57 | + del self._meta[name] |
| 58 | + |
| 59 | + def clear(self) -> None: |
| 60 | + self._snapshots.clear() |
| 61 | + self._meta.clear() |
| 62 | + |
| 63 | + def info(self, name: Optional[str] = None) -> dict: |
| 64 | + if name: |
| 65 | + if name not in self._meta: |
| 66 | + raise KeyError(f"Snapshot not found: {name}") |
| 67 | + meta = self._meta[name] |
| 68 | + return {"name": meta.name, "created_at": meta.created_at.isoformat()} |
| 69 | + return {"count": len(self._snapshots), "snapshots": [m.name for m in self._meta.values()]} |
0 commit comments