Skip to content

Commit 382d85e

Browse files
Fix memory issues when installing models on Windows (#8652)
* Wrap GGUF loader for context managed close() Wrap gguf.GGUFReader and then use a context manager to load memory-mapped GGUF files, so that they will automatically close properly when no longer needed. Should prevent the 'file in use in another process' errors on Windows. * Additional check for cached state_dict Additional check for cached state_dict as path is now optional - should solve model manager 'missing' this and the resultant memory errors. * Appease ruff * Further ruff appeasement * ruff * loaders.py fix for linux No longer attempting to delete internal object. * loaders.py - one more _mmap ref removed --------- Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com>
1 parent abcc987 commit 382d85e

File tree

2 files changed

+46
-12
lines changed

2 files changed

+46
-12
lines changed

invokeai/backend/model_manager/model_on_disk.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ def load_state_dict(self, path: Optional[Path] = None) -> StateDict:
8484

8585
path = self.resolve_weight_file(path)
8686

87+
if path in self._state_dict_cache:
88+
return self._state_dict_cache[path]
89+
8790
with SilenceWarnings():
8891
if path.suffix.endswith((".ckpt", ".pt", ".pth", ".bin")):
8992
scan_result = scan_file_path(path)
Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,53 @@
1+
import gc
12
from pathlib import Path
23

34
import gguf
45
import torch
56

67
from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
78
from invokeai.backend.quantization.gguf.utils import TORCH_COMPATIBLE_QTYPES
9+
from invokeai.backend.util.logging import InvokeAILogger
10+
11+
logger = InvokeAILogger.get_logger()
12+
13+
14+
class WrappedGGUFReader:
15+
"""Wrapper around GGUFReader that adds a close() method."""
16+
17+
def __init__(self, path: Path):
18+
self.reader = gguf.GGUFReader(path)
19+
20+
def __enter__(self):
21+
return self.reader
22+
23+
def __exit__(self, exc_type, exc_val, exc_tb):
24+
self.close()
25+
return False
26+
27+
def close(self):
28+
"""Explicitly close the memory-mapped file."""
29+
if hasattr(self.reader, "data"):
30+
try:
31+
self.reader.data.flush()
32+
del self.reader.data
33+
except (AttributeError, OSError, ValueError) as e:
34+
logger.warning(f"Wasn't able to close GGUF memory map: {e}")
35+
del self.reader
36+
gc.collect()
837

938

1039
def gguf_sd_loader(path: Path, compute_dtype: torch.dtype) -> dict[str, GGMLTensor]:
11-
reader = gguf.GGUFReader(path)
12-
13-
sd: dict[str, GGMLTensor] = {}
14-
for tensor in reader.tensors:
15-
torch_tensor = torch.from_numpy(tensor.data)
16-
shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape)))
17-
if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES:
18-
torch_tensor = torch_tensor.view(*shape)
19-
sd[tensor.name] = GGMLTensor(
20-
torch_tensor, ggml_quantization_type=tensor.tensor_type, tensor_shape=shape, compute_dtype=compute_dtype
21-
)
22-
return sd
40+
with WrappedGGUFReader(path) as reader:
41+
sd: dict[str, GGMLTensor] = {}
42+
for tensor in reader.tensors:
43+
torch_tensor = torch.from_numpy(tensor.data)
44+
shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape)))
45+
if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES:
46+
torch_tensor = torch_tensor.view(*shape)
47+
sd[tensor.name] = GGMLTensor(
48+
torch_tensor,
49+
ggml_quantization_type=tensor.tensor_type,
50+
tensor_shape=shape,
51+
compute_dtype=compute_dtype,
52+
)
53+
return sd

0 commit comments

Comments
 (0)