Skip to content

Commit f7315f0

Browse files
RyanJDickhipsterusername
authored andcommitted
Make the default max RAM cache size more conservative.
1 parent 285313b commit f7315f0

File tree

1 file changed

+13
-25
lines changed

1 file changed

+13
-25
lines changed

invokeai/backend/model_manager/load/model_cache/model_cache.py

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -400,23 +400,19 @@ def _calc_ram_available_to_model_cache(self) -> int:
400400
# Heuristics for dynamically calculating the RAM cache size, **in order of increasing priority**:
401401
# 1. As an initial default, use 50% of the total RAM for InvokeAI.
402402
# - Assume a 2GB baseline for InvokeAI's non-model RAM usage, and use the rest of the RAM for the model cache.
403-
# 2. On a system with a lot of RAM (e.g. 64GB+), users probably don't want InvokeAI to eat up too much RAM.
404-
# There are diminishing returns to storing more and more models. So, we apply an upper bound.
403+
# 2. On a system with a lot of RAM, users probably don't want InvokeAI to eat up too much RAM.
404+
# There are diminishing returns to storing more and more models. So, we apply an upper bound. (Keep in mind
405+
# that most OSes have some amount of disk caching, which we still benefit from if there is excess memory,
406+
# even if we drop models from the cache.)
405407
# - On systems without a CUDA device, the upper bound is 32GB.
406-
# - On systems with a CUDA device, the upper bound is 2x the amount of VRAM.
407-
# 3. On systems with a CUDA device, the minimum should be the VRAM size (less the working memory).
408-
# - Setting lower than this would mean that we sometimes kick models out of the cache when there is room for
409-
# all models in VRAM.
410-
# - Consider an extreme case of a system with 8GB RAM / 24GB VRAM. I haven't tested this, but I think
411-
# you'd still want the RAM cache size to be ~24GB (less the working memory). (Though you'd probably want to
412-
# set `keep_ram_copy_of_weights: false` in this case.)
413-
# 4. Absolute minimum of 4GB.
408+
# - On systems with a CUDA device, the upper bound is 1x the amount of VRAM (less the working memory).
409+
# 3. Absolute minimum of 4GB.
414410

415411
# NOTE(ryand): We explored dynamically adjusting the RAM cache size based on memory pressure (using psutil), but
416412
# decided against it for now, for the following reasons:
417413
# - It was surprisingly difficult to get memory metrics with consistent definitions across OSes. (If you go
418-
# down this path again, don't underestimate the amount of complexity here and be sure to test rigorously on all
419-
# OSes.)
414+
# down this path again, don't underestimate the amount of complexity here and be sure to test rigorously on all
415+
# OSes.)
420416
# - Making the RAM cache size dynamic opens the door for performance regressions that are hard to diagnose and
421417
# hard for users to understand. It is better for users to see that their RAM is maxed out, and then override
422418
# the default value if desired.
@@ -438,26 +434,18 @@ def _calc_ram_available_to_model_cache(self) -> int:
438434
# ------------------
439435
max_ram_cache_size_bytes = 32 * GB
440436
if total_cuda_vram_bytes is not None:
441-
max_ram_cache_size_bytes = 2 * total_cuda_vram_bytes
437+
if self._max_vram_cache_size_gb is not None:
438+
max_ram_cache_size_bytes = int(self._max_vram_cache_size_gb * GB)
439+
else:
440+
max_ram_cache_size_bytes = total_cuda_vram_bytes - int(self._execution_device_working_mem_gb * GB)
442441
if ram_available_to_model_cache > max_ram_cache_size_bytes:
443442
heuristics_applied.append(2)
444443
ram_available_to_model_cache = max_ram_cache_size_bytes
445444

446445
# Apply heuristic 3.
447446
# ------------------
448-
if total_cuda_vram_bytes is not None:
449-
if self._max_vram_cache_size_gb is not None:
450-
min_ram_cache_size_bytes = int(self._max_vram_cache_size_gb * GB)
451-
else:
452-
min_ram_cache_size_bytes = total_cuda_vram_bytes - int(self._execution_device_working_mem_gb * GB)
453-
if ram_available_to_model_cache < min_ram_cache_size_bytes:
454-
heuristics_applied.append(3)
455-
ram_available_to_model_cache = min_ram_cache_size_bytes
456-
457-
# Apply heuristic 4.
458-
# ------------------
459447
if ram_available_to_model_cache < 4 * GB:
460-
heuristics_applied.append(4)
448+
heuristics_applied.append(3)
461449
ram_available_to_model_cache = 4 * GB
462450

463451
self._logger.info(

0 commit comments

Comments
 (0)