@@ -400,23 +400,19 @@ def _calc_ram_available_to_model_cache(self) -> int:
400
400
# Heuristics for dynamically calculating the RAM cache size, **in order of increasing priority**:
401
401
# 1. As an initial default, use 50% of the total RAM for InvokeAI.
402
402
# - Assume a 2GB baseline for InvokeAI's non-model RAM usage, and use the rest of the RAM for the model cache.
403
- # 2. On a system with a lot of RAM (e.g. 64GB+), users probably don't want InvokeAI to eat up too much RAM.
404
- # There are diminishing returns to storing more and more models. So, we apply an upper bound.
403
+ # 2. On a system with a lot of RAM, users probably don't want InvokeAI to eat up too much RAM.
404
+ # There are diminishing returns to storing more and more models. So, we apply an upper bound. (Keep in mind
405
+ # that most OSes have some amount of disk caching, which we still benefit from if there is excess memory,
406
+ # even if we drop models from the cache.)
405
407
# - On systems without a CUDA device, the upper bound is 32GB.
406
- # - On systems with a CUDA device, the upper bound is 2x the amount of VRAM.
407
- # 3. On systems with a CUDA device, the minimum should be the VRAM size (less the working memory).
408
- # - Setting lower than this would mean that we sometimes kick models out of the cache when there is room for
409
- # all models in VRAM.
410
- # - Consider an extreme case of a system with 8GB RAM / 24GB VRAM. I haven't tested this, but I think
411
- # you'd still want the RAM cache size to be ~24GB (less the working memory). (Though you'd probably want to
412
- # set `keep_ram_copy_of_weights: false` in this case.)
413
- # 4. Absolute minimum of 4GB.
408
+ # - On systems with a CUDA device, the upper bound is 1x the amount of VRAM (less the working memory).
409
+ # 3. Absolute minimum of 4GB.
414
410
415
411
# NOTE(ryand): We explored dynamically adjusting the RAM cache size based on memory pressure (using psutil), but
416
412
# decided against it for now, for the following reasons:
417
413
# - It was surprisingly difficult to get memory metrics with consistent definitions across OSes. (If you go
418
- # down this path again, don't underestimate the amount of complexity here and be sure to test rigorously on all
419
- # OSes.)
414
+ # down this path again, don't underestimate the amount of complexity here and be sure to test rigorously on all
415
+ # OSes.)
420
416
# - Making the RAM cache size dynamic opens the door for performance regressions that are hard to diagnose and
421
417
# hard for users to understand. It is better for users to see that their RAM is maxed out, and then override
422
418
# the default value if desired.
@@ -438,26 +434,18 @@ def _calc_ram_available_to_model_cache(self) -> int:
438
434
# ------------------
439
435
max_ram_cache_size_bytes = 32 * GB
440
436
if total_cuda_vram_bytes is not None :
441
- max_ram_cache_size_bytes = 2 * total_cuda_vram_bytes
437
+ if self ._max_vram_cache_size_gb is not None :
438
+ max_ram_cache_size_bytes = int (self ._max_vram_cache_size_gb * GB )
439
+ else :
440
+ max_ram_cache_size_bytes = total_cuda_vram_bytes - int (self ._execution_device_working_mem_gb * GB )
442
441
if ram_available_to_model_cache > max_ram_cache_size_bytes :
443
442
heuristics_applied .append (2 )
444
443
ram_available_to_model_cache = max_ram_cache_size_bytes
445
444
446
445
# Apply heuristic 3.
447
446
# ------------------
448
- if total_cuda_vram_bytes is not None :
449
- if self ._max_vram_cache_size_gb is not None :
450
- min_ram_cache_size_bytes = int (self ._max_vram_cache_size_gb * GB )
451
- else :
452
- min_ram_cache_size_bytes = total_cuda_vram_bytes - int (self ._execution_device_working_mem_gb * GB )
453
- if ram_available_to_model_cache < min_ram_cache_size_bytes :
454
- heuristics_applied .append (3 )
455
- ram_available_to_model_cache = min_ram_cache_size_bytes
456
-
457
- # Apply heuristic 4.
458
- # ------------------
459
447
if ram_available_to_model_cache < 4 * GB :
460
- heuristics_applied .append (4 )
448
+ heuristics_applied .append (3 )
461
449
ram_available_to_model_cache = 4 * GB
462
450
463
451
self ._logger .info (
0 commit comments