Add endpoint for emptying the model cache. Also, adds a threading lock to the ModelCache to make it thread-safe.

RyanJDick · RyanJDick · commit 3613ee3913eb · 2025-01-28T19:44:52.000Z
diff --git a/invokeai/app/api/routers/model_manager.py b/invokeai/app/api/routers/model_manager.py
@@ -858,6 +858,18 @@ async def get_stats() -> Optional[CacheStats]:
     return ApiDependencies.invoker.services.model_manager.load.ram_cache.stats
 
 
+@model_manager_router.post(
+    "/empty_model_cache",
+    operation_id="empty_model_cache",
+    status_code=200,
+)
+async def empty_model_cache() -> None:
+    """Drop all models from the model cache to free RAM/VRAM. 'Locked' models that are in active use will not be dropped."""
+    # Request 1000GB of room in order to force the cache to drop all models.
+    ApiDependencies.invoker.services.logger.info("Emptying model cache.")
+    ApiDependencies.invoker.services.model_manager.load.ram_cache.make_room(1000 * 2**30)
+
+
 class HFTokenStatus(str, Enum):
     VALID = "valid"
     INVALID = "invalid"
diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache.py b/invokeai/backend/model_manager/load/model_cache/model_cache.py
@@ -1,8 +1,10 @@
 import gc
 import logging
+import threading
 import time
+from functools import wraps
 from logging import Logger
-from typing import Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional
 
 import psutil
 import torch
@@ -41,6 +43,17 @@ def get_model_cache_key(model_key: str, submodel_type: Optional[SubModelType] =
         return model_key
 
 
+def synchronized(method: Callable[..., Any]) -> Callable[..., Any]:
+    """A decorator that applies the class's self._lock to the method."""
+
+    @wraps(method)
+    def wrapper(self, *args, **kwargs):
+        with self._lock:  # Automatically acquire and release the lock
+            return method(self, *args, **kwargs)
+
+    return wrapper
+
+
 class ModelCache:
     """A cache for managing models in memory.
 
@@ -125,16 +138,25 @@ def __init__(
 
         self._ram_cache_size_bytes = self._calc_ram_available_to_model_cache()
 
+        # A lock applied to all public method calls to make the ModelCache thread-safe.
+        # At the time of writing, the ModelCache should only be accessed from two threads:
+        # - The graph execution thread
+        # - Requests to empty the cache from a separate thread
+        self._lock = threading.RLock()
+
     @property
+    @synchronized
     def stats(self) -> Optional[CacheStats]:
         """Return collected CacheStats object."""
         return self._stats
 
     @stats.setter
+    @synchronized
     def stats(self, stats: CacheStats) -> None:
         """Set the CacheStats object for collecting cache statistics."""
         self._stats = stats
 
+    @synchronized
     def put(self, key: str, model: AnyModel) -> None:
         """Add a model to the cache."""
         if key in self._cached_models:
@@ -173,6 +195,7 @@ def put(self, key: str, model: AnyModel) -> None:
             f"Added model {key} (Type: {model.__class__.__name__}, Wrap mode: {wrapped_model.__class__.__name__}, Model size: {size/MB:.2f}MB)"
         )
 
+    @synchronized
     def get(self, key: str, stats_name: Optional[str] = None) -> CacheRecord:
         """Retrieve a model from the cache.
 
@@ -208,6 +231,7 @@ def get(self, key: str, stats_name: Optional[str] = None) -> CacheRecord:
         self._logger.debug(f"Cache hit: {key} (Type: {cache_entry.cached_model.model.__class__.__name__})")
         return cache_entry
 
+    @synchronized
     def lock(self, cache_entry: CacheRecord, working_mem_bytes: Optional[int]) -> None:
         """Lock a model for use and move it into VRAM."""
         if cache_entry.key not in self._cached_models:
@@ -243,6 +267,7 @@ def lock(self, cache_entry: CacheRecord, working_mem_bytes: Optional[int]) -> No
 
         self._log_cache_state()
 
+    @synchronized
     def unlock(self, cache_entry: CacheRecord) -> None:
         """Unlock a model."""
         if cache_entry.key not in self._cached_models:
@@ -588,6 +613,7 @@ def _log_cache_state(self, title: str = "Model cache state:", include_entry_deta
 
         self._logger.debug(log)
 
+    @synchronized
     def make_room(self, bytes_needed: int) -> None:
         """Make enough room in the cache to accommodate a new model of indicated size.
 
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
@@ -320,6 +320,26 @@ export type paths = {
         patch?: never;
         trace?: never;
     };
+    "/api/v2/models/empty_model_cache": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        get?: never;
+        put?: never;
+        /**
+         * Empty Model Cache
+         * @description Drop all models from the model cache to free RAM/VRAM. 'Locked' models that are in active use will not be dropped.
+         */
+        post: operations["empty_model_cache"];
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
     "/api/v2/models/hf_login": {
         parameters: {
             query?: never;
@@ -20327,6 +20347,26 @@ export interface operations {
             };
         };
     };
+    empty_model_cache: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": unknown;
+                };
+            };
+        };
+    };
     get_hf_login_status: {
         parameters: {
             query?: never;