Flux Vae broke for float16, force bfloat16 or float32 were compatible (#7213)

RyanJDick · web-flow · commit ca9cb1c9ef16 · 2024-11-13T15:51:40.000-08:00
## Summary The Flux VAE, like many VAEs, is broken if run using float16 inputs returning black images due to NaNs This will fix the issue by forcing the VAE to run in bfloat16 or float32 were compatible ## Related Issues / Discussions Fix for issue #7208 ## QA Instructions Tested on MacOS, VAE works with float16 in the invoke.yaml and left to default. I also briefly forced it down the float32 route to check that to. Needs testing on CUDA / ROCm ## Merge Plan It should be a straight forward merge,
diff --git a/invokeai/app/invocations/flux_vae_decode.py b/invokeai/app/invocations/flux_vae_decode.py
@@ -41,7 +41,8 @@ class FluxVaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
     def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Image:
         with vae_info as vae:
             assert isinstance(vae, AutoEncoder)
-            latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=TorchDevice.choose_torch_dtype())
+            vae_dtype = next(iter(vae.parameters())).dtype
+            latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
             img = vae.decode(latents)
 
         img = img.clamp(-1, 1)
diff --git a/invokeai/app/invocations/flux_vae_encode.py b/invokeai/app/invocations/flux_vae_encode.py
@@ -44,9 +44,8 @@ def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tenso
         generator = torch.Generator(device=TorchDevice.choose_torch_device()).manual_seed(0)
         with vae_info as vae:
             assert isinstance(vae, AutoEncoder)
-            image_tensor = image_tensor.to(
-                device=TorchDevice.choose_torch_device(), dtype=TorchDevice.choose_torch_dtype()
-            )
+            vae_dtype = next(iter(vae.parameters())).dtype
+            image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
             latents = vae.encode(image_tensor, sample=True, generator=generator)
             return latents
 
diff --git a/invokeai/backend/model_manager/load/load_default.py b/invokeai/backend/model_manager/load/load_default.py
@@ -35,6 +35,7 @@ def __init__(
         self._logger = logger
         self._ram_cache = ram_cache
         self._torch_dtype = TorchDevice.choose_torch_dtype()
+        self._torch_device = TorchDevice.choose_torch_device()
 
     def load_model(self, model_config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> LoadedModel:
         """
diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py
@@ -84,7 +84,15 @@ def _load_model(
             model = AutoEncoder(ae_params[config.config_path])
             sd = load_file(model_path)
             model.load_state_dict(sd, assign=True)
-            model.to(dtype=self._torch_dtype)
+            # VAE is broken in float16, which mps defaults to
+            if self._torch_dtype == torch.float16:
+                try:
+                    vae_dtype = torch.tensor([1.0], dtype=torch.bfloat16, device=self._torch_device).dtype
+                except TypeError:
+                    vae_dtype = torch.float32
+            else:
+                vae_dtype = self._torch_dtype
+            model.to(vae_dtype)
 
         return model