Skip to content

Commit 4301812

Browse files
committed
feat(backend): prefer xformers based on cuda compute capability
1 parent 09f97b6 commit 4301812

File tree

1 file changed

+13
-2
lines changed

1 file changed

+13
-2
lines changed

invokeai/backend/stable_diffusion/diffusers_pipeline.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,19 @@ def _adjust_memory_efficient_attention(self, latents: torch.Tensor):
171171
"""
172172
if xformers is available, use it, otherwise use sliced attention.
173173
"""
174+
175+
# On 30xx and 40xx series GPUs, `torch-sdp` is faster than `xformers`. This corresponds to a CUDA major
176+
# version of 8 or higher. So, for major version 7 or below, we prefer `xformers`.
177+
# See:
178+
# - https://developer.nvidia.com/cuda-gpus
179+
# - https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
180+
try:
181+
prefer_xformers = torch.cuda.is_available() and torch.cuda.get_device_properties("cuda").major <= 7 # type: ignore # Type of "get_device_properties" is partially unknown
182+
except Exception:
183+
prefer_xformers = False
184+
174185
config = get_config()
175-
if config.attention_type == "xformers":
186+
if config.attention_type == "xformers" and is_xformers_available() and prefer_xformers:
176187
self.enable_xformers_memory_efficient_attention()
177188
return
178189
elif config.attention_type == "sliced":
@@ -195,7 +206,7 @@ def _adjust_memory_efficient_attention(self, latents: torch.Tensor):
195206

196207
# the remainder if this code is called when attention_type=='auto'
197208
if self.unet.device.type == "cuda":
198-
if is_xformers_available():
209+
if is_xformers_available() and prefer_xformers:
199210
self.enable_xformers_memory_efficient_attention()
200211
return
201212
elif hasattr(torch.nn.functional, "scaled_dot_product_attention"):

0 commit comments

Comments
 (0)