@@ -187,29 +187,24 @@ def _adjust_memory_efficient_attention(self, latents: torch.Tensor):
187
187
self .disable_attention_slicing ()
188
188
return
189
189
elif config .attention_type == "torch-sdp" :
190
- if hasattr (torch .nn .functional , "scaled_dot_product_attention" ):
191
- # diffusers enables sdp automatically
192
- return
193
- else :
194
- raise Exception ("torch-sdp attention slicing not available" )
190
+ # torch-sdp is the default in diffusers.
191
+ return
195
192
196
193
# See https://github.com/invoke-ai/InvokeAI/issues/7049 for context.
197
194
# Bumping torch from 2.2.2 to 2.4.1 caused the sliced attention implementation to produce incorrect results.
198
195
# For now, if a user is on an MPS device and has not explicitly set the attention_type, then we select the
199
196
# non-sliced torch-sdp implementation. This keeps things working on MPS at the cost of increased peak memory
200
197
# utilization.
201
198
if torch .backends .mps .is_available ():
202
- assert hasattr (torch .nn .functional , "scaled_dot_product_attention" )
203
199
return
204
200
205
- # the remainder if this code is called when attention_type=='auto'
201
+ # The remainder if this code is called when attention_type=='auto'.
206
202
if self .unet .device .type == "cuda" :
207
203
if is_xformers_available ():
208
204
self .enable_xformers_memory_efficient_attention ()
209
205
return
210
- elif hasattr (torch .nn .functional , "scaled_dot_product_attention" ):
211
- # diffusers enables sdp automatically
212
- return
206
+ # torch-sdp is the default in diffusers.
207
+ return
213
208
214
209
if self .unet .device .type == "cpu" or self .unet .device .type == "mps" :
215
210
mem_free = psutil .virtual_memory ().free
0 commit comments