@@ -206,19 +206,22 @@ def LoadModel(self, request, context):
206
206
torch_dtype = compute )
207
207
if request .ContextSize > 0 :
208
208
self .max_tokens = request .ContextSize
209
- else :
209
+ elif request . Type != "MusicgenForConditionalGeneration" :
210
210
self .max_tokens = self .model .config .max_position_embeddings
211
+ else :
212
+ self .max_tokens = 512
211
213
212
- self .tokenizer = AutoTokenizer .from_pretrained (model_name , use_safetensors = True )
213
- self .XPU = False
214
-
215
- if XPU and self .OV == False :
216
- self .XPU = True
217
- try :
218
- print ("Optimizing model" , model_name , "to XPU." , file = sys .stderr )
219
- self .model = ipex .optimize_transformers (self .model , inplace = True , dtype = torch .float16 , device = "xpu" )
220
- except Exception as err :
221
- print ("Not using XPU:" , err , file = sys .stderr )
214
+ if request .Type != "MusicgenForConditionalGeneration" :
215
+ self .tokenizer = AutoTokenizer .from_pretrained (model_name , use_safetensors = True )
216
+ self .XPU = False
217
+
218
+ if XPU and self .OV == False :
219
+ self .XPU = True
220
+ try :
221
+ print ("Optimizing model" , model_name , "to XPU." , file = sys .stderr )
222
+ self .model = ipex .optimize_transformers (self .model , inplace = True , dtype = torch .float16 , device = "xpu" )
223
+ except Exception as err :
224
+ print ("Not using XPU:" , err , file = sys .stderr )
222
225
223
226
except Exception as err :
224
227
print ("Error:" , err , file = sys .stderr )
0 commit comments