Skip to content

Commit e38610e

Browse files
authored
feat: OpenVINO acceleration for embeddings in transformer backend (#2190)
OpenVINO acceleration for embeddings New argument type: OVModelForFeatureExtraction
1 parent 3754f15 commit e38610e

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

backend/python/transformers/transformers_server.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,21 @@ def LoadModel(self, request, context):
153153
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
154154
device=device_map)
155155
self.OV = True
156+
elif request.Type == "OVModelForFeatureExtraction":
157+
from optimum.intel.openvino import OVModelForFeatureExtraction
158+
from openvino.runtime import Core
159+
160+
if "GPU" in Core().available_devices:
161+
device_map="GPU"
162+
else:
163+
device_map="CPU"
164+
self.model = OVModelForFeatureExtraction.from_pretrained(model_name,
165+
compile=True,
166+
trust_remote_code=request.TrustRemoteCode,
167+
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
168+
export=True,
169+
device=device_map)
170+
self.OV = True
156171
else:
157172
self.model = AutoModel.from_pretrained(model_name,
158173
trust_remote_code=request.TrustRemoteCode,

0 commit comments

Comments
 (0)