Skip to content

Commit 5177837

Browse files
authored
chore: detect and enable avx512 builds (#4675)
chore(avx512): add support Fixes #4662 Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent f9e368b commit 5177837

File tree

3 files changed

+17
-1
lines changed

3 files changed

+17
-1
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ RUN make prepare
303303
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
304304
## (both will use CUDA or hipblas for the actual computation)
305305
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
306-
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
306+
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
307307
else \
308308
make build; \
309309
fi

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ endif
186186
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
187187
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
188188
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
189+
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
189190
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
190191
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
191192
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
@@ -699,6 +700,13 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
699700
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
700701
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
701702

703+
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
704+
cp -rf backend/cpp/llama backend/cpp/llama-avx512
705+
$(MAKE) -C backend/cpp/llama-avx512 purge
706+
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
707+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
708+
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
709+
702710
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
703711
cp -rf backend/cpp/llama backend/cpp/llama-avx
704712
$(MAKE) -C backend/cpp/llama-avx purge

pkg/model/initializers.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ const (
4848
LLamaCPP = "llama-cpp"
4949

5050
LLamaCPPAVX2 = "llama-cpp-avx2"
51+
LLamaCPPAVX512 = "llama-cpp-avx512"
5152
LLamaCPPAVX = "llama-cpp-avx"
5253
LLamaCPPFallback = "llama-cpp-fallback"
5354
LLamaCPPCUDA = "llama-cpp-cuda"
@@ -68,6 +69,7 @@ const (
6869

6970
var llamaCPPVariants = []string{
7071
LLamaCPPAVX2,
72+
LLamaCPPAVX512,
7173
LLamaCPPAVX,
7274
LLamaCPPFallback,
7375
LLamaCPPCUDA,
@@ -268,6 +270,12 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
268270
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
269271
selectedProcess = p
270272
}
273+
} else if xsysinfo.HasCPUCaps(cpuid.AVX512F) {
274+
p := backendPath(assetDir, LLamaCPPAVX512)
275+
if _, err := os.Stat(p); err == nil {
276+
log.Info().Msgf("[%s] attempting to load with AVX512 variant", backend)
277+
selectedProcess = p
278+
}
271279
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
272280
p := backendPath(assetDir, LLamaCPPAVX)
273281
if _, err := os.Stat(p); err == nil {

0 commit comments

Comments
 (0)