Skip to content

Commit 077ba78

Browse files
committed
refactor
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 1f23930 commit 077ba78

File tree

2 files changed

+69
-39
lines changed

2 files changed

+69
-39
lines changed

Makefile

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,13 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
691691
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
692692
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
693693

694+
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
695+
cp -rf backend/cpp/llama backend/cpp/llama-grpc
696+
$(MAKE) -C backend/cpp/llama-grpc purge
697+
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
698+
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
699+
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
700+
694701
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
695702
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
696703
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
@@ -764,10 +771,3 @@ docker-image-intel-xpu:
764771
.PHONY: swagger
765772
swagger:
766773
swag init -g core/http/app.go --output swagger
767-
768-
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
769-
cp -rf backend/cpp/llama backend/cpp/llama-grpc
770-
$(MAKE) -C backend/cpp/llama-grpc purge
771-
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
772-
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
773-
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc

pkg/model/initializers.go

Lines changed: 62 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@ var Aliases map[string]string = map[string]string{
2929
const (
3030
LlamaGGML = "llama-ggml"
3131

32-
LLamaCPP = "llama-cpp"
32+
LLamaCPP = "llama-cpp"
3333

3434
LLamaCPPCUDA12 = "llama-cpp-cuda12"
3535
LLamaCPPAVX2 = "llama-cpp-avx2"
3636
LLamaCPPAVX = "llama-cpp-avx"
3737
LLamaCPPFallback = "llama-cpp-fallback"
3838
LLamaCPPCUDA = "llama-cpp-cuda"
39+
LLamaCPPGRPC = "llama-cpp-grpc"
3940

4041
Gpt4AllLlamaBackend = "gpt4all-llama"
4142
Gpt4AllMptBackend = "gpt4all-mpt"
@@ -81,7 +82,8 @@ ENTRY:
8182
}
8283
}
8384

84-
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback := false, false, false
85+
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp)
86+
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC := false, false, false, false
8587
if _, ok := backends[LLamaCPP]; !ok {
8688
for _, e := range entry {
8789
if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
@@ -96,16 +98,23 @@ ENTRY:
9698
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPFallback)
9799
foundLCPPFallback = true
98100
}
101+
if strings.Contains(e.Name(), LLamaCPPGRPC) && !foundLCPPGRPC {
102+
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPGRPC)
103+
foundLCPPGRPC = true
104+
}
99105
}
100106
}
101107

102108
// order backends from the asset directory.
103109
// as we scan for backends, we want to keep some order which backends are tried of.
104110
// for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last.
105-
// sets a priority list
106-
// First has more priority
111+
112+
// sets a priority list - first has more priority
107113
priorityList := []string{
108-
// First llama.cpp and llama-ggml
114+
115+
// First llama.cpp(variants) and llama-ggml to follow.
116+
// We keep the fallback to prevent that if the llama.cpp variants
117+
// that depends on shared libs if breaks have still a safety net.
109118
LLamaCPP, LlamaGGML, Gpt4All, LLamaCPPFallback,
110119
}
111120

@@ -142,6 +151,50 @@ ENTRY:
142151
return orderedBackends, nil
143152
}
144153

154+
// selectGRPCProcess selects the GRPC process to start based on system capabilities
155+
func selectGRPCProcess(backend, assetDir string) string {
156+
foundCUDA := false
157+
var grpcProcess string
158+
159+
// Select backend now just for llama.cpp
160+
if backend != LLamaCPP {
161+
return ""
162+
}
163+
164+
// Note: This environment variable is read by the LocalAI's llama.cpp grpc-server
165+
if os.Getenv("LLAMACPP_GRPC_SERVERS") != "" {
166+
return backendPath(assetDir, LLamaCPPGRPC)
167+
}
168+
169+
gpus, err := xsysinfo.GPUs()
170+
if err == nil {
171+
for _, gpu := range gpus {
172+
if strings.Contains(gpu.String(), "nvidia") {
173+
log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
174+
grpcProcess = backendPath(assetDir, LLamaCPPCUDA)
175+
if _, err := os.Stat(grpcProcess); err == nil {
176+
foundCUDA = true
177+
}
178+
}
179+
}
180+
}
181+
182+
if !foundCUDA {
183+
if cpu.X86.HasAVX2 {
184+
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
185+
grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
186+
} else if cpu.X86.HasAVX {
187+
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
188+
grpcProcess = backendPath(assetDir, LLamaCPPAVX)
189+
} else {
190+
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
191+
grpcProcess = backendPath(assetDir, LLamaCPPFallback)
192+
}
193+
}
194+
195+
return grpcProcess
196+
}
197+
145198
// starts the grpcModelProcess for the backend, and returns a grpc client
146199
// It also loads the model
147200
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (ModelAddress, error) {
@@ -192,33 +245,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
192245
} else {
193246
grpcProcess := backendPath(o.assetDir, backend)
194247

195-
foundCUDA := false
196-
// for llama-cpp, check CPU capabilities and load the appropriate variant
197-
if backend == LLamaCPP {
198-
gpus, err := xsysinfo.GPUs()
199-
if err == nil {
200-
for _, gpu := range gpus {
201-
if strings.Contains(gpu.String(), "nvidia") {
202-
log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
203-
grpcProcess = backendPath(o.assetDir, LLamaCPPCUDA)
204-
if _, err := os.Stat(grpcProcess); err == nil {
205-
foundCUDA = true
206-
}
207-
}
208-
}
209-
}
210-
211-
if !foundCUDA {
212-
if cpu.X86.HasAVX2 {
213-
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
214-
grpcProcess = backendPath(o.assetDir, LLamaCPPAVX2)
215-
} else if cpu.X86.HasAVX {
216-
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
217-
grpcProcess = backendPath(o.assetDir, LLamaCPPAVX)
218-
} else {
219-
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
220-
grpcProcess = backendPath(o.assetDir, LLamaCPPFallback)
221-
}
248+
if os.Getenv("DISABLE_AUTODETECT") != "true" {
249+
// autoDetect GRPC process to start based on system capabilities
250+
if selectedProcess := selectGRPCProcess(backend, o.assetDir); selectedProcess != "" {
251+
grpcProcess = selectedProcess
222252
}
223253
}
224254

0 commit comments

Comments
 (0)