Skip to content

Commit af9e5a2

Browse files
authored
Revert #1963 (#2056)
* Revert "fix(fncall): fix regression introduced in #1963 (#2048)" This reverts commit 6b06d4e. * Revert "fix: action-tmate back to upstream, dead code removal (#2038)" This reverts commit fdec8a9. * Revert "feat(grpc): return consumed token count and update response accordingly (#2035)" This reverts commit e843d7d. * Revert "refactor: backend/service split, channel-based llm flow (#1963)" This reverts commit eed5706. * feat(grpc): return consumed token count and update response accordingly Fixes: #1920 Signed-off-by: Ettore Di Giacinto <[email protected]> --------- Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent af8c705 commit af9e5a2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+2272
-3042
lines changed

.github/workflows/test.yml

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,8 @@ jobs:
121121
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
122122
- name: Setup tmate session if tests fail
123123
if: ${{ failure() }}
124-
uses: mxschmitt/[email protected]
125-
with:
126-
connect-timeout-seconds: 180
124+
uses: mxschmitt/action-tmate@v3
125+
timeout-minutes: 5
127126

128127
tests-aio-container:
129128
runs-on: ubuntu-latest
@@ -174,9 +173,8 @@ jobs:
174173
make run-e2e-aio
175174
- name: Setup tmate session if tests fail
176175
if: ${{ failure() }}
177-
uses: mxschmitt/[email protected]
178-
with:
179-
connect-timeout-seconds: 180
176+
uses: mxschmitt/action-tmate@v3
177+
timeout-minutes: 5
180178

181179
tests-apple:
182180
runs-on: macOS-14
@@ -209,6 +207,5 @@ jobs:
209207
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
210208
- name: Setup tmate session if tests fail
211209
if: ${{ failure() }}
212-
uses: mxschmitt/[email protected]
213-
with:
214-
connect-timeout-seconds: 180
210+
uses: mxschmitt/action-tmate@v3
211+
timeout-minutes: 5

Makefile

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -301,9 +301,6 @@ clean-tests:
301301
rm -rf test-dir
302302
rm -rf core/http/backend-assets
303303

304-
halt-backends: ## Used to clean up stray backends sometimes left running when debugging manually
305-
ps | grep 'backend-assets/grpc/' | awk '{print $$1}' | xargs -I {} kill -9 {}
306-
307304
## Build:
308305
build: prepare backend-assets grpcs ## Build the project
309306
$(info ${GREEN}I local-ai build info:${RESET})
@@ -368,29 +365,29 @@ run-e2e-image:
368365

369366
run-e2e-aio:
370367
@echo 'Running e2e AIO tests'
371-
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
368+
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
372369

373370
test-e2e:
374371
@echo 'Running e2e tests'
375372
BUILD_TYPE=$(BUILD_TYPE) \
376373
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
377-
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
374+
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
378375

379376
teardown-e2e:
380377
rm -rf $(TEST_DIR) || true
381378
docker stop $$(docker ps -q --filter ancestor=localai-tests)
382379

383380
test-gpt4all: prepare-test
384381
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
385-
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
382+
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
386383

387384
test-llama: prepare-test
388385
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
389-
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
386+
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
390387

391388
test-llama-gguf: prepare-test
392389
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
393-
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
390+
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
394391

395392
test-tts: prepare-test
396393
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
@@ -648,10 +645,7 @@ backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libb
648645
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
649646
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
650647
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
651-
# EXPERIMENTAL:
652-
ifeq ($(BUILD_TYPE),metal)
653-
cp $(CURDIR)/sources/go-llama-ggml/llama.cpp/ggml-metal.metal backend-assets/grpc/
654-
endif
648+
655649
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
656650
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
657651
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/

backend/go/transcribe/transcript.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,16 @@ func runCommand(command []string) (string, error) {
2121
// AudioToWav converts audio to wav for transcribe.
2222
// TODO: use https://github.com/mccoyst/ogg?
2323
func audioToWav(src, dst string) error {
24-
command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
24+
command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
2525
out, err := runCommand(command)
2626
if err != nil {
2727
return fmt.Errorf("error: %w out: %s", err, out)
2828
}
2929
return nil
3030
}
3131

32-
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
33-
res := schema.TranscriptionResult{}
32+
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
33+
res := schema.Result{}
3434

3535
dir, err := os.MkdirTemp("", "whisper")
3636
if err != nil {

backend/go/transcribe/whisper.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
2121
return err
2222
}
2323

24-
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
24+
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
2525
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
2626
}

core/backend/embeddings.go

Lines changed: 2 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -2,100 +2,14 @@ package backend
22

33
import (
44
"fmt"
5-
"time"
65

76
"github.com/go-skynet/LocalAI/core/config"
8-
"github.com/go-skynet/LocalAI/core/schema"
9-
"github.com/google/uuid"
107

11-
"github.com/go-skynet/LocalAI/pkg/concurrency"
128
"github.com/go-skynet/LocalAI/pkg/grpc"
13-
"github.com/go-skynet/LocalAI/pkg/model"
9+
model "github.com/go-skynet/LocalAI/pkg/model"
1410
)
1511

16-
type EmbeddingsBackendService struct {
17-
ml *model.ModelLoader
18-
bcl *config.BackendConfigLoader
19-
appConfig *config.ApplicationConfig
20-
}
21-
22-
func NewEmbeddingsBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *EmbeddingsBackendService {
23-
return &EmbeddingsBackendService{
24-
ml: ml,
25-
bcl: bcl,
26-
appConfig: appConfig,
27-
}
28-
}
29-
30-
func (ebs *EmbeddingsBackendService) Embeddings(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] {
31-
32-
resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
33-
go func(request *schema.OpenAIRequest) {
34-
if request.Model == "" {
35-
request.Model = model.StableDiffusionBackend
36-
}
37-
38-
bc, request, err := ebs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, ebs.appConfig)
39-
if err != nil {
40-
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
41-
close(resultChannel)
42-
return
43-
}
44-
45-
items := []schema.Item{}
46-
47-
for i, s := range bc.InputToken {
48-
// get the model function to call for the result
49-
embedFn, err := modelEmbedding("", s, ebs.ml, bc, ebs.appConfig)
50-
if err != nil {
51-
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
52-
close(resultChannel)
53-
return
54-
}
55-
56-
embeddings, err := embedFn()
57-
if err != nil {
58-
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
59-
close(resultChannel)
60-
return
61-
}
62-
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
63-
}
64-
65-
for i, s := range bc.InputStrings {
66-
// get the model function to call for the result
67-
embedFn, err := modelEmbedding(s, []int{}, ebs.ml, bc, ebs.appConfig)
68-
if err != nil {
69-
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
70-
close(resultChannel)
71-
return
72-
}
73-
74-
embeddings, err := embedFn()
75-
if err != nil {
76-
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
77-
close(resultChannel)
78-
return
79-
}
80-
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
81-
}
82-
83-
id := uuid.New().String()
84-
created := int(time.Now().Unix())
85-
resp := &schema.OpenAIResponse{
86-
ID: id,
87-
Created: created,
88-
Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
89-
Data: items,
90-
Object: "list",
91-
}
92-
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp}
93-
close(resultChannel)
94-
}(request)
95-
return resultChannel
96-
}
97-
98-
func modelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
12+
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
9913
modelFile := backendConfig.Model
10014

10115
grpcOpts := gRPCModelOpts(backendConfig)

0 commit comments

Comments
 (0)