Skip to content

Commit 628502f

Browse files
committed
TTS endpoint: add optional language paramter
Signed-off-by: blob42 <[email protected]>
1 parent 9c9ead0 commit 628502f

File tree

8 files changed

+83
-70
lines changed

8 files changed

+83
-70
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ protogen-clean: protogen-go-clean protogen-python-clean
428428
.PHONY: protogen-go
429429
protogen-go:
430430
mkdir -p pkg/grpc/proto
431-
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
431+
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
432432
backend/backend.proto
433433

434434
.PHONY: protogen-go-clean

backend/backend.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ message TTSRequest {
263263
string model = 2;
264264
string dst = 3;
265265
string voice = 4;
266+
optional string language = 5;
266267
}
267268

268269
message TokenizationResponse {

backend/python/coqui/coqui_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def LoadModel(self, request, context):
6767
def TTS(self, request, context):
6868
try:
6969
# if model is multilangual add language from request or env as fallback
70-
lang = request.Lang or COQUI_LANGUAGE
70+
lang = request.language or COQUI_LANGUAGE
7171
if self.tts.is_multi_lingual and lang is None:
7272
return backend_pb2.Result(success=False, message=f"Model is multi-lingual, but no language was provided")
7373

core/backend/tts.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func generateUniqueFileName(dir, baseName, ext string) string {
2929
}
3030
}
3131

32-
func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) {
32+
func ModelTTS(backend, text, modelFile, voice string, language string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) {
3333
bb := backend
3434
if bb == "" {
3535
bb = model.PiperBackend
@@ -83,6 +83,7 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader,
8383
Model: modelPath,
8484
Voice: voice,
8585
Dst: filePath,
86+
Language: &language,
8687
})
8788

8889
// return RPC error if any
@@ -92,3 +93,9 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader,
9293

9394
return filePath, res, err
9495
}
96+
97+
func ModelTTSInfo(backend, text, modelFile, voice string, language string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) {
98+
99+
100+
return "", nil, nil
101+
}

core/cli/tts.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ type TTSCMD struct {
1818
Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"`
1919
Model string `short:"m" required:"" help:"Model name to run the TTS"`
2020
Voice string `short:"v" help:"Voice name to run the TTS"`
21+
Language string `short:"l" help:"Language to use with the TTS"`
2122
OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
2223
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
2324
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
@@ -45,7 +46,7 @@ func (t *TTSCMD) Run(ctx *Context) error {
4546
options := config.BackendConfig{}
4647
options.SetDefaults()
4748

48-
filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options)
49+
filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
4950
if err != nil {
5051
return err
5152
}

core/http/endpoints/elevenlabs/tts.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
5252
}
5353
log.Debug().Msgf("Request for model: %s", modelFile)
5454

55-
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg)
55+
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, "", voiceID, ml, appConfig, *cfg)
5656
if err != nil {
5757
return err
5858
}

core/http/endpoints/localai/tts.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ import (
1212
)
1313

1414
// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
15-
// @Summary Generates audio from the input text.
16-
// @Param request body schema.TTSRequest true "query params"
17-
// @Success 200 {string} binary "Response"
18-
// @Router /v1/audio/speech [post]
19-
// @Router /tts [post]
15+
// @Summary Generates audio from the input text.
16+
// @Accept json
17+
// @Produce audio/x-wav
18+
// @Param request body schema.TTSRequest true "query params"
19+
// @Success 200 {string} binary "generated audio/wav file"
20+
// @Router /v1/audio/speech [post]
21+
// @Router /tts [post]
2022
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
2123
return func(c *fiber.Ctx) error {
2224

@@ -52,7 +54,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
5254
cfg.Backend = input.Backend
5355
}
5456

55-
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg)
57+
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, input.Language, ml, appConfig, *cfg)
5658
if err != nil {
5759
return err
5860
}

core/schema/localai.go

Lines changed: 61 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,61 @@
1-
package schema
2-
3-
import (
4-
gopsutil "github.com/shirou/gopsutil/v3/process"
5-
)
6-
7-
type BackendMonitorRequest struct {
8-
Model string `json:"model" yaml:"model"`
9-
}
10-
11-
type BackendMonitorResponse struct {
12-
MemoryInfo *gopsutil.MemoryInfoStat
13-
MemoryPercent float32
14-
CPUPercent float64
15-
}
16-
17-
type TTSRequest struct {
18-
Model string `json:"model" yaml:"model"`
19-
Input string `json:"input" yaml:"input"`
20-
Voice string `json:"voice" yaml:"voice"`
21-
Backend string `json:"backend" yaml:"backend"`
22-
}
23-
24-
type StoresSet struct {
25-
Store string `json:"store,omitempty" yaml:"store,omitempty"`
26-
27-
Keys [][]float32 `json:"keys" yaml:"keys"`
28-
Values []string `json:"values" yaml:"values"`
29-
}
30-
31-
type StoresDelete struct {
32-
Store string `json:"store,omitempty" yaml:"store,omitempty"`
33-
34-
Keys [][]float32 `json:"keys"`
35-
}
36-
37-
type StoresGet struct {
38-
Store string `json:"store,omitempty" yaml:"store,omitempty"`
39-
40-
Keys [][]float32 `json:"keys" yaml:"keys"`
41-
}
42-
43-
type StoresGetResponse struct {
44-
Keys [][]float32 `json:"keys" yaml:"keys"`
45-
Values []string `json:"values" yaml:"values"`
46-
}
47-
48-
type StoresFind struct {
49-
Store string `json:"store,omitempty" yaml:"store,omitempty"`
50-
51-
Key []float32 `json:"key" yaml:"key"`
52-
Topk int `json:"topk" yaml:"topk"`
53-
}
54-
55-
type StoresFindResponse struct {
56-
Keys [][]float32 `json:"keys" yaml:"keys"`
57-
Values []string `json:"values" yaml:"values"`
58-
Similarities []float32 `json:"similarities" yaml:"similarities"`
59-
}
1+
package schema
2+
3+
import (
4+
gopsutil "github.com/shirou/gopsutil/v3/process"
5+
)
6+
7+
type BackendMonitorRequest struct {
8+
Model string `json:"model" yaml:"model"`
9+
}
10+
11+
type BackendMonitorResponse struct {
12+
MemoryInfo *gopsutil.MemoryInfoStat
13+
MemoryPercent float32
14+
CPUPercent float64
15+
}
16+
17+
// @Descsription TTS request body
18+
type TTSRequest struct {
19+
Model string `json:"model" yaml:"model"` // model name or full path
20+
Input string `json:"input" yaml:"input"` // text input
21+
Voice string `json:"voice" yaml:"voice"` // voice audio file or speaker id
22+
Backend string `json:"backend" yaml:"backend"`
23+
Language string `json:"language,omitempty" yaml:"language,omitempty"` // (optional) language to use with TTS model
24+
}
25+
26+
type StoresSet struct {
27+
Store string `json:"store,omitempty" yaml:"store,omitempty"`
28+
29+
Keys [][]float32 `json:"keys" yaml:"keys"`
30+
Values []string `json:"values" yaml:"values"`
31+
}
32+
33+
type StoresDelete struct {
34+
Store string `json:"store,omitempty" yaml:"store,omitempty"`
35+
36+
Keys [][]float32 `json:"keys"`
37+
}
38+
39+
type StoresGet struct {
40+
Store string `json:"store,omitempty" yaml:"store,omitempty"`
41+
42+
Keys [][]float32 `json:"keys" yaml:"keys"`
43+
}
44+
45+
type StoresGetResponse struct {
46+
Keys [][]float32 `json:"keys" yaml:"keys"`
47+
Values []string `json:"values" yaml:"values"`
48+
}
49+
50+
type StoresFind struct {
51+
Store string `json:"store,omitempty" yaml:"store,omitempty"`
52+
53+
Key []float32 `json:"key" yaml:"key"`
54+
Topk int `json:"topk" yaml:"topk"`
55+
}
56+
57+
type StoresFindResponse struct {
58+
Keys [][]float32 `json:"keys" yaml:"keys"`
59+
Values []string `json:"values" yaml:"values"`
60+
Similarities []float32 `json:"similarities" yaml:"similarities"`
61+
}

0 commit comments

Comments
 (0)