fix(llama.cpp-ggml): fixup max_tokens for old backend (#2094)

mudler · web-flow · commit 180cd4ccda07 · 2024-04-21T16:34:00.000+02:00
fix(llama.cpp-ggml): set 0 as default for `max_tokens`

Signed-off-by: Ettore Di Giacinto &lt;mudler@localai.io&gt;
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
@@ -210,7 +210,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	defaultMirostatETA := 0.1
 	defaultTypicalP := 1.0
 	defaultTFZ := 1.0
-	defaultInfinity := -1
+	defaultZero := 0
 
 	// Try to offload all GPU layers (if GPU is found)
 	defaultHigh := 99999999
@@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	}
 
 	if cfg.Maxtokens == nil {
-		cfg.Maxtokens = &defaultInfinity
+		cfg.Maxtokens = &defaultZero
 	}
 
 	if cfg.Mirostat == nil {

Original file line number	Diff line number	Diff line change
`@@ -210,7 +210,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {`
`210`	`210`	`defaultMirostatETA := 0.1`
`211`	`211`	`defaultTypicalP := 1.0`
`212`	`212`	`defaultTFZ := 1.0`
`213`		`- defaultInfinity := -1`
	`213`	`+ defaultZero := 0`
`214`	`214`
`215`	`215`	`// Try to offload all GPU layers (if GPU is found)`
`216`	`216`	`defaultHigh := 99999999`
`@@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {`
`254`	`254`	`}`
`255`	`255`
`256`	`256`	`if cfg.Maxtokens == nil {`
`257`		`- cfg.Maxtokens = &defaultInfinity`
	`257`	`+ cfg.Maxtokens = &defaultZero`
`258`	`258`	`}`
`259`	`259`
`260`	`260`	`if cfg.Mirostat == nil {`