mudler · mudler · May 20, 2024 · May 16, 2024 · May 18, 2024 · May 18, 2024
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -61,7 +61,7 @@ jobs:
           go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
           export PATH=$PATH:$GOPATH/bin
           export PATH=/usr/local/cuda/bin:$PATH
-          make dist
+          GO_TAGS=p2p make dist
       - uses: actions/upload-artifact@v4
         with:
           name: LocalAI-linux
@@ -121,7 +121,7 @@ jobs:
           export C_INCLUDE_PATH=/usr/local/include
           export CPLUS_INCLUDE_PATH=/usr/local/include
           export PATH=$PATH:$GOPATH/bin
-          make dist
+          GO_TAGS=p2p make dist
       - uses: actions/upload-artifact@v4
         with:
           name: LocalAI-MacOS-arm64

diff --git a/Dockerfile b/Dockerfile
@@ -14,7 +14,7 @@ ARG TARGETVARIANT
 ENV DEBIAN_FRONTEND=noninteractive
 ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
 
-ARG GO_TAGS="stablediffusion tinydream tts"
+ARG GO_TAGS="stablediffusion tinydream tts p2p"
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \

diff --git a/core/cli/cli.go b/core/cli/cli.go
@@ -1,21 +1,16 @@
 package cli
 
-import "embed"
-
-type Context struct {
-	Debug    bool    `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
-	LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
-
-	// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
-	BackendAssets embed.FS `kong:"-"`
-}
+import (
+	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	"github.com/go-skynet/LocalAI/core/cli/worker"
+)
 
 var CLI struct {
-	Context `embed:""`
+	cliContext.Context `embed:""`
 
-	Run            RunCMD            `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
-	Models         ModelsCMD         `cmd:"" help:"Manage LocalAI models and definitions"`
-	TTS            TTSCMD            `cmd:"" help:"Convert text to speech"`
-	Transcript     TranscriptCMD     `cmd:"" help:"Convert audio to text"`
-	LLAMACPPWorker LLAMACPPWorkerCMD `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
+	Run        RunCMD        `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
+	Models     ModelsCMD     `cmd:"" help:"Manage LocalAI models and definitions"`
+	TTS        TTSCMD        `cmd:"" help:"Convert text to speech"`
+	Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
+	Worker     worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
 }
diff --git a/core/cli/context/context.go b/core/cli/context/context.go
@@ -0,0 +1,11 @@
+package cliContext
+
+import "embed"
+
+type Context struct {
+	Debug    bool    `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
+	LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
+
+	// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
+	BackendAssets embed.FS `kong:"-"`
+}
diff --git a/core/cli/models.go b/core/cli/models.go
@@ -4,6 +4,8 @@ import (
 	"encoding/json"
 	"fmt"
 
+	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/rs/zerolog/log"
 	"github.com/schollz/progressbar/v3"
@@ -29,7 +31,7 @@ type ModelsCMD struct {
 	Install ModelsInstall `cmd:"" help:"Install a model from the gallery"`
 }
 
-func (ml *ModelsList) Run(ctx *Context) error {
+func (ml *ModelsList) Run(ctx *cliContext.Context) error {
 	var galleries []gallery.Gallery
 	if err := json.Unmarshal([]byte(ml.Galleries), &galleries); err != nil {
 		log.Error().Err(err).Msg("unable to load galleries")
@@ -49,7 +51,7 @@ func (ml *ModelsList) Run(ctx *Context) error {
 	return nil
 }
 
-func (mi *ModelsInstall) Run(ctx *Context) error {
+func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 	modelName := mi.ModelArgs[0]
 
 	var galleries []gallery.Gallery

diff --git a/core/cli/run.go b/core/cli/run.go
@@ -1,12 +1,15 @@
 package cli
 
 import (
+	"context"
 	"fmt"
 	"strings"
 	"time"
 
+	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
 	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/core/http"
+	"github.com/go-skynet/LocalAI/core/p2p"
 	"github.com/go-skynet/LocalAI/core/startup"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
@@ -37,13 +40,14 @@ type RunCMD struct {
 	Threads     int  `env:"LOCALAI_THREADS,THREADS" short:"t" default:"4" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
 	ContextSize int  `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
 
-	Address          string   `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
-	CORS             bool     `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
-	CORSAllowOrigins string   `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
-	UploadLimit      int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
-	APIKeys          []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
-	DisableWebUI     bool     `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
-
+	Address              string   `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
+	CORS                 bool     `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
+	CORSAllowOrigins     string   `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
+	UploadLimit          int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
+	APIKeys              []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
+	DisableWebUI         bool     `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
+	Peer2Peer            bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
+	Peer2PeerToken       string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
 	ParallelRequests     bool     `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
 	SingleActiveBackend  bool     `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
 	PreloadBackendOnly   bool     `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
@@ -54,7 +58,7 @@ type RunCMD struct {
 	WatchdogBusyTimeout  string   `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
 }
 
-func (r *RunCMD) Run(ctx *Context) error {
+func (r *RunCMD) Run(ctx *cliContext.Context) error {
 	opts := []config.AppOption{
 		config.WithConfigFile(r.ModelsConfigFile),
 		config.WithJSONStringPreload(r.PreloadModels),
@@ -81,6 +85,31 @@ func (r *RunCMD) Run(ctx *Context) error {
 		config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
 	}
 
+	if r.Peer2Peer || r.Peer2PeerToken != "" {
+		log.Info().Msg("P2P mode enabled")
+		token := r.Peer2PeerToken
+		if token == "" {
+			// IF no token is provided, and p2p is enabled,
+			// we generate one and wait for the user to pick up the token (this is for interactive)
+			log.Info().Msg("No token provided, generating one")
+			token = p2p.GenerateToken()
+			log.Info().Msg("Generated Token:")
+			fmt.Println(token)
+
+			log.Info().Msg("To use the token, you can run the following command in another node or terminal:")
+			fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
+
+			// Ask for user confirmation
+			log.Info().Msg("Press a button to proceed")
+			var input string
+			fmt.Scanln(&input)
+		}
+		log.Info().Msg("Starting P2P server discovery...")
+		if err := p2p.LLamaCPPRPCServerDiscoverer(context.Background(), token); err != nil {
+			return err
+		}
+	}
+
 	idleWatchDog := r.EnableWatchdogIdle
 	busyWatchDog := r.EnableWatchdogBusy
 

diff --git a/core/cli/transcript.go b/core/cli/transcript.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 
 	"github.com/go-skynet/LocalAI/core/backend"
+	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
 	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
@@ -22,7 +23,7 @@ type TranscriptCMD struct {
 	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
 }
 
-func (t *TranscriptCMD) Run(ctx *Context) error {
+func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
 	opts := &config.ApplicationConfig{
 		ModelPath:         t.ModelsPath,
 		Context:           context.Background(),

diff --git a/core/cli/tts.go b/core/cli/tts.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	"github.com/go-skynet/LocalAI/core/backend"
+	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
 	"github.com/go-skynet/LocalAI/core/config"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
@@ -24,7 +25,7 @@ type TTSCMD struct {
 	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
 }
 
-func (t *TTSCMD) Run(ctx *Context) error {
+func (t *TTSCMD) Run(ctx *cliContext.Context) error {
 	outputFile := t.OutputFile
 	outputDir := t.BackendAssetsPath
 	if outputFile != "" {

diff --git a/core/cli/worker/worker.go b/core/cli/worker/worker.go
@@ -0,0 +1,10 @@
+package worker
+
+type WorkerFlags struct {
+	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+}
+
+type Worker struct {
+	P2P      P2P      `cmd:"" name:"p2p-llama-cpp-rpc" help:"Starts a LocalAI llama.cpp worker in P2P mode (requires a token)"`
+	LLamaCPP LLamaCPP `cmd:"" name:"llama-cpp-rpc" help:"Starts a llama.cpp worker in standalone mode"`
+}
diff --git a/core/cli/llamacppworker.go → core/cli/worker/worker_llamacpp.go b/core/cli/llamacppworker.go → core/cli/worker/worker_llamacpp.go
@@ -1,26 +1,32 @@
-package cli
+package worker
 
 import (
+	"fmt"
 	"os"
 	"syscall"
 
+	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
 	"github.com/go-skynet/LocalAI/pkg/assets"
 	"github.com/rs/zerolog/log"
 )
 
-type LLAMACPPWorkerCMD struct {
-	Args              []string `arg:"" optional:"" name:"models" help:"Worker arguments: host port"`
-	BackendAssetsPath string   `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+type LLamaCPP struct {
+	Args        []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
+	WorkerFlags `embed:""`
 }
 
-func (r *LLAMACPPWorkerCMD) Run(ctx *Context) error {
+func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
 	// Extract files from the embedded FS
 	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
 	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
 	if err != nil {
 		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
 	}
 
+	if len(os.Args) < 4 {
+		return fmt.Errorf("usage: local-ai worker llama-cpp-rpc -- <llama-rpc-server-args>")
+	}
+
 	return syscall.Exec(
 		assets.ResolvePath(
 			r.BackendAssetsPath,
@@ -32,6 +38,6 @@ func (r *LLAMACPPWorkerCMD) Run(ctx *Context) error {
 				r.BackendAssetsPath,
 				"util",
 				"llama-cpp-rpc-server",
-			)}, r.Args...),
+			)}, os.Args[4:]...),
 		os.Environ())
 }
diff --git a/core/cli/worker/worker_nop2p.go b/core/cli/worker/worker_nop2p.go
@@ -0,0 +1,16 @@
+//go:build !p2p
+// +build !p2p
+
+package worker
+
+import (
+	"fmt"
+
+	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+)
+
+type P2P struct{}
+
+func (r *P2P) Run(ctx *cliContext.Context) error {
+	return fmt.Errorf("p2p mode is not enabled in this build")
+}
diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go
@@ -0,0 +1,104 @@
+//go:build p2p
+// +build p2p
+
+package worker
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"time"
+
+	cliContext "github.com/go-skynet/LocalAI/core/cli/context"
+	"github.com/go-skynet/LocalAI/core/p2p"
+	"github.com/go-skynet/LocalAI/pkg/assets"
+	"github.com/phayes/freeport"
+	"github.com/rs/zerolog/log"
+)
+
+type P2P struct {
+	WorkerFlags       `embed:""`
+	Token             string   `env:"LOCALAI_TOKEN,TOKEN" help:"JSON list of galleries"`
+	NoRunner          bool     `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
+	RunnerAddress     string   `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
+	RunnerPort        string   `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
+	ExtraLLamaCPPArgs []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
+}
+
+func (r *P2P) Run(ctx *cliContext.Context) error {
+	// Extract files from the embedded FS
+	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
+	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
+	if err != nil {
+		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
+	}
+
+	// Check if the token is set
+	// as we always need it.
+	if r.Token == "" {
+		return fmt.Errorf("Token is required")
+	}
+
+	port, err := freeport.GetFreePort()
+	if err != nil {
+		return err
+	}
+
+	address := "127.0.0.1"
+
+	if r.NoRunner {
+		// Let override which port and address to bind if the user
+		// configure the llama-cpp service on its own
+		p := fmt.Sprint(port)
+		if r.RunnerAddress != "" {
+			address = r.RunnerAddress
+		}
+		if r.RunnerPort != "" {
+			p = r.RunnerPort
+		}
+
+		err = p2p.BindLLamaCPPWorker(context.Background(), address, p, r.Token)
+		if err != nil {
+			return err
+		}
+		log.Info().Msgf("You need to start llama-cpp-rpc-server on '%s:%s'", address, p)
+
+		return nil
+	}
+
+	// Start llama.cpp directly from the version we have pre-packaged
+	go func() {
+		for {
+			log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
+			cmd := exec.Command(
+				assets.ResolvePath(
+					r.BackendAssetsPath,
+					"util",
+					"llama-cpp-rpc-server",
+				),
+				append([]string{"--host", address, "--port", fmt.Sprint(port)}, r.ExtraLLamaCPPArgs...)...,
+			)
+
+			cmd.Env = os.Environ()
+
+			cmd.Stderr = os.Stdout
+			cmd.Stdout = os.Stdout
+
+			if err := cmd.Start(); err != nil {
+				log.Error().Err(err).Msg("Failed to start llama-cpp-rpc-server")
+			}
+
+			cmd.Wait()
+		}
+	}()
+
+	err = p2p.BindLLamaCPPWorker(context.Background(), address, fmt.Sprint(port), r.Token)
+	if err != nil {
+		return err
+	}
+
+	for {
+		time.Sleep(1 * time.Second)
+	}
+}