chore: v0.3.4

av · av · commit ba27d9c1639f · 2025-03-22T12:04:14.000+01:00
diff --git a/.scripts/seed.ts b/.scripts/seed.ts
@@ -6,7 +6,7 @@ import * as toml from 'jsr:@std/toml';
 import * as path from 'jsr:@std/path';
 import * as collections from "jsr:@std/collections/deep-merge";
 
-const VERSION = "0.3.3";
+const VERSION = "0.3.4";
 
 type ValueSeed = {
   // Path relative to the project root
diff --git a/app/package.json b/app/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@avcodes/harbor-app",
   "private": true,
-  "version": "0.3.3",
+  "version": "0.3.4",
   "type": "module",
   "scripts": {
     "dev": "vite",
diff --git a/app/src-tauri/Cargo.toml b/app/src-tauri/Cargo.toml
@@ -1,7 +1,7 @@
 
 [package]
 name = "harbor-app"
-version = "0.3.3"
+version = "0.3.4"
 description = "A companion app for Harbor LLM toolkit"
 authors = ["av"]
 edition = "2021"
diff --git a/app/src-tauri/tauri.conf.json b/app/src-tauri/tauri.conf.json
@@ -1,7 +1,7 @@
 {
   "$schema": "https://schema.tauri.app/config/2.0.0-rc",
   "productName": "Harbor",
-  "version": "0.3.3",
+  "version": "0.3.4",
   "identifier": "com.harbor.app",
   "build": {
     "beforeDevCommand": "bun run dev",
diff --git a/app/src/serviceMetadata.ts b/app/src/serviceMetadata.ts
@@ -477,6 +477,7 @@ export const serviceMetadata: Record<string, Partial<HarborService>> = {
     llamaswap: {
         name: 'llama-swap',
         tags: [HST.satellite, HST.api],
-        wikiUrl: '',
+        wikiUrl: 'https://github.com/av/harbor/wiki/2.3.40-Satellite-llamaswap',
+        tooltip: 'Runs multiple llama.cpp servers on demand for seamless switching between them.',
     }
 };
diff --git a/boost/README.md b/boost/README.md
@@ -1,7 +1,7 @@
-> Handle: `boost`
+> Handle: `boost`<br/>
 > URL: <http://localhost:34131/>
 
-![Screenshot of boost bench](../docs/boost.png) <small>`g1` and `rcn` optimizer modules compared to original LLMs. [BBH256](https://gist.github.com/av/18cc8138a0acbe1b30f51e8bb19add90) task, run with [Harbor Bench](../docs/5.1.-Harbor-Bench)</small>
+![splash](../docs/harbor-boost.png)
 
 `boost` is a service that acts as an optimizing LLM proxy. It takes your inputs, and pre-processes them before sending them to the downstream API.
 
@@ -12,14 +12,21 @@ Features that make Harbor's `boost` special:
 * 🎭 `boost` can serve as a plain LLM proxy (multiple downstream APIs behind a single endpoint)
 * ✍️ `boost` is scriptable, you can write your own modules
 
+![Short overview of boost behavior](../docs/boost-behavior.png)
+
 The main focus, of course are the workflows that can help improve the LLM output in specific scenarios. Here are some examples of what's possible with `boost`:
 
-* When "random" is mentioned in the message, `klmbr` will rewrite 35% of message characters to increase the entropy and produce more diverse completion
-* Launch self-reflection reasoning chain when the message ends with a question mark
+* Add R1-like reasoning to [any LLM](https://www.reddit.com/r/LocalLLaMA/comments/1ixckba/making_older_llms_llama_2_and_gemma_1_reason/)
+* When "random" is mentioned in the message, [`klmbr`](#klmbr---boost-llm-creativity) will rewrite 35% of message characters to increase the entropy and produce more diverse completion
+* Launch [self-reflection reasoning](#rcn---recursive-certainty-validation) chain when the message ends with a question mark
 * Expand the conversation context with the "inner monologue" of the model, where it can iterate over your question a few times before giving the final answer
 * Apply a specific LLM personality if the message contains a specific keyword
+* Add external memory to your interactions with a specific model
+* Make your LLM [pass a skill check](https://www.reddit.com/r/LocalLLaMA/comments/1jaqylp/llm_must_pass_a_skill_check_to_talk_to_me/) before replying to you
+
+Boost is scriptable, you can provision your own modules with the workflows suitable for your needs. See [Custom Modules](#custom-modules) section for more information.
 
-Moreover, boost is scriptable, you can provision your own modules with the workflows suitable for your needs. See [Custom Modules](#custom-modules) section for more information.
+![Screenshot of boost bench](../docs/boost.png) <small>`g1` and `rcn` optimizer modules compared to original LLMs. [BBH256](https://gist.github.com/av/18cc8138a0acbe1b30f51e8bb19add90) task, run with [Harbor Bench](../docs/5.1.-Harbor-Bench)</small>
 
 `boost` operates at the OpenAI-compatible API level, so can be used with any LLM backend that accepts OpenAI API requests. You can also plug `boost` into the UIs that are compatible with OpenAI API.
 
@@ -43,6 +50,7 @@ Moreover, boost is scriptable, you can provision your own modules with the workf
   * [`supersummer` - Super Summarization](#supersummer---super-summarization)
   * [`r0` - R1-like reasoning chains](#r0---r1-like-reasoning-chains)
   * [`markov` - token completion graph](#markov---token-completion-graph)
+  * [`dnd` - skill check](#dnd---skill-check)
   * Custom Modules (not configurable, mostly examples, but can still be enabled)
     * [discussurl](https://github.com/av/harbor/blob/main/boost/src/custom_modules/discussurl.py) - parse mentioned URLs and add them to the context
     * [meow](https://github.com/av/harbor/blob/main/boost/src/custom_modules/meow.py) - the model ignores all previous instructions and just meows
@@ -510,6 +518,17 @@ harbor boost modules add markov
 
 There's no configuration for this module yet.
 
+#### `dnd` - skill check
+
+⚠️ This module is experimental and only compatible with Open WebUI as a client due to its support of custom artifacts.
+
+When serving the completion, LLM will first invent a skill check it must pass to address your message. Then, the workflow will roll a dice determining if the model passes the check or not and will guide the model to respond accordingly.
+
+```bash
+# Enable the module
+harbor boost modules add dnd
+```
+
 ### API
 
 `boost` works as an OpenAI-compatible API proxy. It'll query configured downstream services for which models they serve and provide "boosted" wrappers in its own API.
diff --git a/compose.x.llamaswap.cdi.yml b/compose.x.llamaswap.cdi.yml
@@ -0,0 +1,12 @@
+# This file is generated by seed-cdi.ts script,
+# any updates will be overwritten.
+services:
+  llamaswap:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: cdi
+              capabilities: [gpu]
+              device_ids:
+              - nvidia.com/gpu=all
diff --git a/compose.x.traefik.llamaswap.yml b/compose.x.traefik.llamaswap.yml
@@ -0,0 +1,13 @@
+# This file is generated by seed-traefik.ts script,
+# any updates will be overwritten.
+services:
+  llamaswap:
+    labels:
+      - "traefik.enable=true"
+      - "traefik.http.routers.llamaswap.rule=Host(`llamaswap.${HARBOR_TRAEFIK_DOMAIN}`)"
+      - "traefik.http.services.llamaswap.loadbalancer.server.port=${HARBOR_LLAMASWAP_HOST_PORT}"
+      - "traefik.http.routers.llamaswap.entrypoints=web"
+      - "traefik.http.routers.llamaswap.service=llamaswap"
+
+    networks:
+      - traefik-public
diff --git a/docs/2.-Services.md b/docs/2.-Services.md
@@ -47,15 +47,12 @@ Visual programming for AI language models
 - [Open WebUI](https://github.com/av/harbor/wiki/2.1.1-Frontend:-Open-WebUI) <span style="opacity: 0.5;">`Frontend`</span><br/>
 widely adopted and feature rich web interface for interacting with LLMs. Supports OpenAI-compatible and Ollama backends, multi-users, multi-model chats, custom prompts, TTS, Web RAG, RAG, and much much more.
 
-- [oterm](https://github.com/av/harbor/wiki/2.1.12-Frontend-oterm) <span style="opacity: 0.5;">`CLI`, `Frontend`</span><br/>
+- [oterm](https://github.com/av/harbor/wiki/2.1.12-Frontend-oterm) <span style="opacity: 0.5;">`Frontend`, `CLI`</span><br/>
 The text-based terminal client for Ollama.
 
 - [Parllama](https://github.com/av/harbor/wiki/2.1.7-Frontend:-parllama) <span style="opacity: 0.5;">`Frontend`</span><br/>
 TUI for Ollama
 
-- [RAGLite](https://github.com/av/harbor/wiki/2.3.39-Satellite-RAGLite) <span style="opacity: 0.5;">`Satellite`, `Frontend`</span><br/>
-Python toolkit for Retrieval-Augmented Generation (RAG)
-
 # Backends
 
 This section covers services that provide the LLM inference capabilities.
@@ -172,6 +169,9 @@ LLM proxy that can aggregate multiple inference APIs together into a single endp
 - [LitLytics](https://github.com/av/harbor/wiki/2.3.21-Satellite:-LitLytics) <span style="opacity: 0.5;">`Satellite`, `Partial Support`, `Workflows`</span><br/>
 Simple analytics platform that leverages LLMs to automate data analysis.
 
+- [llama-swap](https://github.com/av/harbor/wiki/2.3.40-Satellite-llamaswap) <span style="opacity: 0.5;">`Satellite`, `API`</span><br/>
+Runs multiple llama.cpp servers on demand for seamless switching between them.
+
 - [lm-evaluation-harness](https://github.com/av/harbor/wiki/2.3.17-Satellite:-lm-evaluation-harness) <span style="opacity: 0.5;">`Satellite`, `CLI`, `Eval`</span><br/>
 A de-facto standard framework for the few-shot evaluation of language models.
 
@@ -208,7 +208,7 @@ Test your prompts, agents, and RAGs. A developer-friendly local tool for testing
 - [Qdrant](https://github.com/av/harbor/wiki/2.3.26-Satellite:-Qdrant) <span style="opacity: 0.5;">`Satellite`</span><br/>
 Qdrant - High-performance, massive-scale Vector Database and Vector Search Engine.
 
-- [RAGLite](https://github.com/av/harbor/wiki/2.3.39-Satellite-RAGLite) <span style="opacity: 0.5;">`Satellite`, `Frontend`</span><br/>
+- [RAGLite](https://github.com/av/harbor/wiki/2.3.39-Satellite-RAGLite) <span style="opacity: 0.5;">`Satellite`</span><br/>
 Python toolkit for Retrieval-Augmented Generation (RAG)
 
 - [Repopack](https://github.com/av/harbor/wiki/2.3.22-Satellite:-Repopack) <span style="opacity: 0.5;">`Satellite`, `CLI`</span><br/>
diff --git a/docs/2.3.40-Satellite-llamaswap.md b/docs/2.3.40-Satellite-llamaswap.md
@@ -3,7 +3,7 @@
 > Handle: `llamaswap`<br/>
 > URL: [http://localhost:34401](http://localhost:34401)
 
-llama-swap is a light weight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
+llama-swap is a lightweight, transparent proxy server that provides automatic model swapping to llama.cpp's server.
 
 ### Starting
 
diff --git a/harbor.sh b/harbor.sh
@@ -4162,7 +4162,7 @@ run_gptme_command() {
 # ========================================================================
 
 # Globals
-version="0.3.3"
+version="0.3.4"
 harbor_repo_url="https://github.com/av/harbor.git"
 harbor_release_url="https://api.github.com/repos/av/harbor/releases/latest"
 delimiter="|"
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@avcodes/harbor",
-  "version": "0.3.3",
+  "version": "0.3.4",
   "description": "Effortlessly run LLM backends, APIs, frontends, and services with one command.",
   "private": false,
   "author": "av <av@av.codes> (https://av.codes)",
diff --git a/pyproject.toml b/pyproject.toml

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@avcodes/harbor-app",`
`3`	`3`	`"private": true,`
`4`		`- "version": "0.3.3",`
	`4`	`+ "version": "0.3.4",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"scripts": {`
`7`	`7`	`"dev": "vite",`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"$schema": "https://schema.tauri.app/config/2.0.0-rc",`
`3`	`3`	`"productName": "Harbor",`
`4`		`- "version": "0.3.3",`
	`4`	`+ "version": "0.3.4",`
`5`	`5`	`"identifier": "com.harbor.app",`
`6`	`6`	`"build": {`
`7`	`7`	`"beforeDevCommand": "bun run dev",`
Original file line number	Diff line number	Diff line change
`@@ -477,6 +477,7 @@ export const serviceMetadata: Record<string, Partial<HarborService>> = {`
`477`	`477`	`llamaswap: {`
`478`	`478`	`name: 'llama-swap',`
`479`	`479`	`tags: [HST.satellite, HST.api],`
`480`		`- wikiUrl: '',`
	`480`	`+ wikiUrl: 'https://github.com/av/harbor/wiki/2.3.40-Satellite-llamaswap',`
	`481`	`+ tooltip: 'Runs multiple llama.cpp servers on demand for seamless switching between them.',`
`481`	`482`	`}`
`482`	`483`	`};`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@avcodes/harbor",`
`3`		`- "version": "0.3.3",`
	`3`	`+ "version": "0.3.4",`
`4`	`4`	`"description": "Effortlessly run LLM backends, APIs, frontends, and services with one command.",`
`5`	`5`	`"private": false,`
`6`	`6`	`"author": "av <[email protected]> (https://av.codes)",`