Skip to content

Commit cb104a2

Browse files
authored
Merge branch 'ollama:main' into main
2 parents 5d967d5 + 27da2cd commit cb104a2

File tree

219 files changed

+11334
-9675
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

219 files changed

+11334
-9675
lines changed

.github/workflows/release.yaml

+22
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ jobs:
103103
arch: [amd64]
104104
preset: ['CPU']
105105
include:
106+
- os: windows
107+
arch: amd64
108+
preset: 'CUDA 11'
109+
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
110+
cuda-version: '11.3'
106111
- os: windows
107112
arch: amd64
108113
preset: 'CUDA 12'
@@ -319,6 +324,7 @@ jobs:
319324
case "$COMPONENT" in
320325
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
321326
lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
327+
lib/ollama/cuda_v11) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
322328
lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
323329
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
324330
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
@@ -426,6 +432,22 @@ jobs:
426432
docker buildx imagetools inspect ollama/ollama:${{ steps.metadata.outputs.version }}
427433
working-directory: ${{ runner.temp }}
428434
435+
# Trigger downstream release process
436+
trigger:
437+
runs-on: ubuntu-latest
438+
environment: release
439+
needs: [darwin-build, windows-build, windows-depends]
440+
steps:
441+
- name: Trigger downstream release process
442+
run: |
443+
curl -L \
444+
-X POST \
445+
-H "Accept: application/vnd.github+json" \
446+
-H "Authorization: Bearer ${{ secrets.RELEASE_TOKEN }}" \
447+
-H "X-GitHub-Api-Version: 2022-11-28" \
448+
https://api.github.com/repos/ollama/${{ vars.RELEASE_REPO }}/dispatches \
449+
-d "{\"event_type\": \"trigger-workflow\", \"client_payload\": {\"run_id\": \"${GITHUB_RUN_ID}\", \"version\": \"${GITHUB_REF_NAME#v}\"}}"
450+
429451
# Aggregate all the assets and ship a release
430452
release:
431453
needs: [darwin-sign, windows-sign, linux-build]

.github/workflows/test.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
include:
4747
- preset: CPU
4848
- preset: CUDA
49-
container: nvidia/cuda:12.8.1-devel-ubuntu22.04
49+
container: nvidia/cuda:11.8.0-devel-ubuntu22.04
5050
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
5151
- preset: ROCm
5252
container: rocm/dev-ubuntu-22.04:6.1.2
@@ -78,7 +78,7 @@ jobs:
7878
include:
7979
- preset: CPU
8080
- preset: CUDA
81-
install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
81+
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
8282
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
8383
- preset: ROCm
8484
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
@@ -102,7 +102,7 @@ jobs:
102102
$ErrorActionPreference = "Stop"
103103
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
104104
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
105-
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_12.8", "nvcc_12.8", "cublas_12.8", "cublas_dev_12.8")) -NoNewWindow -Wait
105+
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.3", "nvcc_11.3", "cublas_11.3", "cublas_dev_11.3")) -NoNewWindow -Wait
106106
}
107107
108108
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path

.golangci.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ linters:
1919
- nolintlint
2020
- nosprintfhostport
2121
- staticcheck
22-
- tenv
2322
- unconvert
23+
- usetesting
2424
- wastedassign
2525
- whitespace
2626
disable:

CMakePresets.json

+13
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@
1717
"name": "CUDA",
1818
"inherits": [ "Default" ]
1919
},
20+
{
21+
"name": "CUDA 11",
22+
"inherits": [ "CUDA" ],
23+
"cacheVariables": {
24+
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;70;75;80;86",
25+
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets"
26+
}
27+
},
2028
{
2129
"name": "CUDA 12",
2230
"inherits": [ "CUDA" ],
@@ -70,6 +78,11 @@
7078
"configurePreset": "CUDA",
7179
"targets": [ "ggml-cuda" ]
7280
},
81+
{
82+
"name": "CUDA 11",
83+
"inherits": [ "CUDA" ],
84+
"configurePreset": "CUDA 11"
85+
},
7386
{
7487
"name": "CUDA 12",
7588
"inherits": [ "CUDA" ],

Dockerfile

+16-1
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@ ARG JETPACK5VERSION=r35.4.1
77
ARG JETPACK6VERSION=r36.4.0
88
ARG CMAKEVERSION=3.31.2
99

10+
# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
1011
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
1112
RUN yum install -y yum-utils \
12-
&& dnf install -y ccache \
13+
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
14+
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
15+
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
1316
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
17+
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
1418

1519
FROM --platform=linux/arm64 almalinux:8 AS base-arm64
1620
# install epel-release for ccache
@@ -34,6 +38,15 @@ RUN --mount=type=cache,target=/root/.ccache \
3438
&& cmake --build --parallel --preset 'CPU' \
3539
&& cmake --install build --component CPU --strip --parallel 8
3640

41+
FROM base AS cuda-11
42+
ARG CUDA11VERSION=11.3
43+
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
44+
ENV PATH=/usr/local/cuda-11/bin:$PATH
45+
RUN --mount=type=cache,target=/root/.ccache \
46+
cmake --preset 'CUDA 11' \
47+
&& cmake --build --parallel --preset 'CUDA 11' \
48+
&& cmake --install build --component CUDA --strip --parallel 8
49+
3750
FROM base AS cuda-12
3851
ARG CUDA12VERSION=12.8
3952
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
@@ -85,9 +98,11 @@ RUN --mount=type=cache,target=/root/.cache/go-build \
8598
go build -trimpath -buildmode=pie -o /bin/ollama .
8699

87100
FROM --platform=linux/amd64 scratch AS amd64
101+
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
88102
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
89103

90104
FROM --platform=linux/arm64 scratch AS arm64
105+
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
91106
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
92107
COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5
93108
COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6

Makefile.sync

+11-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
UPSTREAM=https://github.com/ggerganov/llama.cpp.git
22
WORKDIR=llama/vendor
3-
FETCH_HEAD=e1e8e0991ffd9e99a445c6812bb519d5bac9f4b5
3+
FETCH_HEAD=de4c07f93783a1a96456a44dc16b9db538ee1618
44

55
.PHONY: help
66
help:
@@ -15,11 +15,13 @@ help:
1515
@echo " make -f $(lastword $(MAKEFILE_LIST)) clean sync"
1616

1717
.PHONY: sync
18-
sync: llama/build-info.cpp llama/llama.cpp ml/backend/ggml/ggml
18+
sync: llama/build-info.cpp ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal
1919

20-
.PHONY: llama/build-info.cpp
21-
llama/build-info.cpp: llama/build-info.cpp.in
22-
sed -e 's|@FETCH_HEAD@|$(FETCH_HEAD)|' $< > $@
20+
llama/build-info.cpp: llama/build-info.cpp.in llama/llama.cpp
21+
sed -e 's|@FETCH_HEAD@|$(FETCH_HEAD)|' <$< >$@
22+
23+
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal: ml/backend/ggml/ggml
24+
go generate ./$(@D)
2325

2426
.PHONY: llama/llama.cpp
2527
llama/llama.cpp: llama/vendor/
@@ -30,12 +32,13 @@ ml/backend/ggml/ggml: llama/vendor/ggml/
3032
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
3133

3234
PATCHES=$(wildcard llama/patches/*.patch)
35+
PATCHED=$(join $(dir $(PATCHES)), $(addsuffix ed, $(addprefix ., $(notdir $(PATCHES)))))
3336

3437
.PHONY: apply-patches
3538
.NOTPARALLEL:
36-
apply-patches: $(addsuffix ed, $(PATCHES))
39+
apply-patches: $(PATCHED)
3740

38-
%.patched: %.patch
41+
llama/patches/.%.patched: llama/patches/%.patch
3942
@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi
4043

4144
.PHONY: checkout
@@ -57,4 +60,4 @@ format-patches: llama/patches
5760

5861
.PHONE: clean
5962
clean: checkout
60-
$(RM) $(addsuffix ed, $(PATCHES))
63+
$(RM) llama/patches/.*.patched

README.md

+3
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
337337
- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
338338
- [IntelliBar](https://intellibar.app/) (AI-powered assistant for macOS)
339339
- [Jirapt](https://github.com/AliAhmedNada/jirapt) (Jira Integration to generate issues, tasks, epics)
340+
- [ojira](https://github.com/AliAhmedNada/ojira) (Jira chrome plugin to easily generate descriptions for tasks)
340341
- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Interactive chat tool that can leverage Ollama models for rapid understanding and navigation of GitHub code repositories)
341342
- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
342343
- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
@@ -548,6 +549,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
548549
- [Abso](https://github.com/lunary-ai/abso) (OpenAI-compatible TypeScript SDK for any LLM provider)
549550
- [Nichey](https://github.com/goodreasonai/nichey) is a Python package for generating custom wikis for your research topic
550551
- [Ollama for D](https://github.com/kassane/ollama-d)
552+
- [OllamaPlusPlus](https://github.com/HardCodeDev777/OllamaPlusPlus) (Very simple C++ library for Ollama)
551553

552554
### Mobile
553555

@@ -604,6 +606,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
604606
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
605607
- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)
606608
- [mcp-llm](https://github.com/sammcj/mcp-llm) (MCP Server to allow LLMs to call other LLMs)
609+
- [UnityCodeLama](https://github.com/HardCodeDev777/UnityCodeLama) (Unity Edtior tool to analyze scripts via Ollama)
607610

608611
### Supported backends
609612

api/client_test.go

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package api
22

33
import (
4-
"context"
54
"encoding/json"
65
"fmt"
76
"net/http"
@@ -137,7 +136,7 @@ func TestClientStream(t *testing.T) {
137136
client := NewClient(&url.URL{Scheme: "http", Host: ts.Listener.Addr().String()}, http.DefaultClient)
138137

139138
var receivedChunks []ChatResponse
140-
err := client.stream(context.Background(), http.MethodPost, "/v1/chat", nil, func(chunk []byte) error {
139+
err := client.stream(t.Context(), http.MethodPost, "/v1/chat", nil, func(chunk []byte) error {
141140
var resp ChatResponse
142141
if err := json.Unmarshal(chunk, &resp); err != nil {
143142
return fmt.Errorf("failed to unmarshal chunk: %w", err)
@@ -223,7 +222,7 @@ func TestClientDo(t *testing.T) {
223222
ID string `json:"id"`
224223
Success bool `json:"success"`
225224
}
226-
err := client.do(context.Background(), http.MethodPost, "/v1/messages", nil, &resp)
225+
err := client.do(t.Context(), http.MethodPost, "/v1/messages", nil, &resp)
227226

228227
if tc.wantErr != "" {
229228
if err == nil {

api/types.go

-13
Original file line numberDiff line numberDiff line change
@@ -271,9 +271,6 @@ type Options struct {
271271
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
272272
PresencePenalty float32 `json:"presence_penalty,omitempty"`
273273
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
274-
Mirostat int `json:"mirostat,omitempty"`
275-
MirostatTau float32 `json:"mirostat_tau,omitempty"`
276-
MirostatEta float32 `json:"mirostat_eta,omitempty"`
277274
Stop []string `json:"stop,omitempty"`
278275
}
279276

@@ -466,13 +463,6 @@ type ProcessModelResponse struct {
466463
SizeVRAM int64 `json:"size_vram"`
467464
}
468465

469-
type RetrieveModelResponse struct {
470-
Id string `json:"id"`
471-
Object string `json:"object"`
472-
Created int64 `json:"created"`
473-
OwnedBy string `json:"owned_by"`
474-
}
475-
476466
type TokenResponse struct {
477467
Token string `json:"token"`
478468
}
@@ -655,9 +645,6 @@ func DefaultOptions() Options {
655645
RepeatPenalty: 1.1,
656646
PresencePenalty: 0.0,
657647
FrequencyPenalty: 0.0,
658-
Mirostat: 0,
659-
MirostatTau: 5.0,
660-
MirostatEta: 0.1,
661648
Seed: -1,
662649

663650
Runner: Runner{

app/lifecycle/logging.go

+2-20
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,14 @@ import (
44
"fmt"
55
"log/slog"
66
"os"
7-
"path/filepath"
87
"strconv"
98
"strings"
109

1110
"github.com/ollama/ollama/envconfig"
11+
"github.com/ollama/ollama/logutil"
1212
)
1313

1414
func InitLogging() {
15-
level := slog.LevelInfo
16-
17-
if envconfig.Debug() {
18-
level = slog.LevelDebug
19-
}
20-
2115
var logFile *os.File
2216
var err error
2317
// Detect if we're a GUI app on windows, and if not, send logs to console
@@ -33,20 +27,8 @@ func InitLogging() {
3327
return
3428
}
3529
}
36-
handler := slog.NewTextHandler(logFile, &slog.HandlerOptions{
37-
Level: level,
38-
AddSource: true,
39-
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
40-
if attr.Key == slog.SourceKey {
41-
source := attr.Value.Any().(*slog.Source)
42-
source.File = filepath.Base(source.File)
43-
}
44-
return attr
45-
},
46-
})
47-
48-
slog.SetDefault(slog.New(handler))
4930

31+
slog.SetDefault(logutil.NewLogger(logFile, envconfig.LogLevel()))
5032
slog.Info("ollama app started")
5133
}
5234

benchmark/server_benchmark_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ func BenchmarkColdStart(b *testing.B) {
7878

7979
for _, tt := range tests {
8080
b.Run(fmt.Sprintf("%s/cold/%s", m, tt.name), func(b *testing.B) {
81-
ctx := context.Background()
81+
ctx := b.Context()
8282

8383
// Set number of tokens as our throughput metric
8484
b.SetBytes(int64(tt.maxTokens))
@@ -113,7 +113,7 @@ func BenchmarkWarmStart(b *testing.B) {
113113

114114
for _, tt := range tests {
115115
b.Run(fmt.Sprintf("%s/warm/%s", m, tt.name), func(b *testing.B) {
116-
ctx := context.Background()
116+
ctx := b.Context()
117117

118118
// Pre-warm the model
119119
warmup(client, m, tt.prompt, b)
@@ -140,7 +140,7 @@ func setup(b *testing.B) *api.Client {
140140
if err != nil {
141141
b.Fatal(err)
142142
}
143-
if _, err := client.Show(context.Background(), &api.ShowRequest{Model: modelName(b)}); err != nil {
143+
if _, err := client.Show(b.Context(), &api.ShowRequest{Model: modelName(b)}); err != nil {
144144
b.Fatalf("Model unavailable: %v", err)
145145
}
146146

0 commit comments

Comments
 (0)