Skip to content

Commit 5d967d5

Browse files
authored
Merge branch 'ollama:main' into main
2 parents 5478571 + af31cce commit 5d967d5

File tree

137 files changed

+5880
-2929
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

137 files changed

+5880
-2929
lines changed

.github/workflows/release.yaml

-6
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,6 @@ jobs:
103103
arch: [amd64]
104104
preset: ['CPU']
105105
include:
106-
- os: windows
107-
arch: amd64
108-
preset: 'CUDA 11'
109-
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
110-
cuda-version: '11.3'
111106
- os: windows
112107
arch: amd64
113108
preset: 'CUDA 12'
@@ -324,7 +319,6 @@ jobs:
324319
case "$COMPONENT" in
325320
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
326321
lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
327-
lib/ollama/cuda_v11) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
328322
lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
329323
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
330324
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;

.github/workflows/test.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
include:
4747
- preset: CPU
4848
- preset: CUDA
49-
container: nvidia/cuda:11.8.0-devel-ubuntu22.04
49+
container: nvidia/cuda:12.8.1-devel-ubuntu22.04
5050
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
5151
- preset: ROCm
5252
container: rocm/dev-ubuntu-22.04:6.1.2
@@ -78,7 +78,7 @@ jobs:
7878
include:
7979
- preset: CPU
8080
- preset: CUDA
81-
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
81+
install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
8282
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
8383
- preset: ROCm
8484
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
@@ -102,7 +102,7 @@ jobs:
102102
$ErrorActionPreference = "Stop"
103103
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
104104
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
105-
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.3", "nvcc_11.3", "cublas_11.3", "cublas_dev_11.3")) -NoNewWindow -Wait
105+
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_12.8", "nvcc_12.8", "cublas_12.8", "cublas_dev_12.8")) -NoNewWindow -Wait
106106
}
107107
108108
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path

CMakePresets.json

-13
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,6 @@
1717
"name": "CUDA",
1818
"inherits": [ "Default" ]
1919
},
20-
{
21-
"name": "CUDA 11",
22-
"inherits": [ "CUDA" ],
23-
"cacheVariables": {
24-
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;70;75;80;86",
25-
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets"
26-
}
27-
},
2820
{
2921
"name": "CUDA 12",
3022
"inherits": [ "CUDA" ],
@@ -78,11 +70,6 @@
7870
"configurePreset": "CUDA",
7971
"targets": [ "ggml-cuda" ]
8072
},
81-
{
82-
"name": "CUDA 11",
83-
"inherits": [ "CUDA" ],
84-
"configurePreset": "CUDA 11"
85-
},
8673
{
8774
"name": "CUDA 12",
8875
"inherits": [ "CUDA" ],

Dockerfile

+1-16
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,10 @@ ARG JETPACK5VERSION=r35.4.1
77
ARG JETPACK6VERSION=r36.4.0
88
ARG CMAKEVERSION=3.31.2
99

10-
# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
1110
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
1211
RUN yum install -y yum-utils \
13-
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
14-
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
15-
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
12+
&& dnf install -y ccache \
1613
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
17-
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
1814

1915
FROM --platform=linux/arm64 almalinux:8 AS base-arm64
2016
# install epel-release for ccache
@@ -38,15 +34,6 @@ RUN --mount=type=cache,target=/root/.ccache \
3834
&& cmake --build --parallel --preset 'CPU' \
3935
&& cmake --install build --component CPU --strip --parallel 8
4036

41-
FROM base AS cuda-11
42-
ARG CUDA11VERSION=11.3
43-
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
44-
ENV PATH=/usr/local/cuda-11/bin:$PATH
45-
RUN --mount=type=cache,target=/root/.ccache \
46-
cmake --preset 'CUDA 11' \
47-
&& cmake --build --parallel --preset 'CUDA 11' \
48-
&& cmake --install build --component CUDA --strip --parallel 8
49-
5037
FROM base AS cuda-12
5138
ARG CUDA12VERSION=12.8
5239
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
@@ -98,11 +85,9 @@ RUN --mount=type=cache,target=/root/.cache/go-build \
9885
go build -trimpath -buildmode=pie -o /bin/ollama .
9986

10087
FROM --platform=linux/amd64 scratch AS amd64
101-
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
10288
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
10389

10490
FROM --platform=linux/arm64 scratch AS arm64
105-
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
10691
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
10792
COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5
10893
COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6

Makefile.sync

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
UPSTREAM=https://github.com/ggerganov/llama.cpp.git
22
WORKDIR=llama/vendor
3-
FETCH_HEAD=2016f07bd106c73699ecbaace80f55db5ed95dac
3+
FETCH_HEAD=e1e8e0991ffd9e99a445c6812bb519d5bac9f4b5
44

55
.PHONY: help
66
help:

README.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ Here are some example models that can be downloaded:
8383
| QwQ | 32B | 20GB | `ollama run qwq` |
8484
| DeepSeek-R1 | 7B | 4.7GB | `ollama run deepseek-r1` |
8585
| DeepSeek-R1 | 671B | 404GB | `ollama run deepseek-r1:671b` |
86+
| Llama 4 | 109B | 67GB | `ollama run llama4:scout` |
87+
| Llama 4 | 400B | 245GB | `ollama run llama4:maverick` |
8688
| Llama 3.3 | 70B | 43GB | `ollama run llama3.3` |
8789
| Llama 3.2 | 3B | 2.0GB | `ollama run llama3.2` |
8890
| Llama 3.2 | 1B | 1.3GB | `ollama run llama3.2:1b` |
@@ -99,7 +101,7 @@ Here are some example models that can be downloaded:
99101
| Code Llama | 7B | 3.8GB | `ollama run codellama` |
100102
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
101103
| LLaVA | 7B | 4.5GB | `ollama run llava` |
102-
| Granite-3.2 | 8B | 4.9GB | `ollama run granite3.2` |
104+
| Granite-3.3 | 8B | 4.9GB | `ollama run granite3.3` |
103105

104106
> [!NOTE]
105107
> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
@@ -334,6 +336,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
334336
- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
335337
- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
336338
- [IntelliBar](https://intellibar.app/) (AI-powered assistant for macOS)
339+
- [Jirapt](https://github.com/AliAhmedNada/jirapt) (Jira Integration to generate issues, tasks, epics)
337340
- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Interactive chat tool that can leverage Ollama models for rapid understanding and navigation of GitHub code repositories)
338341
- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
339342
- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
@@ -416,6 +419,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
416419
- [1Panel](https://github.com/1Panel-dev/1Panel/) (Web-based Linux Server Management Tool)
417420
- [AstrBot](https://github.com/Soulter/AstrBot/) (User-friendly LLM-based multi-platform chatbot with a WebUI, supporting RAG, LLM agents, and plugins integration)
418421
- [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.)
422+
- [Flufy](https://github.com/Aharon-Bensadoun/Flufy) (A beautiful chat interface for interacting with Ollama's API. Built with React, TypeScript, and Material-UI.)
419423
- [Ellama](https://github.com/zeozeozeo/ellama) (Friendly native app to chat with an Ollama instance)
420424
- [screenpipe](https://github.com/mediar-ai/screenpipe) Build agents powered by your screen history
421425
- [Ollamb](https://github.com/hengkysteen/ollamb) (Simple yet rich in features, cross-platform built with Flutter and designed for Ollama. Try the [web demo](https://hengkysteen.github.io/demo/ollamb/).)
@@ -491,7 +495,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
491495

492496
### Libraries
493497

494-
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/integrations/chat/ollama/) with [example](https://js.langchain.com/docs/tutorials/local_rag/)
498+
- [LangChain](https://python.langchain.com/docs/integrations/chat/ollama/) and [LangChain.js](https://js.langchain.com/docs/integrations/chat/ollama/) with [example](https://js.langchain.com/docs/tutorials/local_rag/)
495499
- [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
496500
- [crewAI](https://github.com/crewAIInc/crewAI)
497501
- [Yacana](https://remembersoftwares.github.io/yacana/) (User-friendly multi-agent framework for brainstorming and executing predetermined flows with built-in tool integration)

api/types.go

-7
Original file line numberDiff line numberDiff line change
@@ -283,12 +283,7 @@ type Runner struct {
283283
NumBatch int `json:"num_batch,omitempty"`
284284
NumGPU int `json:"num_gpu,omitempty"`
285285
MainGPU int `json:"main_gpu,omitempty"`
286-
LowVRAM bool `json:"low_vram,omitempty"`
287-
F16KV bool `json:"f16_kv,omitempty"` // Deprecated: This option is ignored
288-
LogitsAll bool `json:"logits_all,omitempty"`
289-
VocabOnly bool `json:"vocab_only,omitempty"`
290286
UseMMap *bool `json:"use_mmap,omitempty"`
291-
UseMLock bool `json:"use_mlock,omitempty"`
292287
NumThread int `json:"num_thread,omitempty"`
293288
}
294289

@@ -671,8 +666,6 @@ func DefaultOptions() Options {
671666
NumBatch: 512,
672667
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
673668
NumThread: 0, // let the runtime decide
674-
LowVRAM: false,
675-
UseMLock: false,
676669
UseMMap: nil,
677670
},
678671
}

cmd/cmd.go

+37-15
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/olekukonko/tablewriter"
3232
"github.com/spf13/cobra"
3333
"golang.org/x/crypto/ssh"
34+
"golang.org/x/sync/errgroup"
3435
"golang.org/x/term"
3536

3637
"github.com/ollama/ollama/api"
@@ -41,6 +42,7 @@ import (
4142
"github.com/ollama/ollama/runner"
4243
"github.com/ollama/ollama/server"
4344
"github.com/ollama/ollama/types/model"
45+
"github.com/ollama/ollama/types/syncmap"
4446
"github.com/ollama/ollama/version"
4547
)
4648

@@ -106,7 +108,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
106108
}
107109
spinner.Stop()
108110

109-
req.Name = args[0]
111+
req.Model = args[0]
110112
quantize, _ := cmd.Flags().GetString("quantize")
111113
if quantize != "" {
112114
req.Quantize = quantize
@@ -117,34 +119,54 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
117119
return err
118120
}
119121

120-
if len(req.Files) > 0 {
121-
fileMap := map[string]string{}
122-
for f, digest := range req.Files {
122+
var g errgroup.Group
123+
g.SetLimit(max(runtime.GOMAXPROCS(0)-1, 1))
124+
125+
files := syncmap.NewSyncMap[string, string]()
126+
for f, digest := range req.Files {
127+
g.Go(func() error {
123128
if _, err := createBlob(cmd, client, f, digest, p); err != nil {
124129
return err
125130
}
126-
fileMap[filepath.Base(f)] = digest
127-
}
128-
req.Files = fileMap
131+
132+
// TODO: this is incorrect since the file might be in a subdirectory
133+
// instead this should take the path relative to the model directory
134+
// but the current implementation does not allow this
135+
files.Store(filepath.Base(f), digest)
136+
return nil
137+
})
129138
}
130139

131-
if len(req.Adapters) > 0 {
132-
fileMap := map[string]string{}
133-
for f, digest := range req.Adapters {
140+
adapters := syncmap.NewSyncMap[string, string]()
141+
for f, digest := range req.Adapters {
142+
g.Go(func() error {
134143
if _, err := createBlob(cmd, client, f, digest, p); err != nil {
135144
return err
136145
}
137-
fileMap[filepath.Base(f)] = digest
138-
}
139-
req.Adapters = fileMap
146+
147+
// TODO: same here
148+
adapters.Store(filepath.Base(f), digest)
149+
return nil
150+
})
140151
}
141152

153+
if err := g.Wait(); err != nil {
154+
return err
155+
}
156+
157+
req.Files = files.Items()
158+
req.Adapters = adapters.Items()
159+
142160
bars := make(map[string]*progress.Bar)
143161
fn := func(resp api.ProgressResponse) error {
144162
if resp.Digest != "" {
145163
bar, ok := bars[resp.Digest]
146164
if !ok {
147-
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
165+
msg := resp.Status
166+
if msg == "" {
167+
msg = fmt.Sprintf("pulling %s...", resp.Digest[7:19])
168+
}
169+
bar = progress.NewBar(msg, resp.Total, resp.Completed)
148170
bars[resp.Digest] = bar
149171
p.Add(resp.Digest, bar)
150172
}
@@ -213,7 +235,7 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, digest stri
213235
}
214236
}()
215237

216-
if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
238+
if err := client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
217239
return "", err
218240
}
219241
return digest, nil

cmd/cmd_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -690,7 +690,7 @@ func TestCreateHandler(t *testing.T) {
690690
return
691691
}
692692

693-
if req.Name != "test-model" {
693+
if req.Model != "test-model" {
694694
t.Errorf("expected model name 'test-model', got %s", req.Name)
695695
}
696696

convert/convert.go

+9-9
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ import (
44
"encoding/json"
55
"errors"
66
"fmt"
7-
"io"
87
"io/fs"
98
"log/slog"
9+
"os"
1010
"slices"
1111
"strings"
1212

@@ -89,7 +89,7 @@ type ModelConverter interface {
8989
// KV maps parameters to LLM key-values
9090
KV(*Tokenizer) ggml.KV
9191
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
92-
Tensors([]Tensor) []ggml.Tensor
92+
Tensors([]Tensor) []*ggml.Tensor
9393
// Replacements returns a list of string pairs to replace in tensor names.
9494
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
9595
Replacements() []string
@@ -106,13 +106,13 @@ type AdapterConverter interface {
106106
// KV maps parameters to LLM key-values
107107
KV(ggml.KV) ggml.KV
108108
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
109-
Tensors([]Tensor) []ggml.Tensor
109+
Tensors([]Tensor) []*ggml.Tensor
110110
// Replacements returns a list of string pairs to replace in tensor names.
111111
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
112112
Replacements() []string
113113
}
114114

115-
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
115+
func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error {
116116
bts, err := fs.ReadFile(fsys, "adapter_config.json")
117117
if err != nil {
118118
return err
@@ -147,14 +147,14 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
147147
return err
148148
}
149149

150-
return writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
150+
return writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
151151
}
152152

153153
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
154154
// and files it finds in the input path.
155155
// Supported input model formats include safetensors.
156156
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
157-
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
157+
func ConvertModel(fsys fs.FS, f *os.File) error {
158158
bts, err := fs.ReadFile(fsys, "config.json")
159159
if err != nil {
160160
return err
@@ -239,13 +239,13 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
239239
return err
240240
}
241241

242-
return writeFile(ws, conv.KV(t), conv.Tensors(ts))
242+
return writeFile(f, conv.KV(t), conv.Tensors(ts))
243243
}
244244

245-
func writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
245+
func writeFile(f *os.File, kv ggml.KV, ts []*ggml.Tensor) error {
246246
for i := range ts {
247247
ts[i].Shape = slices.Clone(ts[i].Shape)
248248
slices.Reverse(ts[i].Shape)
249249
}
250-
return ggml.WriteGGUF(ws, kv, ts)
250+
return ggml.WriteGGUF(f, kv, ts)
251251
}

convert/convert_bert.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ func (p *bertModel) KV(t *Tokenizer) ggml.KV {
132132
return kv
133133
}
134134

135-
func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor {
136-
var out []ggml.Tensor
135+
func (p *bertModel) Tensors(ts []Tensor) []*ggml.Tensor {
136+
var out []*ggml.Tensor
137137
for _, t := range ts {
138138
if slices.Contains([]string{
139139
"embeddings.position_ids",
@@ -143,7 +143,7 @@ func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor {
143143
continue
144144
}
145145

146-
out = append(out, ggml.Tensor{
146+
out = append(out, &ggml.Tensor{
147147
Name: t.Name(),
148148
Kind: t.Kind(),
149149
Shape: t.Shape(),

0 commit comments

Comments
 (0)