Skip to content

Commit bb2f3ad

Browse files
authored
Merge pull request #874 from martindevans/llama_3.1_update_binaries
Llama 3.1 update binaries
2 parents 2816c58 + d1dbb21 commit bb2f3ad

17 files changed

+163
-157
lines changed

LLama.Unittest/BasicTest.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
using System.Text;
21
using LLama.Common;
3-
using LLama.Native;
42
using Xunit.Abstractions;
53

64
namespace LLama.Unittest

LLama.Web/Common/ModelOptions.cs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,6 @@ public class ModelOptions
4747
/// <inheritdoc />
4848
public string ModelPath { get; set; }
4949

50-
/// <inheritdoc />
51-
public AdapterCollection LoraAdapters { get; set; } = new();
52-
53-
/// <inheritdoc />
54-
public string LoraBase { get; set; } = string.Empty;
55-
5650
/// <inheritdoc />
5751
public uint? Threads { get; set; }
5852

LLama/Abstractions/IModelParams.cs

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
using System.Collections;
33
using System.Collections.Generic;
44
using System.ComponentModel;
5-
using System.Linq;
65
using System.Text;
76
using System.Text.Json;
87
using System.Text.Json.Serialization;
@@ -69,67 +68,12 @@ public interface IModelParams
6968
/// </summary>
7069
bool VocabOnly { get; }
7170

72-
/// <summary>
73-
/// List of LoRA adapters to apply
74-
/// </summary>
75-
AdapterCollection LoraAdapters { get; }
76-
77-
/// <summary>
78-
/// base model path for the lora adapter (lora_base)
79-
/// </summary>
80-
string LoraBase { get; }
81-
8271
/// <summary>
8372
/// Override specific metadata items in the model
8473
/// </summary>
8574
List<MetadataOverride> MetadataOverrides { get; }
8675
}
8776

88-
/// <summary>
89-
/// A LoRA adapter to apply to a model
90-
/// </summary>
91-
/// <param name="Path">Path to the LoRA file</param>
92-
/// <param name="Scale">Strength of this LoRA</param>
93-
public readonly record struct LoraAdapter(string Path, float Scale);
94-
95-
/// <summary>
96-
/// A list of LoraAdapter objects
97-
/// </summary>
98-
public sealed class AdapterCollection
99-
: List<LoraAdapter>, IEquatable<AdapterCollection>
100-
{
101-
/// <inheritdoc />
102-
public bool Equals(AdapterCollection? other)
103-
{
104-
if (other == null)
105-
return false;
106-
107-
return this.SequenceEqual(other);
108-
}
109-
110-
/// <inheritdoc/>
111-
public override bool Equals(object? obj)
112-
{
113-
return Equals(obj as AdapterCollection);
114-
}
115-
116-
/// <inheritdoc/>
117-
public override int GetHashCode()
118-
{
119-
unchecked
120-
{
121-
var hash = 17;
122-
for (var i = 0; i < Count; i++)
123-
{
124-
hash += this[i].GetHashCode();
125-
hash *= 7823;
126-
}
127-
return hash;
128-
}
129-
}
130-
}
131-
132-
13377
/// <summary>
13478
/// A fixed size array to set the tensor splits across multiple GPUs
13579
/// </summary>

LLama/Common/ModelParams.cs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,6 @@ public record ModelParams
3939
/// <inheritdoc />
4040
public string ModelPath { get; set; }
4141

42-
/// <inheritdoc />
43-
public AdapterCollection LoraAdapters { get; set; } = new();
44-
45-
/// <inheritdoc />
46-
public string LoraBase { get; set; } = string.Empty;
47-
4842
/// <inheritdoc />
4943
public uint? Threads { get; set; }
5044

LLama/LLamaQuantizer.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public static bool Quantize(string srcFileName, string dstFilename, string ftype
6262
private static bool ValidateFtype(LLamaFtype ftype)
6363
{
6464
// Validation copies from here:
65-
// https://github.com/ggerganov/llama.cpp/blob/f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7/llama.cpp#L13450
65+
// https://github.com/ggerganov/llama.cpp/blob/345c8c0c87a97c1595f9c8b14833d531c8c7d8df/src/llama.cpp#L15624
6666

6767
switch (ftype)
6868
{
@@ -105,9 +105,12 @@ private static bool ValidateFtype(LLamaFtype ftype)
105105

106106
case LLamaFtype.MOSTLY_IQ3_S:
107107
case LLamaFtype.MOSTLY_IQ3_M:
108+
109+
case LLamaFtype.MOSTLY_Q4_0_4_4:
110+
case LLamaFtype.MOSTLY_Q4_0_4_8:
111+
case LLamaFtype.MOSTLY_Q4_0_8_8:
108112
return true;
109113

110-
case LLamaFtype.MOSTLY_Q4_1_SOME_F16:
111114
case LLamaFtype.GUESSED:
112115
default:
113116
return false;

LLama/LLamaSharp.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
</ItemGroup>
5454

5555
<PropertyGroup>
56-
<BinaryReleaseId>368645698ab648e390dc</BinaryReleaseId>
56+
<BinaryReleaseId>345c8c0c87a97c1595f9c8b</BinaryReleaseId>
5757
</PropertyGroup>
5858

5959
<PropertyGroup>

LLama/LLamaWeights.cs

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using System;
1+
using System;
22
using System.Collections.Generic;
33
using System.Text;
44
using System.Threading;
@@ -72,17 +72,6 @@ public static LLamaWeights LoadFromFile(IModelParams @params)
7272
{
7373
using var pin = @params.ToLlamaModelParams(out var lparams);
7474
var weights = SafeLlamaModelHandle.LoadFromFile(@params.ModelPath, lparams);
75-
76-
foreach (var adapter in @params.LoraAdapters)
77-
{
78-
if (string.IsNullOrEmpty(adapter.Path))
79-
continue;
80-
if (adapter.Scale <= 0)
81-
continue;
82-
83-
weights.ApplyLoraFromFile(adapter.Path, adapter.Scale, @params.LoraBase);
84-
}
85-
8675
return new LLamaWeights(weights);
8776
}
8877

@@ -100,14 +89,6 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
10089
// don't touch the @params object inside the task, it might be changed
10190
// externally! Save a copy of everything that we need later.
10291
var modelPath = @params.ModelPath;
103-
var loraBase = @params.LoraBase;
104-
var loraAdapters = @params.LoraAdapters.ToArray();
105-
106-
// Determine the range to report for model loading. llama.cpp reports 0-1, but we'll remap that into a
107-
// slightly smaller range to allow some space for reporting LoRA loading too.
108-
var modelLoadProgressRange = 1f;
109-
if (loraAdapters.Length > 0)
110-
modelLoadProgressRange = 0.9f;
11192

11293
using (@params.ToLlamaModelParams(out var lparams))
11394
{
@@ -119,7 +100,7 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
119100
lparams.progress_callback = (progress, ctx) =>
120101
{
121102
// Update the progress reporter (remapping the value into the smaller range).
122-
progressReporter?.Report(Math.Clamp(progress, 0, 1) * modelLoadProgressRange);
103+
progressReporter?.Report(Math.Clamp(progress, 0, 1));
123104

124105
// If the user set a callback in the model params, call that and see if we should cancel
125106
if (internalCallback != null && !internalCallback(progress, ctx))
@@ -141,30 +122,6 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
141122
// Load the model
142123
var weights = SafeLlamaModelHandle.LoadFromFile(modelPath, lparams);
143124

144-
// Apply the LoRA adapters
145-
for (var i = 0; i < loraAdapters.Length; i++)
146-
{
147-
// Interrupt applying LoRAs if the token is cancelled
148-
if (token.IsCancellationRequested)
149-
{
150-
weights.Dispose();
151-
token.ThrowIfCancellationRequested();
152-
}
153-
154-
// Don't apply invalid adapters
155-
var adapter = loraAdapters[i];
156-
if (string.IsNullOrEmpty(adapter.Path))
157-
continue;
158-
if (adapter.Scale <= 0)
159-
continue;
160-
161-
weights.ApplyLoraFromFile(adapter.Path, adapter.Scale, loraBase);
162-
163-
// Report progress. Model loading reported progress from 0 -> 0.9, use
164-
// the last 0.1 to represent all of the LoRA adapters being applied.
165-
progressReporter?.Report(0.9f + (0.1f / loraAdapters.Length) * (i + 1));
166-
}
167-
168125
// Update progress reporter to indicate completion
169126
progressReporter?.Report(1);
170127

LLama/Native/LLamaAttentionType.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
namespace LLama.Native;
22

3+
/// <summary>
4+
///
5+
/// </summary>
6+
/// <remarks>llama_attention_type</remarks>
37
public enum LLamaAttentionType
48
{
59
Unspecified = -1,

LLama/Native/LLamaFtype.cs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ namespace LLama.Native
33
/// <summary>
44
/// Supported model file types
55
/// </summary>
6+
/// <remarks>C# representation of llama_ftype</remarks>
67
public enum LLamaFtype
78
{
89
/// <summary>
@@ -35,10 +36,10 @@ public enum LLamaFtype
3536
/// <remarks>Benchmark@7B: 3.90GB, +0.1846 ppl</remarks>
3637
MOSTLY_Q4_1 = 3,
3738

38-
/// <summary>
39-
/// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
40-
/// </summary>
41-
MOSTLY_Q4_1_SOME_F16 = 4,
39+
///// <summary>
40+
///// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
41+
///// </summary>
42+
//MOSTLY_Q4_1_SOME_F16 = 4,
4243

4344
/// <summary>
4445
/// Mostly 5 bit

LLama/Native/LLamaTokenDataArray.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public void ApplyGrammar(SafeLLamaContextHandle ctx, SafeLLamaGrammarHandle? gra
9797

9898
using (LLamaTokenDataArrayNative.Create(this, out var st))
9999
{
100-
NativeApi.llama_sample_grammar(ctx, ref st, grammar);
100+
NativeApi.llama_grammar_sample(grammar, ctx, ref st);
101101
Sorted = st.sorted;
102102
}
103103
}

LLama/Native/LLamaVocabPreType.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,7 @@ internal enum LLamaVocabPreType
2727
CHATGLM4 = 17,
2828
VIKING = 18,
2929
JAIS = 19,
30+
TEKKEN = 20,
31+
SMOLLM = 21,
32+
CODESHELL = 22,
3033
}

LLama/Native/LoraAdapter.cs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
using System;
2+
3+
namespace LLama.Native;
4+
5+
/// <summary>
6+
/// A LoRA adapter which can be applied to a context for a specific model
7+
/// </summary>
8+
public class LoraAdapter
9+
{
10+
/// <summary>
11+
/// The model which this LoRA adapter was loaded with.
12+
/// </summary>
13+
public SafeLlamaModelHandle Model { get; }
14+
15+
/// <summary>
16+
/// The full path of the file this adapter was loaded from
17+
/// </summary>
18+
public string Path { get; }
19+
20+
/// <summary>
21+
/// Native pointer of the loaded adapter, will be automatically freed when the model is unloaded
22+
/// </summary>
23+
internal IntPtr Pointer { get; }
24+
25+
/// <summary>
26+
/// Indicates if this adapter has been unloaded
27+
/// </summary>
28+
internal bool Loaded { get; private set; }
29+
30+
internal LoraAdapter(SafeLlamaModelHandle model, string path, IntPtr nativePtr)
31+
{
32+
Model = model;
33+
Path = path;
34+
Pointer = nativePtr;
35+
Loaded = true;
36+
}
37+
38+
/// <summary>
39+
/// Unload this adapter
40+
/// </summary>
41+
public void Unload()
42+
{
43+
Loaded = false;
44+
NativeApi.llama_lora_adapter_free(Pointer);
45+
}
46+
}

LLama/Native/NativeApi.Grammar.cs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,16 @@ public static partial class NativeApi
3535
/// <param name="ctx"></param>
3636
/// <param name="candidates"></param>
3737
/// <param name="grammar"></param>
38-
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
39-
public static extern void llama_sample_grammar(SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates, SafeLLamaGrammarHandle grammar);
38+
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
39+
public static extern void llama_grammar_sample(SafeLLamaGrammarHandle grammar, SafeLLamaContextHandle ctx, ref LLamaTokenDataArrayNative candidates);
4040

41-
/// <summary>
42-
/// Accepts the sampled token into the grammar
43-
/// </summary>
44-
/// <param name="ctx"></param>
45-
/// <param name="grammar"></param>
46-
/// <param name="token"></param>
47-
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
48-
public static extern void llama_grammar_accept_token(SafeLLamaContextHandle ctx, SafeLLamaGrammarHandle grammar, LLamaToken token);
41+
/// <summary>
42+
/// Accepts the sampled token into the grammar
43+
/// </summary>
44+
/// <param name="ctx"></param>
45+
/// <param name="grammar"></param>
46+
/// <param name="token"></param>
47+
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
48+
public static extern void llama_grammar_accept_token(SafeLLamaGrammarHandle grammar, SafeLLamaContextHandle ctx, LLamaToken token);
4949
}
5050
}

LLama/Native/NativeApi.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,5 +445,12 @@ public static void llama_log_set(NativeLogConfig.LLamaLogCallback logCallback)
445445
/// <returns>Returns the split_prefix length.</returns>
446446
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
447447
public static extern int llama_split_prefix(string split_prefix, nuint maxlen, string split_path, int split_no, int split_count);
448+
449+
/// <summary>
450+
/// Manually free a LoRA adapter. loaded adapters will be free when the associated model is deleted
451+
/// </summary>
452+
/// <param name="adapter"></param>
453+
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
454+
public static extern void llama_lora_adapter_free(IntPtr adapter);
448455
}
449456
}

0 commit comments

Comments
 (0)