Skip to content

Commit 3e6b9c8

Browse files
committed
Update to llama.cpp 368645698ab648e390dcd7c00a2bf60efa654f57.
1 parent c0efbf0 commit 3e6b9c8

14 files changed

+168
-71
lines changed

LLama.Web/Common/ModelOptions.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,5 +118,8 @@ public class ModelOptions
118118

119119
/// <inheritdoc />
120120
public LLamaPoolingType PoolingType { get; set; }
121+
122+
/// <inheritdoc />
123+
public LLamaAttentionType AttentionType { get; set; } = LLamaAttentionType.Unspecified;
121124
}
122125
}

LLama/Abstractions/IContextParams.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,9 @@ public interface IContextParams
123123
/// How to pool (sum) embedding results by sequence id (ignored if no pooling layer)
124124
/// </summary>
125125
LLamaPoolingType PoolingType { get; }
126+
127+
/// <summary>
128+
/// Attention type to use for embeddings
129+
/// </summary>
130+
LLamaAttentionType AttentionType { get; }
126131
}

LLama/Common/ModelParams.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ public record ModelParams
109109
/// <inheritdoc />
110110
public LLamaPoolingType PoolingType { get; set; } = LLamaPoolingType.Unspecified;
111111

112+
/// <inheritdoc />
113+
public LLamaAttentionType AttentionType { get; set; } = LLamaAttentionType.Unspecified;
114+
112115
/// <inheritdoc />
113116
public bool VocabOnly { get; set; }
114117

LLama/Extensions/IContextParamsExtensions.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public static void ToLlamaContextParams(this IContextParams @params, out LLamaCo
5252
result.offload_kqv = !@params.NoKqvOffload;
5353
result.flash_attention = @params.FlashAttention;
5454
result.llama_pooling_type = @params.PoolingType;
55+
result.attention_type = @params.AttentionType;
5556

5657
result.n_threads = Threads(@params.Threads);
5758
result.n_threads_batch = Threads(@params.BatchThreads);

LLama/LLamaQuantizer.cs

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using LLama.Native;
1+
using LLama.Native;
22
using System;
33
using System.Collections.Generic;
44

@@ -66,49 +66,49 @@ private static bool ValidateFtype(LLamaFtype ftype)
6666

6767
switch (ftype)
6868
{
69-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0:
70-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1:
71-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0:
72-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1:
73-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0:
74-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_F16:
75-
case LLamaFtype.LLAMA_FTYPE_ALL_F32:
69+
case LLamaFtype.MOSTLY_Q4_0:
70+
case LLamaFtype.MOSTLY_Q4_1:
71+
case LLamaFtype.MOSTLY_Q5_0:
72+
case LLamaFtype.MOSTLY_Q5_1:
73+
case LLamaFtype.MOSTLY_Q8_0:
74+
case LLamaFtype.MOSTLY_F16:
75+
case LLamaFtype.ALL_F32:
7676

77-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q2_K_S:
78-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q2_K:
77+
case LLamaFtype.MOSTLY_Q2_K_S:
78+
case LLamaFtype.MOSTLY_Q2_K:
7979

80-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_K_XS:
81-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_S:
82-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_M:
83-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_L:
80+
case LLamaFtype.MOSTLY_IQ3_K_XS:
81+
case LLamaFtype.MOSTLY_Q3_K_S:
82+
case LLamaFtype.MOSTLY_Q3_K_M:
83+
case LLamaFtype.MOSTLY_Q3_K_L:
8484

85-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_K_S:
86-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_K_M:
85+
case LLamaFtype.MOSTLY_Q4_K_S:
86+
case LLamaFtype.MOSTLY_Q4_K_M:
8787

88-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_K_S:
89-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_K_M:
88+
case LLamaFtype.MOSTLY_Q5_K_S:
89+
case LLamaFtype.MOSTLY_Q5_K_M:
9090

91-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q6_K:
91+
case LLamaFtype.MOSTLY_Q6_K:
9292

93-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_XXS:
94-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_XS:
95-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_S:
96-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_M:
93+
case LLamaFtype.MOSTLY_IQ2_XXS:
94+
case LLamaFtype.MOSTLY_IQ2_XS:
95+
case LLamaFtype.MOSTLY_IQ2_S:
96+
case LLamaFtype.MOSTLY_IQ2_M:
9797

98-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_XXS:
98+
case LLamaFtype.MOSTLY_IQ3_XXS:
9999

100-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ1_S:
101-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ1_M:
100+
case LLamaFtype.MOSTLY_IQ1_S:
101+
case LLamaFtype.MOSTLY_IQ1_M:
102102

103-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ4_NL:
104-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ4_XS:
103+
case LLamaFtype.MOSTLY_IQ4_NL:
104+
case LLamaFtype.MOSTLY_IQ4_XS:
105105

106-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_S:
107-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_M:
106+
case LLamaFtype.MOSTLY_IQ3_S:
107+
case LLamaFtype.MOSTLY_IQ3_M:
108108
return true;
109109

110-
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16:
111-
case LLamaFtype.LLAMA_FTYPE_GUESSED:
110+
case LLamaFtype.MOSTLY_Q4_1_SOME_F16:
111+
case LLamaFtype.GUESSED:
112112
default:
113113
return false;
114114
}

LLama/LLamaSharp.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
</ItemGroup>
5454

5555
<PropertyGroup>
56-
<BinaryReleaseId>1c5eba6f8e62</BinaryReleaseId>
56+
<BinaryReleaseId>368645698ab648e390dc</BinaryReleaseId>
5757
</PropertyGroup>
5858

5959
<PropertyGroup>

LLama/Native/LLamaAttentionType.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
namespace LLama.Native;
2+
3+
public enum LLamaAttentionType
4+
{
5+
Unspecified = -1,
6+
Causal = 0,
7+
NonCausal = 1,
8+
}

LLama/Native/LLamaContextParams.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ public struct LLamaContextParams
6565
/// whether to pool (sum) embedding results by sequence id
6666
/// </summary>
6767
public LLamaPoolingType llama_pooling_type;
68+
69+
/// <summary>
70+
/// Attention type to use for embeddings
71+
/// </summary>
72+
public LLamaAttentionType attention_type;
6873

6974
/// <summary>
7075
/// RoPE base frequency, 0 = from model

LLama/Native/LLamaFtype.cs

Lines changed: 48 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
namespace LLama.Native
1+
namespace LLama.Native
22
{
33
/// <summary>
44
/// Supported model file types
@@ -9,176 +9,191 @@ public enum LLamaFtype
99
/// All f32
1010
/// </summary>
1111
/// <remarks>Benchmark@7B: 26GB</remarks>
12-
LLAMA_FTYPE_ALL_F32 = 0,
12+
ALL_F32 = 0,
1313

1414
/// <summary>
1515
/// Mostly f16
1616
/// </summary>
1717
/// <remarks>Benchmark@7B: 13GB</remarks>
18-
LLAMA_FTYPE_MOSTLY_F16 = 1,
18+
MOSTLY_F16 = 1,
1919

2020
/// <summary>
2121
/// Mostly 8 bit
2222
/// </summary>
2323
/// <remarks>Benchmark@7B: 6.7GB, +0.0004ppl</remarks>
24-
LLAMA_FTYPE_MOSTLY_Q8_0 = 7,
24+
MOSTLY_Q8_0 = 7,
2525

2626
/// <summary>
2727
/// Mostly 4 bit
2828
/// </summary>
2929
/// <remarks>Benchmark@7B: 3.50GB, +0.2499 ppl</remarks>
30-
LLAMA_FTYPE_MOSTLY_Q4_0 = 2,
30+
MOSTLY_Q4_0 = 2,
3131

3232
/// <summary>
3333
/// Mostly 4 bit
3434
/// </summary>
3535
/// <remarks>Benchmark@7B: 3.90GB, +0.1846 ppl</remarks>
36-
LLAMA_FTYPE_MOSTLY_Q4_1 = 3,
36+
MOSTLY_Q4_1 = 3,
3737

3838
/// <summary>
3939
/// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
4040
/// </summary>
41-
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4,
41+
MOSTLY_Q4_1_SOME_F16 = 4,
4242

4343
/// <summary>
4444
/// Mostly 5 bit
4545
/// </summary>
4646
/// <remarks>Benchmark@7B: 4.30GB @ 7B tokens, +0.0796 ppl</remarks>
47-
LLAMA_FTYPE_MOSTLY_Q5_0 = 8,
47+
MOSTLY_Q5_0 = 8,
4848

4949
/// <summary>
5050
/// Mostly 5 bit
5151
/// </summary>
5252
/// <remarks>Benchmark@7B: 4.70GB, +0.0415 ppl</remarks>
53-
LLAMA_FTYPE_MOSTLY_Q5_1 = 9,
53+
MOSTLY_Q5_1 = 9,
5454

5555
/// <summary>
5656
/// K-Quant 2 bit
5757
/// </summary>
5858
/// <remarks>Benchmark@7B: 2.67GB @ 7N parameters, +0.8698 ppl</remarks>
59-
LLAMA_FTYPE_MOSTLY_Q2_K = 10,
59+
MOSTLY_Q2_K = 10,
6060

6161
/// <summary>
6262
/// K-Quant 3 bit (Small)
6363
/// </summary>
6464
/// <remarks>Benchmark@7B: 2.75GB, +0.5505 ppl</remarks>
65-
LLAMA_FTYPE_MOSTLY_Q3_K_S = 11,
65+
MOSTLY_Q3_K_S = 11,
6666

6767
/// <summary>
6868
/// K-Quant 3 bit (Medium)
6969
/// </summary>
7070
/// <remarks>Benchmark@7B: 3.06GB, +0.2437 ppl</remarks>
71-
LLAMA_FTYPE_MOSTLY_Q3_K_M = 12,
71+
MOSTLY_Q3_K_M = 12,
7272

7373
/// <summary>
7474
/// K-Quant 3 bit (Large)
7575
/// </summary>
7676
/// <remarks>Benchmark@7B: 3.35GB, +0.1803 ppl</remarks>
77-
LLAMA_FTYPE_MOSTLY_Q3_K_L = 13,
77+
MOSTLY_Q3_K_L = 13,
7878

7979
/// <summary>
8080
/// K-Quant 4 bit (Small)
8181
/// </summary>
8282
/// <remarks>Benchmark@7B: 3.56GB, +0.1149 ppl</remarks>
83-
LLAMA_FTYPE_MOSTLY_Q4_K_S = 14,
83+
MOSTLY_Q4_K_S = 14,
8484

8585
/// <summary>
8686
/// K-Quant 4 bit (Medium)
8787
/// </summary>
8888
/// <remarks>Benchmark@7B: 3.80GB, +0.0535 ppl</remarks>
89-
LLAMA_FTYPE_MOSTLY_Q4_K_M = 15,
89+
MOSTLY_Q4_K_M = 15,
9090

9191
/// <summary>
9292
/// K-Quant 5 bit (Small)
9393
/// </summary>
9494
/// <remarks>Benchmark@7B: 4.33GB, +0.0353 ppl</remarks>
95-
LLAMA_FTYPE_MOSTLY_Q5_K_S = 16,
95+
MOSTLY_Q5_K_S = 16,
9696

9797
/// <summary>
9898
/// K-Quant 5 bit (Medium)
9999
/// </summary>
100100
/// <remarks>Benchmark@7B: 4.45GB, +0.0142 ppl</remarks>
101-
LLAMA_FTYPE_MOSTLY_Q5_K_M = 17,
101+
MOSTLY_Q5_K_M = 17,
102102

103103
/// <summary>
104104
/// K-Quant 6 bit
105105
/// </summary>
106106
/// <remarks>Benchmark@7B: 5.15GB, +0.0044 ppl</remarks>
107-
LLAMA_FTYPE_MOSTLY_Q6_K = 18,
107+
MOSTLY_Q6_K = 18,
108108

109109
/// <summary>
110110
/// except 1d tensors
111111
/// </summary>
112-
LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19,
112+
MOSTLY_IQ2_XXS = 19,
113113

114114
/// <summary>
115115
/// except 1d tensors
116116
/// </summary>
117-
LLAMA_FTYPE_MOSTLY_IQ2_XS = 20,
117+
MOSTLY_IQ2_XS = 20,
118118

119119
/// <summary>
120120
/// except 1d tensors
121121
/// </summary>
122-
LLAMA_FTYPE_MOSTLY_Q2_K_S = 21,
122+
MOSTLY_Q2_K_S = 21,
123123

124124
/// <summary>
125125
/// except 1d tensors
126126
/// </summary>
127-
LLAMA_FTYPE_MOSTLY_IQ3_K_XS = 22,
127+
MOSTLY_IQ3_K_XS = 22,
128128

129129
/// <summary>
130130
/// except 1d tensors
131131
/// </summary>
132-
LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23,
132+
MOSTLY_IQ3_XXS = 23,
133133

134134
/// <summary>
135135
/// except 1d tensors
136136
/// </summary>
137-
LLAMA_FTYPE_MOSTLY_IQ1_S = 24,
137+
MOSTLY_IQ1_S = 24,
138138

139139
/// <summary>
140140
/// except 1d tensors
141141
/// </summary>
142-
LLAMA_FTYPE_MOSTLY_IQ4_NL = 25,
142+
MOSTLY_IQ4_NL = 25,
143143

144144
/// <summary>
145145
/// except 1d tensors
146146
/// </summary>
147-
LLAMA_FTYPE_MOSTLY_IQ3_S = 26,
147+
MOSTLY_IQ3_S = 26,
148148

149149
/// <summary>
150150
/// except 1d tensors
151151
/// </summary>
152-
LLAMA_FTYPE_MOSTLY_IQ3_M = 27,
152+
MOSTLY_IQ3_M = 27,
153153

154154
/// <summary>
155155
/// except 1d tensors
156156
/// </summary>
157-
LLAMA_FTYPE_MOSTLY_IQ2_S = 28,
157+
MOSTLY_IQ2_S = 28,
158158

159159
/// <summary>
160160
/// except 1d tensors
161161
/// </summary>
162-
LLAMA_FTYPE_MOSTLY_IQ2_M = 29,
162+
MOSTLY_IQ2_M = 29,
163163

164164
/// <summary>
165165
/// except 1d tensors
166166
/// </summary>
167-
LLAMA_FTYPE_MOSTLY_IQ4_XS = 30,
167+
MOSTLY_IQ4_XS = 30,
168168

169169
/// <summary>
170170
/// except 1d tensors
171171
/// </summary>
172-
LLAMA_FTYPE_MOSTLY_IQ1_M = 31,
172+
MOSTLY_IQ1_M = 31,
173173

174174
/// <summary>
175175
/// except 1d tensors
176176
/// </summary>
177-
LLAMA_FTYPE_MOSTLY_BF16 = 32,
177+
MOSTLY_BF16 = 32,
178+
179+
/// <summary>
180+
/// except 1d tensors
181+
/// </summary>
182+
MOSTLY_Q4_0_4_4 = 33,
183+
184+
/// <summary>
185+
/// except 1d tensors
186+
/// </summary>
187+
MOSTLY_Q4_0_4_8 = 34,
188+
189+
/// <summary>
190+
/// except 1d tensors
191+
/// </summary>
192+
MOSTLY_Q4_0_8_8 = 35,
178193

179194
/// <summary>
180195
/// File type was not specified
181196
/// </summary>
182-
LLAMA_FTYPE_GUESSED = 1024
197+
GUESSED = 1024
183198
}
184199
}

LLama/Native/LLamaVocabPreType.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,8 @@ internal enum LLamaVocabPreType
2323
DBRX = 13,
2424
SMAUG = 14,
2525
PORO = 15,
26-
VIKING = 16,
26+
CHATGLM3 = 16,
27+
CHATGLM4 = 17,
28+
VIKING = 18,
29+
JAIS = 19,
2730
}

0 commit comments

Comments
 (0)