Skip to content

Commit fc4a714

Browse files
authored
feat(llama.cpp): bump and adapt to upstream changes (#4378)
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 0429e00 commit fc4a714

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
88
# llama.cpp versions
99
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
1010
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
11-
CPPLLAMA_VERSION?=274ec65af6e54039eb95cb44904af5c945dca1fa
11+
CPPLLAMA_VERSION?=c27ac678dd393af0da9b8acf10266e760c8a0912
1212

1313
# whisper.cpp version
1414
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

backend/cpp/llama/grpc-server.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2228,6 +2228,35 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
22282228
// }
22292229
// }
22302230

2231+
const std::vector<ggml_type> kv_cache_types = {
2232+
GGML_TYPE_F32,
2233+
GGML_TYPE_F16,
2234+
GGML_TYPE_BF16,
2235+
GGML_TYPE_Q8_0,
2236+
GGML_TYPE_Q4_0,
2237+
GGML_TYPE_Q4_1,
2238+
GGML_TYPE_IQ4_NL,
2239+
GGML_TYPE_Q5_0,
2240+
GGML_TYPE_Q5_1,
2241+
};
2242+
2243+
static ggml_type kv_cache_type_from_str(const std::string & s) {
2244+
for (const auto & type : kv_cache_types) {
2245+
if (ggml_type_name(type) == s) {
2246+
return type;
2247+
}
2248+
}
2249+
throw std::runtime_error("Unsupported cache type: " + s);
2250+
}
2251+
2252+
static std::string get_all_kv_cache_types() {
2253+
std::ostringstream msg;
2254+
for (const auto & type : kv_cache_types) {
2255+
msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", ");
2256+
}
2257+
return msg.str();
2258+
}
2259+
22312260
static void params_parse(const backend::ModelOptions* request,
22322261
common_params & params) {
22332262

@@ -2242,10 +2271,10 @@ static void params_parse(const backend::ModelOptions* request,
22422271
// params.model_alias ??
22432272
params.model_alias = request->modelfile();
22442273
if (!request->cachetypekey().empty()) {
2245-
params.cache_type_k = request->cachetypekey();
2274+
params.cache_type_k = kv_cache_type_from_str(request->cachetypekey());
22462275
}
22472276
if (!request->cachetypevalue().empty()) {
2248-
params.cache_type_v = request->cachetypevalue();
2277+
params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue());
22492278
}
22502279
params.n_ctx = request->contextsize();
22512280
//params.memory_f16 = request->f16memory();

0 commit comments

Comments
 (0)