Skip to content

Commit b67cc50

Browse files
committed
Merge 'origin/master' into hipblas
2 parents fcbc262 + e216aa0 commit b67cc50

16 files changed

+350
-179
lines changed

CMakeLists.txt

+14-13
Original file line numberDiff line numberDiff line change
@@ -77,21 +77,19 @@ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
7777
# Build info header
7878
#
7979

80-
# Write header template to binary dir to keep source directory clean
81-
file(WRITE "${CMAKE_BINARY_DIR}/BUILD_INFO.h.in" "\
82-
#ifndef BUILD_INFO_H\n\
83-
#define BUILD_INFO_H\n\
84-
\n\
85-
#define BUILD_NUMBER @BUILD_NUMBER@\n\
86-
#define BUILD_COMMIT \"@BUILD_COMMIT@\"\n\
87-
\n\
88-
#endif // BUILD_INFO_H\n\
89-
")
90-
9180
# Generate initial build-info.h
9281
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
9382

9483
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
84+
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/.git")
85+
86+
# Is git submodule
87+
if(NOT IS_DIRECTORY "${GIT_DIR}")
88+
file(READ ${GIT_DIR} REAL_GIT_DIR_LINK)
89+
string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK})
90+
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${REAL_GIT_DIR}")
91+
endif()
92+
9593
# Add a custom target for build-info.h
9694
add_custom_target(BUILD_INFO ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h")
9795

@@ -101,7 +99,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
10199
COMMENT "Generating build details from Git"
102100
COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake"
103101
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
104-
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/.git/index"
102+
DEPENDS "${GIT_DIR}/index"
105103
VERBATIM
106104
)
107105
else()
@@ -389,8 +387,11 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
389387
add_compile_options(-mavx512vnni)
390388
endif()
391389
endif()
390+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
391+
message(STATUS "PowerPC detected")
392+
add_compile_options(-mcpu=native -mtune=native)
393+
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
392394
else()
393-
# TODO: support PowerPC
394395
message(STATUS "Unknown architecture")
395396
endif()
396397

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.
226226
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
227227

228228
build-info.h: $(wildcard .git/index) scripts/build-info.sh
229-
@scripts/build-info.sh > $@.tmp
229+
@sh scripts/build-info.sh > $@.tmp
230230
@if ! cmp -s $@.tmp $@; then \
231231
mv $@.tmp $@; \
232232
else \

examples/benchmark/benchmark-matmult.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ float tensor_sum_elements(struct ggml_tensor * tensor) {
3838

3939
#define TENSOR_TYPE_AS_STR(TYPE) TYPE == GGML_TYPE_F32 ? "FP32" : TYPE == GGML_TYPE_F16 ? "FP16" : TYPE == GGML_TYPE_Q4_0 ? "Q4_0" : TYPE == GGML_TYPE_Q4_1 ? "Q4_1" : "UNKNOWN"
4040

41-
#define TENSOR_DUMP(TENSOR) printf("%15s: type = %i (%5s) ne = %5ld x %5ld x %5ld, nb = (%5li, %5li, %5li) - ", #TENSOR, \
41+
#define TENSOR_DUMP(TENSOR) printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", #TENSOR, \
4242
TENSOR->type,TENSOR_TYPE_AS_STR(TENSOR->type),\
43-
TENSOR->ne[0], TENSOR->ne[1], TENSOR->ne[2], TENSOR->nb[0], TENSOR->nb[1], TENSOR->nb[2]); \
43+
(int) TENSOR->ne[0], (int) TENSOR->ne[1], (int) TENSOR->ne[2], TENSOR->nb[0], TENSOR->nb[1], TENSOR->nb[2]); \
4444
{ float sum = tensor_sum_elements(TENSOR); printf("Sum of tensor %s is %6.2f\n",#TENSOR, sum); }
4545

4646
struct benchmark_params_struct {
@@ -138,7 +138,7 @@ int main(int argc, char ** argv) {
138138
ctx = ggml_init(params);
139139
if (!ctx) {
140140
fprintf(stderr, "%s: ggml_init() failed\n", __func__);
141-
return false;
141+
return 1;
142142
}
143143

144144

examples/common.cpp

+62-2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,33 @@ int32_t get_num_physical_cores() {
6666
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
6767
}
6868

69+
std::string process_escapes(const char* input) {
70+
std::string output;
71+
72+
if (input != nullptr) {
73+
std::size_t input_len = std::strlen(input);
74+
output.reserve(input_len);
75+
76+
for (std::size_t i = 0; i < input_len; ++i) {
77+
if (input[i] == '\\' && i + 1 < input_len) {
78+
switch (input[++i]) {
79+
case 'n': output.push_back('\n'); break;
80+
case 't': output.push_back('\t'); break;
81+
case '\'': output.push_back('\''); break;
82+
case '\"': output.push_back('\"'); break;
83+
case '\\': output.push_back('\\'); break;
84+
default: output.push_back('\\');
85+
output.push_back(input[i]); break;
86+
}
87+
} else {
88+
output.push_back(input[i]);
89+
}
90+
}
91+
}
92+
93+
return output;
94+
}
95+
6996
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
7097
bool invalid_param = false;
7198
std::string arg;
@@ -91,7 +118,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
91118
invalid_param = true;
92119
break;
93120
}
94-
params.prompt = argv[i];
121+
params.prompt = process_escapes(argv[i]);
95122
} else if (arg == "--session") {
96123
if (++i >= argc) {
97124
invalid_param = true;
@@ -324,7 +351,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
324351
fprintf(stderr, " run in interactive mode and poll user input upon seeing PROMPT (can be\n");
325352
fprintf(stderr, " specified more than once for multiple prompts).\n");
326353
fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n");
327-
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for <= 0)\n");
354+
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
328355
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
329356
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
330357
fprintf(stderr, " prompt to start generation with (default: empty)\n");
@@ -405,6 +432,39 @@ std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::s
405432
return res;
406433
}
407434

435+
struct llama_context * llama_init_from_gpt_params(const gpt_params & params) {
436+
auto lparams = llama_context_default_params();
437+
438+
lparams.n_ctx = params.n_ctx;
439+
lparams.n_parts = params.n_parts;
440+
lparams.seed = params.seed;
441+
lparams.f16_kv = params.memory_f16;
442+
lparams.use_mmap = params.use_mmap;
443+
lparams.use_mlock = params.use_mlock;
444+
lparams.logits_all = params.perplexity;
445+
lparams.embedding = params.embedding;
446+
447+
llama_context * lctx = llama_init_from_file(params.model.c_str(), lparams);
448+
449+
if (lctx == NULL) {
450+
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
451+
return NULL;
452+
}
453+
454+
if (!params.lora_adapter.empty()) {
455+
int err = llama_apply_lora_from_file(lctx,
456+
params.lora_adapter.c_str(),
457+
params.lora_base.empty() ? NULL : params.lora_base.c_str(),
458+
params.n_threads);
459+
if (err != 0) {
460+
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
461+
return NULL;
462+
}
463+
}
464+
465+
return lctx;
466+
}
467+
408468
/* Keep track of current color of output, and emit ANSI code if it changes. */
409469
void set_console_color(console_state & con_st, console_color_t color) {
410470
if (con_st.use_color && con_st.color != color) {

examples/common.h

+6
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ std::string gpt_random_prompt(std::mt19937 & rng);
7777

7878
std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
7979

80+
//
81+
// Model utils
82+
//
83+
84+
struct llama_context * llama_init_from_gpt_params(const gpt_params & params);
85+
8086
//
8187
// Console utils
8288
//

examples/embedding/embedding.cpp

+5-19
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ int main(int argc, char ** argv) {
2121

2222
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
2323

24-
if (params.seed <= 0) {
24+
if (params.seed < 0) {
2525
params.seed = time(NULL);
2626
}
2727

@@ -35,24 +35,10 @@ int main(int argc, char ** argv) {
3535
llama_context * ctx;
3636

3737
// load the model
38-
{
39-
auto lparams = llama_context_default_params();
40-
41-
lparams.n_ctx = params.n_ctx;
42-
lparams.n_parts = params.n_parts;
43-
lparams.seed = params.seed;
44-
lparams.f16_kv = params.memory_f16;
45-
lparams.logits_all = params.perplexity;
46-
lparams.use_mmap = params.use_mmap;
47-
lparams.use_mlock = params.use_mlock;
48-
lparams.embedding = params.embedding;
49-
50-
ctx = llama_init_from_file(params.model.c_str(), lparams);
51-
52-
if (ctx == NULL) {
53-
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
54-
return 1;
55-
}
38+
ctx = llama_init_from_gpt_params(params);
39+
if (ctx == NULL) {
40+
fprintf(stderr, "%s: error: unable to load model\n", __func__);
41+
return 1;
5642
}
5743

5844
// print system information

examples/main/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ It is important to note that the generated text may be shorter than the specifie
130130

131131
- `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1).
132132

133-
The RNG seed is used to initialize the random number generator that influences the text generation process. By setting a specific seed value, you can obtain consistent and reproducible results across multiple runs with the same input and settings. This can be helpful for testing, debugging, or comparing the effects of different options on the generated text to see when they diverge. If the seed is set to a value less than or equal to 0, a random seed will be used, which will result in different outputs on each run.
133+
The RNG seed is used to initialize the random number generator that influences the text generation process. By setting a specific seed value, you can obtain consistent and reproducible results across multiple runs with the same input and settings. This can be helpful for testing, debugging, or comparing the effects of different options on the generated text to see when they diverge. If the seed is set to a value less than 0, a random seed will be used, which will result in different outputs on each run.
134134

135135
### Temperature
136136

examples/main/main.cpp

+22-48
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#include <signal.h>
2323
#include <unistd.h>
2424
#elif defined (_WIN32)
25+
#define WIN32_LEAN_AND_MEAN
26+
#define NOMINMAX
27+
#include <windows.h>
2528
#include <signal.h>
2629
#endif
2730

@@ -84,7 +87,7 @@ int main(int argc, char ** argv) {
8487

8588
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
8689

87-
if (params.seed <= 0) {
90+
if (params.seed < 0) {
8891
params.seed = time(NULL);
8992
}
9093

@@ -101,34 +104,11 @@ int main(int argc, char ** argv) {
101104
llama_context * ctx;
102105
g_ctx = &ctx;
103106

104-
// load the model
105-
{
106-
auto lparams = llama_context_default_params();
107-
108-
lparams.n_ctx = params.n_ctx;
109-
lparams.n_parts = params.n_parts;
110-
lparams.seed = params.seed;
111-
lparams.f16_kv = params.memory_f16;
112-
lparams.use_mmap = params.use_mmap;
113-
lparams.use_mlock = params.use_mlock;
114-
115-
ctx = llama_init_from_file(params.model.c_str(), lparams);
116-
117-
if (ctx == NULL) {
118-
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
119-
return 1;
120-
}
121-
}
122-
123-
if (!params.lora_adapter.empty()) {
124-
int err = llama_apply_lora_from_file(ctx,
125-
params.lora_adapter.c_str(),
126-
params.lora_base.empty() ? NULL : params.lora_base.c_str(),
127-
params.n_threads);
128-
if (err != 0) {
129-
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
130-
return 1;
131-
}
107+
// load the model and apply lora adapter, if any
108+
ctx = llama_init_from_gpt_params(params);
109+
if (ctx == NULL) {
110+
fprintf(stderr, "%s: error: unable to load model\n", __func__);
111+
return 1;
132112
}
133113

134114
// print system information
@@ -263,7 +243,10 @@ int main(int argc, char ** argv) {
263243
sigint_action.sa_flags = 0;
264244
sigaction(SIGINT, &sigint_action, NULL);
265245
#elif defined (_WIN32)
266-
signal(SIGINT, sigint_handler);
246+
auto console_ctrl_handler = [](DWORD ctrl_type) -> BOOL {
247+
return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false;
248+
};
249+
SetConsoleCtrlHandler(static_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
267250
#endif
268251

269252
fprintf(stderr, "%s: interactive mode on.\n", __func__);
@@ -298,17 +281,17 @@ int main(int argc, char ** argv) {
298281
}
299282

300283
bool is_antiprompt = false;
301-
bool input_noecho = false;
284+
bool input_echo = true;
302285

303286
// HACK - because session saving incurs a non-negligible delay, for now skip re-saving session
304287
// if we loaded a session with at least 75% similarity. It's currently just used to speed up the
305288
// initial prompt so it doesn't need to be an exact match.
306289
bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < (embd_inp.size() * 3 / 4);
307290

308291

309-
int n_past = 0;
310-
int n_remain = params.n_predict;
311-
int n_consumed = 0;
292+
int n_past = 0;
293+
int n_remain = params.n_predict;
294+
int n_consumed = 0;
312295
int n_session_consumed = 0;
313296

314297
// the first thing we will do is to output the prompt, so set color accordingly
@@ -413,7 +396,7 @@ int main(int argc, char ** argv) {
413396
llama_token id = 0;
414397

415398
{
416-
auto logits = llama_get_logits(ctx);
399+
auto logits = llama_get_logits(ctx);
417400
auto n_vocab = llama_n_vocab(ctx);
418401

419402
// Apply params.logit_bias map
@@ -485,7 +468,7 @@ int main(int argc, char ** argv) {
485468
embd.push_back(id);
486469

487470
// echo this to console
488-
input_noecho = false;
471+
input_echo = true;
489472

490473
// decrement remaining sampling budget
491474
--n_remain;
@@ -503,14 +486,14 @@ int main(int argc, char ** argv) {
503486
}
504487

505488
// display text
506-
if (!input_noecho) {
489+
if (input_echo) {
507490
for (auto id : embd) {
508491
printf("%s", llama_token_to_str(ctx, id));
509492
}
510493
fflush(stdout);
511494
}
512495
// reset color to default if we there is no pending user input
513-
if (!input_noecho && (int)embd_inp.size() == n_consumed) {
496+
if (input_echo && (int)embd_inp.size() == n_consumed) {
514497
set_console_color(con_st, CONSOLE_COLOR_DEFAULT);
515498
}
516499

@@ -542,11 +525,6 @@ int main(int argc, char ** argv) {
542525
// potentially set color to indicate we are taking user input
543526
set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
544527

545-
#if defined (_WIN32)
546-
// Windows: must reactivate sigint handler after each signal
547-
signal(SIGINT, sigint_handler);
548-
#endif
549-
550528
if (params.instruct) {
551529
printf("\n> ");
552530
}
@@ -605,7 +583,7 @@ int main(int argc, char ** argv) {
605583
n_remain -= line_inp.size();
606584
}
607585

608-
input_noecho = true; // do not echo this again
586+
input_echo = false; // do not echo this again
609587
}
610588

611589
if (n_past > 0) {
@@ -630,10 +608,6 @@ int main(int argc, char ** argv) {
630608
}
631609
}
632610

633-
#if defined (_WIN32)
634-
signal(SIGINT, SIG_DFL);
635-
#endif
636-
637611
llama_print_timings(ctx);
638612
llama_free(ctx);
639613

0 commit comments

Comments
 (0)