Skip to content

Commit 949dfc1

Browse files
committed
mulmat-tune-tool: add --n_pass; document; fix tailing spaces
1 parent 8aebab9 commit 949dfc1

17 files changed

+324
-134
lines changed

examples/mulmat-tune/README.md

Lines changed: 288 additions & 112 deletions
Large diffs are not rendered by default.
Loading
319 KB
Binary file not shown.

examples/mulmat-tune/bench-out/13b.q4_0.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 10 163 0 0 94 0
8888
256 17 759 0 0 171 0
8989
512 39 2837 0 0 321 0
90-

examples/mulmat-tune/bench-out/13b.q5_0.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 10 167 0 0 60 0
8888
256 20 733 0 0 129 0
8989
512 43 3462 0 0 262 0
90-

examples/mulmat-tune/bench-out/7b.f16.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,3 @@
8383
128 10 167 0 0 79 0
8484
256 19 835 0 0 136 0
8585
512 39 2856 0 0 283 0
86-

examples/mulmat-tune/bench-out/7b.f32.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,3 @@
8383
128 9 198 0 0 64 0
8484
256 20 766 0 0 166 0
8585
512 40 3464 0 0 276 0
86-

examples/mulmat-tune/bench-out/7b.q4_0.openblas.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 9 201 0 0 128 0
8888
256 19 886 0 0 172 0
8989
512 39 3227 0 0 405 0
90-

examples/mulmat-tune/bench-out/7b.q4_0.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 10 203 0 0 111 0
8888
256 19 705 0 0 165 0
8989
512 33 2832 0 0 313 0
90-

examples/mulmat-tune/bench-out/7b.q4_1.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 10 210 0 0 70 0
8888
256 20 856 0 0 128 0
8989
512 40 2949 0 0 334 0
90-

examples/mulmat-tune/bench-out/7b.q5_1.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 10 202 0 0 76 0
8888
256 20 850 0 0 123 0
8989
512 39 2944 0 0 301 0
90-

examples/mulmat-tune/bench-out/7b.q8_0.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 9 206 0 0 79 0
8888
256 19 784 0 0 132 0
8989
512 38 2780 0 0 310 0
90-

examples/mulmat-tune/mulmat-tune-tool.c

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ static void print_blas_build_tips(void);
2424
static void progress(int i, int max);
2525
static bool prompt_yes_no(const char *prompt);
2626

27-
static void cmd_tune(struct ggml_mulmat_tune *b, bool verbose);
27+
static void cmd_tune(struct ggml_mulmat_tune *b, int n_pass, bool verbose);
2828
static void cmd_analyze(struct ggml_mulmat_tune *b);
2929

3030
static void usage(char *prog) {
@@ -41,6 +41,9 @@ static void usage(char *prog) {
4141
" default 10\n",
4242
"--backend BACKEND blas backend: CUDA | CL | CBLAS\n",
4343
" default: auto detect\n",
44+
"--n_pass number of passes to run\n",
45+
" default 3\n",
46+
" requires: in range [1, 5]\n",
4447
"--file FILE data file to write\n",
4548
" default stdout\n",
4649
"-y always answer \"yes\" to all prompts\n",
@@ -94,6 +97,7 @@ int main(int argc, char **argv) {
9497
const char *arg_model = NULL;
9598
const char *arg_type = NULL;
9699
const char *arg_m_num = NULL;
100+
const char *arg_n_pass = NULL;
97101
const char *arg_backend = NULL;
98102
const char *arg_file = NULL;
99103
bool always_yes = false;
@@ -114,6 +118,11 @@ int main(int argc, char **argv) {
114118
arg_m_num = argv[i + 1];
115119
++i;
116120
}
121+
} else if (strcmp(argv[i], "--n_pass") == 0) {
122+
if (i + 1 < argc) {
123+
arg_n_pass = argv[i + 1];
124+
++i;
125+
}
117126
} else if (strcmp(argv[i], "--backend") == 0) {
118127
if (i + 1 < argc) {
119128
arg_backend = argv[i + 1];
@@ -203,6 +212,20 @@ int main(int argc, char **argv) {
203212
}
204213
}
205214

215+
int n_pass = 3;
216+
{
217+
if (arg_n_pass != NULL) {
218+
int v = atoi(arg_n_pass);
219+
n_pass = v;
220+
}
221+
if (n_pass < 1 || n_pass > MAX_NUM_PASS) {
222+
fprintf(stderr, "invalid n_pass: %d, expect in range [1, 5]\n",
223+
n_pass);
224+
usage(argv[0]);
225+
exit(1);
226+
}
227+
}
228+
206229
{
207230
enum ggml_backend backend = GGML_BACKEND_UNKNOWN;
208231
if (arg_backend == NULL) {
@@ -265,7 +288,7 @@ int main(int argc, char **argv) {
265288
tune.model, tune.type_name, ggml_get_backend_name(tune.backend),
266289
tune.blas_vendor);
267290

268-
cmd_tune(&tune, true /* verbose */);
291+
cmd_tune(&tune, n_pass, true /* verbose */);
269292

270293
FILE *fp = NULL;
271294
if (arg_file != NULL) {
@@ -334,7 +357,7 @@ int main(int argc, char **argv) {
334357
return 0;
335358
}
336359

337-
void cmd_tune(struct ggml_mulmat_tune *tune, bool verbose) {
360+
void cmd_tune(struct ggml_mulmat_tune *tune, int n_pass, bool verbose) {
338361
size_t wsize = 0;
339362
void *q_buf = NULL;
340363
void *wdata = NULL;
@@ -477,21 +500,21 @@ void cmd_tune(struct ggml_mulmat_tune *tune, bool verbose) {
477500
// without memset, the first run may be significant slow.
478501
memset(wdata, 0, wsize);
479502

480-
int stage_time[NUM_BENCH];
481-
for (int i_bench = 0; i_bench < NUM_BENCH; i_bench++) {
503+
int stage_time[MAX_NUM_PASS];
504+
for (int i_bench = 0; i_bench < n_pass; i_bench++) {
482505
int t0 = (int)ggml_time_us();
483506

484507
ggml_internal_compute_forward_mul_mat(
485508
profile, stage, wsize, wdata, src0, src1, dst);
486509

487510
stage_time[i_bench] = (int)ggml_time_us() - t0;
488511
if (verbose) {
489-
progress(i_bench, NUM_BENCH);
512+
progress(i_bench, n_pass);
490513
}
491514
}
492515

493516
item->stages_time[stage] =
494-
tune_time_min(stage_time, NUM_BENCH);
517+
tune_time_min(stage_time, n_pass);
495518

496519
if (verbose) {
497520
line_len++;

examples/mulmat-tune/mulmat-tune.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
extern "C" {
99
#endif
1010

11-
#define NUM_BENCH 4
11+
#define MAX_NUM_PASS 5
1212

1313
#define GGML_MULMAT_N_SHAPES 6
1414
#define GGML_MULMAT_MAX_PROFILES 8

ggml.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14374,7 +14374,7 @@ void ggml_graph_compute_mul_mat_set_task_profile(struct ggml_cgraph *cgraph) {
1437414374
if (shape != NULL) {
1437514375
memset(profile_time, 0, sizeof(profile_time));
1437614376
ggml_mulmat_tune_shape_estimate_time(shape, M, cgraph->n_threads, profile_time);
14377-
14377+
1437814378
int min = INT32_MAX;
1437914379
for (int j = 0; j < shape->n_profiles; j++) {
1438014380
int total = profile_time[j].total_time;
@@ -14413,7 +14413,7 @@ void ggml_graph_compute_mul_mat_set_task_profile(struct ggml_cgraph *cgraph) {
1441314413
}
1441414414
}
1441514415
}
14416-
14416+
1441714417
if (profile == NULL) {
1441814418
for (int j = 0; j < n_profiles; j++) {
1441914419
if (profiles[j].stages[0].backend == GGML_BACKEND_CPU &&

tests/test-mulmat-tune

362 KB
Binary file not shown.

tests/test-mulmat-tune.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ void test_ggml_mulmat_tune_estimate_time_non_zero_NK(void) {
3535
const int m_start = m_step;
3636
const int m_num = 2;
3737

38-
ggml_mulmat_tune_setup_model(&tune, "7B", m_start, m_step, m_num);
38+
ggml_mulmat_tune_setup_model(&tune, "7B", m_num);
3939

4040
struct ggml_mulmat_tune_shape *shape = NULL;
4141
for (int i = 0; i < tune.n_shapes; i++) {
@@ -149,7 +149,7 @@ void test_ggml_mulmat_tune_estimate_time_non_zero_NK(void) {
149149
ggml_mulmat_tune_get_shape(&tune, shape->N, shape->K,
150150
shape->src0_type, shape->src1_type);
151151
GGML_ASSERT(matched_shape);
152-
152+
153153
ggml_mulmat_tune_shape_estimate_time(matched_shape, e->M, e->nth,
154154
profile_time);
155155

@@ -193,7 +193,7 @@ void test_ggml_mulmat_tune_estimate_time_zero_NK(void) {
193193
const int m_start = m_step;
194194
const int m_num = 2;
195195

196-
ggml_mulmat_tune_setup_model(&tune, "7B", m_start, m_step, m_num);
196+
ggml_mulmat_tune_setup_model(&tune, "7B", m_num);
197197

198198
struct ggml_mulmat_tune_shape *shape = NULL;
199199
for (int i = 0; i < tune.n_shapes; i++) {

0 commit comments

Comments
 (0)