Skip to content

Commit 913011d

Browse files
committed
Add cmd/qoirview --multithreaded option
Wall clock time taken to decode test/data/bricks-color.qoir (160 x 120) Single-threaded: 160 microseconds Multi-threaded: 220 microseconds (0.73x) Wall clock time taken to decode test/data/harvesters.qoir (1165 x 859) Single-threaded: 6521 microseconds Multi-threaded: 2543 microseconds (2.56x) Wall clock time taken to decode NASA's "Cosmic Cliffs" (3600 x 2085) https://www.nasa.gov/image-feature/goddard/2022/nasa-s-webb-reveals-cosmic-cliffs-glittering-landscape-of-star-birth Single-threaded: 59880 microseconds Multi-threaded: 16835 microseconds (3.56x) Times are median of 5 runs. Also update doc/historical_benchmarks.txt for recent commits.
1 parent af456ac commit 913011d

File tree

2 files changed

+179
-5
lines changed

2 files changed

+179
-5
lines changed

cmd/qoirview.c

Lines changed: 171 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,144 @@
2222

2323
// ----
2424

25+
bool g_multithreaded = false;
26+
bool g_print_decode_time = false;
27+
28+
// ----
29+
30+
typedef struct worker_data_struct {
31+
// Request.
32+
const uint8_t* src_ptr;
33+
size_t src_len;
34+
qoir_decode_options* options;
35+
int32_t y0;
36+
int32_t y1;
37+
38+
// Response.
39+
const char* status_message;
40+
} worker_data;
41+
42+
int //
43+
work(void* data) {
44+
worker_data* wd = (worker_data*)data;
45+
qoir_decode_options opts = {0};
46+
memcpy(&opts, wd->options, sizeof(*wd->options));
47+
if (!opts.pixbuf.data) {
48+
wd->status_message =
49+
"main: inconsistent qoir_decode_options.pixbuf.data field\n";
50+
return 0;
51+
}
52+
if (opts.src_clip_rectangle.y0 < wd->y0) {
53+
opts.src_clip_rectangle.y0 = wd->y0;
54+
}
55+
if (opts.src_clip_rectangle.y1 > wd->y1) {
56+
opts.src_clip_rectangle.y1 = wd->y1;
57+
}
58+
59+
qoir_decode_result res = qoir_decode(wd->src_ptr, wd->src_len, &opts);
60+
if (res.owned_memory) {
61+
free(res.owned_memory);
62+
wd->status_message =
63+
"main: inconsistent qoir_decode_result.owned_memory field\n";
64+
} else {
65+
wd->status_message = res.status_message;
66+
}
67+
return 0;
68+
}
69+
70+
qoir_decode_result //
71+
multithreaded_decode( //
72+
const uint8_t* src_ptr, //
73+
const size_t src_len, //
74+
const qoir_decode_options* options) {
75+
qoir_decode_pixel_configuration_result config =
76+
qoir_decode_pixel_configuration(src_ptr, src_len);
77+
if (config.status_message) {
78+
qoir_decode_result res = {0};
79+
res.status_message = config.status_message;
80+
return res;
81+
}
82+
83+
uint32_t height_in_tiles =
84+
qoir_calculate_number_of_tiles_1d(config.dst_pixcfg.height_in_pixels);
85+
if (height_in_tiles <= 1) {
86+
return qoir_decode(src_ptr, src_len, options);
87+
}
88+
89+
uint64_t pixbuf_len = 4 * (uint64_t)config.dst_pixcfg.width_in_pixels *
90+
(uint64_t)config.dst_pixcfg.height_in_pixels;
91+
if (pixbuf_len > SIZE_MAX) {
92+
qoir_decode_result res = {0};
93+
res.status_message =
94+
qoir_status_message__error_unsupported_pixbuf_dimensions;
95+
return res;
96+
}
97+
98+
qoir_decode_options opts = {0};
99+
if (options) {
100+
memcpy(&opts, options, sizeof(*options));
101+
}
102+
103+
opts.pixbuf.pixcfg.pixfmt = QOIR_PIXEL_FORMAT__BGRA_PREMUL;
104+
opts.pixbuf.pixcfg.width_in_pixels = config.dst_pixcfg.width_in_pixels;
105+
opts.pixbuf.pixcfg.height_in_pixels = config.dst_pixcfg.height_in_pixels;
106+
opts.pixbuf.data = malloc(pixbuf_len);
107+
opts.pixbuf.stride_in_bytes = 4 * opts.pixbuf.pixcfg.width_in_pixels;
108+
if (!opts.pixbuf.data) {
109+
qoir_decode_result res = {0};
110+
res.status_message = qoir_status_message__error_out_of_memory;
111+
return res;
112+
}
113+
114+
if (!opts.use_src_clip_rectangle) {
115+
opts.use_src_clip_rectangle = true;
116+
opts.src_clip_rectangle =
117+
qoir_make_rectangle(0, 0, (int32_t)config.dst_pixcfg.width_in_pixels,
118+
(int32_t)config.dst_pixcfg.height_in_pixels);
119+
}
120+
121+
const char* status_message = NULL;
122+
uint32_t num_threads = height_in_tiles;
123+
if (num_threads > 16) {
124+
num_threads = 16;
125+
}
126+
worker_data data[16] = {0};
127+
SDL_Thread* threads[16] = {0};
128+
129+
for (uint32_t i = 0; i < num_threads; i++) {
130+
data[i].src_ptr = src_ptr;
131+
data[i].src_len = src_len;
132+
data[i].options = &opts;
133+
data[i].y0 = (((i + 0) * height_in_tiles) / num_threads) * QOIR_TILE_SIZE;
134+
data[i].y1 = (((i + 1) * height_in_tiles) / num_threads) * QOIR_TILE_SIZE;
135+
threads[i] = SDL_CreateThread(&work, "worker", &data[i]);
136+
if (!threads[i]) {
137+
status_message = "main: could not create thread";
138+
}
139+
}
140+
141+
for (uint32_t i = 0; i < num_threads; i++) {
142+
if (threads[i]) {
143+
SDL_WaitThread(threads[i], NULL);
144+
if (!status_message) {
145+
status_message = data[i].status_message;
146+
}
147+
}
148+
}
149+
if (status_message) {
150+
free(opts.pixbuf.data);
151+
qoir_decode_result res = {0};
152+
res.status_message = status_message;
153+
return res;
154+
}
155+
qoir_decode_result res = {0};
156+
res.owned_memory = opts.pixbuf.data;
157+
memcpy(&res.dst_pixbuf, &opts.pixbuf, sizeof(opts.pixbuf));
158+
return res;
159+
}
160+
161+
// ----
162+
25163
SDL_Surface* //
26164
load(const char* filename, void** owned_memory) {
27165
SDL_RWops* rw = SDL_RWFromFile(filename, "rb");
@@ -60,9 +198,12 @@ load(const char* filename, void** owned_memory) {
60198
return NULL;
61199
}
62200

201+
uint64_t now = SDL_GetPerformanceCounter();
63202
qoir_decode_options opts = {0};
64203
opts.pixfmt = QOIR_PIXEL_FORMAT__BGRA_PREMUL;
65-
qoir_decode_result decode = qoir_decode(ptr, len, &opts);
204+
qoir_decode_result decode = g_multithreaded
205+
? multithreaded_decode(ptr, len, &opts)
206+
: qoir_decode(ptr, len, &opts);
66207
free(ptr);
67208
ptr = NULL;
68209
len = 0;
@@ -72,6 +213,11 @@ load(const char* filename, void** owned_memory) {
72213
decode.status_message);
73214
return NULL;
74215
}
216+
if (g_print_decode_time) {
217+
uint64_t elapsed = SDL_GetPerformanceCounter() - now;
218+
printf("%" PRIu64 " microseconds to decode.\n",
219+
(elapsed * 1000000) / SDL_GetPerformanceFrequency());
220+
}
75221

76222
*owned_memory = decode.owned_memory;
77223
return SDL_CreateRGBSurfaceFrom(
@@ -116,8 +262,29 @@ draw(SDL_Window* window, SDL_Surface* surface) {
116262

117263
int //
118264
main(int argc, char** argv) {
119-
if (argc != 2) {
120-
fprintf(stderr, "usage: %s filename\n", argv[0]);
265+
const char* filename = NULL;
266+
bool too_many_args = false;
267+
for (int i = 1; i < argc; i++) {
268+
const char* arg = argv[i];
269+
if ((arg[0] == '-') && (arg[1] == '-')) {
270+
arg++;
271+
}
272+
if (strcmp(arg, "-multithreaded") == 0) {
273+
g_multithreaded = true;
274+
continue;
275+
} else if (strcmp(arg, "-print-decode-time") == 0) {
276+
g_print_decode_time = true;
277+
continue;
278+
} else if (filename == NULL) {
279+
filename = argv[i];
280+
continue;
281+
}
282+
too_many_args = true;
283+
}
284+
285+
if (too_many_args || (filename == NULL)) {
286+
fprintf(stderr, "usage: %s -print-decode-time -multithreaded filename\n",
287+
argv[0]);
121288
return 1;
122289
}
123290
if (SDL_Init(SDL_INIT_VIDEO) < 0) {
@@ -132,7 +299,7 @@ main(int argc, char** argv) {
132299
return 1;
133300
}
134301
void* surface_owned_memory = NULL;
135-
SDL_Surface* surface = load(argv[1], &surface_owned_memory);
302+
SDL_Surface* surface = load(filename, &surface_owned_memory);
136303
if (!surface) {
137304
return 1;
138305
}

doc/historical_benchmarks.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,14 @@ My desktop machine:
2727
----
2828

2929
Commit CmpRatio -------------gcc------------ ------------clang-----------
30-
PENDING 0.2526 CR 161.40 gccEnc 271.96 gccDec 142.45 claEnc 171.02 claDec Add metadata (ICCP, EXIF, XMP) support
30+
PENDING 0.2526 CR 167.52 gccEnc 272.21 gccDec 143.01 claEnc 175.73 claDec Add cmd/qoirview --multithreaded option
31+
af456ac 0.2526 CR 168.15 gccEnc 272.06 gccDec 142.85 claEnc 175.81 claDec Add more test/data images
32+
36876fb 0.2526 CR 167.36 gccEnc 272.01 gccDec 142.47 claEnc 175.58 claDec Add qoir_decode_options clip and offset fields
33+
33d06eb 0.2526 CR 161.37 gccEnc 271.87 gccDec 146.15 claEnc 165.56 claDec Crop to qoir_decode_options.pixbuf dimensions
34+
d273370 0.2526 CR 161.39 gccEnc 271.79 gccDec 142.67 claEnc 171.27 claDec Add a qoir_decode_options.pixbuf field
35+
48bcc50 0.2526 CR 161.04 gccEnc 272.52 gccDec 142.89 claEnc 171.21 claDec Apply unlossify before (not after) swizzle
36+
9defe8f 0.2526 CR 160.64 gccEnc 271.52 gccDec 142.24 claEnc 175.26 claDec Fix unlossify for 3 bytes per pixel formats
37+
224cd54 0.2526 CR 161.40 gccEnc 271.96 gccDec 142.45 claEnc 171.02 claDec Add metadata (ICCP, EXIF, XMP) support
3138
2ba31e2 0.2526 CR 161.04 gccEnc 271.99 gccDec 146.03 claEnc 164.93 claDec Have gammaawaredither use multiply not divide
3239
1d31c28 0.2526 CR 160.66 gccEnc 271.56 gccDec 146.06 claEnc 164.79 claDec Add dither encoding option
3340
72a8d47 0.2526 CR 161.37 gccEnc 271.94 gccDec 145.81 claEnc 165.64 claDec Have gammaawaredither preserve ancillary chunks

0 commit comments

Comments
 (0)