Skip to content

Commit 009405a

Browse files
authored
wasm: prefer pmin/pmax
According to [emscripten](https://emscripten.org/docs/porting/simd.html) and [v8](https://github.com/v8/v8/blob/b6520eda5eafc3b007a5641b37136dfc9d92f63d/src/compiler/backend/x64/code-generator-x64.cc#L2661-L2699), `[f32x4|f64x2].[min|max]` compiles to much more instructions than `[f32x4|f64x2].[pmin|pmax]`. It is defined in [spec](https://github.com/WebAssembly/spec/blob/main/proposals/simd/SIMD.md#floating-point-min-and-max) that the difference between pmin/pmax and min/max is NaN-propagating behavior, and the equivalent to the x86 `_mm_min_ps`/`_mm_max_ps` is pmin/pmax in [v8](https://github.com/v8/v8/blob/b6520eda5eafc3b007a5641b37136dfc9d92f63d/src/compiler/backend/x64/code-generator-x64.cc#L2740-L2747). This should make functions with min/max faster on webassembly, and align with the existing behavior with x86 sse.
1 parent 1fdc1c8 commit 009405a

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

include/cglm/vec4.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
649649
#if defined(__wasm__) && defined(__wasm_simd128__)
650650
glmm_store(dest, wasm_f32x4_add(
651651
glmm_load(dest),
652-
wasm_f32x4_max(glmm_load(a), glmm_load(b))));
652+
wasm_f32x4_pmax(glmm_load(a), glmm_load(b))));
653653
#elif defined( __SSE__ ) || defined( __SSE2__ )
654654
glmm_store(dest, _mm_add_ps(glmm_load(dest),
655655
_mm_max_ps(glmm_load(a),
@@ -681,7 +681,7 @@ glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
681681
#if defined(__wasm__) && defined(__wasm_simd128__)
682682
glmm_store(dest, wasm_f32x4_add(
683683
glmm_load(dest),
684-
wasm_f32x4_min(glmm_load(a), glmm_load(b))));
684+
wasm_f32x4_pmin(glmm_load(a), glmm_load(b))));
685685
#elif defined( __SSE__ ) || defined( __SSE2__ )
686686
glmm_store(dest, _mm_add_ps(glmm_load(dest),
687687
_mm_min_ps(glmm_load(a),
@@ -854,7 +854,7 @@ CGLM_INLINE
854854
void
855855
glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
856856
#if defined(__wasm__) && defined(__wasm_simd128__)
857-
glmm_store(dest, wasm_f32x4_max(glmm_load(a), glmm_load(b)));
857+
glmm_store(dest, wasm_f32x4_pmax(glmm_load(a), glmm_load(b)));
858858
#elif defined( __SSE__ ) || defined( __SSE2__ )
859859
glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b)));
860860
#elif defined(CGLM_NEON_FP)
@@ -878,7 +878,7 @@ CGLM_INLINE
878878
void
879879
glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
880880
#if defined(__wasm__) && defined(__wasm_simd128__)
881-
glmm_store(dest, wasm_f32x4_min(glmm_load(a), glmm_load(b)));
881+
glmm_store(dest, wasm_f32x4_pmin(glmm_load(a), glmm_load(b)));
882882
#elif defined( __SSE__ ) || defined( __SSE2__ )
883883
glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b)));
884884
#elif defined(CGLM_NEON_FP)
@@ -902,8 +902,8 @@ CGLM_INLINE
902902
void
903903
glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
904904
#if defined(__wasm__) && defined(__wasm_simd128__)
905-
glmm_store(v, wasm_f32x4_min(
906-
wasm_f32x4_max(glmm_load(v), wasm_f32x4_splat(minVal)),
905+
glmm_store(v, wasm_f32x4_pmin(
906+
wasm_f32x4_pmax(glmm_load(v), wasm_f32x4_splat(minVal)),
907907
wasm_f32x4_splat(maxVal)));
908908
#elif defined( __SSE__ ) || defined( __SSE2__ )
909909
glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),

0 commit comments

Comments
 (0)