Remove global -ffast-math flag, but apply fast math to just color_helpers.h/color_helpers.cpp

sharkautarch · sharkautarch · commit f8051ffecee2 · 2024-09-01T10:27:22.000-04:00
Turn on FTZ/DAZ inside create_color_mgmt_luts()
diff --git a/meson.build b/meson.build
@@ -37,10 +37,6 @@ add_project_arguments(cppc.get_supported_arguments([
   '-Wno-missing-braces',
 ]), language: 'cpp')
 
-add_project_arguments(cppc.get_supported_arguments([
-  '-ffast-math',
-]), language: 'cpp')
-
 pipewire_dep = dependency('libpipewire-0.3', required: get_option('pipewire'))
 librt_dep = cppc.find_library('rt', required : get_option('pipewire'))
 hwdata_dep = dependency('hwdata', required : false)
diff --git a/src/Utils/Directives.h b/src/Utils/Directives.h
@@ -0,0 +1,58 @@
+#pragma once
+
+namespace gamescope::Directives {
+  struct FlagSwitcher {
+    unsigned long long m_csr = 0ull;
+    inline FlagSwitcher();
+    inline ~FlagSwitcher();
+  };
+}
+
+#if defined(__x86__) || defined(__x86_64__)
+# include <xmmintrin.h>
+# include <pmmintrin.h>
+# define SET_FAST_MATH_FLAGS gamescope::Directives::FlagSwitcher switcher{};
+
+ inline gamescope::Directives::FlagSwitcher::FlagSwitcher() : m_csr{_mm_getcsr()} {
+   _mm_setcsr( (unsigned int)m_csr | _MM_DENORMALS_ZERO_ON | _MM_FLUSH_ZERO_ON );
+ }
+
+ inline gamescope::Directives::FlagSwitcher::~FlagSwitcher() {
+   _mm_setcsr( (unsigned int)m_csr );
+ }
+
+#elif defined(__aarch64__) && __has_builtin(__builtin_aarch64_get_fpcr64) && __has_builtin(__builtin_aarch64_set_fpcr64)
+# define SET_FAST_MATH_FLAGS gamescope::Directives::FlagSwitcher switcher{};
+
+ static constexpr unsigned long long fz_bit = 0x1'00'00'00;
+ //based on this stuff: https://github.com/DLTcollab/sse2neon/blob/706d3b58025364c2371cafcf9b16e32ff7e630ed/sse2neon.h#L2433
+ //and this: https://stackoverflow.com/a/59001820
+ static constexpr unsigned long long fz16_bit =  0x8'00'00;
+
+ inline gamescope::Directives::FlagSwitcher::FlagSwitcher() : m_csr{__builtin_aarch64_get_fpcr64()} {
+   __builtin_aarch64_set_fpcr64(m_csr | fz_bit | fz16_bit);
+ }
+
+ inline gamescope::Directives::FlagSwitcher::~FlagSwitcher() {
+   __builtin_aarch64_set_fpcr64(m_csr);
+ }
+
+#else
+# define SET_FAST_MATH_FLAGS
+
+#endif
+
+#ifdef __clang__
+# define FAST_MATH_ON _Pragma("float_control(push)");        \
+                     _Pragma("float_control(precise, off)") //https://clang.llvm.org/docs/LanguageExtensions.html#extensions-to-specify-floating-point-flags
+# define FAST_MATH_OFF _Pragma("float_control(pop)")
+
+#elif defined(__GNUC__)
+# define FAST_MATH_ON  _Pragma("GCC push_options");             \
+                      _Pragma("GCC optimize(\"-ffast-math\")")
+# define FAST_MATH_OFF _Pragma("GCC pop_options")
+
+#else
+# define FAST_MATH_ON
+# define FAST_MATH_OFF
+#endif
diff --git a/src/color_bench.cpp b/src/color_bench.cpp
@@ -17,6 +17,7 @@ lut3d_t lut3d_float;
 
 static void BenchmarkCalcColorTransform(EOTF inputEOTF, benchmark::State &state)
 {
+    SET_FAST_MATH_FLAGS
     const primaries_t primaries = { { 0.602f, 0.355f }, { 0.340f, 0.574f }, { 0.164f, 0.121f } };
     const glm::vec2 white = { 0.3070f, 0.3220f };
     const glm::vec2 destVirtualWhite = { 0.f, 0.f };
diff --git a/src/color_helpers.cpp b/src/color_helpers.cpp
@@ -1,6 +1,8 @@
 #define COLOR_HELPERS_CPP
 #include "color_helpers_impl.h"
 
+FAST_MATH_ON
+
 #include <algorithm>
 #include <cstdint>
 #include <cmath>
@@ -214,7 +216,7 @@ inline void lerp_rgb(float* out, const float* a, const float* b, const float* c,
 
 inline float ClampAndSanitize( float a, float min, float max )
 {
-#ifndef __FAST_MATH__
+#if !( defined(__FAST_MATH__) || defined(__FINITE_MATH_ONLY__) )
     return std::isfinite( a ) ? std::min(std::max(min, a), max) : min;
 #else
     return std::min(std::max(min, a), max);
@@ -910,3 +912,5 @@ const glm::mat3 k_xyz_from_2020 = normalised_primary_matrix( displaycolorimetry_
 const glm::mat3 k_2020_from_xyz = glm::inverse( k_xyz_from_2020 );
 
 const glm::mat3 k_2020_from_709 = k_2020_from_xyz * k_xyz_from_709;
+
+FAST_MATH_OFF
diff --git a/src/color_helpers_impl.h b/src/color_helpers_impl.h
@@ -1,4 +1,8 @@
 #pragma once
+#include "Utils/Directives.h"
+
+FAST_MATH_ON
+
 #include "color_helpers.h"
 
 namespace rendervulkan {
@@ -17,4 +21,6 @@ namespace ns_color_tests {
 
 #ifdef COLOR_HELPERS_CPP
 REGISTER_LUT_EDGE_SIZE(rendervulkan::s_nLutEdgeSize3d);
-#endif
+#endif
+
+FAST_MATH_OFF
diff --git a/src/color_tests.cpp b/src/color_tests.cpp
@@ -1,4 +1,4 @@
-#include "color_helpers.h"
+#include "color_helpers_impl.h"
 #include <cstdio>
 
 //#include <glm/ext.hpp>
@@ -16,6 +16,7 @@ lut3d_t lut3d_float;
 
 static void BenchmarkCalcColorTransform(EOTF inputEOTF, benchmark::State &state)
 {
+    SET_FAST_MATH_FLAGS
     const primaries_t primaries = { { 0.602f, 0.355f }, { 0.340f, 0.574f }, { 0.164f, 0.121f } };
     const glm::vec2 white = { 0.3070f, 0.3220f };
 
@@ -232,6 +233,7 @@ void test_eetf2390_mono()
 
 int main(int argc, char* argv[])
 {
+    SET_FAST_MATH_FLAGS
     printf("color_tests\n");
     // test_eetf2390_mono();
     color_tests();
diff --git a/src/steamcompmgr.cpp b/src/steamcompmgr.cpp
@@ -224,6 +224,7 @@ static const gamescope_color_mgmt_t k_ScreenshotColorMgmtHDR =
 static void
 create_color_mgmt_luts(const gamescope_color_mgmt_t& newColorMgmt, gamescope_color_mgmt_luts outColorMgmtLuts[ EOTF_Count ])
 {
+	SET_FAST_MATH_FLAGS
 	const displaycolorimetry_t& displayColorimetry = newColorMgmt.displayColorimetry;
 	const displaycolorimetry_t& outputEncodingColorimetry = newColorMgmt.outputEncodingColorimetry;
 
@@ -5069,7 +5070,7 @@ steamcompmgr_latch_frame_done( steamcompmgr_win_t *w, uint64_t vblank_idx )
 
 static inline float santitize_float( float f )
 {
-#ifndef __FAST_MATH__
+#if !( defined(__FAST_MATH__) || defined(__FINITE_MATH_ONLY__) )
 	return ( std::isfinite( f ) ? f : 0.f );
 #else
 	return f;

Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,7 @@ lut3d_t lut3d_float;`
`17`	`17`
`18`	`18`	`static void BenchmarkCalcColorTransform(EOTF inputEOTF, benchmark::State &state)`
`19`	`19`	`{`
	`20`	`+ SET_FAST_MATH_FLAGS`
`20`	`21`	`const primaries_t primaries = { { 0.602f, 0.355f }, { 0.340f, 0.574f }, { 0.164f, 0.121f } };`
`21`	`22`	`const glm::vec2 white = { 0.3070f, 0.3220f };`
`22`	`23`	`const glm::vec2 destVirtualWhite = { 0.f, 0.f };`
Original file line number	Diff line number	Diff line change
`@@ -224,6 +224,7 @@ static const gamescope_color_mgmt_t k_ScreenshotColorMgmtHDR =`
`224`	`224`	`static void`
`225`	`225`	`create_color_mgmt_luts(const gamescope_color_mgmt_t& newColorMgmt, gamescope_color_mgmt_luts outColorMgmtLuts[ EOTF_Count ])`
`226`	`226`	`{`
	`227`	`+ SET_FAST_MATH_FLAGS`
`227`	`228`	`const displaycolorimetry_t& displayColorimetry = newColorMgmt.displayColorimetry;`
`228`	`229`	`const displaycolorimetry_t& outputEncodingColorimetry = newColorMgmt.outputEncodingColorimetry;`
`229`	`230`
`@@ -5069,7 +5070,7 @@ steamcompmgr_latch_frame_done( steamcompmgr_win_t *w, uint64_t vblank_idx )`
`5069`	`5070`
`5070`	`5071`	`static inline float santitize_float( float f )`
`5071`	`5072`	`{`
`5072`		`-#ifndef __FAST_MATH__`
	`5073`	`+#if !( defined(__FAST_MATH__) \|\| defined(__FINITE_MATH_ONLY__) )`
`5073`	`5074`	`return ( std::isfinite( f ) ? f : 0.f );`
`5074`	`5075`	`#else`
`5075`	`5076`	`return f;`