Skip to content

Commit f8051ff

Browse files
committed
Remove global -ffast-math flag, but apply fast math to just color_helpers.h/color_helpers.cpp
Turn on FTZ/DAZ inside create_color_mgmt_luts()
1 parent 467e12c commit f8051ff

7 files changed

+76
-8
lines changed

meson.build

-4
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,6 @@ add_project_arguments(cppc.get_supported_arguments([
3737
'-Wno-missing-braces',
3838
]), language: 'cpp')
3939

40-
add_project_arguments(cppc.get_supported_arguments([
41-
'-ffast-math',
42-
]), language: 'cpp')
43-
4440
pipewire_dep = dependency('libpipewire-0.3', required: get_option('pipewire'))
4541
librt_dep = cppc.find_library('rt', required : get_option('pipewire'))
4642
hwdata_dep = dependency('hwdata', required : false)

src/Utils/Directives.h

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#pragma once
2+
3+
namespace gamescope::Directives {
4+
struct FlagSwitcher {
5+
unsigned long long m_csr = 0ull;
6+
inline FlagSwitcher();
7+
inline ~FlagSwitcher();
8+
};
9+
}
10+
11+
#if defined(__x86__) || defined(__x86_64__)
12+
# include <xmmintrin.h>
13+
# include <pmmintrin.h>
14+
# define SET_FAST_MATH_FLAGS gamescope::Directives::FlagSwitcher switcher{};
15+
16+
inline gamescope::Directives::FlagSwitcher::FlagSwitcher() : m_csr{_mm_getcsr()} {
17+
_mm_setcsr( (unsigned int)m_csr | _MM_DENORMALS_ZERO_ON | _MM_FLUSH_ZERO_ON );
18+
}
19+
20+
inline gamescope::Directives::FlagSwitcher::~FlagSwitcher() {
21+
_mm_setcsr( (unsigned int)m_csr );
22+
}
23+
24+
#elif defined(__aarch64__) && __has_builtin(__builtin_aarch64_get_fpcr64) && __has_builtin(__builtin_aarch64_set_fpcr64)
25+
# define SET_FAST_MATH_FLAGS gamescope::Directives::FlagSwitcher switcher{};
26+
27+
static constexpr unsigned long long fz_bit = 0x1'00'00'00;
28+
//based on this stuff: https://github.com/DLTcollab/sse2neon/blob/706d3b58025364c2371cafcf9b16e32ff7e630ed/sse2neon.h#L2433
29+
//and this: https://stackoverflow.com/a/59001820
30+
static constexpr unsigned long long fz16_bit = 0x8'00'00;
31+
32+
inline gamescope::Directives::FlagSwitcher::FlagSwitcher() : m_csr{__builtin_aarch64_get_fpcr64()} {
33+
__builtin_aarch64_set_fpcr64(m_csr | fz_bit | fz16_bit);
34+
}
35+
36+
inline gamescope::Directives::FlagSwitcher::~FlagSwitcher() {
37+
__builtin_aarch64_set_fpcr64(m_csr);
38+
}
39+
40+
#else
41+
# define SET_FAST_MATH_FLAGS
42+
43+
#endif
44+
45+
#ifdef __clang__
46+
# define FAST_MATH_ON _Pragma("float_control(push)"); \
47+
_Pragma("float_control(precise, off)") //https://clang.llvm.org/docs/LanguageExtensions.html#extensions-to-specify-floating-point-flags
48+
# define FAST_MATH_OFF _Pragma("float_control(pop)")
49+
50+
#elif defined(__GNUC__)
51+
# define FAST_MATH_ON _Pragma("GCC push_options"); \
52+
_Pragma("GCC optimize(\"-ffast-math\")")
53+
# define FAST_MATH_OFF _Pragma("GCC pop_options")
54+
55+
#else
56+
# define FAST_MATH_ON
57+
# define FAST_MATH_OFF
58+
#endif

src/color_bench.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ lut3d_t lut3d_float;
1717

1818
static void BenchmarkCalcColorTransform(EOTF inputEOTF, benchmark::State &state)
1919
{
20+
SET_FAST_MATH_FLAGS
2021
const primaries_t primaries = { { 0.602f, 0.355f }, { 0.340f, 0.574f }, { 0.164f, 0.121f } };
2122
const glm::vec2 white = { 0.3070f, 0.3220f };
2223
const glm::vec2 destVirtualWhite = { 0.f, 0.f };

src/color_helpers.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#define COLOR_HELPERS_CPP
22
#include "color_helpers_impl.h"
33

4+
FAST_MATH_ON
5+
46
#include <algorithm>
57
#include <cstdint>
68
#include <cmath>
@@ -214,7 +216,7 @@ inline void lerp_rgb(float* out, const float* a, const float* b, const float* c,
214216

215217
inline float ClampAndSanitize( float a, float min, float max )
216218
{
217-
#ifndef __FAST_MATH__
219+
#if !( defined(__FAST_MATH__) || defined(__FINITE_MATH_ONLY__) )
218220
return std::isfinite( a ) ? std::min(std::max(min, a), max) : min;
219221
#else
220222
return std::min(std::max(min, a), max);
@@ -910,3 +912,5 @@ const glm::mat3 k_xyz_from_2020 = normalised_primary_matrix( displaycolorimetry_
910912
const glm::mat3 k_2020_from_xyz = glm::inverse( k_xyz_from_2020 );
911913

912914
const glm::mat3 k_2020_from_709 = k_2020_from_xyz * k_xyz_from_709;
915+
916+
FAST_MATH_OFF

src/color_helpers_impl.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
#pragma once
2+
#include "Utils/Directives.h"
3+
4+
FAST_MATH_ON
5+
26
#include "color_helpers.h"
37

48
namespace rendervulkan {
@@ -17,4 +21,6 @@ namespace ns_color_tests {
1721

1822
#ifdef COLOR_HELPERS_CPP
1923
REGISTER_LUT_EDGE_SIZE(rendervulkan::s_nLutEdgeSize3d);
20-
#endif
24+
#endif
25+
26+
FAST_MATH_OFF

src/color_tests.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "color_helpers.h"
1+
#include "color_helpers_impl.h"
22
#include <cstdio>
33

44
//#include <glm/ext.hpp>
@@ -16,6 +16,7 @@ lut3d_t lut3d_float;
1616
1717
static void BenchmarkCalcColorTransform(EOTF inputEOTF, benchmark::State &state)
1818
{
19+
SET_FAST_MATH_FLAGS
1920
const primaries_t primaries = { { 0.602f, 0.355f }, { 0.340f, 0.574f }, { 0.164f, 0.121f } };
2021
const glm::vec2 white = { 0.3070f, 0.3220f };
2122
@@ -232,6 +233,7 @@ void test_eetf2390_mono()
232233

233234
int main(int argc, char* argv[])
234235
{
236+
SET_FAST_MATH_FLAGS
235237
printf("color_tests\n");
236238
// test_eetf2390_mono();
237239
color_tests();

src/steamcompmgr.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ static const gamescope_color_mgmt_t k_ScreenshotColorMgmtHDR =
224224
static void
225225
create_color_mgmt_luts(const gamescope_color_mgmt_t& newColorMgmt, gamescope_color_mgmt_luts outColorMgmtLuts[ EOTF_Count ])
226226
{
227+
SET_FAST_MATH_FLAGS
227228
const displaycolorimetry_t& displayColorimetry = newColorMgmt.displayColorimetry;
228229
const displaycolorimetry_t& outputEncodingColorimetry = newColorMgmt.outputEncodingColorimetry;
229230

@@ -5069,7 +5070,7 @@ steamcompmgr_latch_frame_done( steamcompmgr_win_t *w, uint64_t vblank_idx )
50695070

50705071
static inline float santitize_float( float f )
50715072
{
5072-
#ifndef __FAST_MATH__
5073+
#if !( defined(__FAST_MATH__) || defined(__FINITE_MATH_ONLY__) )
50735074
return ( std::isfinite( f ) ? f : 0.f );
50745075
#else
50755076
return f;

0 commit comments

Comments
 (0)