Skip to content

feat: support sse and neon #51

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,26 @@ cc_library(
includes = ["include"],
)

cc_binary(
name = "benchmark-arm",
srcs = glob([
"benchmark/*.cpp",
"benchmark/*.h",
"benchmark/*.hpp",
]),
data = glob(["testdata/*.json"]),
deps = [
":sonic-cpp",
"@google_benchmark//:benchmark",
"@rapidjson",
"@cJSON",
"@yyjson",
"@simdjson",
],
copts = ['-O3', ' -march=armv8-a', '-DNDEBUG', '-std=c++17'],
linkopts = ['-lstdc++fs'],
)

cc_binary(
name = "benchmark",
srcs = glob([
Expand Down Expand Up @@ -87,6 +107,56 @@ cc_test(
],
)

cc_test(
name = "unittest-arm",
srcs = glob([
"tests/*.h",
"tests/*.cpp",
"include/sonic/*",
"include/sonic/**/*",
]),
deps = [
":string_view",
"@gtest//:gtest_main",
],
data = glob([ "testdata/*.json"]),
linkopts = sanitize_copts + [
'-lstdc++fs',
'-fstack-protector-all',
'-fsanitize-link-c++-runtime'
],
copts = sanitize_copts + [
'-O3', '-g', '-UNDEBUG', '-std=c++14', '-march=armv8-a',
'-fstack-protector-all',
'-Iinclude', '-Wall', '-Wextra', '-Werror',
],
)

cc_test(
name = "unittest-sse",
srcs = glob([
"tests/*.h",
"tests/*.cpp",
"include/sonic/*",
"include/sonic/**/*",
]),
deps = [
":string_view",
"@gtest//:gtest_main",
],
data = glob([ "testdata/*.json"]),
linkopts = sanitize_copts + [
'-lstdc++fs',
'-fstack-protector-all',
'-fsanitize-link-c++-runtime'
],
copts = sanitize_copts + [
'-O3', '-g', '-UNDEBUG', '-std=c++14', '-march=westmere',
'-fstack-protector-all',
'-Iinclude', '-Wall', '-Wextra', '-Werror',
],
)

cc_test(
name = "unittest-gcc-coverage",
srcs = glob([
Expand Down
3 changes: 2 additions & 1 deletion bazel/cJSON.BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ cc_library(
name = "cJSON",
srcs = ["cJSON.c"],
hdrs = ["cJSON.h"],
copts = ['-O3' ,'-DNDEBUG', '-march=haswell'],
copts = ['-O3' ,'-DNDEBUG',],
)

3 changes: 2 additions & 1 deletion bazel/yyjson.BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ cc_library(
srcs = ["src/yyjson.c"],
hdrs = ["src/yyjson.h"],
includes = ["src"],
copts = ['-O3', '-DNDEBUG', '-march=haswell', '-g'],
copts = ['-O3', '-DNDEBUG', '-g'],
)

5 changes: 5 additions & 0 deletions include/sonic/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,12 @@ class SpinLock {
break;
}
while (lock_.load(std::memory_order_relaxed)) {
#if defined(__x86_64__) || defined(_M_AMD64)
__builtin_ia32_pause();
#elif defined(__aarch64__) || defined(_M_ARM64)
asm volatile("yield");
#else
#endif
}
}
}
Expand Down
2 changes: 0 additions & 2 deletions include/sonic/dom/dynamicnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
#include "sonic/dom/type.h"
#include "sonic/error.h"
#include "sonic/internal/ftoa.h"
#include "sonic/internal/itoa.h"
#include "sonic/internal/quote.h"
#include "sonic/writebuffer.h"

namespace sonic_json {
Expand Down
10 changes: 5 additions & 5 deletions include/sonic/dom/handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <string>

#include "sonic/dom/type.h"
#include "sonic/internal/haswell.h"
#include "sonic/internal/arch/simd_base.h"
#include "sonic/string_view.h"
#include "sonic/writebuffer.h"

Expand Down Expand Up @@ -153,7 +153,7 @@ class SAXHandler {
if (pairs) {
void *mem = obj.template containerMalloc<MemberType>(pairs, *alloc_);
obj.setChildren(mem);
internal::haswell::xmemcpy<sizeof(MemberType)>(
internal::Xmemcpy<sizeof(MemberType)>(
(void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs);
} else {
obj.setChildren(nullptr);
Expand All @@ -169,7 +169,7 @@ class SAXHandler {
arr.setLength(count, kArray);
if (count) {
arr.setChildren(arr.template containerMalloc<NodeType>(count, *alloc_));
internal::haswell::xmemcpy<sizeof(NodeType)>(
internal::Xmemcpy<sizeof(NodeType)>(
(void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count);
} else {
arr.setChildren(nullptr);
Expand Down Expand Up @@ -239,7 +239,7 @@ class LazySAXHandler {
arr.setLength(count, kArray);
if (count) {
arr.setChildren(arr.template containerMalloc<NodeType>(count, *alloc_));
internal::haswell::xmemcpy<sizeof(NodeType)>(
internal::Xmemcpy<sizeof(NodeType)>(
(void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count);
stack_.Pop<NodeType>(count);
} else {
Expand All @@ -254,7 +254,7 @@ class LazySAXHandler {
if (pairs) {
void *mem = obj.template containerMalloc<MemberType>(pairs, *alloc_);
obj.setChildren(mem);
internal::haswell::xmemcpy<sizeof(MemberType)>(
internal::Xmemcpy<sizeof(MemberType)>(
(void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs);
stack_.Pop<MemberType>(pairs);
} else {
Expand Down
47 changes: 25 additions & 22 deletions include/sonic/dom/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
#include "sonic/dom/handler.h"
#include "sonic/dom/json_pointer.h"
#include "sonic/error.h"
#include "sonic/internal/arch/simd_quote.h"
#include "sonic/internal/arch/simd_skip.h"
#include "sonic/internal/arch/simd_str2int.h"
#include "sonic/internal/atof_native.h"
#include "sonic/internal/haswell.h"
#include "sonic/internal/parse_number_normal_fast.h"
#include "sonic/internal/simd_str2int.h"
#include "sonic/internal/skip.h"
#include "sonic/internal/unicode.h"
#include "sonic/internal/utils.h"
#include "sonic/writebuffer.h"

namespace sonic_json {
Expand Down Expand Up @@ -352,24 +352,27 @@ class Parser {
double_fract : {
int fract_len = FLOATING_LONGEST_DIGITS - man_nd;
if (fract_len > 0) {
uint64_t sum = internal::simd_str2int_sse(s + i, fract_len);
const uint64_t pow10[17] = {1,
10,
100,
1000,
10000,
100000,
1000000,
10000000,
100000000,
1000000000,
10000000000,
100000000000,
1000000000000,
10000000000000,
100000000000000,
1000000000000000,
10000000000000000};
uint64_t sum = internal::simd_str2int(s + i, fract_len);
const uint64_t pow10[18] = {
1,
10,
100,
1000,
10000,
100000,
1000000,
10000000,
100000000,
1000000000,
10000000000,
100000000000,
1000000000000,
10000000000000,
100000000000000,
1000000000000000,
10000000000000000,
100000000000000000,
};
man = man * pow10[fract_len] + sum;
man_nd += fract_len;
i += fract_len;
Expand Down
2 changes: 1 addition & 1 deletion include/sonic/dom/serialize.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include "sonic/dom/flags.h"
#include "sonic/dom/type.h"
#include "sonic/error.h"
#include "sonic/internal/arch/simd_quote.h"
#include "sonic/internal/ftoa.h"
#include "sonic/internal/itoa.h"
#include "sonic/internal/quote.h"
#include "sonic/writebuffer.h"

namespace sonic_json {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,21 @@

#pragma once

#include "sonic/internal/simd.h"
#include "sonic/macro.h"
#include <sonic/macro.h>

#include "simd.h"

namespace sonic_json {
namespace internal {
namespace haswell {
namespace avx2 {

using namespace simd;

// We sometimes call trailing_zero on inputs that are zero,
// but the algorithms do not end up using the returned value.
// Sadly, sanitizers are not smart enough to figure it out.

sonic_force_inline int trailing_zeroes(uint64_t input_num) {
sonic_force_inline int TrailingZeroes(uint64_t input_num) {
////////
// You might expect the next line to be equivalent to
// return (int)_tzcnt_u64(input_num);
Expand All @@ -40,7 +41,7 @@ sonic_force_inline int trailing_zeroes(uint64_t input_num) {
}

/* result might be undefined when input_num is zero */
sonic_force_inline uint64_t clear_lowest_bit(uint64_t input_num) {
sonic_force_inline uint64_t ClearLowestBit(uint64_t input_num) {
#if __BMI__
return _blsr_u64(input_num);
#else
Expand All @@ -49,21 +50,21 @@ sonic_force_inline uint64_t clear_lowest_bit(uint64_t input_num) {
}

/* result might be undefined when input_num is zero */
sonic_force_inline int leading_zeroes(uint64_t input_num) {
sonic_force_inline int LeadingZeroes(uint64_t input_num) {
return __builtin_clzll(input_num);
}

sonic_force_inline long long int count_ones(uint64_t input_num) {
sonic_force_inline long long int CountOnes(uint64_t input_num) {
return __builtin_popcountll(input_num);
}

sonic_force_inline bool add_overflow(uint64_t value1, uint64_t value2,
uint64_t* result) {
sonic_force_inline bool AddOverflow(uint64_t value1, uint64_t value2,
uint64_t* result) {
return __builtin_uaddll_overflow(
value1, value2, reinterpret_cast<unsigned long long*>(result));
}

sonic_force_inline uint64_t prefix_xor(const uint64_t bitmask) {
sonic_force_inline uint64_t PrefixXor(const uint64_t bitmask) {
// There should be no such thing with a processor supporting avx2
// but not clmul.
#if __PCLMUL__
Expand All @@ -77,17 +78,17 @@ sonic_force_inline uint64_t prefix_xor(const uint64_t bitmask) {
#endif
}

sonic_force_inline bool is_ascii(const simd8x64<uint8_t>& input) {
sonic_force_inline bool IsAscii(const simd8x64<uint8_t>& input) {
return input.reduce_or().is_ascii();
}

template <size_t ChunkSize>
sonic_force_inline void xmemcpy(void* dst_, const void* src_, size_t chunks) {
sonic_force_inline void Xmemcpy(void* dst_, const void* src_, size_t chunks) {
std::memcpy(dst_, src_, chunks * ChunkSize);
}

template <>
sonic_force_inline void xmemcpy<32>(void* dst_, const void* src_,
sonic_force_inline void Xmemcpy<32>(void* dst_, const void* src_,
size_t chunks) {
uint8_t* dst = reinterpret_cast<uint8_t*>(dst_);
const uint8_t* src = reinterpret_cast<const uint8_t*>(src_);
Expand Down Expand Up @@ -121,7 +122,7 @@ sonic_force_inline void xmemcpy<32>(void* dst_, const void* src_,
}

template <>
sonic_force_inline void xmemcpy<16>(void* dst_, const void* src_,
sonic_force_inline void Xmemcpy<16>(void* dst_, const void* src_,
size_t chunks) {
uint8_t* dst = reinterpret_cast<uint8_t*>(dst_);
const uint8_t* src = reinterpret_cast<const uint8_t*>(src_);
Expand Down Expand Up @@ -160,6 +161,6 @@ sonic_force_inline void xmemcpy<16>(void* dst_, const void* src_,
}
}

} // namespace haswell
} // namespace avx2
} // namespace internal
} // namespace sonic_json
31 changes: 31 additions & 0 deletions include/sonic/internal/arch/avx2/itoa.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright 2022 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "../common/x86_common/itoa.h"

namespace sonic_json {
namespace internal {
namespace avx2 {

using sonic_json::internal::x86_common::Utoa_16;
using sonic_json::internal::x86_common::Utoa_8;
using sonic_json::internal::x86_common::UtoaSSE;

} // namespace avx2
} // namespace internal
} // namespace sonic_json
Loading