Skip to content

Commit e933fd1

Browse files
authored
Fix: GetOnDemand forgot copying " for parseStringInplace when key is escaped string (#89)
1 parent 4250a05 commit e933fd1

File tree

5 files changed

+30
-69
lines changed

5 files changed

+30
-69
lines changed

include/sonic/internal/arch/common/base.h

Lines changed: 0 additions & 44 deletions
This file was deleted.

include/sonic/internal/arch/common/unicode_common.h

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
#pragma once
1919

20-
#include "../common/base.h"
21-
2220
namespace sonic_json {
2321
namespace internal {
2422
namespace common {
@@ -268,25 +266,19 @@ sonic_force_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
268266
return offset > 0;
269267
}
270268

271-
template <size_t BLOK_SIZE>
272-
sonic_force_inline uint64_t GetEscapedBranchless(uint64_t &prev_escaped,
273-
uint64_t backslash) {
274-
static_assert(BLOK_SIZE == 32 || BLOK_SIZE == 64,
275-
"escaped branchless block only support 32 or 64 bytes");
276-
backslash &= ~prev_escaped;
277-
uint64_t follows_escape = backslash << 1 | prev_escaped;
278-
const uint64_t even_bits = 0x5555555555555555ULL;
279-
uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape;
280-
uint64_t sequences_starting_on_even_bits;
281-
if (BLOK_SIZE == 64) {
282-
prev_escaped = AddOverflow64(odd_sequence_starts, backslash,
283-
&sequences_starting_on_even_bits);
284-
} else if (BLOK_SIZE == 32) {
285-
prev_escaped = AddOverflow32(odd_sequence_starts, backslash,
286-
&sequences_starting_on_even_bits);
287-
}
288-
uint64_t invert_mask = sequences_starting_on_even_bits << 1;
289-
return (even_bits ^ invert_mask) & follows_escape;
269+
template <size_t BLOCK_SIZE>
270+
sonic_force_inline uint64_t GetEscaped(uint64_t &prev_escaped,
271+
uint64_t backslash) {
272+
uint64_t with_prev_backslash = backslash & ~prev_escaped;
273+
static const uint64_t odd_bits = 0xAAAAAAAAAAAAAAAAULL;
274+
// see: https://github.com/simdjson/simdjson/pull/2042
275+
uint64_t escaped =
276+
(((with_prev_backslash << 1) | odd_bits) - with_prev_backslash) ^
277+
odd_bits;
278+
// & backslash to clear escaped char
279+
uint64_t escaped_with_prev = (escaped ^ (backslash | prev_escaped));
280+
prev_escaped = ((escaped & backslash) >> (BLOCK_SIZE - 1)) & 0x1ULL;
281+
return escaped_with_prev;
290282
}
291283

292284
} // namespace common

include/sonic/internal/arch/common/x86_common/skip.inc.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ sonic_force_inline uint64_t GetStringBits(const uint8_t *data,
2121
uint64_t escaped = 0;
2222
uint64_t bs_bits = v.eq('\\');
2323
if (bs_bits) {
24-
escaped = common::GetEscapedBranchless<64>(prev_escaped, bs_bits);
24+
escaped = common::GetEscaped<64>(prev_escaped, bs_bits);
2525
} else {
2626
escaped = prev_escaped;
2727
prev_escaped = 0;
@@ -72,12 +72,14 @@ sonic_force_inline int SkipString(const uint8_t *data, size_t &pos,
7272
bool found = false;
7373
while (pos + VEC_LEN <= len) {
7474
const VecUint8Type v(data + pos);
75+
76+
// bs_bits and quote_bits only has VEC_LEN bits used
7577
bs_bits = (v == '\\').to_bitmask();
7678
quote_bits = (v == '"').to_bitmask();
7779

7880
// maybe has escaped quotes
7981
if (((quote_bits - 1) & bs_bits) || prev_escaped) {
80-
escaped = common::GetEscapedBranchless<32>(prev_escaped, bs_bits);
82+
escaped = common::GetEscaped<VEC_LEN>(prev_escaped, bs_bits);
8183
// NOTE: maybe mark the normal string as escaped, example "abc":"\\",
8284
// abc will marked as escaped.
8385
found = true;

include/sonic/internal/arch/simd_skip.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,9 @@ class SkipScanner {
184184
// parse escaped key
185185
kbuf.resize(sn + 32);
186186
uint8_t *nsrc = &kbuf[0];
187-
std::memcpy(nsrc, sp, sn);
187+
188+
// parseStringInplace need `"` as the end
189+
std::memcpy(nsrc, sp, sn + 1);
188190
sn = parseStringInplace(nsrc, err);
189191
if (err) {
190192
pos = (sp - data) + (nsrc - &kbuf[0]);

tests/skip_test.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,15 @@ TEST(GetOnDemand, SuccessEscapeCharacters) {
7979
R"("\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")");
8080
TestGetOnDemand(R"({"a":"\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\""})", {"a"},
8181
R"("\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\"")");
82+
TestGetOnDemand(R"({"\"a\"":"\n\tHello,\nworld!\n"})", {"\"a\""},
83+
R"("\n\tHello,\nworld!\n")");
84+
TestGetOnDemand(R"({"123456789012345\"123":"sse_string",
85+
"1234567890123456789012345678901\"123":"avx2_string",
86+
"obj\n\t\\":{"name\\\\\\\\":"string\\\\"},
87+
"array\"\t\n\b\r":["\n\tHello,\nworld!\n",
88+
"{\" / \b \f \n \r \t } [景] 测试中文 😀")],
89+
"\"a\"":"\n\tHello,\nworld!\n"})",
90+
{"\"a\""}, R"("\n\tHello,\nworld!\n")");
8291
}
8392

8493
TEST(GetOnDemand, Failed) {
@@ -93,4 +102,4 @@ TEST(GetOnDemand, Failed) {
93102

94103
TestGetOnDemandFailed(R"("\")", {}, ParseResult(kParseErrorInvalidChar, 3));
95104
}
96-
} // namespace
105+
} // namespace

0 commit comments

Comments
 (0)