Skip to content

Commit 2e41568

Browse files
author
Brian Vaughn
authored
DevTools encoding supports multibyte characters (e.g. "🟩") (#22424)
Changes our text encoding approach to properly support multibyte characters following this algorithm. Based on benchmarking, this new approach is roughly equivalent in terms of performance (sometimes slightly faster, sometimes slightly slower). I also considered using TextEncoder/TextDecoder for this, but it was much slower (~85%).
1 parent c88fb49 commit 2e41568

File tree

3 files changed

+77
-23
lines changed

3 files changed

+77
-23
lines changed

‎packages/react-devtools-shared/src/__tests__/store-test.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,19 @@ describe('Store', () => {
101101
`);
102102
});
103103

104+
it('should handle multibyte character strings', () => {
105+
const Component = () => null;
106+
Component.displayName = '🟩💜🔵';
107+
108+
const container = document.createElement('div');
109+
110+
act(() => legacyRender(<Component />, container));
111+
expect(store).toMatchInlineSnapshot(`
112+
[root]
113+
<🟩💜🔵>
114+
`);
115+
});
116+
104117
describe('collapseNodesByDefault:false', () => {
105118
beforeEach(() => {
106119
store.collapseNodesByDefault = false;

‎packages/react-devtools-shared/src/backend/renderer.js

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,11 +1514,16 @@ export function attach(
15141514

15151515
type OperationsArray = Array<number>;
15161516

1517+
type StringTableEntry = {|
1518+
encodedString: Array<number>,
1519+
id: number,
1520+
|};
1521+
15171522
const pendingOperations: OperationsArray = [];
15181523
const pendingRealUnmountedIDs: Array<number> = [];
15191524
const pendingSimulatedUnmountedIDs: Array<number> = [];
15201525
let pendingOperationsQueue: Array<OperationsArray> | null = [];
1521-
const pendingStringTable: Map<string, number> = new Map();
1526+
const pendingStringTable: Map<string, StringTableEntry> = new Map();
15221527
let pendingStringTableLength: number = 0;
15231528
let pendingUnmountedRootID: number | null = null;
15241529

@@ -1736,13 +1741,19 @@ export function attach(
17361741
// Now fill in the string table.
17371742
// [stringTableLength, str1Length, ...str1, str2Length, ...str2, ...]
17381743
operations[i++] = pendingStringTableLength;
1739-
pendingStringTable.forEach((value, key) => {
1740-
operations[i++] = key.length;
1741-
const encodedKey = utfEncodeString(key);
1742-
for (let j = 0; j < encodedKey.length; j++) {
1743-
operations[i + j] = encodedKey[j];
1744+
pendingStringTable.forEach((entry, stringKey) => {
1745+
const encodedString = entry.encodedString;
1746+
1747+
// Don't use the string length.
1748+
// It won't work for multibyte characters (like emoji).
1749+
const length = encodedString.length;
1750+
1751+
operations[i++] = length;
1752+
for (let j = 0; j < length; j++) {
1753+
operations[i + j] = encodedString[j];
17441754
}
1745-
i += key.length;
1755+
1756+
i += length;
17461757
});
17471758

17481759
if (numUnmountIDs > 0) {
@@ -1789,21 +1800,31 @@ export function attach(
17891800
pendingStringTableLength = 0;
17901801
}
17911802

1792-
function getStringID(str: string | null): number {
1793-
if (str === null) {
1803+
function getStringID(string: string | null): number {
1804+
if (string === null) {
17941805
return 0;
17951806
}
1796-
const existingID = pendingStringTable.get(str);
1797-
if (existingID !== undefined) {
1798-
return existingID;
1799-
}
1800-
const stringID = pendingStringTable.size + 1;
1801-
pendingStringTable.set(str, stringID);
1802-
// The string table total length needs to account
1803-
// both for the string length, and for the array item
1804-
// that contains the length itself. Hence + 1.
1805-
pendingStringTableLength += str.length + 1;
1806-
return stringID;
1807+
const existingEntry = pendingStringTable.get(string);
1808+
if (existingEntry !== undefined) {
1809+
return existingEntry.id;
1810+
}
1811+
1812+
const id = pendingStringTable.size + 1;
1813+
const encodedString = utfEncodeString(string);
1814+
1815+
pendingStringTable.set(string, {
1816+
encodedString,
1817+
id,
1818+
});
1819+
1820+
// The string table total length needs to account both for the string length,
1821+
// and for the array item that contains the length itself.
1822+
//
1823+
// Don't use string length for this table.
1824+
// It won't work for multibyte characters (like emoji).
1825+
pendingStringTableLength += encodedString.length + 1;
1826+
1827+
return id;
18071828
}
18081829

18091830
function recordMount(fiber: Fiber, parentFiber: Fiber | null) {

‎packages/react-devtools-shared/src/utils.js

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,17 +138,37 @@ export function utfDecodeString(array: Array<number>): string {
138138
return string;
139139
}
140140

141+
function surrogatePairToCodePoint(
142+
charCode1: number,
143+
charCode2: number,
144+
): number {
145+
return ((charCode1 & 0x3ff) << 10) + (charCode2 & 0x3ff) + 0x10000;
146+
}
147+
148+
// Credit for this encoding approach goes to Tim Down:
149+
// https://stackoverflow.com/questions/4877326/how-can-i-tell-if-a-string-contains-multibyte-characters-in-javascript
141150
export function utfEncodeString(string: string): Array<number> {
142151
const cached = encodedStringCache.get(string);
143152
if (cached !== undefined) {
144153
return cached;
145154
}
146155

147-
const encoded = new Array(string.length);
148-
for (let i = 0; i < string.length; i++) {
149-
encoded[i] = string.codePointAt(i);
156+
const encoded = [];
157+
let i = 0;
158+
let charCode;
159+
while (i < string.length) {
160+
charCode = string.charCodeAt(i);
161+
// Handle multibyte unicode characters (like emoji).
162+
if ((charCode & 0xf800) === 0xd800) {
163+
encoded.push(surrogatePairToCodePoint(charCode, string.charCodeAt(++i)));
164+
} else {
165+
encoded.push(charCode);
166+
}
167+
++i;
150168
}
169+
151170
encodedStringCache.set(string, encoded);
171+
152172
return encoded;
153173
}
154174

0 commit comments

Comments
 (0)