Skip to content

Commit 5b8ea25

Browse files
author
Brian Vaughn
committed
Switched UTF string encoding approach to handle multibyte characters
1 parent 8464d69 commit 5b8ea25

File tree

4 files changed

+85
-53
lines changed

4 files changed

+85
-53
lines changed

packages/react-devtools-shared/src/__tests__/setupEnv.js

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,3 @@ global.process.env.DARK_MODE_DIMMED_LOG_COLOR = DARK_MODE_DIMMED_LOG_COLOR;
2424
global.process.env.LIGHT_MODE_DIMMED_WARNING_COLOR = LIGHT_MODE_DIMMED_WARNING_COLOR;
2525
global.process.env.LIGHT_MODE_DIMMED_ERROR_COLOR = LIGHT_MODE_DIMMED_ERROR_COLOR;
2626
global.process.env.LIGHT_MODE_DIMMED_LOG_COLOR = LIGHT_MODE_DIMMED_LOG_COLOR;
27-
28-
global.TextEncoder = require('util').TextEncoder;
29-
global.TextDecoder = require('util').TextDecoder;

packages/react-devtools-shared/src/__tests__/store-test.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,19 @@ describe('Store', () => {
101101
`);
102102
});
103103

104+
it('should handle multibyte character strings', () => {
105+
const Component = () => null;
106+
Component.displayName = '🟩💜🔵';
107+
108+
const container = document.createElement('div');
109+
110+
act(() => legacyRender(<Component />, container));
111+
expect(store).toMatchInlineSnapshot(`
112+
[root]
113+
<🟩💜🔵>
114+
`);
115+
});
116+
104117
describe('collapseNodesByDefault:false', () => {
105118
beforeEach(() => {
106119
store.collapseNodesByDefault = false;

packages/react-devtools-shared/src/backend/renderer.js

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,11 +1513,16 @@ export function attach(
15131513

15141514
type OperationsArray = Array<number>;
15151515

1516+
type StringTableEntry = {|
1517+
encodedString: Array<number>,
1518+
id: number,
1519+
|};
1520+
15161521
const pendingOperations: OperationsArray = [];
15171522
const pendingRealUnmountedIDs: Array<number> = [];
15181523
const pendingSimulatedUnmountedIDs: Array<number> = [];
15191524
let pendingOperationsQueue: Array<OperationsArray> | null = [];
1520-
const pendingStringTable: Map<string, number> = new Map();
1525+
const pendingStringTable: Map<string, StringTableEntry> = new Map();
15211526
let pendingStringTableLength: number = 0;
15221527
let pendingUnmountedRootID: number | null = null;
15231528

@@ -1735,13 +1740,19 @@ export function attach(
17351740
// Now fill in the string table.
17361741
// [stringTableLength, str1Length, ...str1, str2Length, ...str2, ...]
17371742
operations[i++] = pendingStringTableLength;
1738-
pendingStringTable.forEach((value, key) => {
1739-
operations[i++] = key.length;
1740-
const encodedKey = utfEncodeString(key);
1741-
for (let j = 0; j < encodedKey.length; j++) {
1742-
operations[i + j] = encodedKey[j];
1743+
pendingStringTable.forEach((entry, stringKey) => {
1744+
const encodedString = entry.encodedString;
1745+
1746+
// Don't use the string length.
1747+
// It won't work for multibyte characters (like emoji).
1748+
const length = encodedString.length;
1749+
1750+
operations[i++] = length;
1751+
for (let j = 0; j < length; j++) {
1752+
operations[i + j] = encodedString[j];
17431753
}
1744-
i += key.length;
1754+
1755+
i += length;
17451756
});
17461757

17471758
if (numUnmountIDs > 0) {
@@ -1788,21 +1799,31 @@ export function attach(
17881799
pendingStringTableLength = 0;
17891800
}
17901801

1791-
function getStringID(str: string | null): number {
1792-
if (str === null) {
1802+
function getStringID(string: string | null): number {
1803+
if (string === null) {
17931804
return 0;
17941805
}
1795-
const existingID = pendingStringTable.get(str);
1796-
if (existingID !== undefined) {
1797-
return existingID;
1798-
}
1799-
const stringID = pendingStringTable.size + 1;
1800-
pendingStringTable.set(str, stringID);
1801-
// The string table total length needs to account
1802-
// both for the string length, and for the array item
1803-
// that contains the length itself. Hence + 1.
1804-
pendingStringTableLength += str.length + 1;
1805-
return stringID;
1806+
const existingEntry = pendingStringTable.get(string);
1807+
if (existingEntry !== undefined) {
1808+
return existingEntry.id;
1809+
}
1810+
1811+
const id = pendingStringTable.size + 1;
1812+
const encodedString = utfEncodeString(string);
1813+
1814+
pendingStringTable.set(string, {
1815+
encodedString,
1816+
id,
1817+
});
1818+
1819+
// The string table total length needs to account both for the string length,
1820+
// and for the array item that contains the length itself.
1821+
//
1822+
// Don't use string length for this table.
1823+
// It won't work for multibyte characters (like emoji).
1824+
pendingStringTableLength += encodedString.length + 1;
1825+
1826+
return id;
18061827
}
18071828

18081829
function recordMount(fiber: Fiber, parentFiber: Fiber | null) {

packages/react-devtools-shared/src/utils.js

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,7 @@ const cachedDisplayNames: WeakMap<Function, string> = new WeakMap();
5656

5757
// On large trees, encoding takes significant time.
5858
// Try to reuse the already encoded strings.
59-
const encodedStringCache: LRUCache<
60-
string,
61-
Array<number> | Uint8Array,
62-
> = new LRU({
59+
const encodedStringCache: LRUCache<string, Array<number>> = new LRU({
6360
max: 1000,
6461
});
6562

@@ -128,42 +125,46 @@ export function getUID(): number {
128125
return ++uidCounter;
129126
}
130127

131-
const isTextEncoderSupported =
132-
typeof TextDecoder === 'function' && typeof TextEncoder === 'function';
133-
134128
export function utfDecodeString(array: Array<number>): string {
135-
if (isTextEncoderSupported) {
136-
// Handles multi-byte characters; use if available.
137-
return new TextDecoder().decode(new Uint8Array(array));
138-
} else {
139-
// Avoid spreading the array (e.g. String.fromCodePoint(...array))
140-
// Functions arguments are first placed on the stack before the function is called
141-
// which throws a RangeError for large arrays.
142-
// See github.com/facebook/react/issues/22293
143-
let string = '';
144-
for (let i = 0; i < array.length; i++) {
145-
const char = array[i];
146-
string += String.fromCodePoint(char);
147-
}
148-
return string;
129+
// Avoid spreading the array (e.g. String.fromCodePoint(...array))
130+
// Functions arguments are first placed on the stack before the function is called
131+
// which throws a RangeError for large arrays.
132+
// See github.com/facebook/react/issues/22293
133+
let string = '';
134+
for (let i = 0; i < array.length; i++) {
135+
const char = array[i];
136+
string += String.fromCodePoint(char);
149137
}
138+
return string;
150139
}
151140

152-
export function utfEncodeString(string: string): Array<number> | Uint8Array {
141+
function surrogatePairToCodePoint(
142+
charCode1: number,
143+
charCode2: number,
144+
): number {
145+
return ((charCode1 & 0x3ff) << 10) + (charCode2 & 0x3ff) + 0x10000;
146+
}
147+
148+
// Credit for this encoding approach goes to Tim Down:
149+
// https://stackoverflow.com/questions/4877326/how-can-i-tell-if-a-string-contains-multibyte-characters-in-javascript
150+
export function utfEncodeString(string: string): Array<number> {
153151
const cached = encodedStringCache.get(string);
154152
if (cached !== undefined) {
155153
return cached;
156154
}
157155

158-
let encoded;
159-
if (isTextEncoderSupported) {
160-
// Handles multi-byte characters; use if available.
161-
encoded = new TextEncoder().encode(string);
162-
} else {
163-
encoded = new Array(string.length);
164-
for (let i = 0; i < string.length; i++) {
165-
encoded[i] = string.codePointAt(i);
156+
const encoded = [];
157+
let i = 0;
158+
let charCode;
159+
while (i < string.length) {
160+
charCode = string.charCodeAt(i);
161+
// Handle multibyte unicode characters (like emoji).
162+
if ((charCode & 0xf800) === 0xd800) {
163+
encoded.push(surrogatePairToCodePoint(charCode, string.charCodeAt(++i)));
164+
} else {
165+
encoded.push(charCode);
166166
}
167+
++i;
167168
}
168169

169170
encodedStringCache.set(string, encoded);

0 commit comments

Comments
 (0)