Remove IsGlyphFullWidth from InputBuffer (#17680)

lhecker · web-flow · commit 2c452e0fd6f2 · 2024-08-07T22:19:11.000+02:00
In several places the old conhost codebase appears to assume that any
wide glyph is represented by two codepoints. This is probably an
artifact of the ASCII/DBCS split that conhost used to have.
When conhost got merged into a single UCS2-aware application,
this artifact was apparently never properly resolved.

To my knowledge there are at least two places where this assumption
exists: The clipboard code which translates non-wide non-ascii
characters to Alt-numpad sequences, and this code. Both are wrong.
This is because in a Unicode-context there's no correlation between
the number of codepoints and the width of the glyph, even with UCS2.

In a post-UCS2-world the correct check is for surrogate pairs,
as they must be avoided for the same reason DBCS were avoided.

One could consider this a breaking change of the API,
as this can now result in repeat counts &gt;1 for wide glyphs.
If someone complained about this change in behavior, I'd probably
not change it back, as narrow complex Unicode characters exist too.
diff --git a/src/host/inputBuffer.cpp b/src/host/inputBuffer.cpp
@@ -10,7 +10,6 @@
 #include "misc.h"
 #include "stream.h"
 #include "../interactivity/inc/ServiceLocator.hpp"
-#include "../types/inc/GlyphWidth.hpp"
 
 #define INPUT_BUFFER_DEFAULT_INPUT_MODE (ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT | ENABLE_MOUSE_INPUT)
 
@@ -804,13 +803,9 @@ bool InputBuffer::_CoalesceEvent(const INPUT_RECORD& inEvent) noexcept
             (lastKey.wVirtualScanCode == inKey.wVirtualScanCode || WI_IsFlagSet(inKey.dwControlKeyState, NLS_IME_CONVERSION)) &&
             lastKey.uChar.UnicodeChar == inKey.uChar.UnicodeChar &&
             lastKey.dwControlKeyState == inKey.dwControlKeyState &&
-            // TODO:GH#8000 This behavior is an import from old conhost v1 and has been broken for decades.
-            // This is probably the outdated idea that any wide glyph is being represented by 2 characters (DBCS) and likely
-            // resulted from conhost originally being split into a ASCII/OEM and a DBCS variant with preprocessor flags.
-            // You can't update the repeat count of such a A,B pair, because they're stored as A,A,B,B (down-down, up-up).
-            // I believe the proper approach is to store pairs of characters as pairs, update their combined
-            // repeat count and only when they're being read de-coalesce them into their alternating form.
-            !IsGlyphFullWidth(inKey.uChar.UnicodeChar))
+            // A single repeat count cannot represent two INPUT_RECORDs simultaneously,
+            // and so it cannot represent a surrogate pair either.
+            !til::is_surrogate(inKey.uChar.UnicodeChar))
         {
             lastKey.wRepeatCount += inKey.wRepeatCount;
             return true;
diff --git a/src/host/ut_host/InputBufferTests.cpp b/src/host/ut_host/InputBufferTests.cpp
@@ -219,22 +219,19 @@ class InputBufferTests
         }
     }
 
-    TEST_METHOD(InputBufferDoesNotCoalesceFullWidthChars)
+    TEST_METHOD(InputBufferDoesNotCoalesceSurrogatePairs)
     {
         InputBuffer inputBuffer;
-        WCHAR hiraganaA = 0x3042; // U+3042 hiragana A
-        auto record = MakeKeyEvent(true, 1, hiraganaA, 0, hiraganaA, 0);
 
-        // send a bunch of identical events
-        inputBuffer.Flush();
-        for (size_t i = 0; i < RECORD_INSERT_COUNT; ++i)
-        {
-            VERIFY_IS_GREATER_THAN(inputBuffer.Write(record), 0u);
-            VERIFY_ARE_EQUAL(inputBuffer._storage.back(), record);
-        }
+        // U+1F44D thumbs up emoji
+        inputBuffer.Write(MakeKeyEvent(true, 1, 0xD83D, 0, 0xD83D, 0));
+        inputBuffer.Write(MakeKeyEvent(true, 1, 0xDC4D, 0, 0xDC4D, 0));
+
+        // Should not coalesce despite otherwise matching perfectly.
+        inputBuffer.Write(MakeKeyEvent(true, 1, 0xDC4D, 0, 0xDC4D, 0));
 
         // The events shouldn't be coalesced
-        VERIFY_ARE_EQUAL(inputBuffer.GetNumberOfReadyEvents(), RECORD_INSERT_COUNT);
+        VERIFY_ARE_EQUAL(3u, inputBuffer.GetNumberOfReadyEvents());
     }
 
     TEST_METHOD(CanFlushAllOutput)