Skip to content

Commit f20cd3a

Browse files
authored
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make our TextBuffer more performant and allow us to improve our Unicode correctness in the future, by getting rid of our dependence on `OutputCellIterator`. In the future we can then replace the simple UTF-16 code point iterator with a proper grapheme cluster iterator. While my focus is technically on Unicode correctness, the ~4x VT throughput increase in OpenConsole is pretty nice too. This PR adds: * A new, simpler ROW iterator (unused in this PR) * Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`) They're based on functions that align the cursor to the start/end of the _current_ cell, so such functions can be added as well. * `ReplaceText` to write a raw string of text with the possibility to specify a right margin. * `CopyRangeFrom` will allow us to make reflow much faster, as it's able to bulk-copy already measured strings without re-measuring them. Related to #8000 ## Validation Steps Performed * enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper wide glyph reflow at the end of a row ✅ * This produces "a 咪" where only "a" has a white background: ```sh printf '\e7こん\e8\x1b[107ma\x1b[m\n' ``` * This produces "abん": ```sh stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n' ``` * This produces "xy" at the end of the line: ```sh stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n' ``` * This produces red whitespace followed by "こ " in the default background color at the end of the line, and "ん" on the next line: ```sh printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n' ```
1 parent f5e9e8e commit f20cd3a

File tree

18 files changed

+566
-217
lines changed

18 files changed

+566
-217
lines changed

.github/actions/spelling/expect/alphabet.txt

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ BBBBBBBB
1818
BBBBBCCC
1919
BBBBCCCCC
2020
BBGGRR
21+
efg
2122
EFG
2223
EFGh
2324
QQQQQQQQQQABCDEFGHIJ

.github/actions/spelling/expect/expect.txt

-2
Original file line numberDiff line numberDiff line change
@@ -2280,8 +2280,6 @@ xunit
22802280
xutr
22812281
XVIRTUALSCREEN
22822282
XWalk
2283-
xwwyzz
2284-
xxyyzz
22852283
yact
22862284
YCast
22872285
YCENTER

src/buffer/out/Row.cpp

+270-65
Large diffs are not rendered by default.

src/buffer/out/Row.hpp

+77-3
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ Revision History:
2020

2121
#pragma once
2222

23-
#include <span>
24-
2523
#include <til/rle.h>
2624

2725
#include "LineRendition.hpp"
@@ -37,6 +35,28 @@ enum class DelimiterClass
3735
RegularChar
3836
};
3937

38+
struct RowWriteState
39+
{
40+
// The text you want to write into the given ROW. When ReplaceText() returns,
41+
// this is updated to remove all text from the beginning that was successfully written.
42+
std::wstring_view text; // IN/OUT
43+
// The column at which to start writing.
44+
til::CoordType columnBegin = 0; // IN
45+
// The first column which should not be written to anymore.
46+
til::CoordType columnLimit = 0; // IN
47+
48+
// The column 1 past the last glyph that was successfully written into the row. If you need to call
49+
// ReplaceAttributes() to colorize the written range, etc., this is the columnEnd parameter you want.
50+
// If you want to continue writing where you left off, this is also the next columnBegin parameter.
51+
til::CoordType columnEnd = 0; // OUT
52+
// The first column that got modified by this write operation. In case that the first glyph we write overwrites
53+
// the trailing half of a wide glyph, leadingSpaces will be 1 and this value will be 1 less than colBeg.
54+
til::CoordType columnBeginDirty = 0; // OUT
55+
// This is 1 past the last column that was modified and will be 1 past columnEnd if we overwrote
56+
// the leading half of a wide glyph and had to fill the trailing half with whitespace.
57+
til::CoordType columnEndDirty = 0; // OUT
58+
};
59+
4060
class ROW final
4161
{
4262
public:
@@ -62,16 +82,23 @@ class ROW final
6282
void Resize(wchar_t* charsBuffer, uint16_t* charOffsetsBuffer, uint16_t rowWidth, const TextAttribute& fillAttribute);
6383
void TransferAttributes(const til::small_rle<TextAttribute, uint16_t, 1>& attr, til::CoordType newWidth);
6484

85+
til::CoordType NavigateToPrevious(til::CoordType column) const noexcept;
86+
til::CoordType NavigateToNext(til::CoordType column) const noexcept;
87+
6588
void ClearCell(til::CoordType column);
6689
OutputCellIterator WriteCells(OutputCellIterator it, til::CoordType columnBegin, std::optional<bool> wrap = std::nullopt, std::optional<til::CoordType> limitRight = std::nullopt);
6790
bool SetAttrToEnd(til::CoordType columnBegin, TextAttribute attr);
6891
void ReplaceAttributes(til::CoordType beginIndex, til::CoordType endIndex, const TextAttribute& newAttr);
6992
void ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars);
93+
void ReplaceText(RowWriteState& state);
94+
til::CoordType CopyRangeFrom(til::CoordType columnBegin, til::CoordType columnLimit, const ROW& other, til::CoordType& otherBegin, til::CoordType otherLimit);
7095

96+
til::small_rle<TextAttribute, uint16_t, 1>& Attributes() noexcept;
7197
const til::small_rle<TextAttribute, uint16_t, 1>& Attributes() const noexcept;
7298
TextAttribute GetAttrByColumn(til::CoordType column) const;
7399
std::vector<uint16_t> GetHyperlinks() const;
74100
uint16_t size() const noexcept;
101+
til::CoordType LineRenditionColumns() const noexcept;
75102
til::CoordType MeasureLeft() const noexcept;
76103
til::CoordType MeasureRight() const noexcept;
77104
bool ContainsText() const noexcept;
@@ -89,6 +116,50 @@ class ROW final
89116
#endif
90117

91118
private:
119+
// WriteHelper exists because other forms of abstracting this functionality away (like templates with lambdas)
120+
// where only very poorly optimized by MSVC as it failed to inline the templates.
121+
struct WriteHelper
122+
{
123+
explicit WriteHelper(ROW& row, til::CoordType columnBegin, til::CoordType columnLimit, const std::wstring_view& chars) noexcept;
124+
bool IsValid() const noexcept;
125+
void ReplaceCharacters(til::CoordType width) noexcept;
126+
void ReplaceText() noexcept;
127+
void CopyRangeFrom(const std::span<const uint16_t>& charOffsets) noexcept;
128+
void Finish();
129+
130+
// Parent pointer.
131+
ROW& row;
132+
// The text given by the caller.
133+
const std::wstring_view& chars;
134+
135+
// This is the same as the columnBegin parameter for ReplaceText(), etc.,
136+
// but clamped to a valid range via _clampedColumnInclusive.
137+
uint16_t colBeg;
138+
// This is the same as the columnLimit parameter for ReplaceText(), etc.,
139+
// but clamped to a valid range via _clampedColumnInclusive.
140+
uint16_t colLimit;
141+
142+
// The column 1 past the last glyph that was successfully written into the row. If you need to call
143+
// ReplaceAttributes() to colorize the written range, etc., this is the columnEnd parameter you want.
144+
// If you want to continue writing where you left off, this is also the next columnBegin parameter.
145+
uint16_t colEnd;
146+
// The first column that got modified by this write operation. In case that the first glyph we write overwrites
147+
// the trailing half of a wide glyph, leadingSpaces will be 1 and this value will be 1 less than colBeg.
148+
uint16_t colBegDirty;
149+
// Similar to dirtyBeg, this is 1 past the last column that was modified and will be 1 past colEnd if
150+
// we overwrote the leading half of a wide glyph and had to fill the trailing half with whitespace.
151+
uint16_t colEndDirty;
152+
// The offset in ROW::chars at which we start writing the contents of WriteHelper::chars.
153+
uint16_t chBeg;
154+
// The offset at which we start writing leadingSpaces-many whitespaces.
155+
uint16_t chBegDirty;
156+
// The same as `colBeg - colBegDirty`. This is the amount of whitespace
157+
// we write at chBegDirty, before the actual WriteHelper::chars content.
158+
uint16_t leadingSpaces;
159+
// The amount of characters copied from WriteHelper::chars.
160+
size_t charsConsumed;
161+
};
162+
92163
// To simplify the detection of wide glyphs, we don't just store the simple character offset as described
93164
// for _charOffsets. Instead we use the most significant bit to indicate whether any column is the
94165
// trailing half of a wide glyph. This simplifies many implementation details via _uncheckedIsTrailer.
@@ -102,13 +173,16 @@ class ROW final
102173
template<typename T>
103174
constexpr uint16_t _clampedColumnInclusive(T v) const noexcept;
104175

176+
uint16_t _adjustBackward(uint16_t column) const noexcept;
177+
uint16_t _adjustForward(uint16_t column) const noexcept;
178+
105179
wchar_t _uncheckedChar(size_t off) const noexcept;
106180
uint16_t _charSize() const noexcept;
107181
uint16_t _uncheckedCharOffset(size_t col) const noexcept;
108182
bool _uncheckedIsTrailer(size_t col) const noexcept;
109183

110184
void _init() noexcept;
111-
void _resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd, size_t chExtEndNew);
185+
void _resizeChars(uint16_t colEndDirty, uint16_t chBegDirty, size_t chEndDirty, uint16_t chEndDirtyOld);
112186

113187
// These fields are a bit "wasteful", but it makes all this a bit more robust against
114188
// programming errors during initial development (which is when this comment was written).

src/buffer/out/precomp.h

+4-38
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,7 @@
1-
/*++
2-
Copyright (c) Microsoft Corporation
3-
Licensed under the MIT license.
4-
5-
Module Name:
6-
- precomp.h
7-
8-
Abstract:
9-
- Contains external headers to include in the precompile phase of console build process.
10-
- Avoid including internal project headers. Instead include them only in the classes that need them (helps with test project building).
11-
--*/
12-
13-
// stdafx.h : include file for standard system include files,
14-
// or project specific include files that are used frequently, but
15-
// are changed infrequently
16-
//
17-
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT license.
183
#pragma once
194

20-
// clang-format off
21-
22-
// This includes support libraries from the CRT, STL, WIL, and GSL
23-
#include "LibraryIncludes.h"
24-
25-
#pragma warning(push)
26-
#ifndef WIN32_LEAN_AND_MEAN
27-
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
28-
#define NOMCX
29-
#define NOHELP
30-
#define NOCOMM
31-
#endif
32-
33-
// Windows Header Files:
34-
#include <windows.h>
35-
#include <intsafe.h>
36-
37-
// private dependencies
38-
#include "../inc/unicode.hpp"
39-
#pragma warning(pop)
5+
#include <LibraryIncludes.h>
406

41-
// clang-format on
7+
#include <unicode.hpp>

src/buffer/out/textBuffer.cpp

+26
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,32 @@ bool TextBuffer::_PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute
376376
return fSuccess;
377377
}
378378

379+
void TextBuffer::ConsumeGrapheme(std::wstring_view& chars) noexcept
380+
{
381+
// This function is supposed to mirror the behavior of ROW::Write, when it reads characters off of `chars`.
382+
// (I know that a UTF-16 code point is not a grapheme, but that's what we're working towards.)
383+
chars = til::utf16_pop(chars);
384+
}
385+
386+
// This function is intended for writing regular "lines" of text and only the `state.text` and`state.columnBegin`
387+
// fields are being used, whereas `state.columnLimit` is automatically overwritten by the line width of the given row.
388+
// This allows this function to automatically set the wrap-forced field of the row, which is also the return value.
389+
// The return value indicates to the caller whether the cursor should be moved to the next line.
390+
void TextBuffer::WriteLine(til::CoordType row, bool wrapAtEOL, const TextAttribute& attributes, RowWriteState& state)
391+
{
392+
auto& r = GetRowByOffset(row);
393+
394+
r.ReplaceText(state);
395+
r.ReplaceAttributes(state.columnBegin, state.columnEnd, attributes);
396+
397+
if (state.columnEnd >= state.columnLimit)
398+
{
399+
r.SetWrapForced(wrapAtEOL);
400+
}
401+
402+
TriggerRedraw(Viewport::FromExclusive({ state.columnBeginDirty, row, state.columnEndDirty, row + 1 }));
403+
}
404+
379405
// Routine Description:
380406
// - Writes cells to the output buffer. Writes at the cursor.
381407
// Arguments:

src/buffer/out/textBuffer.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ class TextBuffer final
8989
TextBufferTextIterator GetTextDataAt(const til::point at, const Microsoft::Console::Types::Viewport limit) const;
9090

9191
// Text insertion functions
92+
static void ConsumeGrapheme(std::wstring_view& chars) noexcept;
93+
void WriteLine(til::CoordType row, bool wrapAtEOL, const TextAttribute& attributes, RowWriteState& state);
94+
9295
OutputCellIterator Write(const OutputCellIterator givenIt);
9396

9497
OutputCellIterator Write(const OutputCellIterator givenIt,

src/cascadia/LocalTests_SettingsModel/pch.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Author(s):
1818
// Manually include til after we include Windows.Foundation to give it winrt superpowers
1919
#define BLOCK_TIL
2020
// This includes support libraries from the CRT, STL, WIL, and GSL
21-
#include "LibraryIncludes.h"
21+
#include <LibraryIncludes.h>
2222
// This is inexplicable, but for whatever reason, cppwinrt conflicts with the
2323
// SDK definition of this function, so the only fix is to undef it.
2424
// from WinBase.h
@@ -28,8 +28,9 @@ Author(s):
2828
#endif
2929

3030
#include <wil/cppwinrt.h>
31-
#include <unknwn.h>
31+
#include <Unknwn.h>
3232
#include <hstring.h>
33+
#include <shellapi.h>
3334

3435
#include <WexTestClass.h>
3536
#include <json.h>

src/host/ut_host/ScreenBufferTests.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -5249,6 +5249,16 @@ void ScreenBufferTests::SetAutoWrapMode()
52495249
// Content should be clamped to the line width, overwriting the last char.
52505250
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"abf", attributes));
52515251
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());
5252+
// Writing a wide glyph into the last 2 columns and overwriting it with a narrow one.
5253+
cursor.SetPosition({ 80 - 3, startLine });
5254+
stateMachine.ProcessString(L"a\U0001F604b");
5255+
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"a b", attributes));
5256+
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());
5257+
// Writing a wide glyph into the last column and overwriting it with a narrow one.
5258+
cursor.SetPosition({ 80 - 3, startLine });
5259+
stateMachine.ProcessString(L"ab\U0001F604c");
5260+
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"abc", attributes));
5261+
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());
52525262

52535263
Log::Comment(L"When DECAWM is set, output is wrapped again.");
52545264
stateMachine.ProcessString(L"\x1b[?7h");

src/host/ut_host/TextBufferTests.cpp

+82
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ class TextBufferTests
147147

148148
TEST_METHOD(TestBurrito);
149149
TEST_METHOD(TestOverwriteChars);
150+
TEST_METHOD(TestRowReplaceText);
150151

151152
TEST_METHOD(TestAppendRTFText);
152153

@@ -2046,6 +2047,87 @@ void TextBufferTests::TestOverwriteChars()
20462047
#undef complex1
20472048
}
20482049

2050+
void TextBufferTests::TestRowReplaceText()
2051+
{
2052+
static constexpr til::size bufferSize{ 10, 3 };
2053+
static constexpr UINT cursorSize = 12;
2054+
const TextAttribute attr{ 0x7f };
2055+
TextBuffer buffer{ bufferSize, attr, cursorSize, false, _renderer };
2056+
auto& row = buffer.GetRowByOffset(0);
2057+
2058+
#define complex L"\U0001F41B"
2059+
2060+
struct Test
2061+
{
2062+
const wchar_t* description;
2063+
struct
2064+
{
2065+
std::wstring_view text;
2066+
til::CoordType columnBegin = 0;
2067+
til::CoordType columnLimit = 0;
2068+
} input;
2069+
struct
2070+
{
2071+
std::wstring_view text;
2072+
til::CoordType columnEnd = 0;
2073+
til::CoordType columnBeginDirty = 0;
2074+
til::CoordType columnEndDirty = 0;
2075+
} expected;
2076+
std::wstring_view expectedRow;
2077+
};
2078+
2079+
static constexpr std::array tests{
2080+
Test{
2081+
L"Not enough space -> early exit",
2082+
{ complex, 2, 2 },
2083+
{ complex, 2, 2, 2 },
2084+
L" ",
2085+
},
2086+
Test{
2087+
L"Exact right amount of space",
2088+
{ complex, 2, 4 },
2089+
{ L"", 4, 2, 4 },
2090+
L" " complex L" ",
2091+
},
2092+
Test{
2093+
L"Not enough space -> columnEnd = columnLimit",
2094+
{ complex complex, 0, 3 },
2095+
{ complex, 3, 0, 4 },
2096+
complex L" ",
2097+
},
2098+
Test{
2099+
L"Too much to fit into the row",
2100+
{ complex L"b" complex L"c" complex L"abcd", 0, til::CoordTypeMax },
2101+
{ L"cd", 10, 0, 10 },
2102+
complex L"b" complex L"c" complex L"ab",
2103+
},
2104+
Test{
2105+
L"Overwriting wide glyphs dirties both cells, but leaves columnEnd at the end of the text",
2106+
{ L"efg", 1, til::CoordTypeMax },
2107+
{ L"", 4, 0, 5 },
2108+
L" efg c" complex L"ab",
2109+
},
2110+
};
2111+
2112+
for (const auto& t : tests)
2113+
{
2114+
Log::Comment(t.description);
2115+
RowWriteState actual{
2116+
.text = t.input.text,
2117+
.columnBegin = t.input.columnBegin,
2118+
.columnLimit = t.input.columnLimit,
2119+
};
2120+
row.ReplaceText(actual);
2121+
VERIFY_ARE_EQUAL(t.expected.text, actual.text);
2122+
VERIFY_ARE_EQUAL(t.expected.columnEnd, actual.columnEnd);
2123+
VERIFY_ARE_EQUAL(t.expected.columnBeginDirty, actual.columnBeginDirty);
2124+
VERIFY_ARE_EQUAL(t.expected.columnEndDirty, actual.columnEndDirty);
2125+
VERIFY_ARE_EQUAL(t.expectedRow, row.GetText());
2126+
}
2127+
2128+
#undef complex
2129+
}
2130+
20492131
void TextBufferTests::TestAppendRTFText()
20502132
{
20512133
{

0 commit comments

Comments
 (0)