Skip to content

Add an efficient text stream write function #14821

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
388 changes: 323 additions & 65 deletions src/buffer/out/Row.cpp

Large diffs are not rendered by default.

103 changes: 100 additions & 3 deletions src/buffer/out/Row.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ Revision History:

#pragma once

#include <span>

#include <til/rle.h>

#include "LineRendition.hpp"
Expand All @@ -37,6 +35,53 @@ enum class DelimiterClass
RegularChar
};

struct RowTextIterator
{
RowTextIterator(std::span<const wchar_t> chars, std::span<const uint16_t> charOffsets, uint16_t offset) noexcept;

bool operator==(const RowTextIterator& other) const noexcept;
RowTextIterator& operator++() noexcept;
const RowTextIterator& operator*() const noexcept;

std::wstring_view Text() const noexcept;
til::CoordType Cols() const noexcept;
DbcsAttribute DbcsAttr() const noexcept;

private:
uint16_t _uncheckedCharOffset(size_t col) const noexcept;
bool _uncheckedIsTrailer(size_t col) const noexcept;

// To simplify the detection of wide glyphs, we don't just store the simple character offset as described
// for _charOffsets. Instead we use the most significant bit to indicate whether any column is the
// trailing half of a wide glyph. This simplifies many implementation details via _uncheckedIsTrailer.
static constexpr uint16_t CharOffsetsTrailer = 0x8000;
static constexpr uint16_t CharOffsetsMask = 0x7fff;

std::span<const wchar_t> _chars;
std::span<const uint16_t> _charOffsets;
uint16_t _beg;
uint16_t _end;
};

struct RowWriteState
{
// The text you want to write into the given ROW. When ReplaceText() returns,
// this is updated to remove all text from the beginning that was successfully written.
std::wstring_view text; // IN/OUT
// The column at which to start writing.
til::CoordType columnBegin = 0; // IN
// The first column which should not be written to anymore.
til::CoordType columnLimit = 0; // IN

// The column 1 past the last glyph that was successfully written into the row. If you need to call
// ReplaceAttributes() to colorize the written range, etc., this is the columnEnd parameter you want.
// If you want to continue writing where you left off, this is also the next columnBegin parameter.
til::CoordType columnEnd = 0; // OUT
// This is 1 past the last column that was modified and will be 1 past columnEnd if we overwrote
// the leading half of a wide glyph and had to fill the trailing half with whitespace.
til::CoordType columnEndDirty = 0; // OUT
};

class ROW final
{
public:
Expand All @@ -57,17 +102,25 @@ class ROW final
bool WasDoubleBytePadded() const noexcept;
void SetLineRendition(const LineRendition lineRendition) noexcept;
LineRendition GetLineRendition() const noexcept;
RowTextIterator Begin() const noexcept;
RowTextIterator End() const noexcept;

void Reset(const TextAttribute& attr);
void Resize(wchar_t* charsBuffer, uint16_t* charOffsetsBuffer, uint16_t rowWidth, const TextAttribute& fillAttribute);
void TransferAttributes(const til::small_rle<TextAttribute, uint16_t, 1>& attr, til::CoordType newWidth);

til::CoordType NavigateToPrevious(til::CoordType column) const noexcept;
til::CoordType NavigateToNext(til::CoordType column) const noexcept;

void ClearCell(til::CoordType column);
OutputCellIterator WriteCells(OutputCellIterator it, til::CoordType columnBegin, std::optional<bool> wrap = std::nullopt, std::optional<til::CoordType> limitRight = std::nullopt);
bool SetAttrToEnd(til::CoordType columnBegin, TextAttribute attr);
void ReplaceAttributes(til::CoordType beginIndex, til::CoordType endIndex, const TextAttribute& newAttr);
void ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars);
void ReplaceText(RowWriteState& state);
til::CoordType CopyRangeFrom(til::CoordType columnBegin, til::CoordType columnLimit, const ROW& other, til::CoordType& otherBegin, til::CoordType otherLimit);

til::small_rle<TextAttribute, uint16_t, 1>& Attributes() noexcept;
const til::small_rle<TextAttribute, uint16_t, 1>& Attributes() const noexcept;
TextAttribute GetAttrByColumn(til::CoordType column) const;
std::vector<uint16_t> GetHyperlinks() const;
Expand All @@ -89,6 +142,47 @@ class ROW final
#endif

private:
// WriteHelper exists because other forms of abstracting this functionality away (like templates with lambdas)
// where only very poorly optimized by MSVC as it failed to inline the templates.
struct WriteHelper
{
explicit WriteHelper(ROW& row, til::CoordType columnBegin, til::CoordType columnLimit, const std::wstring_view& chars) noexcept;
bool IsValid() const noexcept;
void ReplaceCharacters(til::CoordType width) noexcept;
void ReplaceText() noexcept;
void CopyRangeFrom(const std::span<const uint16_t>& charOffsets) noexcept;
void Finish();

// Parent pointer.
ROW& row;
// The text given by the caller.
const std::wstring_view& chars;

// This is the same as the columnBegin parameter for ReplaceText(), etc.,
// but clamped to a valid range via _clampedColumnInclusive.
uint16_t colBeg;
// This is the same as the columnLimit parameter for ReplaceText(), etc.,
// but clamped to a valid range via _clampedColumnInclusive.
uint16_t colLimit;

// The column 1 past the last glyph that was successfully written into the row. If you need to call
// ReplaceAttributes() to colorize the written range, etc., this is the columnEnd parameter you want.
// If you want to continue writing where you left off, this is also the next columnBegin parameter.
uint16_t colEnd;
// The first column that got modified by this write operation. In case that the first glyph we write, overwrites
// the trailing half of a wide glyph, leadingSpaces will be 1 and this value will be 1 less than colBeg.
uint16_t colBegDirty;
// Similar to dirtyBeg, this is 1 past the last column that was modified and will be 1 past colEnd if
// we overwrote the leading half of a wide glyph and had to fill the trailing half with whitespace.
uint16_t colEndDirty;
// The offset in ROW::chars at which we start writing the contents of WriteHelper::chars.
uint16_t chBeg;
// The offset at which we start writing leadingSpaces-many whitespaces.
uint16_t chBegDirty;
// The amount of characters copied from WriteHelper::chars.
size_t charsConsumed;
};

// To simplify the detection of wide glyphs, we don't just store the simple character offset as described
// for _charOffsets. Instead we use the most significant bit to indicate whether any column is the
// trailing half of a wide glyph. This simplifies many implementation details via _uncheckedIsTrailer.
Expand All @@ -102,13 +196,16 @@ class ROW final
template<typename T>
constexpr uint16_t _clampedColumnInclusive(T v) const noexcept;

uint16_t _adjustBackward(uint16_t column) const noexcept;
uint16_t _adjustForward(uint16_t column) const noexcept;

wchar_t _uncheckedChar(size_t off) const noexcept;
uint16_t _charSize() const noexcept;
uint16_t _uncheckedCharOffset(size_t col) const noexcept;
bool _uncheckedIsTrailer(size_t col) const noexcept;

void _init() noexcept;
void _resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd, size_t chExtEndNew);
void _resizeChars(uint16_t colEndDirty, uint16_t chBegDirty, size_t chEndDirty, uint16_t chEndDirtyOld);

// These fields are a bit "wasteful", but it makes all this a bit more robust against
// programming errors during initial development (which is when this comment was written).
Expand Down
42 changes: 4 additions & 38 deletions src/buffer/out/precomp.h
Original file line number Diff line number Diff line change
@@ -1,41 +1,7 @@
/*++
Copyright (c) Microsoft Corporation
Licensed under the MIT license.

Module Name:
- precomp.h

Abstract:
- Contains external headers to include in the precompile phase of console build process.
- Avoid including internal project headers. Instead include them only in the classes that need them (helps with test project building).
--*/

// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//

// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#pragma once

// clang-format off

// This includes support libraries from the CRT, STL, WIL, and GSL
#include "LibraryIncludes.h"

#pragma warning(push)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
#define NOMCX
#define NOHELP
#define NOCOMM
#endif

// Windows Header Files:
#include <windows.h>
#include <intsafe.h>

// private dependencies
#include "../inc/unicode.hpp"
#pragma warning(pop)
#include <LibraryIncludes.h>

// clang-format on
#include <unicode.hpp>
18 changes: 18 additions & 0 deletions src/buffer/out/textBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,24 @@ bool TextBuffer::_PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute
return fSuccess;
}

void TextBuffer::ConsumeGrapheme(std::wstring_view& chars) noexcept
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kinda weird that this is a static on textbuffer when it just does a til::utf16_pop, but presumably this does more in the future?

Copy link
Member Author

@lhecker lhecker Mar 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the future I'll replace this function with some ICU code that actually advances the string by an entire grapheme cluster. So, if you have something like å ("a˚") it'll advance by 2 code points and not just 1 like it does now.

It's possible to just put the ICU code into til and use it throughout the code directly, just like we do it right now with the UTF-16 helpers. But from now on, I'd like to avoid doing that, even if it means writing such static methods, because I'd like to keep everything string handling related as close and tight as possible in the future. I think OutputCellIterator had the same intention originally and was well meant, but it suffers from being a leaky abstraction. Lots of code is now built around an implicit assumption that OutputCellIterator will always advance by exactly 1 or 2 columns. Using the til UTF-16 helpers directly elsewhere in our code would have a similar effect in my opinion, because it would equally leak (and potentially incorrectly leak) any assumptions about how TextBuffer handles incoming text under normal circumstances.

{
// This function is supposed to mirror the behavior of ROW::Write, when it reads characters off of `chars`.
// (I know that a UTF-16 code point is not a grapheme, but that's what we're working towards.)
chars = til::utf16_pop(chars);
}

void TextBuffer::Write(til::CoordType row, bool wrapAtEOL, const TextAttribute& attributes, RowWriteState& state)
{
auto& r = GetRowByOffset(row);

r.ReplaceText(state);
r.ReplaceAttributes(state.columnBegin, state.columnEnd, attributes);
r.SetWrapForced(wrapAtEOL && state.columnEndDirty >= r.size());

TriggerRedraw(Viewport::FromExclusive({ state.columnBegin, row, state.columnEnd, row + 1 }));
}

// Routine Description:
// - Writes cells to the output buffer. Writes at the cursor.
// Arguments:
Expand Down
3 changes: 3 additions & 0 deletions src/buffer/out/textBuffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ class TextBuffer final
TextBufferTextIterator GetTextDataAt(const til::point at, const Microsoft::Console::Types::Viewport limit) const;

// Text insertion functions
static void ConsumeGrapheme(std::wstring_view& chars) noexcept;
void Write(til::CoordType row, bool wrapAtEOL, const TextAttribute& attributes, RowWriteState& state);

OutputCellIterator Write(const OutputCellIterator givenIt);

OutputCellIterator Write(const OutputCellIterator givenIt,
Expand Down
5 changes: 3 additions & 2 deletions src/cascadia/LocalTests_SettingsModel/pch.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Author(s):
// Manually include til after we include Windows.Foundation to give it winrt superpowers
#define BLOCK_TIL
// This includes support libraries from the CRT, STL, WIL, and GSL
#include "LibraryIncludes.h"
#include <LibraryIncludes.h>
// This is inexplicable, but for whatever reason, cppwinrt conflicts with the
// SDK definition of this function, so the only fix is to undef it.
// from WinBase.h
Expand All @@ -28,8 +28,9 @@ Author(s):
#endif

#include <wil/cppwinrt.h>
#include <unknwn.h>
#include <Unknwn.h>
#include <hstring.h>
#include <shellapi.h>

#include <WexTestClass.h>
#include <json.h>
Expand Down
10 changes: 10 additions & 0 deletions src/host/ut_host/ScreenBufferTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5248,6 +5248,16 @@ void ScreenBufferTests::SetAutoWrapMode()
// Content should be clamped to the line width, overwriting the last char.
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"abf", attributes));
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());
// Writing a wide glyph into the last 2 columns and overwriting it with a narrow one.
cursor.SetPosition({ 80 - 3, startLine });
stateMachine.ProcessString(L"a\U0001F604b");
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"a b", attributes));
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());
// Writing a wide glyph into the last column and overwriting it with a narrow one.
cursor.SetPosition({ 80 - 3, startLine });
stateMachine.ProcessString(L"ab\U0001F604c");
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"abc", attributes));
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());

Log::Comment(L"When DECAWM is set, output is wrapped again.");
stateMachine.ProcessString(L"\x1b[?7h");
Expand Down
53 changes: 53 additions & 0 deletions src/host/ut_host/TextBufferTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ class TextBufferTests

TEST_METHOD(TestBurrito);
TEST_METHOD(TestOverwriteChars);
TEST_METHOD(TestRowReplaceText);

TEST_METHOD(TestAppendRTFText);

Expand Down Expand Up @@ -2046,6 +2047,58 @@ void TextBufferTests::TestOverwriteChars()
#undef complex1
}

void TextBufferTests::TestRowReplaceText()
{
til::size bufferSize{ 10, 3 };
UINT cursorSize = 12;
TextAttribute attr{ 0x7f };
TextBuffer buffer{ bufferSize, attr, cursorSize, false, _renderer };
auto& row = buffer.GetRowByOffset(0);
RowWriteState state;

#define complex L"\U0001F41B"

// Not enough space -> early exit
state.text = complex;
state.columnBegin = 2;
state.columnLimit = 2;
row.ReplaceText(state);
VERIFY_ARE_EQUAL(2, state.columnEndDirty);
VERIFY_ARE_EQUAL(complex, state.text);
VERIFY_ARE_EQUAL(L" ", row.GetText());

// Writing with the exact right amount of space
state.text = complex;
state.columnBegin = 2;
state.columnLimit = 4;
row.ReplaceText(state);
VERIFY_ARE_EQUAL(4, state.columnEndDirty);
VERIFY_ARE_EQUAL(L"", state.text);
VERIFY_ARE_EQUAL(L" " complex L" ", row.GetText());

// Overwrite a wide character, but with not enough space left
state.text = complex complex;
state.columnBegin = 0;
state.columnLimit = 3;
row.ReplaceText(state);
// It's not quite clear what Write() should return in that case,
// so this simply asserts on what it happens to return right now.
VERIFY_ARE_EQUAL(4, state.columnEndDirty);
VERIFY_ARE_EQUAL(complex, state.text);
VERIFY_ARE_EQUAL(complex L" ", row.GetText());

// Various text, too much to fit into the row
state.text = L"a" complex L"b" complex L"c" complex L"foo";
state.columnBegin = 1;
state.columnLimit = til::CoordTypeMax;
row.ReplaceText(state);
VERIFY_ARE_EQUAL(10, state.columnEndDirty);
VERIFY_ARE_EQUAL(L"foo", state.text);
VERIFY_ARE_EQUAL(L" a" complex L"b" complex L"c" complex, row.GetText());

#undef complex
}

void TextBufferTests::TestAppendRTFText()
{
{
Expand Down
18 changes: 9 additions & 9 deletions src/inc/LibraryIncludes.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

// Block minwindef.h min/max macros to prevent <algorithm> conflict
#define NOMINMAX
// Exclude rarely-used stuff from Windows headers
#define WIN32_LEAN_AND_MEAN
Comment on lines +20 to +21
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This prevents the inclusion of commdlg.h in the BufferOut project, which causes a conflict with ROW::ReplaceText (ReplaceText is a macro). I wish we could somehow disable all these TCHAR macros in the /um/ headers.


#include <algorithm>
#include <atomic>
Expand Down Expand Up @@ -52,21 +54,19 @@
#include <vector>

// WIL
#include <wil/Common.h>
#include <wil/Result.h>
#include <wil/nt_result_macros.h>
#include <wil/resource.h>
#include <wil/wistd_memory.h>
#include <wil/stl.h>
#include <wil/com.h>
#include <wil/stl.h>
#include <wil/filesystem.h>
#include <wil/win32_helpers.h>
// Due to the use of RESOURCE_SUPPRESS_STL in result.h, we need to include resource.h first, which happens
// implicitly through the includes above. If RESOURCE_SUPPRESS_STL is gone, the order doesn't matter anymore.
#include <wil/result.h>
#include <wil/nt_result_macros.h>

// GSL
// Block GSL Multi Span include because it both has C++17 deprecated iterators
// and uses the C-namespaced "max" which conflicts with Windows definitions.
#define GSL_MULTI_SPAN_H
#include <gsl/gsl>
#include <gsl/gsl_util>
#include <gsl/pointers>

// CppCoreCheck
#include <CppCoreCheck/Warnings.h>
Expand Down
Loading