Skip to content

Commit d5a7a41

Browse files
Adapt to latest changes in libunicode and its new grapheme_line_segmenter API
Signed-off-by: Christian Parpart <[email protected]>
1 parent 521b140 commit d5a7a41

File tree

9 files changed

+39
-22
lines changed

9 files changed

+39
-22
lines changed

cmake/presets/common.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
"hidden": true,
2828
"cacheVariables": {
2929
"CMAKE_BUILD_TYPE": "Debug",
30-
"CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}"
30+
"CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}",
31+
"LIBUNICODE_TABLEGEN_FASTBUILD": "ON"
3132
}
3233
}
3334
]

scripts/install-deps.ps1

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ class ThirdParty {
1414
$ThirdParties =
1515
@(
1616
[ThirdParty]@{
17-
Folder = "libunicode-23d7b30166a914b10526bb8fe7a469a9610c07dc";
18-
Archive = "libunicode-23d7b30166a914b10526bb8fe7a469a9610c07dc.zip";
19-
URI = "https://github.com/contour-terminal/libunicode/archive/23d7b30166a914b10526bb8fe7a469a9610c07dc.zip";
17+
Folder = "libunicode-dabfea48f7fd2a8bf6ae19e37581de5c127c607f";
18+
Archive = "libunicode-dabfea48f7fd2a8bf6ae19e37581de5c127c607f.zip";
19+
URI = "https://github.com/contour-terminal/libunicode/archive/dabfea48f7fd2a8bf6ae19e37581de5c127c607f.zip";
2020
Macro = "libunicode"
2121
};
2222
[ThirdParty]@{

scripts/install-deps.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ fetch_and_unpack_boxed()
121121
fetch_and_unpack_libunicode()
122122
{
123123
if test x$LIBUNICODE_SRC_DIR = x; then
124-
local libunicode_git_sha="23d7b30166a914b10526bb8fe7a469a9610c07dc"
124+
local libunicode_git_sha="dabfea48f7fd2a8bf6ae19e37581de5c127c607f"
125125
fetch_and_unpack \
126126
libunicode-$libunicode_git_sha \
127127
libunicode-$libunicode_git_sha.tar.gz \

src/vtbackend/CellUtil.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,12 @@ CRISPY_REQUIRES(CellConcept<Cell>)
9494
if (!AllowWidthChange)
9595
return 0;
9696

97-
auto const newWidth = [codepoint]() {
97+
auto const newWidth = [codepoint]() -> int {
9898
switch (codepoint)
9999
{
100100
case 0xFE0E: return 1;
101101
case 0xFE0F: return 2;
102-
default: return unicode::width(codepoint);
102+
default: return static_cast<int>(unicode::width(codepoint));
103103
}
104104
}();
105105

src/vtbackend/cell/CompactCell.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <libunicode/convert.h>
1818
#include <libunicode/width.h>
1919

20+
#include <algorithm>
2021
#include <memory>
2122
#include <string>
2223

@@ -312,7 +313,7 @@ inline void CompactCell::setCharacter(char32_t codepoint) noexcept
312313
_extra->imageFragment = {};
313314
}
314315
if (codepoint)
315-
setWidth(static_cast<uint8_t>(std::max(unicode::width(codepoint), 1)));
316+
setWidth(static_cast<uint8_t>(std::max(unicode::width(codepoint), 1u)));
316317
else
317318
setWidth(1);
318319
}

src/vtbackend/cell/SimpleCell.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ inline void SimpleCell::setCharacter(char32_t codepoint)
158158
if (codepoint)
159159
{
160160
_codepoints.push_back(codepoint);
161-
setWidth(static_cast<uint8_t>(std::max(unicode::width(codepoint), 1)));
161+
setWidth(static_cast<uint8_t>(std::max(unicode::width(codepoint), 1u)));
162162
}
163163
else
164164
setWidth(1);

src/vtparser/Parser-impl.h

+13-9
Original file line numberDiff line numberDiff line change
@@ -373,11 +373,15 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,
373373
if (!maxCharCount)
374374
return { ProcessKind::FallbackToFSM, 0 };
375375

376-
_scanState.next = nullptr;
377376
auto const chunk = std::string_view(input, static_cast<size_t>(std::distance(input, end)));
378-
auto const [cellCount, subStart, subEnd] = unicode::scan_text(_scanState, chunk, maxCharCount);
379377

380-
if (_scanState.next == input)
378+
_graphemeLineSegmenter.reset(chunk);
379+
unicode::grapheme_segmentation_result result = _graphemeLineSegmenter.process(maxCharCount);
380+
auto const cellCount = result.width;
381+
auto const* subStart = result.text.data();
382+
auto const* subEnd = subStart + result.text.size();
383+
384+
if (result.text.empty())
381385
return { ProcessKind::FallbackToFSM, 0 };
382386

383387
// We do not test on cellCount>0 because the scan could contain only a ZWJ (zero width
@@ -390,10 +394,10 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,
390394

391395
assert(cellCount <= maxCharCount);
392396
assert(subEnd <= chunk.data() + chunk.size());
393-
assert(_scanState.next <= chunk.data() + chunk.size());
397+
assert(_graphemeLineSegmenter.next() <= chunk.data() + chunk.size());
394398

395399
auto const text = std::string_view { subStart, byteCount };
396-
if (_scanState.utf8.expectedLength == 0)
400+
if (!_graphemeLineSegmenter.is_utf8_byte_pending())
397401
{
398402
if (!text.empty())
399403
_eventListener.print(text, cellCount);
@@ -407,22 +411,22 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,
407411
_eventListener.execute(*input++);
408412
}
409413

410-
auto const count = static_cast<size_t>(std::distance(input, _scanState.next));
414+
auto const count = static_cast<size_t>(std::distance(input, _graphemeLineSegmenter.next()));
411415
return { ProcessKind::ContinueBulk, count };
412416
}
413417

414418
template <typename EventListener, bool TraceStateChanges>
415419
void Parser<EventListener, TraceStateChanges>::printUtf8Byte(char ch)
416420
{
417-
unicode::ConvertResult const r = unicode::from_utf8(_scanState.utf8, (uint8_t) ch);
421+
unicode::ConvertResult const r = _graphemeLineSegmenter.process_single_byte(static_cast<uint8_t>(ch));
418422
if (std::holds_alternative<unicode::Incomplete>(r))
419423
return;
420424

421425
auto constexpr ReplacementCharacter = char32_t { 0xFFFD };
422426
auto const codepoint = std::holds_alternative<unicode::Success>(r) ? std::get<unicode::Success>(r).value
423427
: ReplacementCharacter;
424428
_eventListener.print(codepoint);
425-
_scanState.lastCodepointHint = codepoint;
429+
_graphemeLineSegmenter.reset_last_codepoint_hint(codepoint);
426430
}
427431

428432
template <typename EventListener, bool TraceStateChanges>
@@ -435,7 +439,7 @@ void Parser<EventListener, TraceStateChanges>::handle(ActionClass actionClass,
435439

436440
switch (action)
437441
{
438-
case Action::GroundStart: _scanState.lastCodepointHint = 0; break;
442+
case Action::GroundStart: _graphemeLineSegmenter.reset_last_codepoint_hint(); break;
439443
case Action::Clear: _eventListener.clear(); break;
440444
case Action::CollectLeader: _eventListener.collectLeader(ch); break;
441445
case Action::Collect: _eventListener.collect(ch); break;

src/vtparser/Parser.h

+6-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#pragma once
33

44
#include <libunicode/convert.h>
5-
#include <libunicode/scan.h>
5+
#include <libunicode/grapheme_line_segmenter.h>
66

77
#include <fmt/core.h>
88

@@ -531,7 +531,10 @@ class Parser
531531

532532
[[nodiscard]] State state() const noexcept { return _state; }
533533

534-
[[nodiscard]] char32_t precedingGraphicCharacter() const noexcept { return _scanState.lastCodepointHint; }
534+
[[nodiscard]] char32_t precedingGraphicCharacter() const noexcept
535+
{
536+
return _graphemeLineSegmenter.last_codepoint_hint();
537+
}
535538

536539
void printUtf8Byte(char ch);
537540

@@ -553,7 +556,7 @@ class Parser
553556
//
554557
State _state = State::Ground;
555558
EventListener& _eventListener;
556-
unicode::scan_state _scanState {};
559+
unicode::grapheme_line_segmenter<void> _graphemeLineSegmenter;
557560
};
558561

559562
/// @returns parsed tuple with OSC code and offset to first data parameter byte.

src/vtparser/Parser_test.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#include <vtparser/Parser.h>
33
#include <vtparser/ParserEvents.h>
44

5+
#include <crispy/escape.h>
6+
57
#include <libunicode/convert.h>
68

79
#include <catch2/catch_test_macros.hpp>
@@ -17,9 +19,15 @@ class MockParserEvents final: public vtparser::NullParserEvents
1719
size_t maxCharCount = 80;
1820

1921
void error(string_view const& msg) override { INFO(fmt::format("Parser error received. {}", msg)); }
20-
void print(char32_t ch) override { text += unicode::convert_to<char>(ch); }
22+
23+
void print(char32_t ch) override
24+
{
25+
UNSCOPED_INFO(fmt::format("print: U+{:X}", (unsigned) ch));
26+
text += unicode::convert_to<char>(ch);
27+
}
2128
size_t print(std::string_view s, size_t cellCount) override
2229
{
30+
UNSCOPED_INFO(fmt::format("print: {}", crispy::escape(s)));
2331
text += s;
2432
return maxCharCount -= cellCount;
2533
}

0 commit comments

Comments
 (0)