@@ -373,11 +373,15 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,
373
373
if (!maxCharCount)
374
374
return { ProcessKind::FallbackToFSM, 0 };
375
375
376
- _scanState.next = nullptr ;
377
376
auto const chunk = std::string_view (input, static_cast <size_t >(std::distance (input, end)));
378
- auto const [cellCount, subStart, subEnd] = unicode::scan_text (_scanState, chunk, maxCharCount);
379
377
380
- if (_scanState.next == input)
378
+ _graphemeLineSegmenter.reset (chunk);
379
+ unicode::grapheme_segmentation_result result = _graphemeLineSegmenter.process (maxCharCount);
380
+ auto const cellCount = result.width ;
381
+ auto const * subStart = result.text .data ();
382
+ auto const * subEnd = subStart + result.text .size ();
383
+
384
+ if (result.text .empty ())
381
385
return { ProcessKind::FallbackToFSM, 0 };
382
386
383
387
// We do not test on cellCount>0 because the scan could contain only a ZWJ (zero width
@@ -390,10 +394,10 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,
390
394
391
395
assert (cellCount <= maxCharCount);
392
396
assert (subEnd <= chunk.data () + chunk.size ());
393
- assert (_scanState .next <= chunk.data () + chunk.size ());
397
+ assert (_graphemeLineSegmenter .next () <= chunk.data () + chunk.size ());
394
398
395
399
auto const text = std::string_view { subStart, byteCount };
396
- if (_scanState. utf8 . expectedLength == 0 )
400
+ if (!_graphemeLineSegmenter. is_utf8_byte_pending () )
397
401
{
398
402
if (!text.empty ())
399
403
_eventListener.print (text, cellCount);
@@ -407,22 +411,22 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,
407
411
_eventListener.execute (*input++);
408
412
}
409
413
410
- auto const count = static_cast <size_t >(std::distance (input, _scanState .next ));
414
+ auto const count = static_cast <size_t >(std::distance (input, _graphemeLineSegmenter .next ()));
411
415
return { ProcessKind::ContinueBulk, count };
412
416
}
413
417
414
418
template <typename EventListener, bool TraceStateChanges>
415
419
void Parser<EventListener, TraceStateChanges>::printUtf8Byte(char ch)
416
420
{
417
- unicode::ConvertResult const r = unicode::from_utf8 (_scanState. utf8 , ( uint8_t ) ch );
421
+ unicode::ConvertResult const r = _graphemeLineSegmenter. process_single_byte ( static_cast < uint8_t >(ch) );
418
422
if (std::holds_alternative<unicode::Incomplete>(r))
419
423
return ;
420
424
421
425
auto constexpr ReplacementCharacter = char32_t { 0xFFFD };
422
426
auto const codepoint = std::holds_alternative<unicode::Success>(r) ? std::get<unicode::Success>(r).value
423
427
: ReplacementCharacter;
424
428
_eventListener.print (codepoint);
425
- _scanState. lastCodepointHint = codepoint;
429
+ _graphemeLineSegmenter. reset_last_codepoint_hint ( codepoint) ;
426
430
}
427
431
428
432
template <typename EventListener, bool TraceStateChanges>
@@ -435,7 +439,7 @@ void Parser<EventListener, TraceStateChanges>::handle(ActionClass actionClass,
435
439
436
440
switch (action)
437
441
{
438
- case Action::GroundStart: _scanState. lastCodepointHint = 0 ; break ;
442
+ case Action::GroundStart: _graphemeLineSegmenter. reset_last_codepoint_hint () ; break ;
439
443
case Action::Clear: _eventListener.clear (); break ;
440
444
case Action::CollectLeader: _eventListener.collectLeader (ch); break ;
441
445
case Action::Collect: _eventListener.collect (ch); break ;
0 commit comments