@@ -53,6 +53,17 @@ _STL_DISABLE_CLANG_WARNINGS
53
53
#endif // ^^^ !defined(_DEBUG) ^^^
54
54
#endif // !defined(_ENHANCED_REGEX_VISUALIZER)
55
55
56
+ #ifdef _CPPRTTI
57
+ extern "C" {
58
+ _STD size_t __stdcall __std_regex_transform_primary_char(
59
+ _Out_writes_(_Last1 - _First1) _Post_readable_size_(return) char* _First1, char* _Last1,
60
+ _In_reads_(_Last2 - _First2) const char* _First2, const char* _Last2, _In_opt_ const _Collvec*) noexcept;
61
+ _STD size_t __stdcall __std_regex_transform_primary_wchar_t(
62
+ _Out_writes_(_Last1 - _First1) _Post_readable_size_(return) wchar_t* _First1, wchar_t* _Last1,
63
+ _In_reads_(_Last2 - _First2) const wchar_t* _First2, const wchar_t* _Last2, _In_opt_ const _Collvec*) noexcept;
64
+ } // extern "C"
65
+ #endif // ^^^ defined(_CPPRTTI) ^^^
66
+
56
67
_STD_BEGIN
57
68
58
69
enum _Meta_type : int { // meta character representations for parser
@@ -267,6 +278,20 @@ struct _Regex_traits_base { // base of all regular expression traits
267
278
using char_class_type = ctype_base::mask;
268
279
};
269
280
281
+ #ifdef _CPPRTTI
282
+ inline size_t _Regex_transform_primary(_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) char* _First1,
283
+ char* _Last1, _In_reads_(_Last2 - _First2) const char* _First2, const char* _Last2,
284
+ _In_opt_ const _Locinfo::_Collvec* _Vector) noexcept {
285
+ return __std_regex_transform_primary_char(_First1, _Last1, _First2, _Last2, _Vector);
286
+ }
287
+
288
+ inline size_t _Regex_transform_primary(_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) wchar_t* _First1,
289
+ wchar_t* _Last1, _In_reads_(_Last2 - _First2) const wchar_t* _First2, const wchar_t* _Last2,
290
+ _In_opt_ const _Locinfo::_Collvec* _Vector) noexcept {
291
+ return __std_regex_transform_primary_wchar_t(_First1, _Last1, _First2, _Last2, _Vector);
292
+ }
293
+ #endif // ^^^ defined(_CPPRTTI) ^^^
294
+
270
295
template <class _Elem>
271
296
class _Regex_traits : public _Regex_traits_base { // base class for regular expression traits
272
297
public:
@@ -312,13 +337,38 @@ public:
312
337
string_type transform_primary(_FwdIt _First, _FwdIt _Last) const {
313
338
// apply locale-specific case-insensitive transformation
314
339
string_type _Res;
315
-
316
- if (_First != _Last) { // non-empty string, transform it
317
- vector<_Elem> _Temp(_First, _Last);
318
-
319
- _Getctype()->tolower(_Temp.data(), _Temp.data() + _Temp.size());
320
- _Res = _Getcoll()->transform(_Temp.data(), _Temp.data() + _Temp.size());
340
+ #ifdef _CPPRTTI
341
+ if (_First != _Last) {
342
+ const collate<_Elem>* _Coll = _Getcoll();
343
+ const auto& _Coll_type = typeid(*_Coll);
344
+ // TRANSITION, ABI: GH-5394: locale creates collate objects of type collate, not collate_byname.
345
+ // Depending on the resolution of LWG-2338, comparison to typeid(collate) might also become
346
+ // required by the standard.
347
+ if (_Coll_type == typeid(collate_byname<_Elem>) || _Coll_type == typeid(collate<_Elem>)) {
348
+ // non-empty string with known collate facet, transform it
349
+ const string_type _Src(_First, _Last);
350
+ const auto _Src_first = _Src.data();
351
+ const auto _Src_last = _Src_first + _Src.size();
352
+
353
+ size_t _Count = _Src.size();
354
+ while (_Res.size() < _Count) {
355
+ _Res.resize(_Count);
356
+ _Count = _STD _Regex_transform_primary(
357
+ &_Res[0], &_Res[0] + _Count, _Src_first, _Src_last, &_Coll->_Coll);
358
+
359
+ if (_Count == static_cast<size_t>(-1)) {
360
+ // return empty string in case of error
361
+ _Count = 0;
362
+ break;
363
+ }
364
+ }
365
+ _Res.resize(_Count);
366
+ }
321
367
}
368
+ #else // ^^^ defined(_CPPRTTI) / !defined(_CPPRTTI) vvv
369
+ (void) _First;
370
+ (void) _Last;
371
+ #endif // ^^^ !defined(_CPPRTTI) ^^^
322
372
return _Res;
323
373
}
324
374
@@ -4211,26 +4261,30 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ex_class2(
4211
4261
4212
4262
_Elem* const _Coll_elem_first = &_Coll_elem.front();
4213
4263
const _Elem* const _Coll_elem_last = _Coll_elem_first + _Size;
4264
+
4265
+ if (_Size == 1 && _End_arg == _Meta_dot) {
4266
+ // process single-element collating elements like individual characters
4267
+ _Val = *_Coll_elem_first;
4268
+ return _Prs_chr;
4269
+ }
4270
+
4271
+ if (_Flags & regex_constants::icase) {
4272
+ for (auto _Current = _Coll_elem_first; _Current != _Coll_elem_last; ++_Current) {
4273
+ *_Current = _Traits.translate_nocase(*_Current);
4274
+ }
4275
+ } else if (_Flags & regex_constants::collate) {
4276
+ for (auto _Current = _Coll_elem_first; _Current != _Coll_elem_last; ++_Current) {
4277
+ *_Current = _Traits.translate(*_Current);
4278
+ }
4279
+ }
4280
+
4214
4281
if (_End_arg == _Meta_equal) { // process equivalence
4215
4282
_Nfa._Add_equiv2(_Coll_elem_first, _Coll_elem_last);
4216
4283
return _Prs_set;
4217
4284
} else { // process collating element
4218
- if (_Size == 1) {
4219
- _Val = *_Coll_elem_first;
4220
- return _Prs_chr;
4221
- }
4222
4285
4223
4286
// Character ranges with multi-character bounds cannot be represented in NFA nodes yet (see GH-5391).
4224
4287
// Provisionally treat multi-character collating elements as character sets.
4225
- if (_Flags & regex_constants::icase) {
4226
- for (auto _Current = _Coll_elem_first; _Current != _Coll_elem_last; ++_Current) {
4227
- *_Current = _Traits.translate_nocase(*_Current);
4228
- }
4229
- } else if (_Flags & regex_constants::collate) {
4230
- for (auto _Current = _Coll_elem_first; _Current != _Coll_elem_last; ++_Current) {
4231
- *_Current = _Traits.translate(*_Current);
4232
- }
4233
- }
4234
4288
_Nfa._Add_coll2(_Coll_elem_first, _Coll_elem_last);
4235
4289
return _Prs_set;
4236
4290
}
0 commit comments