Skip to content

Commit 842c988

Browse files
1 parent 90424b8 commit 842c988

File tree

11 files changed

+289
-31
lines changed

11 files changed

+289
-31
lines changed

stl/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ set(IMPLIB_SOURCES
213213
${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp
214214
${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp
215215
${CMAKE_CURRENT_LIST_DIR}/src/print.cpp
216+
${CMAKE_CURRENT_LIST_DIR}/src/regex.cpp
216217
${CMAKE_CURRENT_LIST_DIR}/src/sharedmutex.cpp
217218
${CMAKE_CURRENT_LIST_DIR}/src/stacktrace.cpp
218219
${CMAKE_CURRENT_LIST_DIR}/src/syserror_import_lib.cpp

stl/inc/locale

+5
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ inline size_t __CRTDECL _LStrxfrm(_Out_writes_(_Last1 - _First1) _Post_readable_
9494
}
9595
#endif // defined(_CRTBLD)
9696

97+
template <class _Elem>
98+
class _Regex_traits;
99+
97100
_EXPORT_STD template <class _Elem>
98101
class collate : public locale::facet { // facet for ordering sequences of elements
99102
public:
@@ -189,6 +192,8 @@ protected:
189192

190193
private:
191194
_Locinfo::_Collvec _Coll; // used by _LStrcoll and _XStrxfrm
195+
196+
friend _Regex_traits<_Elem>;
192197
};
193198

194199
#ifdef __clang__

stl/inc/regex

+73-19
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,17 @@ _STL_DISABLE_CLANG_WARNINGS
5353
#endif // ^^^ !defined(_DEBUG) ^^^
5454
#endif // !defined(_ENHANCED_REGEX_VISUALIZER)
5555

56+
#ifdef _CPPRTTI
57+
extern "C" {
58+
_STD size_t __stdcall __std_regex_transform_primary_char(
59+
_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) char* _First1, char* _Last1,
60+
_In_reads_(_Last2 - _First2) const char* _First2, const char* _Last2, _In_opt_ const _Collvec*) noexcept;
61+
_STD size_t __stdcall __std_regex_transform_primary_wchar_t(
62+
_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) wchar_t* _First1, wchar_t* _Last1,
63+
_In_reads_(_Last2 - _First2) const wchar_t* _First2, const wchar_t* _Last2, _In_opt_ const _Collvec*) noexcept;
64+
} // extern "C"
65+
#endif // ^^^ defined(_CPPRTTI) ^^^
66+
5667
_STD_BEGIN
5768

5869
enum _Meta_type : int { // meta character representations for parser
@@ -267,6 +278,20 @@ struct _Regex_traits_base { // base of all regular expression traits
267278
using char_class_type = ctype_base::mask;
268279
};
269280

281+
#ifdef _CPPRTTI
282+
inline size_t _Regex_transform_primary(_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) char* _First1,
283+
char* _Last1, _In_reads_(_Last2 - _First2) const char* _First2, const char* _Last2,
284+
_In_opt_ const _Locinfo::_Collvec* _Vector) noexcept {
285+
return __std_regex_transform_primary_char(_First1, _Last1, _First2, _Last2, _Vector);
286+
}
287+
288+
inline size_t _Regex_transform_primary(_Out_writes_(_Last1 - _First1) _Post_readable_size_(return) wchar_t* _First1,
289+
wchar_t* _Last1, _In_reads_(_Last2 - _First2) const wchar_t* _First2, const wchar_t* _Last2,
290+
_In_opt_ const _Locinfo::_Collvec* _Vector) noexcept {
291+
return __std_regex_transform_primary_wchar_t(_First1, _Last1, _First2, _Last2, _Vector);
292+
}
293+
#endif // ^^^ defined(_CPPRTTI) ^^^
294+
270295
template <class _Elem>
271296
class _Regex_traits : public _Regex_traits_base { // base class for regular expression traits
272297
public:
@@ -312,13 +337,38 @@ public:
312337
string_type transform_primary(_FwdIt _First, _FwdIt _Last) const {
313338
// apply locale-specific case-insensitive transformation
314339
string_type _Res;
315-
316-
if (_First != _Last) { // non-empty string, transform it
317-
vector<_Elem> _Temp(_First, _Last);
318-
319-
_Getctype()->tolower(_Temp.data(), _Temp.data() + _Temp.size());
320-
_Res = _Getcoll()->transform(_Temp.data(), _Temp.data() + _Temp.size());
340+
#ifdef _CPPRTTI
341+
if (_First != _Last) {
342+
const collate<_Elem>* _Coll = _Getcoll();
343+
const auto& _Coll_type = typeid(*_Coll);
344+
// TRANSITION, ABI: GH-5394: locale creates collate objects of type collate, not collate_byname.
345+
// Depending on the resolution of LWG-2338, comparison to typeid(collate) might also become
346+
// required by the standard.
347+
if (_Coll_type == typeid(collate_byname<_Elem>) || _Coll_type == typeid(collate<_Elem>)) {
348+
// non-empty string with known collate facet, transform it
349+
const string_type _Src(_First, _Last);
350+
const auto _Src_first = _Src.data();
351+
const auto _Src_last = _Src_first + _Src.size();
352+
353+
size_t _Count = _Src.size();
354+
while (_Res.size() < _Count) {
355+
_Res.resize(_Count);
356+
_Count = _STD _Regex_transform_primary(
357+
&_Res[0], &_Res[0] + _Count, _Src_first, _Src_last, &_Coll->_Coll);
358+
359+
if (_Count == static_cast<size_t>(-1)) {
360+
// return empty string in case of error
361+
_Count = 0;
362+
break;
363+
}
364+
}
365+
_Res.resize(_Count);
366+
}
321367
}
368+
#else // ^^^ defined(_CPPRTTI) / !defined(_CPPRTTI) vvv
369+
(void) _First;
370+
(void) _Last;
371+
#endif // ^^^ !defined(_CPPRTTI) ^^^
322372
return _Res;
323373
}
324374

@@ -4211,26 +4261,30 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ex_class2(
42114261

42124262
_Elem* const _Coll_elem_first = &_Coll_elem.front();
42134263
const _Elem* const _Coll_elem_last = _Coll_elem_first + _Size;
4264+
4265+
if (_Size == 1 && _End_arg == _Meta_dot) {
4266+
// process single-element collating elements like individual characters
4267+
_Val = *_Coll_elem_first;
4268+
return _Prs_chr;
4269+
}
4270+
4271+
if (_Flags & regex_constants::icase) {
4272+
for (auto _Current = _Coll_elem_first; _Current != _Coll_elem_last; ++_Current) {
4273+
*_Current = _Traits.translate_nocase(*_Current);
4274+
}
4275+
} else if (_Flags & regex_constants::collate) {
4276+
for (auto _Current = _Coll_elem_first; _Current != _Coll_elem_last; ++_Current) {
4277+
*_Current = _Traits.translate(*_Current);
4278+
}
4279+
}
4280+
42144281
if (_End_arg == _Meta_equal) { // process equivalence
42154282
_Nfa._Add_equiv2(_Coll_elem_first, _Coll_elem_last);
42164283
return _Prs_set;
42174284
} else { // process collating element
4218-
if (_Size == 1) {
4219-
_Val = *_Coll_elem_first;
4220-
return _Prs_chr;
4221-
}
42224285

42234286
// Character ranges with multi-character bounds cannot be represented in NFA nodes yet (see GH-5391).
42244287
// Provisionally treat multi-character collating elements as character sets.
4225-
if (_Flags & regex_constants::icase) {
4226-
for (auto _Current = _Coll_elem_first; _Current != _Coll_elem_last; ++_Current) {
4227-
*_Current = _Traits.translate_nocase(*_Current);
4228-
}
4229-
} else if (_Flags & regex_constants::collate) {
4230-
for (auto _Current = _Coll_elem_first; _Current != _Coll_elem_last; ++_Current) {
4231-
*_Current = _Traits.translate(*_Current);
4232-
}
4233-
}
42344288
_Nfa._Add_coll2(_Coll_elem_first, _Coll_elem_last);
42354289
return _Prs_set;
42364290
}

stl/inc/yvals.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
// Copyright (c) Microsoft Corporation.
44
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
// This header is used to compile the import library (via locale0_implib.cpp => locale0.cpp => xfacet => yvals.h).
6+
// This header is used to compile the import library
7+
// (via locale0_implib.cpp => locale0.cpp => xfacet => yvals.h and regex.cpp => awint.hpp => yvals.h).
78
// MAJOR LIMITATIONS apply to what can be included here!
89
// Before editing this file, read: /docs/import_library.md
910

stl/msbuild/stl_base/stl.files.settings.targets

+1
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
158158
$(CrtRoot)\github\stl\src\locale0_implib.cpp;
159159
$(CrtRoot)\github\stl\src\nothrow.cpp;
160160
$(CrtRoot)\github\stl\src\print.cpp;
161+
$(CrtRoot)\github\stl\src\regex.cpp;
161162
$(CrtRoot)\github\stl\src\sharedmutex.cpp;
162163
$(CrtRoot)\github\stl\src\stacktrace.cpp;
163164
$(CrtRoot)\github\stl\src\syserror_import_lib.cpp;

stl/src/awint.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
33

44
// Internal definitions for A&W Win32 wrapper routines.
5+
6+
// This file is compiled into the import library (via regex.cpp => awint.hpp).
7+
// MAJOR LIMITATIONS apply to what can be included here!
8+
// Before editing this file, read: /docs/import_library.md
9+
510
#pragma once
611

712
#include <yvals.h>

stl/src/regex.cpp

+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
4+
// This file is compiled into the import library.
5+
// MAJOR LIMITATIONS apply to what can be included here!
6+
// Before editing this file, read: /docs/import_library.md
7+
8+
#include <__msvc_xlocinfo_types.hpp>
9+
#include <clocale>
10+
#include <crtdefs.h>
11+
#include <cstdlib>
12+
#include <cstring>
13+
#include <internal_shared.h>
14+
15+
#include <Windows.h>
16+
17+
#undef _ENFORCE_ONLY_CORE_HEADERS
18+
#include "awint.hpp"
19+
20+
extern "C" {
21+
22+
// derived from xstrxfrm.cpp
23+
size_t __stdcall __std_regex_transform_primary_char(
24+
_Out_writes_(end1 - string1) _Post_readable_size_(return) char* string1, char* end1,
25+
_In_reads_(end2 - string2) const char* string2, const char* end2, _In_opt_ const _Collvec* ploc) noexcept {
26+
size_t n1 = end1 - string1;
27+
size_t n2 = end2 - string2;
28+
size_t retval = static_cast<size_t>(-1);
29+
UINT codepage;
30+
const wchar_t* locale_name;
31+
32+
if (ploc == nullptr) {
33+
locale_name = ___lc_locale_name_func()[LC_COLLATE];
34+
codepage = ___lc_collate_cp_func();
35+
} else {
36+
locale_name = ploc->_LocaleName;
37+
codepage = ploc->_Page;
38+
}
39+
40+
if (locale_name == nullptr && codepage == CP_ACP) {
41+
if (n2 <= n1) {
42+
memcpy(string1, string2, n2);
43+
}
44+
retval = n2;
45+
} else {
46+
// Inquire size of dst string in BYTES
47+
const int dstlen = __crtLCMapStringA(locale_name,
48+
LCMAP_SORTKEY | LINGUISTIC_IGNORECASE | LINGUISTIC_IGNOREDIACRITIC | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH,
49+
string2, static_cast<int>(n2), nullptr, 0, codepage, TRUE);
50+
51+
if (dstlen != 0) {
52+
retval = dstlen;
53+
54+
// if not enough room, return amount needed
55+
if (dstlen <= static_cast<int>(n1)) {
56+
// Map src string to dst string
57+
__crtLCMapStringA(locale_name,
58+
LCMAP_SORTKEY | LINGUISTIC_IGNORECASE | LINGUISTIC_IGNOREDIACRITIC | NORM_IGNOREKANATYPE
59+
| NORM_IGNOREWIDTH,
60+
string2, static_cast<int>(n2), string1, static_cast<int>(n1), codepage, TRUE);
61+
}
62+
}
63+
}
64+
65+
return retval;
66+
}
67+
68+
// derived from xwcsxfrm.cpp
69+
size_t __stdcall __std_regex_transform_primary_wchar_t(
70+
_Out_writes_(end1 - string1) _Post_readable_size_(return) wchar_t* string1, wchar_t* end1,
71+
_In_reads_(end2 - string2) const wchar_t* string2, const wchar_t* end2, _In_opt_ const _Collvec* ploc) noexcept {
72+
size_t n1 = end1 - string1;
73+
size_t n2 = end2 - string2;
74+
size_t size = static_cast<size_t>(-1);
75+
const wchar_t* locale_name;
76+
77+
if (ploc == nullptr) {
78+
locale_name = ___lc_locale_name_func()[LC_COLLATE];
79+
} else {
80+
locale_name = ploc->_LocaleName;
81+
}
82+
83+
if (locale_name == nullptr) {
84+
if (n2 <= n1) {
85+
memcpy(string1, string2, n2 * sizeof(wchar_t));
86+
}
87+
size = n2;
88+
} else {
89+
// When using LCMAP_SORTKEY, LCMapStringW handles BYTES not wide
90+
// chars. We use a byte buffer to hold bytes and then convert the
91+
// byte string to a wide char string and return this so it can be
92+
// compared using wcscmp(). User's buffer is n1 wide chars, so
93+
// use an internal buffer of n1 bytes.
94+
95+
auto bbuffer = _malloc_crt_t(unsigned char, n1);
96+
97+
if (bbuffer) {
98+
#pragma warning(push)
99+
#pragma warning(disable : 6386) // PREfast doesn't understand LCMAP_SORTKEY
100+
size = __crtLCMapStringW(locale_name,
101+
LCMAP_SORTKEY | LINGUISTIC_IGNORECASE | LINGUISTIC_IGNOREDIACRITIC | NORM_IGNOREKANATYPE
102+
| NORM_IGNOREWIDTH,
103+
string2, static_cast<int>(n2), reinterpret_cast<wchar_t*>(bbuffer.get()), static_cast<int>(n1));
104+
#pragma warning(pop)
105+
106+
if (size == 0) {
107+
// buffer not big enough, get size required.
108+
size = __crtLCMapStringW(locale_name,
109+
LCMAP_SORTKEY | LINGUISTIC_IGNORECASE | LINGUISTIC_IGNOREDIACRITIC | NORM_IGNOREKANATYPE
110+
| NORM_IGNOREWIDTH,
111+
string2, static_cast<int>(n2), nullptr, 0);
112+
113+
if (size == 0) {
114+
size = static_cast<size_t>(-1); // default error
115+
}
116+
} else {
117+
// string successfully mapped, convert to wide char
118+
119+
for (size_t i = 0; i < size; ++i) {
120+
string1[i] = static_cast<wchar_t>(bbuffer.get()[i]);
121+
}
122+
}
123+
}
124+
}
125+
126+
return size;
127+
}
128+
} // extern "C"

tests/libcxx/expected_results.txt

-1
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,6 @@ std/re/re.alg/re.alg.search/basic.pass.cpp FAIL
830830
std/re/re.alg/re.alg.search/ecma.pass.cpp FAIL
831831
std/re/re.alg/re.alg.search/extended.pass.cpp FAIL
832832
std/re/re.traits/lookup_collatename.pass.cpp FAIL
833-
std/re/re.traits/transform_primary.pass.cpp FAIL
834833

835834
# Not analyzed, likely STL bugs. Various assertions.
836835
std/numerics/complex.number/complex.ops/complex_divide_complex.pass.cpp FAIL

0 commit comments

Comments
 (0)