@@ -26,6 +26,7 @@ export default (XRegExp) => {
26
26
27
27
// Storage for Unicode data
28
28
const unicode = { } ;
29
+ const unicodeTypes = { } ;
29
30
30
31
// Reuse utils
31
32
const dec = XRegExp . _dec ;
@@ -123,41 +124,56 @@ export default (XRegExp) => {
123
124
*/
124
125
XRegExp . addToken (
125
126
// Use `*` instead of `+` to avoid capturing `^` as the token name in `\p{^}`
126
- / \\ ( [ p P ] ) (?: { ( \^ ? ) ( [ ^ } ] * ) } | ( [ A - Z a - z ] ) ) / ,
127
+ / \\ ( [ p P ] ) (?: { ( \^ ? ) (?: ( S c r i p t | s c ) = ) ? ( [ ^ } ] * ) } | ( [ A - Z a - z ] ) ) / ,
127
128
( match , scope , flags ) => {
128
129
const ERR_DOUBLE_NEG = 'Invalid double negation ' ;
129
130
const ERR_UNKNOWN_NAME = 'Unknown Unicode token ' ;
130
131
const ERR_UNKNOWN_REF = 'Unicode token missing data ' ;
131
132
const ERR_ASTRAL_ONLY = 'Astral mode required for Unicode token ' ;
132
133
const ERR_ASTRAL_IN_CLASS = 'Astral mode does not support Unicode tokens within character classes' ;
134
+ const [
135
+ fullToken ,
136
+ pPrefix ,
137
+ caretNegation ,
138
+ typePrefix ,
139
+ tokenName ,
140
+ tokenSingleCharName
141
+ ] = match ;
133
142
// Negated via \P{..} or \p{^..}
134
- let isNegated = match [ 1 ] === 'P' || ! ! match [ 2 ] ;
143
+ let isNegated = pPrefix === 'P' || ! ! caretNegation ;
135
144
// Switch from BMP (0-FFFF) to astral (0-10FFFF) mode via flag A
136
145
const isAstralMode = flags . includes ( 'A' ) ;
137
- // Token lookup name. Check `[4]` first to avoid passing `undefined` via `\p{}`
138
- let slug = normalize ( match [ 4 ] || match [ 3 ] ) ;
146
+ // Token lookup name. Check `tokenSingleCharName` first to avoid passing `undefined`
147
+ // via `\p{}`
148
+ let slug = normalize ( tokenSingleCharName || tokenName ) ;
139
149
// Token data object
140
150
let item = unicode [ slug ] ;
141
151
142
- if ( match [ 1 ] === 'P' && match [ 2 ] ) {
143
- throw new SyntaxError ( ERR_DOUBLE_NEG + match [ 0 ] ) ;
152
+ if ( pPrefix === 'P' && caretNegation ) {
153
+ throw new SyntaxError ( ERR_DOUBLE_NEG + fullToken ) ;
144
154
}
145
155
if ( ! unicode . hasOwnProperty ( slug ) ) {
146
- throw new SyntaxError ( ERR_UNKNOWN_NAME + match [ 0 ] ) ;
156
+ throw new SyntaxError ( ERR_UNKNOWN_NAME + fullToken ) ;
157
+ }
158
+
159
+ if ( typePrefix ) {
160
+ if ( ! ( unicodeTypes [ typePrefix ] && unicodeTypes [ typePrefix ] [ slug ] ) ) {
161
+ throw new SyntaxError ( ERR_UNKNOWN_NAME + fullToken ) ;
162
+ }
147
163
}
148
164
149
165
// Switch to the negated form of the referenced Unicode token
150
166
if ( item . inverseOf ) {
151
167
slug = normalize ( item . inverseOf ) ;
152
168
if ( ! unicode . hasOwnProperty ( slug ) ) {
153
- throw new ReferenceError ( `${ ERR_UNKNOWN_REF + match [ 0 ] } -> ${ item . inverseOf } ` ) ;
169
+ throw new ReferenceError ( `${ ERR_UNKNOWN_REF + fullToken } -> ${ item . inverseOf } ` ) ;
154
170
}
155
171
item = unicode [ slug ] ;
156
172
isNegated = ! isNegated ;
157
173
}
158
174
159
175
if ( ! ( item . bmp || isAstralMode ) ) {
160
- throw new SyntaxError ( ERR_ASTRAL_ONLY + match [ 0 ] ) ;
176
+ throw new SyntaxError ( ERR_ASTRAL_ONLY + fullToken ) ;
161
177
}
162
178
if ( isAstralMode ) {
163
179
if ( scope === 'class' ) {
@@ -196,6 +212,9 @@ export default (XRegExp) => {
196
212
* character classes and alternation, and should use surrogate pairs to represent astral code
197
213
* points. `inverseOf` can be used to avoid duplicating character data if a Unicode token is
198
214
* defined as the exact inverse of another token.
215
+ * @param {String } [typePrefix] Enables optionally using this type as a prefix for all of the
216
+ * provided Unicode tokens, e.g. if given `'Type'`, then `\p{TokenName}` can also be written
217
+ * as `\p{Type=TokenName}`.
199
218
* @example
200
219
*
201
220
* // Basic use
@@ -206,20 +225,35 @@ export default (XRegExp) => {
206
225
* }]);
207
226
* XRegExp('\\p{XDigit}:\\p{Hexadecimal}+').test('0:3D'); // -> true
208
227
*/
209
- XRegExp . addUnicodeData = ( data ) => {
228
+ XRegExp . addUnicodeData = ( data , typePrefix ) => {
210
229
const ERR_NO_NAME = 'Unicode token requires name' ;
211
230
const ERR_NO_DATA = 'Unicode token has no character data ' ;
212
231
232
+ if ( typePrefix ) {
233
+ // Case sensitive to match ES2018
234
+ unicodeTypes [ typePrefix ] = { } ;
235
+ }
236
+
213
237
for ( const item of data ) {
214
238
if ( ! item . name ) {
215
239
throw new Error ( ERR_NO_NAME ) ;
216
240
}
217
241
if ( ! ( item . inverseOf || item . bmp || item . astral ) ) {
218
242
throw new Error ( ERR_NO_DATA + item . name ) ;
219
243
}
220
- unicode [ normalize ( item . name ) ] = item ;
244
+
245
+ const normalizedName = normalize ( item . name ) ;
246
+ unicode [ normalizedName ] = item ;
247
+ if ( typePrefix ) {
248
+ unicodeTypes [ typePrefix ] [ normalizedName ] = true ;
249
+ }
250
+
221
251
if ( item . alias ) {
222
- unicode [ normalize ( item . alias ) ] = item ;
252
+ const normalizedAlias = normalize ( item . alias ) ;
253
+ unicode [ normalizedAlias ] = item ;
254
+ if ( typePrefix ) {
255
+ unicodeTypes [ typePrefix ] [ normalizedAlias ] = true ;
256
+ }
223
257
}
224
258
}
225
259
0 commit comments