Skip to content

Commit dd7616a

Browse files
committed
fix: allow unicode to be identifiers, fixes #655
1 parent d75ac93 commit dd7616a

File tree

8 files changed

+41
-13
lines changed

8 files changed

+41
-13
lines changed

bin/character-gen.js

100644100755
+12-4
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,19 @@ const isQuote = c => c === '"' || c === "'"
44
const isOperator = c => '!=<>'.includes(c)
55
const isNumber = c => c >= '0' && c <= '9'
66
const isCharacter = c => (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
7-
const isIdentifier = c => '_-?'.includes(c) || isCharacter(c) || isNumber(c)
8-
const isBlank = c => c === '\n' || c === '\t' || c === ' ' || c === '\r' || c === '\v' || c === '\f'
7+
const isWord = c => '_-?'.includes(c) || isCharacter(c) || isNumber(c)
8+
const isBlank = c => '\n\t \r\v\f'.includes(c)
99
const isInlineBlank = c => c === '\t' || c === ' ' || c === '\r'
1010
const isSign = c => c === '-' || c === '+'
1111
// See https://developer.mozilla.org/zh-CN/docs/Web/JavaScript/Reference/Global_Objects/RegExp
1212
const unicodeBlanks = '\u00a0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000'
13+
const unicodePunctuations = '“”'
1314

1415
const types = []
1516
for (let i = 0; i < 128; i++) {
1617
const c = String.fromCharCode(i)
1718
let n = 0
18-
if (isIdentifier(c)) n |= 1
19+
if (isWord(c)) n |= 1
1920
if (isOperator(c)) n |= 2
2021
if (isBlank(c)) n |= 4
2122
if (isQuote(c)) n |= 8
@@ -31,13 +32,20 @@ console.log(`
3132
// This file is generated by bin/character-gen.js
3233
// bitmask character types to boost performance
3334
export const TYPES = [${types.join(', ')}]
34-
export const IDENTIFIER = 1
35+
export const WORD = 1
3536
export const OPERATOR = 2
3637
export const BLANK = 4
3738
export const QUOTE = 8
3839
export const INLINE_BLANK = 16
3940
export const NUMBER = 32
4041
export const SIGN = 64
42+
export const PUNCTUATION = 128
43+
44+
export function isWord (char: string): boolean {
45+
const code = char.charCodeAt(0)
46+
return code >= 128 ? !TYPES[code] : !!(TYPES[code] & WORD)
47+
}
4148
`.trim())
4249

4350
console.log([...unicodeBlanks].map(char => `TYPES[${char.charCodeAt(0)}]`).join(' = ') + ' = BLANK')
51+
console.log([...unicodePunctuations].map(char => `TYPES[${char.charCodeAt(0)}]`).join(' = ') + ' = PUNCTUATION')

bin/perf-diff.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
#!/usr/bin/env bash
22

3-
VERSION_LATEST=$(cat package.json | grep '"version":' | awk -F'"' '{print $4}')
3+
VERSION_LATEST=$(cat package.json | grep '"version":' | head -1 | awk -F'"' '{print $4}')
44
FILE_LOCAL=dist/liquid.node.cjs.js
55
FILE_LATEST=dist/liquid.node.cjs.$VERSION_LATEST.js
66
URL_LATEST=https://unpkg.com/liquidjs@$VERSION_LATEST/dist/liquid.node.cjs.js
77

8-
if [ ! -f $FILE_LATEST ]; then
8+
if [ ! -f "$FILE_LATEST" ]; then
99
curl $URL_LATEST > $FILE_LATEST
1010
fi
1111

docs/themes/navy/layout/partial/all-contributors.swig

+7
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@
6868
<td align="center" valign="top" width="0%"><a href="https://github.com/mahyar-pasarzangene"><img src="https://avatars.githubusercontent.com/u/16485039?v=4?s=100" width="100px;" alt="Mahyar Pasarzangene"/></a></td>
6969
<td align="center" valign="top" width="0%"><a href="https://hubelbauer.net/"><img src="https://avatars.githubusercontent.com/u/6831144?v=4?s=100" width="100px;" alt="Tomáš Hübelbauer"/></a></td>
7070
<td align="center" valign="top" width="0%"><a href="https://sixtwothree.org"><img src="https://avatars.githubusercontent.com/u/73866?v=4?s=100" width="100px;" alt="Jason Garber"/></a></td>
71+
<td align="center" valign="top" width="0%"><a href="https://www.checkoutblocks.com/"><img src="https://avatars.githubusercontent.com/u/114603307?v=4?s=100" width="100px;" alt="Checkout Blocks"/></a></td>
72+
<td align="center" valign="top" width="0%"><a href="https://www.dropkiq.com/"><img src="https://avatars.githubusercontent.com/u/69064?v=4?s=100" width="100px;" alt="Adam Darrah"/></a></td>
73+
<td align="center" valign="top" width="0%"><a href="https://www.11ty.dev/"><img src="https://avatars.githubusercontent.com/u/35147177?v=4?s=100" width="100px;" alt="Eleventy"/></a></td>
74+
<td align="center" valign="top" width="0%"><a href="http://nickreilingh.com/"><img src="https://avatars.githubusercontent.com/u/2458645?v=4?s=100" width="100px;" alt="Nick Reilingh"/></a></td>
75+
<td align="center" valign="top" width="0%"><a href="http://ebobby.org"><img src="https://avatars.githubusercontent.com/u/170356?v=4?s=100" width="100px;" alt="Francisco Soto"/></a></td>
76+
<td align="center" valign="top" width="0%"><a href="https://www.davidlj95.com"><img src="https://avatars.githubusercontent.com/u/8050648?v=4?s=100" width="100px;" alt="David LJ"/></a></td>
77+
<td align="center" valign="top" width="0%"><a href="https://github.com/RasmusWL"><img src="https://avatars.githubusercontent.com/u/1054041?v=4?s=100" width="100px;" alt="Rasmus Wriedt Larsen"/></a></td>
7178
</tr>
7279
</tbody>
7380
</table>

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"build:cjs": "BUNDLES=cjs rollup -c rollup.config.mjs",
2727
"build:min": "BUNDLES=min rollup -c rollup.config.mjs",
2828
"build:umd": "BUNDLES=umd rollup -c rollup.config.mjs",
29+
"build:charmap": "./bin/character-gen.js > src/util/character.ts",
2930
"build:docs": "bin/build-docs.sh"
3031
},
3132
"bin": {

src/parser/tokenizer.ts

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { FilteredValueToken, TagToken, HTMLToken, HashToken, QuotedToken, LiquidTagToken, OutputToken, ValueToken, Token, RangeToken, FilterToken, TopLevelToken, PropertyAccessToken, OperatorToken, LiteralToken, IdentifierToken, NumberToken } from '../tokens'
22
import { OperatorHandler } from '../render/operator'
3-
import { TrieNode, LiteralValue, Trie, createTrie, ellipsis, literalValues, TokenizationError, TYPES, QUOTE, BLANK, IDENTIFIER, NUMBER, SIGN } from '../util'
3+
import { TrieNode, LiteralValue, Trie, createTrie, ellipsis, literalValues, TokenizationError, TYPES, QUOTE, BLANK, NUMBER, SIGN, isWord } from '../util'
44
import { Operators, Expression } from '../render'
55
import { NormalizedFullOptions, defaultOptions } from '../liquid-options'
66
import { FilterArg } from './filter-arg'
@@ -59,7 +59,7 @@ export class Tokenizer {
5959
if (node['end']) info = node
6060
}
6161
if (!info) return -1
62-
if (info['needBoundary'] && (this.peekType(i - this.p) & IDENTIFIER)) return -1
62+
if (info['needBoundary'] && isWord(this.peek(i - this.p))) return -1
6363
return i
6464
}
6565
readFilteredValue (): FilteredValueToken {
@@ -245,7 +245,7 @@ export class Tokenizer {
245245
readIdentifier (): IdentifierToken {
246246
this.skipBlank()
247247
const begin = this.p
248-
while (!this.end() && this.peekType() & IDENTIFIER) ++this.p
248+
while (!this.end() && isWord(this.peek())) ++this.p
249249
return new IdentifierToken(this.input, begin, this.p, this.file)
250250
}
251251

@@ -351,7 +351,7 @@ export class Tokenizer {
351351
n++
352352
} else break
353353
}
354-
if (digitFound && !(this.peekType(n) & IDENTIFIER)) {
354+
if (digitFound && !isWord(this.peek(n))) {
355355
const num = new NumberToken(this.input, this.p, this.p + n, this.file)
356356
this.advance(n)
357357
return num

src/util/character.ts

+8-1
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,18 @@
33
// This file is generated by bin/character-gen.js
44
// bitmask character types to boost performance
55
export const TYPES = [0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 4, 4, 4, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 2, 8, 0, 0, 0, 0, 8, 0, 0, 0, 64, 0, 65, 0, 0, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 0, 0, 2, 2, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
6-
export const IDENTIFIER = 1
6+
export const WORD = 1
77
export const OPERATOR = 2
88
export const BLANK = 4
99
export const QUOTE = 8
1010
export const INLINE_BLANK = 16
1111
export const NUMBER = 32
1212
export const SIGN = 64
13+
export const PUNCTUATION = 128
14+
15+
export function isWord (char: string): boolean {
16+
const code = char.charCodeAt(0)
17+
return code >= 128 ? !TYPES[code] : !!(TYPES[code] & WORD)
18+
}
1319
TYPES[160] = TYPES[5760] = TYPES[6158] = TYPES[8192] = TYPES[8193] = TYPES[8194] = TYPES[8195] = TYPES[8196] = TYPES[8197] = TYPES[8198] = TYPES[8199] = TYPES[8200] = TYPES[8201] = TYPES[8202] = TYPES[8232] = TYPES[8233] = TYPES[8239] = TYPES[8287] = TYPES[12288] = BLANK
20+
TYPES[8220] = TYPES[8221] = PUNCTUATION

src/util/operator-trie.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { IDENTIFIER, TYPES } from '../util/character'
1+
import { isWord } from '../util/character'
22

33
interface TrieInput<T> {
44
[key: string]: T
@@ -25,7 +25,7 @@ export function createTrie<T = any> (input: TrieInput<T>): Trie<T> {
2525
const c = name[i]
2626
node[c] = node[c] || {}
2727

28-
if (i === name.length - 1 && (TYPES[name.charCodeAt(i)] & IDENTIFIER)) {
28+
if (i === name.length - 1 && isWord(name[i])) {
2929
node[c].needBoundary = true
3030
}
3131

test/e2e/issues.spec.ts

+5
Original file line numberDiff line numberDiff line change
@@ -464,4 +464,9 @@ describe('Issues', function () {
464464
}
465465
expect(engine.parseAndRenderSync(tpl, ctx)).toEqual('FOO')
466466
})
467+
it('#655 Error in the tokenization process due to an invalid value expression', () => {
468+
const engine = new Liquid()
469+
const result = engine.parseAndRenderSync('{{ÜLKE}}', { ÜLKE: 'Türkiye' })
470+
expect(result).toEqual('Türkiye')
471+
})
467472
})

0 commit comments

Comments
 (0)