Skip to content

Commit c11b667

Browse files
gh-96346: Use double caching for re._compile() (#96347)
1 parent eed8045 commit c11b667

File tree

2 files changed

+47
-21
lines changed

2 files changed

+47
-21
lines changed

Lib/re/__init__.py

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ def compile(pattern, flags=0):
229229
def purge():
230230
"Clear the regular expression caches"
231231
_cache.clear()
232+
_cache2.clear()
232233
_compile_repl.cache_clear()
233234

234235
def template(pattern, flags=0):
@@ -266,40 +267,64 @@ def escape(pattern):
266267
# --------------------------------------------------------------------
267268
# internals
268269

269-
_cache = {} # ordered!
270-
270+
# Use the fact that dict keeps the insertion order.
271+
# _cache2 uses the simple FIFO policy which has better latency.
272+
# _cache uses the LRU policy which has better hit rate.
273+
_cache = {} # LRU
274+
_cache2 = {} # FIFO
271275
_MAXCACHE = 512
276+
_MAXCACHE2 = 256
277+
assert _MAXCACHE2 < _MAXCACHE
278+
272279
def _compile(pattern, flags):
273280
# internal: compile pattern
274281
if isinstance(flags, RegexFlag):
275282
flags = flags.value
276283
try:
277-
return _cache[type(pattern), pattern, flags]
284+
return _cache2[type(pattern), pattern, flags]
278285
except KeyError:
279286
pass
280-
if isinstance(pattern, Pattern):
281-
if flags:
282-
raise ValueError(
283-
"cannot process flags argument with a compiled pattern")
284-
return pattern
285-
if not _compiler.isstring(pattern):
286-
raise TypeError("first argument must be string or compiled pattern")
287-
if flags & T:
288-
import warnings
289-
warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
290-
"as it is an undocumented flag "
291-
"without an obvious purpose. "
292-
"Don't use it.",
293-
DeprecationWarning)
294-
p = _compiler.compile(pattern, flags)
295-
if not (flags & DEBUG):
287+
288+
key = (type(pattern), pattern, flags)
289+
# Item in _cache should be moved to the end if found.
290+
p = _cache.pop(key, None)
291+
if p is None:
292+
if isinstance(pattern, Pattern):
293+
if flags:
294+
raise ValueError(
295+
"cannot process flags argument with a compiled pattern")
296+
return pattern
297+
if not _compiler.isstring(pattern):
298+
raise TypeError("first argument must be string or compiled pattern")
299+
if flags & T:
300+
import warnings
301+
warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
302+
"as it is an undocumented flag "
303+
"without an obvious purpose. "
304+
"Don't use it.",
305+
DeprecationWarning)
306+
p = _compiler.compile(pattern, flags)
307+
if flags & DEBUG:
308+
return p
296309
if len(_cache) >= _MAXCACHE:
297-
# Drop the oldest item
310+
# Drop the least recently used item.
311+
# next(iter(_cache)) is known to have linear amortized time,
312+
# but it is used here to avoid a dependency from using OrderedDict.
313+
# For the small _MAXCACHE value it doesn't make much of a difference.
298314
try:
299315
del _cache[next(iter(_cache))]
300316
except (StopIteration, RuntimeError, KeyError):
301317
pass
302-
_cache[type(pattern), pattern, flags] = p
318+
# Append to the end.
319+
_cache[key] = p
320+
321+
if len(_cache2) >= _MAXCACHE2:
322+
# Drop the oldest item.
323+
try:
324+
del _cache2[next(iter(_cache2))]
325+
except (StopIteration, RuntimeError, KeyError):
326+
pass
327+
_cache2[key] = p
303328
return p
304329

305330
@functools.lru_cache(_MAXCACHE)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Use double caching for compiled RE patterns.

0 commit comments

Comments
 (0)