Skip to content

Commit 9edba85

Browse files
authored
Refactor abbr escaping
A alternate fix to #1444. This does not exclude the use of carrots or square brackets in abbreviations. It still excludes backslashse, however. I played with backslashes and it just doesn't make sense to support them as they have special meaning in the Markdown, not because of their use in regular expressions.
1 parent e4ab4a6 commit 9edba85

File tree

4 files changed

+46
-33
lines changed

4 files changed

+46
-33
lines changed

docs/changelog.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3434
* Include `scripts/*.py` in the generated source tarballs (#1430).
3535
* Ensure lines after heading in loose list are properly detabbed (#1443).
3636
* Give smarty tree processor higher priority than toc (#1440).
37-
* Explicitly omit carrot (`^`) and backslash (`\`) from abbreviations (#1444).
37+
* Permit carrots (`^`) and square brackets (`]`) but explicitly exclude
38+
backslashes (`\`) from abbreviations (#1444).
3839

3940
## [3.5.2] -- 2024-01-10
4041

docs/extensions/abbreviations.md

+3-7
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,9 @@ will be rendered as:
3636
is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.</p>
3737
```
3838

39-
The following three characters are not permitted in an abbreviation. Any
40-
abbreviation definitions which include one will not be recognized as an
41-
abbreviation definition.
42-
43-
1. carrot (`^`)
44-
2. backslash (`\`)
45-
3. left square bracket (`]`)
39+
The backslash (`\`) is not permitted in an abbreviation. Any abbreviation
40+
definitions which include one or more backslashes between the square brackets
41+
will not be recognized as an abbreviation definition.
4642

4743
Usage
4844
-----

markdown/extensions/abbr.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def extendMarkdown(self, md):
4141
class AbbrPreprocessor(BlockProcessor):
4242
""" Abbreviation Preprocessor - parse text for abbr references. """
4343

44-
RE = re.compile(r'^[*]\[(?P<abbr>[^\]\^\\]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
44+
RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
4545

4646
def test(self, parent: etree.Element, block: str) -> bool:
4747
return True
@@ -72,16 +72,8 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
7272
return False
7373

7474
def _generate_pattern(self, text: str) -> str:
75-
"""
76-
Given a string, returns a regex pattern to match that string.
77-
78-
'HTML' -> r'(?P<abbr>\b[H][T][M][L]\b)'
79-
80-
Note: we force each char as a literal match via a character set (in brackets)
81-
as we don't know what they will be beforehand.
82-
83-
"""
84-
return f"(?P<abbr>\\b{ ''.join(f'[{ c }]' for c in text) }\\b)"
75+
""" Given a string, returns a regex pattern to match that string. """
76+
return f"(?P<abbr>\\b{ re.escape(text) }\\b)"
8577

8678

8779
class AbbrInlineProcessor(InlineProcessor):

tests/test_syntax/extensions/test_abbr.py

+38-14
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525

2626
class TestAbbr(TestCase):
27+
maxDiff = None
2728

2829
default_kwargs = {'extensions': ['abbr']}
2930

@@ -260,28 +261,19 @@ def test_abbr_single_quoted(self):
260261
)
261262
)
262263

263-
def test_abbr_ignore_special_chars(self):
264+
def test_abbr_ignore_backslash(self):
264265
self.assertMarkdownRenders(
265266
self.dedent(
266267
r"""
267-
[^] [\\] [\]] []]
268+
\\foo
268269
269-
*[^]: Not an abbreviation
270-
271-
*[\\]: Not an abbreviation
272-
273-
*[\]]: Not an abbreviation
274-
275-
*[]]: Not an abbreviation
270+
*[\\foo]: Not an abbreviation
276271
"""
277272
),
278273
self.dedent(
279274
r"""
280-
<p>[^] [\] []] []]</p>
281-
<p>*[^]: Not an abbreviation</p>
282-
<p>*[\]: Not an abbreviation</p>
283-
<p>*[]]: Not an abbreviation</p>
284-
<p>*[]]: Not an abbreviation</p>
275+
<p>\foo</p>
276+
<p>*[\foo]: Not an abbreviation</p>
285277
"""
286278
)
287279
)
@@ -301,3 +293,35 @@ def test_abbr_hyphen(self):
301293
"""
302294
)
303295
)
296+
297+
def test_abbr_carrot(self):
298+
self.assertMarkdownRenders(
299+
self.dedent(
300+
"""
301+
ABBR^abbr
302+
303+
*[ABBR^abbr]: Abbreviation
304+
"""
305+
),
306+
self.dedent(
307+
"""
308+
<p><abbr title="Abbreviation">ABBR^abbr</abbr></p>
309+
"""
310+
)
311+
)
312+
313+
def test_abbr_bracket(self):
314+
self.assertMarkdownRenders(
315+
self.dedent(
316+
"""
317+
ABBR]abbr
318+
319+
*[ABBR]abbr]: Abbreviation
320+
"""
321+
),
322+
self.dedent(
323+
"""
324+
<p><abbr title="Abbreviation">ABBR]abbr</abbr></p>
325+
"""
326+
)
327+
)

0 commit comments

Comments
 (0)