Skip to content

Commit a18765c

Browse files
authored
Explicitly omit carot and backslash from abbr
Fixes #1444.
1 parent 421f1e8 commit a18765c

File tree

5 files changed

+76
-38
lines changed

5 files changed

+76
-38
lines changed

docs/changelog.md

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1515
* Include `scripts/*.py` in the generated source tarballs (#1430).
1616
* Ensure lines after heading in loose list are properly detabbed (#1443).
1717
* Give smarty tree processor higher priority than toc (#1440).
18+
* Explicitly omit carrot (`^`) and backslash (`\`) from abbreviations (#1444).
1819

1920
## [3.5.2] -- 2024-01-10
2021

docs/extensions/abbreviations.md

+8
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@ will be rendered as:
3636
is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.</p>
3737
```
3838

39+
The following three characters are not permitted in an abbreviation. Any
40+
abbreviation definitions which include one will not be recognized as an
41+
abbreviation definition.
42+
43+
1. carrot (`^`)
44+
2. backslash (`\`)
45+
3. left square bracket (`]`)
46+
3947
Usage
4048
-----
4149

markdown/extensions/abbr.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def extendMarkdown(self, md):
4141
class AbbrPreprocessor(BlockProcessor):
4242
""" Abbreviation Preprocessor - parse text for abbr references. """
4343

44-
RE = re.compile(r'^[*]\[(?P<abbr>[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
44+
RE = re.compile(r'^[*]\[(?P<abbr>[^\]\^\\]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
4545

4646
def test(self, parent: etree.Element, block: str) -> bool:
4747
return True
@@ -73,18 +73,15 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
7373

7474
def _generate_pattern(self, text: str) -> str:
7575
"""
76-
Given a string, returns an regex pattern to match that string.
76+
Given a string, returns a regex pattern to match that string.
7777
78-
'HTML' -> r'(?P<abbr>[H][T][M][L])'
78+
'HTML' -> r'(?P<abbr>\b[H][T][M][L]\b)'
7979
80-
Note: we force each char as a literal match (in brackets) as we don't
81-
know what they will be beforehand.
80+
Note: we force each char as a literal match via a character set (in brackets)
81+
as we don't know what they will be beforehand.
8282
8383
"""
84-
chars = list(text)
85-
for i in range(len(chars)):
86-
chars[i] = r'[%s]' % chars[i]
87-
return r'(?P<abbr>\b%s\b)' % (r''.join(chars))
84+
return f"(?P<abbr>\\b{ ''.join(f'[{ c }]' for c in text) }\\b)"
8885

8986

9087
class AbbrInlineProcessor(InlineProcessor):

tests/test_extensions.py

-29
Original file line numberDiff line numberDiff line change
@@ -85,35 +85,6 @@ def testConfigAsKwargsOnInit(self):
8585
self.assertEqual(ext.getConfigs(), {'foo': 'baz', 'bar': 'blah'})
8686

8787

88-
class TestAbbr(unittest.TestCase):
89-
""" Test abbr extension. """
90-
91-
def setUp(self):
92-
self.md = markdown.Markdown(extensions=['abbr'])
93-
94-
def testSimpleAbbr(self):
95-
""" Test Abbreviations. """
96-
text = 'Some text with an ABBR and a REF. Ignore REFERENCE and ref.' + \
97-
'\n\n*[ABBR]: Abbreviation\n' + \
98-
'*[REF]: Abbreviation Reference'
99-
self.assertEqual(
100-
self.md.convert(text),
101-
'<p>Some text with an <abbr title="Abbreviation">ABBR</abbr> '
102-
'and a <abbr title="Abbreviation Reference">REF</abbr>. Ignore '
103-
'REFERENCE and ref.</p>'
104-
)
105-
106-
def testNestedAbbr(self):
107-
""" Test Nested Abbreviations. """
108-
text = '[ABBR](/foo) and _ABBR_\n\n' + \
109-
'*[ABBR]: Abbreviation'
110-
self.assertEqual(
111-
self.md.convert(text),
112-
'<p><a href="/foo"><abbr title="Abbreviation">ABBR</abbr></a> '
113-
'and <em><abbr title="Abbreviation">ABBR</abbr></em></p>'
114-
)
115-
116-
11788
class TestMetaData(unittest.TestCase):
11889
""" Test `MetaData` extension. """
11990

tests/test_syntax/extensions/test_abbr.py

+61
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,25 @@ def test_abbr_override(self):
9595
)
9696
)
9797

98+
def test_abbr_nested(self):
99+
self.assertMarkdownRenders(
100+
self.dedent(
101+
"""
102+
[ABBR](/foo)
103+
104+
_ABBR_
105+
106+
*[ABBR]: Abbreviation
107+
"""
108+
),
109+
self.dedent(
110+
"""
111+
<p><a href="/foo"><abbr title="Abbreviation">ABBR</abbr></a></p>
112+
<p><em><abbr title="Abbreviation">ABBR</abbr></em></p>
113+
"""
114+
)
115+
)
116+
98117
def test_abbr_no_blank_Lines(self):
99118
self.assertMarkdownRenders(
100119
self.dedent(
@@ -240,3 +259,45 @@ def test_abbr_single_quoted(self):
240259
"""
241260
)
242261
)
262+
263+
def test_abbr_ignore_special_chars(self):
264+
self.assertMarkdownRenders(
265+
self.dedent(
266+
r"""
267+
[^] [\\] [\]] []]
268+
269+
*[^]: Not an abbreviation
270+
271+
*[\\]: Not an abbreviation
272+
273+
*[\]]: Not an abbreviation
274+
275+
*[]]: Not an abbreviation
276+
"""
277+
),
278+
self.dedent(
279+
r"""
280+
<p>[^] [\] []] []]</p>
281+
<p>*[^]: Not an abbreviation</p>
282+
<p>*[\]: Not an abbreviation</p>
283+
<p>*[]]: Not an abbreviation</p>
284+
<p>*[]]: Not an abbreviation</p>
285+
"""
286+
)
287+
)
288+
289+
def test_abbr_hyphen(self):
290+
self.assertMarkdownRenders(
291+
self.dedent(
292+
"""
293+
ABBR-abbr
294+
295+
*[ABBR-abbr]: Abbreviation
296+
"""
297+
),
298+
self.dedent(
299+
"""
300+
<p><abbr title="Abbreviation">ABBR-abbr</abbr></p>
301+
"""
302+
)
303+
)

0 commit comments

Comments
 (0)