Skip to content

MAINT: Comform ASCIIHexDecode implementation to specification #3274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def decode(
logger_warning(
"missing EOD in ASCIIHexDecode, check if output is OK", __name__
)
break # Reached end of string even if no EOD
break # Reached end of string without an EOD
char = data[index : index + 1]
if char == b">":
break
Expand All @@ -306,7 +306,13 @@ def decode(
retval += bytes((int(hex_pair, base=16),))
hex_pair = b""
index += 1
assert hex_pair == b""
# If the filter encounters the EOD marker after reading
# an odd number of hexadecimal digits,
# it shall behave as if a 0 (zero) followed the last digit.
# For every even number of hexadecimal digits, hex_pair is reset to b"".
if hex_pair != b"":
hex_pair += b"0"
retval += bytes((int(hex_pair, base=16),))
return retval


Expand Down Expand Up @@ -351,7 +357,7 @@ def decode(
logger_warning(
"missing EOD in RunLengthDecode, check if output is OK", __name__
)
break # reach End Of String even if no EOD
break # Reached end of string without an EOD
length = data[index]
index += 1
if length == 128:
Expand Down
7 changes: 3 additions & 4 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ def test_flate_decode_decompress_with_array_params(params):
), # Same as previous, but whitespaced
("30313233343536373839616263646566414243444546>", string.hexdigits.encode()),
("20090a0d0b0c>", string.whitespace.encode()),
# Odd number of hexadecimal digits behaves as if a 0 (zero) followed the last digit
("3938373635343332313>", string.digits[::-1].encode()),
],
ids=[
"empty",
Expand All @@ -114,16 +116,13 @@ def test_flate_decode_decompress_with_array_params(params):
"digits_whitespace",
"hexdigits",
"whitespace",
"odd_number",
],
)
def test_ascii_hex_decode_method(data, expected):
"""
Feeds a bunch of values to ASCIIHexDecode.decode() and ensures the
correct output is returned.

TODO What is decode() supposed to do for such inputs as ">>", ">>>" or
any other not terminated by ">"? (For the latter case, an exception
is currently raised.)
"""
assert ASCIIHexDecode.decode(data) == expected

Expand Down
Loading