Skip to content

STY: Reorder filters in decode_stream_data #3289

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 14 additions & 18 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE.


"""
Implementation of stream filters for PDF.

See TABLE H.1 Abbreviations for standard filter names
"""
"""Implementation of stream filters for PDF."""
__author__ = "Mathieu Fenniak"
__author_email__ = "[email protected]"

Expand Down Expand Up @@ -132,18 +128,17 @@ def decode(
raise DeprecationError("decode_parms as ArrayObject is deprecated")

str_data = decompress(data)
predictor = 1

predictor = 1
if decode_parms:
try:
predictor = decode_parms.get("/Predictor", 1)
except (AttributeError, TypeError): # Type Error is NullObject
pass # Usually an array with a null object was read
# predictor 1 == no predictor
# predictor == 1: no predictor
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the old comment was more clear here.

if predictor != 1:
# /Columns, the number of samples in each row, has a default value of 1;
# §7.4.4.3, ISO 32000.
DEFAULT_BITS_PER_COMPONENT = 8
try:
columns = cast(int, decode_parms[LZW.COLUMNS].get_object()) # type: ignore
except (TypeError, KeyError):
Expand All @@ -152,6 +147,7 @@ def decode(
colors = cast(int, decode_parms[LZW.COLORS].get_object()) # type: ignore
except (TypeError, KeyError):
colors = 1
DEFAULT_BITS_PER_COMPONENT = 8
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Defining this inside the method seems odd at all. I would propose making this an internal class-level constant instead.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Magic constants tend to be discouraged, thus I am not sure whether your proposed solution fits here.

try:
bits_per_component = cast(
int,
Expand All @@ -165,17 +161,17 @@ def decode(
math.ceil(columns * colors * bits_per_component / 8) + 1
) # number of bytes

# TIFF prediction:
if predictor == 2:
# TIFF prediction
rowlength -= 1 # remove the predictor byte
bpp = rowlength // columns
str_data = bytearray(str_data)
for i in range(len(str_data)):
if i % rowlength >= bpp:
str_data[i] = (str_data[i] + str_data[i - bpp]) % 256
str_data = bytes(str_data)
# PNG prediction:
elif 10 <= predictor <= 15:
# PNG prediction
str_data = FlateDecode._decode_png_prediction(
str_data, columns, rowlength
)
Expand All @@ -198,18 +194,18 @@ def _decode_png_prediction(data: bytes, columns: int, rowlength: int) -> bytes:
filter_byte = rowdata[0]

if filter_byte == 0:
# PNG None Predictor
# None Predictor
pass
elif filter_byte == 1:
# PNG Sub Predictor
# Sub Predictor
for i in range(bpp + 1, rowlength):
rowdata[i] = (rowdata[i] + rowdata[i - bpp]) % 256
elif filter_byte == 2:
# PNG Up Predictor
# Up Predictor
for i in range(1, rowlength):
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
elif filter_byte == 3:
# PNG Average Predictor
# Average Predictor
for i in range(1, bpp + 1):
floor = prev_rowdata[i] // 2
rowdata[i] = (rowdata[i] + floor) % 256
Expand All @@ -218,7 +214,7 @@ def _decode_png_prediction(data: bytes, columns: int, rowlength: int) -> bytes:
floor = (left + prev_rowdata[i]) // 2
rowdata[i] = (rowdata[i] + floor) % 256
elif filter_byte == 4:
# PNG Paeth Predictor
# Paeth Predictor
for i in range(1, bpp + 1):
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
for i in range(bpp + 1, rowlength):
Expand Down Expand Up @@ -755,12 +751,12 @@ def decode_stream_data(stream: Any) -> bytes:
elif filter_name == FT.CCITT_FAX_DECODE:
height = stream.get(IA.HEIGHT, ())
data = CCITTFaxDecode.decode(data, params, height)
elif filter_name == FT.JBIG2_DECODE:
data = JBIG2Decode.decode(data, params)
elif filter_name == FT.DCT_DECODE:
data = DCTDecode.decode(data)
elif filter_name == FT.JPX_DECODE:
data = JPXDecode.decode(data)
elif filter_name == FT.JBIG2_DECODE:
data = JBIG2Decode.decode(data, params)
elif filter_name == "/Crypt":
if "/Name" in params or "/Type" in params:
raise NotImplementedError(
Expand Down Expand Up @@ -810,7 +806,7 @@ def _apply_alpha(
f"image and mask size not matching: {obj_as_text}", __name__
)
else:
# TODO : implement mask
# TODO: implement mask
if alpha.mode != "L":
alpha = alpha.convert("L")
if img.mode == "P":
Expand Down
Loading