Skip to content

Commit aed50ad

Browse files
authored
BUG: Using compress_identical_objects on transformed content duplicates differing content (#3197)
1 parent a6f9153 commit aed50ad

File tree

3 files changed

+20
-2
lines changed

3 files changed

+20
-2
lines changed

pypdf/generic/_data_structures.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -994,7 +994,7 @@ def set_data(self, data: bytes) -> None:
994994

995995
def hash_value_data(self) -> bytes:
996996
data = super().hash_value_data()
997-
data += self._data
997+
data += self.get_data()
998998
return data
999999

10001000
def write_to_stream(

resources/two-different-pages.pdf

7.5 KB
Binary file not shown.

tests/test_workflows.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from PIL import Image, ImageChops
1515
from PIL import __version__ as pil_version
1616

17-
from pypdf import PdfReader, PdfWriter
17+
from pypdf import PdfReader, PdfWriter, Transformation
1818
from pypdf.constants import PageAttributes as PG
1919
from pypdf.errors import PdfReadError, PdfReadWarning
2020
from pypdf.generic import (
@@ -272,6 +272,24 @@ def test_extract_textbench(enable, url, pages, print_result=False):
272272
pass
273273

274274

275+
def test_transform_compress_identical_objects():
276+
reader = PdfReader(RESOURCE_ROOT / "two-different-pages.pdf")
277+
writer = PdfWriter()
278+
279+
for page in reader.pages:
280+
op = Transformation().scale(sx=0.8, sy=0.8)
281+
page.add_transformation(op)
282+
writer.add_page(page)
283+
writer.compress_identical_objects()
284+
bytes_out = BytesIO()
285+
writer.write(bytes_out)
286+
result_reader = PdfReader(bytes_out)
287+
pg1_text = result_reader.pages[0].extract_text()
288+
pg2_text = result_reader.pages[1].extract_text()
289+
assert pg1_text.strip() == "1"
290+
assert pg2_text.strip() == "2"
291+
292+
275293
@pytest.mark.slow
276294
def test_orientations():
277295
p = PdfReader(RESOURCE_ROOT / "test Orient.pdf").pages[0]

0 commit comments

Comments
 (0)