Skip to content

Commit 249dad3

Browse files
committed
feat: Add support for BrotliDecode filter (PDF 2.0)
Implements the BrotliDecode filter as specified in ISO 32000-2:2020, Section 7.4.11. Adds necessary constants, integrates the filter into the decoding logic, includes brotli as an optional dependency, adds unit tests, and updates documentation. Closes py-pdf#3223
1 parent 96ba79c commit 249dad3

File tree

6 files changed

+73
-2
lines changed

6 files changed

+73
-2
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# CHANGELOG
22

3+
## Unreleased
4+
5+
### New Features (ENH)
6+
- Add support for BrotliDecode filter (PDF 2.0) (#3223)
7+
38
## Version 5.4.0, 2025-03-16
49

510
### New Features (ENH)
@@ -1680,7 +1685,7 @@ e.g. Russian / Chinese / Japanese / Korean / Arabic.
16801685
### Documentation (DOC)
16811686
- Notes on annotations (#982)
16821687
- Who uses PyPDF2
1683-
- intendet \xe2\x9e\x94 in robustness page (#958)
1688+
- inteted \xe2\x9e\x94 in robustness page (#958)
16841689

16851690
### Maintenance (MAINT)
16861691
- pre-commit / requirements.txt updates (#977)

docs/modules/constants.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,13 @@ Constants
2626
:undoc-members:
2727
:exclude-members: FT, Parent, Kids, T, TU, TM, V, DV, AA, Opt, attributes, attributes_dict
2828
:show-inheritance:
29+
30+
.. autoclass:: pypdf.constants.FilterTypes
31+
:members:
32+
:undoc-members:
33+
:show-inheritance:
34+
35+
.. autoclass:: pypdf.constants.FilterTypeAbbreviations
36+
:members:
37+
:undoc-members:
38+
:show-inheritance:

pypdf/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ class FilterTypes(StrEnum):
245245
CCITT_FAX_DECODE = "/CCITTFaxDecode" # abbreviation: CCF
246246
DCT_DECODE = "/DCTDecode" # abbreviation: DCT
247247
JPX_DECODE = "/JPXDecode"
248+
BROTLI_DECODE = "/BrotliDecode" # abbreviation: Br, PDF 2.0
248249

249250

250251
class FilterTypeAbbreviations:
@@ -257,6 +258,7 @@ class FilterTypeAbbreviations:
257258
RL = "/RL"
258259
CCF = "/CCF"
259260
DCT = "/DCT"
261+
BR = "/Br" # BrotliDecode
260262

261263

262264
class LzwFilterParameters:

pypdf/filters.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@
6565
NullObject,
6666
)
6767

68+
try:
69+
import brotli
70+
except ImportError:
71+
brotli = None
72+
6873

6974
def decompress(data: bytes) -> bytes:
7075
"""
@@ -481,6 +486,50 @@ def decode(
481486
return data
482487

483488

489+
class BrotliDecode:
490+
"""Decodes Brotli-compressed data."""
491+
@staticmethod
492+
def decode(
493+
data: bytes,
494+
decode_parms: Optional[DictionaryObject] = None,
495+
**kwargs: Any,
496+
) -> bytes:
497+
"""
498+
Decode Brotli-compressed data.
499+
500+
Args:
501+
data: Brotli-compressed data.
502+
decode_parms: Optional parameters (unused).
503+
504+
Returns:
505+
Decompressed data.
506+
507+
Raises:
508+
PdfStreamError: If brotli library is not installed.
509+
"""
510+
if brotli is None:
511+
raise PdfStreamError("Brotli library not installed. Required for BrotliDecode filter.")
512+
return brotli.decompress(data)
513+
514+
@staticmethod
515+
def encode(data: bytes, **kwargs: Any) -> bytes:
516+
"""
517+
Encode data using Brotli compression.
518+
519+
Args:
520+
data: Data to compress.
521+
522+
Returns:
523+
Compressed data.
524+
525+
Raises:
526+
PdfStreamError: If brotli library is not installed.
527+
"""
528+
if brotli is None:
529+
raise PdfStreamError("Brotli library not installed. Required for BrotliDecode filter.")
530+
return brotli.compress(data)
531+
532+
484533
@dataclass
485534
class CCITTParameters:
486535
"""§7.4.6, optional parameters for the CCITTFaxDecode filter."""
@@ -666,6 +715,8 @@ def decode_stream_data(stream: Any) -> bytes:
666715
data = DCTDecode.decode(data)
667716
elif filter_name == FT.JPX_DECODE:
668717
data = JPXDecode.decode(data)
718+
elif filter_name == FT.BROTLI_DECODE: # Add BrotliDecode
719+
data = BrotliDecode.decode(data)
669720
elif filter_name == "/Crypt":
670721
if "/Name" in params or "/Type" in params:
671722
raise NotImplementedError(

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,11 @@ Source = "https://github.com/py-pdf/pypdf"
4242
crypto = ["cryptography"]
4343
cryptodome = ["PyCryptodome"]
4444
image = ["Pillow>=8.0.0"]
45+
brotli = ["Brotli"] # Add brotli dependency
4546
full = [
4647
"cryptography",
47-
"Pillow>=8.0.0"
48+
"Pillow>=8.0.0",
49+
"Brotli", # Add brotli to full dependencies
4850
]
4951
dev = [
5052
"black",

tests/test_filters.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pypdf.filters import (
1616
ASCII85Decode,
1717
ASCIIHexDecode,
18+
BrotliDecode, # Add BrotliDecode
1819
CCITParameters,
1920
CCITTFaxDecode,
2021
CCITTParameters,

0 commit comments

Comments
 (0)