Skip to content

Commit 26170b4

Browse files
authored
GH-46198: [Python] Remove deprecated PyExtensionType (#46199)
### Rationale for this change `PyExtensionType` has been deprecated in #38608 and can now be removed. ### What changes are included in this PR? `PyExtensionType` functionality is removed from PyArrow code. ### Are these changes tested? Existing tests should pass. ### Are there any user-facing changes? Deprecated classes are removed. * GitHub Issue: #46198 Authored-by: AlenkaF <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent e6d0edc commit 26170b4

File tree

5 files changed

+7
-162
lines changed

5 files changed

+7
-162
lines changed

docs/source/python/api/datatypes.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ Specific classes and functions for extension types.
131131

132132
BaseExtensionType
133133
ExtensionType
134-
PyExtensionType
135134
UnknownExtensionType
136135
register_extension_type
137136
unregister_extension_type

python/pyarrow/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def print_entry(label, value):
178178
BaseExtensionType, ExtensionType,
179179
RunEndEncodedType, Bool8Type, FixedShapeTensorType,
180180
JsonType, OpaqueType, UuidType,
181-
PyExtensionType, UnknownExtensionType,
181+
UnknownExtensionType,
182182
register_extension_type, unregister_extension_type,
183183
DictionaryMemo,
184184
KeyValueMetadata,

python/pyarrow/lib.pxd

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,6 @@ cdef class JsonType(BaseExtensionType):
207207
const CJsonType* json_ext_type
208208

209209

210-
cdef class PyExtensionType(ExtensionType):
211-
pass
212-
213-
214210
cdef class _Metadata(_Weakrefable):
215211
# required because KeyValueMetadata also extends collections.abc.Mapping
216212
# and the first parent class must be an extension type

python/pyarrow/tests/test_extension_type.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,6 @@ def registered_extension_type(ext_type):
4242
pa.unregister_extension_type(ext_type.extension_name)
4343

4444

45-
@contextlib.contextmanager
46-
def enabled_auto_load():
47-
pa.PyExtensionType.set_auto_load(True)
48-
try:
49-
yield
50-
finally:
51-
pa.PyExtensionType.set_auto_load(False)
52-
53-
5445
class TinyIntType(pa.ExtensionType):
5546

5647
def __init__(self):
@@ -233,15 +224,6 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
233224
return cls(storage_type)
234225

235226

236-
class LegacyIntType(pa.PyExtensionType):
237-
238-
def __init__(self):
239-
pa.PyExtensionType.__init__(self, pa.int8())
240-
241-
def __reduce__(self):
242-
return LegacyIntType, ()
243-
244-
245227
def ipc_write_batch(batch):
246228
stream = pa.BufferOutputStream()
247229
writer = pa.RecordBatchStreamWriter(stream, batch.schema)
@@ -1735,25 +1717,6 @@ def test_tensor_type_str(tensor_type, text):
17351717
assert text in tensor_type_str
17361718

17371719

1738-
def test_legacy_int_type():
1739-
with pytest.warns(FutureWarning, match="PyExtensionType is deprecated"):
1740-
ext_ty = LegacyIntType()
1741-
arr = pa.array([1, 2, 3], type=ext_ty.storage_type)
1742-
ext_arr = pa.ExtensionArray.from_storage(ext_ty, arr)
1743-
batch = pa.RecordBatch.from_arrays([ext_arr], names=['ext'])
1744-
buf = ipc_write_batch(batch)
1745-
1746-
with pytest.warns((RuntimeWarning, FutureWarning)):
1747-
batch = ipc_read_batch(buf)
1748-
assert isinstance(batch.column(0).type, pa.UnknownExtensionType)
1749-
1750-
with enabled_auto_load():
1751-
with pytest.warns(FutureWarning, match="PyExtensionType is deprecated"):
1752-
batch = ipc_read_batch(buf)
1753-
assert isinstance(batch.column(0).type, LegacyIntType)
1754-
assert batch.column(0) == ext_arr
1755-
1756-
17571720
@pytest.mark.parametrize("storage_type,storage", [
17581721
(pa.null(), [None] * 4),
17591722
(pa.int64(), [1, 2, None, 4]),

python/pyarrow/types.pxi

Lines changed: 6 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -2136,91 +2136,7 @@ cdef class OpaqueType(BaseExtensionType):
21362136
return OpaqueScalar
21372137

21382138

2139-
_py_extension_type_auto_load = False
2140-
2141-
2142-
cdef class PyExtensionType(ExtensionType):
2143-
"""
2144-
Concrete base class for Python-defined extension types based on pickle
2145-
for (de)serialization.
2146-
2147-
.. warning::
2148-
This class is deprecated and its deserialization is disabled by default.
2149-
:class:`ExtensionType` is recommended instead.
2150-
2151-
Parameters
2152-
----------
2153-
storage_type : DataType
2154-
The storage type for which the extension is built.
2155-
"""
2156-
2157-
def __cinit__(self):
2158-
if type(self) is PyExtensionType:
2159-
raise TypeError("Can only instantiate subclasses of "
2160-
"PyExtensionType")
2161-
2162-
def __init__(self, DataType storage_type):
2163-
warnings.warn(
2164-
"pyarrow.PyExtensionType is deprecated "
2165-
"and will refuse deserialization by default. "
2166-
"Instead, please derive from pyarrow.ExtensionType and implement "
2167-
"your own serialization mechanism.",
2168-
FutureWarning)
2169-
ExtensionType.__init__(self, storage_type, "arrow.py_extension_type")
2170-
2171-
def __reduce__(self):
2172-
raise NotImplementedError("Please implement {0}.__reduce__"
2173-
.format(type(self).__name__))
2174-
2175-
def __arrow_ext_serialize__(self):
2176-
return pickle.dumps(self)
2177-
2178-
@classmethod
2179-
def __arrow_ext_deserialize__(cls, storage_type, serialized):
2180-
if not _py_extension_type_auto_load:
2181-
warnings.warn(
2182-
"pickle-based deserialization of pyarrow.PyExtensionType subclasses "
2183-
"is disabled by default; if you only ingest "
2184-
"trusted data files, you may re-enable this using "
2185-
"`pyarrow.PyExtensionType.set_auto_load(True)`.\n"
2186-
"In the future, Python-defined extension subclasses should "
2187-
"derive from pyarrow.ExtensionType (not pyarrow.PyExtensionType) "
2188-
"and implement their own serialization mechanism.\n",
2189-
RuntimeWarning)
2190-
return UnknownExtensionType(storage_type, serialized)
2191-
try:
2192-
ty = pickle.loads(serialized)
2193-
except Exception:
2194-
# For some reason, it's impossible to deserialize the
2195-
# ExtensionType instance. Perhaps the serialized data is
2196-
# corrupt, or more likely the type is being deserialized
2197-
# in an environment where the original Python class or module
2198-
# is not available. Fall back on a generic BaseExtensionType.
2199-
return UnknownExtensionType(storage_type, serialized)
2200-
2201-
if ty.storage_type != storage_type:
2202-
raise TypeError("Expected storage type {0} but got {1}"
2203-
.format(ty.storage_type, storage_type))
2204-
return ty
2205-
2206-
# XXX Cython marks extension types as immutable, so cannot expose this
2207-
# as a writable class attribute.
2208-
@classmethod
2209-
def set_auto_load(cls, value):
2210-
"""
2211-
Enable or disable auto-loading of serialized PyExtensionType instances.
2212-
2213-
Parameters
2214-
----------
2215-
value : bool
2216-
Whether to enable auto-loading.
2217-
"""
2218-
global _py_extension_type_auto_load
2219-
assert isinstance(value, bool)
2220-
_py_extension_type_auto_load = value
2221-
2222-
2223-
cdef class UnknownExtensionType(PyExtensionType):
2139+
cdef class UnknownExtensionType(ExtensionType):
22242140
"""
22252141
A concrete class for Python-defined extension types that refer to
22262142
an unknown Python implementation.
@@ -2238,11 +2154,15 @@ cdef class UnknownExtensionType(PyExtensionType):
22382154

22392155
def __init__(self, DataType storage_type, serialized):
22402156
self.serialized = serialized
2241-
PyExtensionType.__init__(self, storage_type)
2157+
super().__init__(storage_type, "pyarrow.unknown")
22422158

22432159
def __arrow_ext_serialize__(self):
22442160
return self.serialized
22452161

2162+
@classmethod
2163+
def __arrow_ext_deserialize__(cls, storage_type, serialized):
2164+
return UnknownExtensionType()
2165+
22462166

22472167
_python_extension_types_registry = []
22482168

@@ -6094,39 +6014,6 @@ cdef class _ExtensionRegistryNanny(_Weakrefable):
60946014
_registry_nanny = _ExtensionRegistryNanny()
60956015

60966016

6097-
def _register_py_extension_type():
6098-
cdef:
6099-
DataType storage_type
6100-
shared_ptr[CExtensionType] cpy_ext_type
6101-
c_string c_extension_name = tobytes("arrow.py_extension_type")
6102-
6103-
# Make a dummy C++ ExtensionType
6104-
storage_type = null()
6105-
check_status(CPyExtensionType.FromClass(
6106-
storage_type.sp_type, c_extension_name, PyExtensionType,
6107-
&cpy_ext_type))
6108-
check_status(
6109-
RegisterPyExtensionType(<shared_ptr[CDataType]> cpy_ext_type))
6110-
6111-
6112-
def _unregister_py_extension_types():
6113-
# This needs to be done explicitly before the Python interpreter is
6114-
# finalized. If the C++ type is destroyed later in the process
6115-
# teardown stage, it will invoke CPython APIs such as Py_DECREF
6116-
# with a destroyed interpreter.
6117-
unregister_extension_type("arrow.py_extension_type")
6118-
for ext_type in _python_extension_types_registry:
6119-
try:
6120-
unregister_extension_type(ext_type.extension_name)
6121-
except KeyError:
6122-
pass
6123-
_registry_nanny.release_registry()
6124-
6125-
6126-
_register_py_extension_type()
6127-
atexit.register(_unregister_py_extension_types)
6128-
6129-
61306017
#
61316018
# PyCapsule export utilities
61326019
#

0 commit comments

Comments
 (0)