Skip to content

Commit 2d50549

Browse files
authored
[#20489] Add support for re.Pattern and re.Match type checking (#34604)
* [#20489] Add support for re.Pattern and re.Match type checking * remove debugging statement
1 parent 28d4617 commit 2d50549

File tree

3 files changed

+66
-9
lines changed

3 files changed

+66
-9
lines changed

sdks/python/apache_beam/typehints/native_type_compatibility_test.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import collections.abc
2323
import enum
24+
import re
2425
import typing
2526
import unittest
2627

@@ -300,16 +301,17 @@ def test_newtype(self):
300301
typehints.Any, convert_to_beam_type(typing.NewType('Number', int)))
301302

302303
def test_pattern(self):
303-
# TODO(https://github.com/apache/beam/issues/20489): Unsupported.
304-
self.assertEqual(typehints.Any, convert_to_beam_type(typing.Pattern))
305-
self.assertEqual(typehints.Any, convert_to_beam_type(typing.Pattern[str]))
306-
self.assertEqual(typehints.Any, convert_to_beam_type(typing.Pattern[bytes]))
304+
self.assertEqual(re.Pattern, convert_to_beam_type(re.Pattern))
305+
self.assertEqual(re.Pattern[str], convert_to_beam_type(re.Pattern[str]))
306+
self.assertEqual(re.Pattern[bytes], convert_to_beam_type(re.Pattern[bytes]))
307+
self.assertNotEqual(
308+
re.Pattern[bytes], convert_to_beam_type(re.Pattern[str]))
307309

308310
def test_match(self):
309-
# TODO(https://github.com/apache/beam/issues/20489): Unsupported.
310-
self.assertEqual(typehints.Any, convert_to_beam_type(typing.Match))
311-
self.assertEqual(typehints.Any, convert_to_beam_type(typing.Match[str]))
312-
self.assertEqual(typehints.Any, convert_to_beam_type(typing.Match[bytes]))
311+
self.assertEqual(re.Match, convert_to_beam_type(re.Match))
312+
self.assertEqual(re.Match[str], convert_to_beam_type(re.Match[str]))
313+
self.assertEqual(re.Match[bytes], convert_to_beam_type(re.Match[bytes]))
314+
self.assertNotEqual(re.Match[bytes], convert_to_beam_type(re.Match[str]))
313315

314316
def test_forward_reference(self):
315317
self.assertEqual(typehints.Any, convert_to_beam_type('int'))

sdks/python/apache_beam/typehints/typehints.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -1516,15 +1516,40 @@ def is_consistent_with(sub, base):
15161516
elif isinstance(sub, TypeConstraint):
15171517
# Nothing but object lives above any type constraints.
15181518
return base == object
1519+
elif getattr(base, '__module__', None) == 're':
1520+
return regex_consistency(sub, base)
15191521
elif is_typing_generic(base):
15201522
# Cannot check unsupported parameterized generic which will cause issubclass
15211523
# to fail with an exception.
15221524
return False
15231525
return issubclass(sub, base)
15241526

15251527

1528+
def regex_consistency(sub, base) -> bool:
1529+
"""Checks whether two regular expression (re) type hints are consistent
1530+
with each other.
1531+
1532+
Either the sub or base hint can be parameterized generics since the set of
1533+
possible parameters is restricted to str | bytes. A base hint without a
1534+
parameter is treated as re.Class[str|bytes] so any sub param with a matching
1535+
base class is consistent. On the flip side, a sub hint without a parameter is
1536+
treated as inconsistent with a parameterized base hint.
1537+
"""
1538+
base_generic = getattr(base, '__origin__', None)
1539+
sub_class = getattr(sub, '__origin__', sub)
1540+
if base_generic:
1541+
if sub_class == sub:
1542+
# if the sub hint is not parameterized but the base hint is we
1543+
# auto-fail
1544+
return False
1545+
return issubclass(sub_class,
1546+
base_generic) and (sub.__args__ == base.__args__)
1547+
else:
1548+
return issubclass(sub_class, base)
1549+
1550+
15261551
def get_yielded_type(type_hint):
1527-
"""Obtains the type of elements yielded by an iterable.
1552+
"""Obtains the type of elements yielded by an iterable.s
15281553
15291554
Note that "iterable" here means: can be iterated over in a for loop, excluding
15301555
strings and dicts.

sdks/python/apache_beam/typehints/typehints_test.py

+30
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import collections.abc
2323
import functools
24+
import re
2425
import sys
2526
import typing
2627
import unittest
@@ -1220,6 +1221,35 @@ def increment(a):
12201221
e.exception.args[0])
12211222

12221223

1224+
class RegexTestCase(TypeHintTestCase):
1225+
def test_pattern(self):
1226+
self.assertCompatible(re.Pattern, re.Pattern)
1227+
self.assertCompatible(re.Pattern, re.Pattern[str])
1228+
self.assertCompatible(re.Pattern, re.Pattern[bytes])
1229+
self.assertCompatible(re.Pattern[str], re.Pattern[str])
1230+
self.assertCompatible(re.Pattern[bytes], re.Pattern[bytes])
1231+
self.assertNotCompatible(re.Pattern[str], re.Pattern)
1232+
self.assertNotCompatible(re.Pattern[bytes], re.Pattern)
1233+
self.assertNotCompatible(re.Pattern[str], re.Pattern[bytes])
1234+
self.assertNotCompatible(re.Pattern[bytes], re.Pattern[str])
1235+
1236+
def test_match(self):
1237+
self.assertCompatible(re.Match, re.Match)
1238+
self.assertCompatible(re.Match, re.Match[str])
1239+
self.assertCompatible(re.Match, re.Match[bytes])
1240+
self.assertCompatible(re.Match[str], re.Match[str])
1241+
self.assertCompatible(re.Match[bytes], re.Match[bytes])
1242+
self.assertNotCompatible(re.Match[str], re.Match)
1243+
self.assertNotCompatible(re.Match[bytes], re.Match)
1244+
self.assertNotCompatible(re.Match[str], re.Match[bytes])
1245+
self.assertNotCompatible(re.Match[bytes], re.Match[str])
1246+
1247+
def test_mix_fails(self):
1248+
self.assertNotCompatible(re.Pattern, re.Match)
1249+
self.assertNotCompatible(re.Match, re.Pattern)
1250+
self.assertNotCompatible(re.Pattern[str], re.Match[str])
1251+
1252+
12231253
class TakesDecoratorTestCase(TypeHintTestCase):
12241254
def test_must_be_primitive_type_or_constraint(self):
12251255
with self.assertRaises(TypeError) as e:

0 commit comments

Comments
 (0)