@@ -428,7 +428,7 @@ def __init__(
428
428
self ._known_coder_urns = set .union (
429
429
# Those which are required.
430
430
self ._REQUIRED_CODER_URNS ,
431
- # Those common coders which are understood by all environments.
431
+ # Those common coders which are understood by many environments.
432
432
self ._COMMON_CODER_URNS .intersection (
433
433
* (
434
434
set (env .capabilities )
@@ -515,8 +515,40 @@ def maybe_length_prefixed_and_safe_coder(self, coder_id):
515
515
# type: (str) -> Tuple[str, str]
516
516
coder = self .components .coders [coder_id ]
517
517
if coder .spec .urn == common_urns .coders .LENGTH_PREFIX .urn :
518
+ # If the coder is already length prefixed, we can use it as is, and
519
+ # have the runner treat it as opaque bytes.
518
520
return coder_id , self .bytes_coder_id
521
+ elif (coder .spec .urn == common_urns .coders .WINDOWED_VALUE .urn and
522
+ self .components .coders [coder .component_coder_ids [1 ]].spec .urn not in
523
+ self ._known_coder_urns ):
524
+ # A WindowedValue coder with an unknown window type.
525
+ # This needs to be encoded in such a way that we still have access to its
526
+ # timestmap.
527
+ lp_elem_coder = self .maybe_length_prefixed_coder (
528
+ coder .component_coder_ids [0 ])
529
+ tp_window_coder = self .timestamped_prefixed_window_coder (
530
+ coder .component_coder_ids [1 ])
531
+ new_coder_id = unique_name (
532
+ self .components .coders , coder_id + '_timestamp_prefixed' )
533
+ self .components .coders [new_coder_id ].CopyFrom (
534
+ beam_runner_api_pb2 .Coder (
535
+ spec = beam_runner_api_pb2 .FunctionSpec (
536
+ urn = common_urns .coders .WINDOWED_VALUE .urn ),
537
+ component_coder_ids = [lp_elem_coder , tp_window_coder ]))
538
+ safe_coder_id = unique_name (
539
+ self .components .coders , coder_id + '_timestamp_prefixed_opaque' )
540
+ self .components .coders [safe_coder_id ].CopyFrom (
541
+ beam_runner_api_pb2 .Coder (
542
+ spec = beam_runner_api_pb2 .FunctionSpec (
543
+ urn = common_urns .coders .WINDOWED_VALUE .urn ),
544
+ component_coder_ids = [
545
+ self .safe_coders [lp_elem_coder ],
546
+ self .safe_coders [tp_window_coder ]
547
+ ]))
548
+ return new_coder_id , safe_coder_id
519
549
elif coder .spec .urn in self ._known_coder_urns :
550
+ # A known coder type, but its components may still need to be length
551
+ # prefixed.
520
552
new_component_ids = [
521
553
self .maybe_length_prefixed_coder (c ) for c in coder .component_coder_ids
522
554
]
@@ -538,6 +570,7 @@ def maybe_length_prefixed_and_safe_coder(self, coder_id):
538
570
spec = coder .spec , component_coder_ids = safe_component_ids ))
539
571
return new_coder_id , safe_coder_id
540
572
else :
573
+ # A completely unkown coder. Wrap the entire thing in a length prefix.
541
574
new_coder_id = unique_name (
542
575
self .components .coders , coder_id + '_length_prefixed' )
543
576
self .components .coders [new_coder_id ].CopyFrom (
@@ -547,6 +580,25 @@ def maybe_length_prefixed_and_safe_coder(self, coder_id):
547
580
component_coder_ids = [coder_id ]))
548
581
return new_coder_id , self .bytes_coder_id
549
582
583
+ @memoize_on_instance
584
+ def timestamped_prefixed_window_coder (self , coder_id ):
585
+ length_prefixed = self .maybe_length_prefixed_coder (coder_id )
586
+ new_coder_id = unique_name (
587
+ self .components .coders , coder_id + '_timestamp_prefixed' )
588
+ self .components .coders [new_coder_id ].CopyFrom (
589
+ beam_runner_api_pb2 .Coder (
590
+ spec = beam_runner_api_pb2 .FunctionSpec (
591
+ urn = common_urns .coders .CUSTOM_WINDOW .urn ),
592
+ component_coder_ids = [length_prefixed ]))
593
+ safe_coder_id = unique_name (
594
+ self .components .coders , coder_id + '_timestamp_prefixed_opaque' )
595
+ self .components .coders [safe_coder_id ].CopyFrom (
596
+ beam_runner_api_pb2 .Coder (
597
+ spec = beam_runner_api_pb2 .FunctionSpec (
598
+ urn = python_urns .TIMESTAMP_PREFIXED_OPAQUE_WINDOW_CODER )))
599
+ self .safe_coders [new_coder_id ] = safe_coder_id
600
+ return new_coder_id
601
+
550
602
def length_prefix_pcoll_coders (self , pcoll_id ):
551
603
# type: (str) -> None
552
604
self .components .pcollections [pcoll_id ].coder_id = (
0 commit comments