Add sdf kafka poll latencies (#34275)

Naireen · Naireen · web-flow · commit 44500319cacf · 2025-03-20T19:19:30.000-04:00
* add kafka sdf metrics

* address comments

* address more comments

* address comments

---------

Co-authored-by: Naireen &lt;naireenhussain@google.com&gt;
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MetricsContainerImpl.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MetricsContainerImpl.java
@@ -321,9 +321,7 @@ public MetricUpdates getUpdates() {
 
     // Add any metricKey labels to the monitoringInfoLabels.
     if (!metricName.getLabels().isEmpty()) {
-      for (Map.Entry<String, String> entry : metricName.getLabels().entrySet()) {
-        builder.setLabel(entry.getKey(), entry.getValue());
-      }
+      builder.setLabels(metricName.getLabels());
     }
     return builder;
   }
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MonitoringInfoEncodings.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MonitoringInfoEncodings.java
@@ -182,12 +182,12 @@ public static double decodeDoubleCounter(ByteString payload) {
     }
   }
 
-  /** Encodes to {@link MonitoringInfoConstants.TypeUrns#PER_WORKER_HISTOGRAM}. */
+  /** Encodes to {@link MonitoringInfoConstants.TypeUrns#HISTOGRAM}. */
   public static ByteString encodeInt64Histogram(HistogramData inputHistogram) {
     return inputHistogram.toProto().toByteString();
   }
 
-  /** Decodes to {@link MonitoringInfoConstants.TypeUrns#PER_WORKER_HISTOGRAM}. */
+  /** Decodes to {@link MonitoringInfoConstants.TypeUrns#HISTOGRAM}. */
   public static HistogramData decodeInt64Histogram(ByteString payload) {
     try {
       return new HistogramData(HistogramValue.parseFrom(payload));
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java
@@ -447,123 +447,126 @@ public ProcessContinuation processElement(
       long skippedRecords = 0L;
       final Stopwatch sw = Stopwatch.createStarted();
 
-      while (true) {
-        // Fetch the record size accumulator.
-        final MovingAvg avgRecordSize = avgRecordSizeCache.getUnchecked(kafkaSourceDescriptor);
-        rawRecords = poll(consumer, kafkaSourceDescriptor.getTopicPartition());
-        // When there are no records available for the current TopicPartition, self-checkpoint
-        // and move to process the next element.
-        if (rawRecords.isEmpty()) {
-          if (!topicPartitionExists(
-              kafkaSourceDescriptor.getTopicPartition(),
-              consumer.partitionsFor(kafkaSourceDescriptor.getTopic()))) {
-            return ProcessContinuation.stop();
-          }
-          if (timestampPolicy != null) {
-            updateWatermarkManually(timestampPolicy, watermarkEstimator, tracker);
-          }
-          return ProcessContinuation.resume();
-        }
-        for (ConsumerRecord<byte[], byte[]> rawRecord : rawRecords) {
-          // If the Kafka consumer returns a record with an offset that is already processed
-          // the record can be safely skipped. This is needed because there is a possibility
-          // that the seek() above fails to move the offset to the desired position. In which
-          // case poll() would return records that are already cnsumed.
-          if (rawRecord.offset() < startOffset) {
-            // If the start offset is not reached even after skipping the records for 10 seconds
-            // then the processing is stopped with a backoff to give the Kakfa server some time
-            // catch up.
-            if (sw.elapsed().getSeconds() > 10L) {
-              LOG.error(
-                  "The expected offset ({}) was not reached even after"
-                      + " skipping consumed records for 10 seconds. The offset we could"
-                      + " reach was {}. The processing of this bundle will be attempted"
-                      + " at a later time.",
-                  expectedOffset,
-                  rawRecord.offset());
-              return ProcessContinuation.resume()
-                  .withResumeDelay(org.joda.time.Duration.standardSeconds(10L));
-            }
-            skippedRecords++;
-            continue;
-          }
-          if (skippedRecords > 0L) {
-            LOG.warn(
-                "{} records were skipped due to seek returning an"
-                    + " earlier position than requested position of {}",
-                skippedRecords,
-                expectedOffset);
-            skippedRecords = 0L;
-          }
-          if (!tracker.tryClaim(rawRecord.offset())) {
-            return ProcessContinuation.stop();
-          }
-          try {
-            KafkaRecord<K, V> kafkaRecord =
-                new KafkaRecord<>(
-                    rawRecord.topic(),
-                    rawRecord.partition(),
-                    rawRecord.offset(),
-                    ConsumerSpEL.getRecordTimestamp(rawRecord),
-                    ConsumerSpEL.getRecordTimestampType(rawRecord),
-                    ConsumerSpEL.hasHeaders() ? rawRecord.headers() : null,
-                    ConsumerSpEL.deserializeKey(keyDeserializerInstance, rawRecord),
-                    ConsumerSpEL.deserializeValue(valueDeserializerInstance, rawRecord));
-            int recordSize =
-                (rawRecord.key() == null ? 0 : rawRecord.key().length)
-                    + (rawRecord.value() == null ? 0 : rawRecord.value().length);
-            avgRecordSize.update(recordSize);
-            rawSizes.update(recordSize);
-            expectedOffset = rawRecord.offset() + 1;
-            Instant outputTimestamp;
-            // The outputTimestamp and watermark will be computed by timestampPolicy, where the
-            // WatermarkEstimator should be a manual one.
-            if (timestampPolicy != null) {
-              TimestampPolicyContext context =
-                  updateWatermarkManually(timestampPolicy, watermarkEstimator, tracker);
-              outputTimestamp = timestampPolicy.getTimestampForRecord(context, kafkaRecord);
-            } else {
-              Preconditions.checkStateNotNull(this.extractOutputTimestampFn);
-              outputTimestamp = extractOutputTimestampFn.apply(kafkaRecord);
+      KafkaMetrics kafkaMetrics = KafkaSinkMetrics.kafkaMetrics();
+      try {
+        while (true) {
+          // Fetch the record size accumulator.
+          final MovingAvg avgRecordSize = avgRecordSizeCache.getUnchecked(kafkaSourceDescriptor);
+          rawRecords = poll(consumer, kafkaSourceDescriptor.getTopicPartition(), kafkaMetrics);
+          // When there are no records available for the current TopicPartition, self-checkpoint
+          // and move to process the next element.
+          if (rawRecords.isEmpty()) {
+            if (!topicPartitionExists(
+                kafkaSourceDescriptor.getTopicPartition(),
+                consumer.partitionsFor(kafkaSourceDescriptor.getTopic()))) {
+              return ProcessContinuation.stop();
             }
-            receiver
-                .get(recordTag)
-                .outputWithTimestamp(KV.of(kafkaSourceDescriptor, kafkaRecord), outputTimestamp);
-          } catch (SerializationException e) {
-            // This exception should only occur during the key and value deserialization when
-            // creating the Kafka Record
-            badRecordRouter.route(
-                receiver,
-                rawRecord,
-                null,
-                e,
-                "Failure deserializing Key or Value of Kakfa record reading from Kafka");
             if (timestampPolicy != null) {
               updateWatermarkManually(timestampPolicy, watermarkEstimator, tracker);
             }
+            return ProcessContinuation.resume();
+          }
+          for (ConsumerRecord<byte[], byte[]> rawRecord : rawRecords) {
+            // If the Kafka consumer returns a record with an offset that is already processed
+            // the record can be safely skipped. This is needed because there is a possibility
+            // that the seek() above fails to move the offset to the desired position. In which
+            // case poll() would return records that are already cnsumed.
+            if (rawRecord.offset() < startOffset) {
+              // If the start offset is not reached even after skipping the records for 10 seconds
+              // then the processing is stopped with a backoff to give the Kakfa server some time
+              // catch up.
+              if (sw.elapsed().getSeconds() > 10L) {
+                LOG.error(
+                    "The expected offset ({}) was not reached even after"
+                        + " skipping consumed records for 10 seconds. The offset we could"
+                        + " reach was {}. The processing of this bundle will be attempted"
+                        + " at a later time.",
+                    expectedOffset,
+                    rawRecord.offset());
+                return ProcessContinuation.resume()
+                    .withResumeDelay(org.joda.time.Duration.standardSeconds(10L));
+              }
+              skippedRecords++;
+              continue;
+            }
+            if (skippedRecords > 0L) {
+              LOG.warn(
+                  "{} records were skipped due to seek returning an"
+                      + " earlier position than requested position of {}",
+                  skippedRecords,
+                  expectedOffset);
+              skippedRecords = 0L;
+            }
+            if (!tracker.tryClaim(rawRecord.offset())) {
+              return ProcessContinuation.stop();
+            }
+            try {
+              KafkaRecord<K, V> kafkaRecord =
+                  new KafkaRecord<>(
+                      rawRecord.topic(),
+                      rawRecord.partition(),
+                      rawRecord.offset(),
+                      ConsumerSpEL.getRecordTimestamp(rawRecord),
+                      ConsumerSpEL.getRecordTimestampType(rawRecord),
+                      ConsumerSpEL.hasHeaders() ? rawRecord.headers() : null,
+                      ConsumerSpEL.deserializeKey(keyDeserializerInstance, rawRecord),
+                      ConsumerSpEL.deserializeValue(valueDeserializerInstance, rawRecord));
+              int recordSize =
+                  (rawRecord.key() == null ? 0 : rawRecord.key().length)
+                      + (rawRecord.value() == null ? 0 : rawRecord.value().length);
+              avgRecordSize.update(recordSize);
+              rawSizes.update(recordSize);
+              expectedOffset = rawRecord.offset() + 1;
+              Instant outputTimestamp;
+              // The outputTimestamp and watermark will be computed by timestampPolicy, where the
+              // WatermarkEstimator should be a manual one.
+              if (timestampPolicy != null) {
+                TimestampPolicyContext context =
+                    updateWatermarkManually(timestampPolicy, watermarkEstimator, tracker);
+                outputTimestamp = timestampPolicy.getTimestampForRecord(context, kafkaRecord);
+              } else {
+                Preconditions.checkStateNotNull(this.extractOutputTimestampFn);
+                outputTimestamp = extractOutputTimestampFn.apply(kafkaRecord);
+              }
+              receiver
+                  .get(recordTag)
+                  .outputWithTimestamp(KV.of(kafkaSourceDescriptor, kafkaRecord), outputTimestamp);
+            } catch (SerializationException e) {
+              // This exception should only occur during the key and value deserialization when
+              // creating the Kafka Record
+              badRecordRouter.route(
+                  receiver,
+                  rawRecord,
+                  null,
+                  e,
+                  "Failure deserializing Key or Value of Kakfa record reading from Kafka");
+              if (timestampPolicy != null) {
+                updateWatermarkManually(timestampPolicy, watermarkEstimator, tracker);
+              }
+            }
           }
-        }
 
-        backlogBytes.set(
-            (long)
-                (BigDecimal.valueOf(
-                            Preconditions.checkStateNotNull(
-                                offsetEstimatorCache.get(kafkaSourceDescriptor).estimate()))
-                        .subtract(BigDecimal.valueOf(expectedOffset), MathContext.DECIMAL128)
-                        .doubleValue()
-                    * avgRecordSize.get()));
-        KafkaMetrics kafkaResults = KafkaSinkMetrics.kafkaMetrics();
-        kafkaResults.updateBacklogBytes(
-            kafkaSourceDescriptor.getTopic(),
-            kafkaSourceDescriptor.getPartition(),
-            (long)
-                (BigDecimal.valueOf(
-                            Preconditions.checkStateNotNull(
-                                offsetEstimatorCache.get(kafkaSourceDescriptor).estimate()))
-                        .subtract(BigDecimal.valueOf(expectedOffset), MathContext.DECIMAL128)
-                        .doubleValue()
-                    * avgRecordSize.get()));
-        kafkaResults.flushBufferedMetrics();
+          backlogBytes.set(
+              (long)
+                  (BigDecimal.valueOf(
+                              Preconditions.checkStateNotNull(
+                                  offsetEstimatorCache.get(kafkaSourceDescriptor).estimate()))
+                          .subtract(BigDecimal.valueOf(expectedOffset), MathContext.DECIMAL128)
+                          .doubleValue()
+                      * avgRecordSize.get()));
+          kafkaMetrics.updateBacklogBytes(
+              kafkaSourceDescriptor.getTopic(),
+              kafkaSourceDescriptor.getPartition(),
+              (long)
+                  (BigDecimal.valueOf(
+                              Preconditions.checkStateNotNull(
+                                  offsetEstimatorCache.get(kafkaSourceDescriptor).estimate()))
+                          .subtract(BigDecimal.valueOf(expectedOffset), MathContext.DECIMAL128)
+                          .doubleValue()
+                      * avgRecordSize.get()));
+        }
+      } finally {
+        kafkaMetrics.flushBufferedMetrics();
       }
     }
   }
@@ -577,13 +580,16 @@ private boolean topicPartitionExists(
 
   // see https://github.com/apache/beam/issues/25962
   private ConsumerRecords<byte[], byte[]> poll(
-      Consumer<byte[], byte[]> consumer, TopicPartition topicPartition) {
+      Consumer<byte[], byte[]> consumer, TopicPartition topicPartition, KafkaMetrics kafkaMetrics) {
     final Stopwatch sw = Stopwatch.createStarted();
     long previousPosition = -1;
-    java.time.Duration elapsed = java.time.Duration.ZERO;
     java.time.Duration timeout = java.time.Duration.ofSeconds(this.consumerPollingTimeout);
+    java.time.Duration elapsed = java.time.Duration.ZERO;
     while (true) {
       final ConsumerRecords<byte[], byte[]> rawRecords = consumer.poll(timeout.minus(elapsed));
+      elapsed = sw.elapsed();
+      kafkaMetrics.updateSuccessfulRpcMetrics(
+          topicPartition.topic(), java.time.Duration.ofMillis(elapsed.toMillis()));
       if (!rawRecords.isEmpty()) {
         // return as we have found some entries
         return rawRecords;
@@ -592,7 +598,6 @@ private ConsumerRecords<byte[], byte[]> poll(
         // there was no progress on the offset/position, which indicates end of stream
         return rawRecords;
       }
-      elapsed = sw.elapsed();
       if (elapsed.toMillis() >= timeout.toMillis()) {
         // timeout is over
         LOG.warn(

Original file line number	Diff line number	Diff line change
`@@ -321,9 +321,7 @@ public MetricUpdates getUpdates() {`
`321`	`321`
`322`	`322`	`// Add any metricKey labels to the monitoringInfoLabels.`
`323`	`323`	`if (!metricName.getLabels().isEmpty()) {`
`324`		`- for (Map.Entry<String, String> entry : metricName.getLabels().entrySet()) {`
`325`		`- builder.setLabel(entry.getKey(), entry.getValue());`
`326`		`- }`
	`324`	`+ builder.setLabels(metricName.getLabels());`
`327`	`325`	`}`
`328`	`326`	`return builder;`
`329`	`327`	`}`
Original file line number	Diff line number	Diff line change
`@@ -182,12 +182,12 @@ public static double decodeDoubleCounter(ByteString payload) {`
`182`	`182`	`}`
`183`	`183`	`}`
`184`	`184`
`185`		`- /** Encodes to {@link MonitoringInfoConstants.TypeUrns#PER_WORKER_HISTOGRAM}. */`
	`185`	`+ /** Encodes to {@link MonitoringInfoConstants.TypeUrns#HISTOGRAM}. */`
`186`	`186`	`public static ByteString encodeInt64Histogram(HistogramData inputHistogram) {`
`187`	`187`	`return inputHistogram.toProto().toByteString();`
`188`	`188`	`}`
`189`	`189`
`190`		`- /** Decodes to {@link MonitoringInfoConstants.TypeUrns#PER_WORKER_HISTOGRAM}. */`
	`190`	`+ /** Decodes to {@link MonitoringInfoConstants.TypeUrns#HISTOGRAM}. */`
`191`	`191`	`public static HistogramData decodeInt64Histogram(ByteString payload) {`
`192`	`192`	`try {`
`193`	`193`	`return new HistogramData(HistogramValue.parseFrom(payload));`