Skip to content

Commit 21dddef

Browse files
committed
feat: built in metrics for afe latency and connectivity error
1 parent 4cf5261 commit 21dddef

File tree

5 files changed

+119
-23
lines changed

5 files changed

+119
-23
lines changed

google-cloud-spanner/src/main/java/com/google/cloud/spanner/BuiltInMetricsConstant.java

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import io.opentelemetry.sdk.metrics.InstrumentSelector;
2727
import io.opentelemetry.sdk.metrics.InstrumentType;
2828
import io.opentelemetry.sdk.metrics.View;
29+
import java.util.List;
2930
import java.util.Map;
3031
import java.util.Set;
3132
import java.util.stream.Collectors;
@@ -37,6 +38,9 @@ public class BuiltInMetricsConstant {
3738
public static final String GAX_METER_NAME = OpenTelemetryMetricsRecorder.GAX_METER_NAME;
3839
static final String SPANNER_METER_NAME = "spanner-java";
3940
static final String GFE_LATENCIES_NAME = "gfe_latencies";
41+
static final String AFE_LATENCIES_NAME = "afe_latencies";
42+
static final String GFE_CONNECTIVITY_ERROR_NAME = "gfe_connectivity_error_count";
43+
static final String AFE_CONNECTIVITY_ERROR_NAME = "afe_connectivity_error_count";
4044
static final String OPERATION_LATENCIES_NAME = "operation_latencies";
4145
static final String ATTEMPT_LATENCIES_NAME = "attempt_latencies";
4246
static final String OPERATION_LATENCY_NAME = "operation_latency";
@@ -50,7 +54,10 @@ public class BuiltInMetricsConstant {
5054
ATTEMPT_LATENCIES_NAME,
5155
OPERATION_COUNT_NAME,
5256
ATTEMPT_COUNT_NAME,
53-
GFE_LATENCIES_NAME)
57+
GFE_LATENCIES_NAME,
58+
AFE_LATENCIES_NAME,
59+
GFE_CONNECTIVITY_ERROR_NAME,
60+
AFE_CONNECTIVITY_ERROR_NAME)
5461
.stream()
5562
.map(m -> METER_NAME + '/' + m)
5663
.collect(Collectors.toSet());
@@ -102,14 +109,14 @@ public class BuiltInMetricsConstant {
102109
DIRECT_PATH_ENABLED_KEY,
103110
DIRECT_PATH_USED_KEY);
104111

112+
static List<Double> BUCKET_BOUNDARIES =
113+
ImmutableList.of(
114+
0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
115+
16.0, 17.0, 18.0, 19.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0, 160.0,
116+
200.0, 250.0, 300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0,
117+
50000.0, 100000.0, 200000.0, 400000.0, 800000.0, 1600000.0, 3200000.0);
105118
static Aggregation AGGREGATION_WITH_MILLIS_HISTOGRAM =
106-
Aggregation.explicitBucketHistogram(
107-
ImmutableList.of(
108-
0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
109-
15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0,
110-
160.0, 200.0, 250.0, 300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0,
111-
10000.0, 20000.0, 50000.0, 100000.0, 200000.0, 400000.0, 800000.0, 1600000.0,
112-
3200000.0));
119+
Aggregation.explicitBucketHistogram(BUCKET_BOUNDARIES);
113120

114121
static Map<InstrumentSelector, View> getAllViews() {
115122
ImmutableMap.Builder<InstrumentSelector, View> views = ImmutableMap.builder();
@@ -129,14 +136,6 @@ static Map<InstrumentSelector, View> getAllViews() {
129136
BuiltInMetricsConstant.AGGREGATION_WITH_MILLIS_HISTOGRAM,
130137
InstrumentType.HISTOGRAM,
131138
"ms");
132-
defineView(
133-
views,
134-
BuiltInMetricsConstant.SPANNER_METER_NAME,
135-
BuiltInMetricsConstant.GFE_LATENCIES_NAME,
136-
BuiltInMetricsConstant.GFE_LATENCIES_NAME,
137-
BuiltInMetricsConstant.AGGREGATION_WITH_MILLIS_HISTOGRAM,
138-
InstrumentType.HISTOGRAM,
139-
"ms");
140139
defineView(
141140
views,
142141
BuiltInMetricsConstant.GAX_METER_NAME,

google-cloud-spanner/src/main/java/com/google/cloud/spanner/BuiltInMetricsRecorder.java

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import io.opentelemetry.api.common.Attributes;
2424
import io.opentelemetry.api.common.AttributesBuilder;
2525
import io.opentelemetry.api.metrics.DoubleHistogram;
26+
import io.opentelemetry.api.metrics.LongCounter;
2627
import io.opentelemetry.api.metrics.Meter;
2728
import java.util.Map;
2829

@@ -35,6 +36,9 @@
3536
class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
3637

3738
private final DoubleHistogram gfeLatencyRecorder;
39+
private final DoubleHistogram afeLatencyRecorder;
40+
private final LongCounter gfeHeaderMissingCountRecorder;
41+
private final LongCounter afeHeaderMissingCountRecorder;
3842

3943
/**
4044
* Creates the following instruments for the following metrics:
@@ -59,6 +63,27 @@ class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
5963
.setDescription(
6064
"Latency between Google's network receiving an RPC and reading back the first byte of the response")
6165
.setUnit("ms")
66+
.setExplicitBucketBoundariesAdvice(BuiltInMetricsConstant.BUCKET_BOUNDARIES)
67+
.build();
68+
this.afeLatencyRecorder =
69+
meter
70+
.histogramBuilder(serviceName + '/' + BuiltInMetricsConstant.AFE_LATENCIES_NAME)
71+
.setDescription(
72+
"Latency between Spanner API Frontend receiving an RPC and starting to write back the response.")
73+
.setExplicitBucketBoundariesAdvice(BuiltInMetricsConstant.BUCKET_BOUNDARIES)
74+
.setUnit("ms")
75+
.build();
76+
this.gfeHeaderMissingCountRecorder =
77+
meter
78+
.counterBuilder(serviceName + '/' + BuiltInMetricsConstant.GFE_CONNECTIVITY_ERROR_NAME)
79+
.setDescription("Number of requests that failed to reach the Google network.")
80+
.setUnit("1")
81+
.build();
82+
this.afeHeaderMissingCountRecorder =
83+
meter
84+
.counterBuilder(serviceName + '/' + BuiltInMetricsConstant.AFE_CONNECTIVITY_ERROR_NAME)
85+
.setDescription("Number of requests that failed to reach the Spanner API Frontend.")
86+
.setUnit("1")
6287
.build();
6388
}
6489

@@ -69,8 +94,17 @@ class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
6994
* @param gfeLatency Attempt Latency in ms
7095
* @param attributes Map of the attributes to store
7196
*/
72-
void recordGFELatency(double gfeLatency, Map<String, String> attributes) {
73-
gfeLatencyRecorder.record(gfeLatency, toOtelAttributes(attributes));
97+
void recordServerTimingHeaderMetrics(
98+
double gfeLatency,
99+
double afeLatency,
100+
Long gfeHeaderMissingCount,
101+
Long afeHeaderMissingCount,
102+
Map<String, String> attributes) {
103+
io.opentelemetry.api.common.Attributes otelAttributes = toOtelAttributes(attributes);
104+
gfeLatencyRecorder.record(gfeLatency, otelAttributes);
105+
gfeHeaderMissingCountRecorder.add(gfeHeaderMissingCount, otelAttributes);
106+
afeLatencyRecorder.record(afeLatency, otelAttributes);
107+
afeHeaderMissingCountRecorder.add(afeHeaderMissingCount, otelAttributes);
74108
}
75109

76110
Attributes toOtelAttributes(Map<String, String> attributes) {

google-cloud-spanner/src/main/java/com/google/cloud/spanner/BuiltInMetricsTracer.java

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ class BuiltInMetricsTracer extends MetricsTracer implements ApiTracer {
4040

4141
private Long gfeLatency = null;
4242

43+
private Long afeLatency = null;
44+
45+
private long gfeHeaderMissingCount = 0;
46+
47+
private long afeHeaderMissingCount = 0;
48+
4349
BuiltInMetricsTracer(
4450
MethodName methodName, BuiltInMetricsRecorder builtInOpenTelemetryMetricsRecorder) {
4551
super(methodName, builtInOpenTelemetryMetricsRecorder);
@@ -56,7 +62,8 @@ public void attemptSucceeded() {
5662
super.attemptSucceeded();
5763
if (gfeLatency != null) {
5864
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.OK.toString());
59-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
65+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
66+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
6067
}
6168
}
6269

@@ -69,7 +76,8 @@ public void attemptCancelled() {
6976
super.attemptCancelled();
7077
if (gfeLatency != null) {
7178
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.CANCELLED.toString());
72-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
79+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
80+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
7381
}
7482
}
7583

@@ -86,7 +94,8 @@ public void attemptFailedDuration(Throwable error, java.time.Duration delay) {
8694
super.attemptFailedDuration(error, delay);
8795
if (gfeLatency != null) {
8896
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
89-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
97+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
98+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
9099
}
91100
}
92101

@@ -102,7 +111,8 @@ public void attemptFailedRetriesExhausted(Throwable error) {
102111
super.attemptFailedRetriesExhausted(error);
103112
if (gfeLatency != null) {
104113
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
105-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
114+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
115+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
106116
}
107117
}
108118

@@ -118,14 +128,27 @@ public void attemptPermanentFailure(Throwable error) {
118128
super.attemptPermanentFailure(error);
119129
if (gfeLatency != null) {
120130
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
121-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
131+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
132+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
122133
}
123134
}
124135

125136
void recordGFELatency(Long gfeLatency) {
126137
this.gfeLatency = gfeLatency;
127138
}
128139

140+
void recordAFELatency(Long afeLatency) {
141+
this.afeLatency = afeLatency;
142+
}
143+
144+
void recordGfeHeaderMissingCount(Long value) {
145+
this.gfeHeaderMissingCount = value;
146+
}
147+
148+
void recordAfeHeaderMissingCount(Long value) {
149+
this.afeHeaderMissingCount = value;
150+
}
151+
129152
@Override
130153
public void addAttributes(Map<String, String> attributes) {
131154
super.addAttributes(attributes);

google-cloud-spanner/src/main/java/com/google/cloud/spanner/CompositeTracer.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,4 +198,28 @@ public void recordGFELatency(Long gfeLatency) {
198198
}
199199
}
200200
}
201+
202+
public void recordGfeHeaderMissingCount(Long value) {
203+
for (ApiTracer child : children) {
204+
if (child instanceof BuiltInMetricsTracer) {
205+
((BuiltInMetricsTracer) child).recordGfeHeaderMissingCount(value);
206+
}
207+
}
208+
}
209+
210+
public void recordAFELatency(Long afeLatency) {
211+
for (ApiTracer child : children) {
212+
if (child instanceof BuiltInMetricsTracer) {
213+
((BuiltInMetricsTracer) child).recordAFELatency(afeLatency);
214+
}
215+
}
216+
}
217+
218+
public void recordAfeHeaderMissingCount(Long value) {
219+
for (ApiTracer child : children) {
220+
if (child instanceof BuiltInMetricsTracer) {
221+
((BuiltInMetricsTracer) child).recordAfeHeaderMissingCount(value);
222+
}
223+
}
224+
}
201225
}

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/HeaderInterceptor.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class HeaderInterceptor implements ClientInterceptor {
7272
private static final Metadata.Key<String> SERVER_TIMING_HEADER_KEY =
7373
Metadata.Key.of("server-timing", Metadata.ASCII_STRING_MARSHALLER);
7474
private static final String GFE_TIMING_HEADER = "gfet4t7";
75+
private static final String AFE_TIMING_HEADER = "afet4t7";
7576
private static final Metadata.Key<String> GOOGLE_CLOUD_RESOURCE_PREFIX_KEY =
7677
Metadata.Key.of("google-cloud-resource-prefix", Metadata.ASCII_STRING_MARSHALLER);
7778
private static final Pattern SERVER_TIMING_PATTERN =
@@ -179,6 +180,21 @@ private void processHeader(
179180
} else {
180181
measureMap.put(SPANNER_GFE_HEADER_MISSING_COUNT, 1L).record(tagContext);
181182
spannerRpcMetrics.recordGfeHeaderMissingCount(1L, attributes);
183+
184+
if (compositeTracer != null) {
185+
compositeTracer.recordGfeHeaderMissingCount(1L);
186+
}
187+
}
188+
189+
// Record AFE latency
190+
// TODO: Add condition to check if AFE is enabled
191+
if (compositeTracer != null) {
192+
if (serverTimingMetrics.containsKey(AFE_TIMING_HEADER)) {
193+
long afeLatency = serverTimingMetrics.get(AFE_TIMING_HEADER);
194+
compositeTracer.recordAFELatency(afeLatency);
195+
} else {
196+
compositeTracer.recordAfeHeaderMissingCount(1L);
197+
}
182198
}
183199
} catch (NumberFormatException e) {
184200
LOGGER.log(LEVEL, "Invalid server-timing object in header: {}", serverTiming);

0 commit comments

Comments
 (0)