Skip to content

Commit da77ead

Browse files
committed
frontend/transport: log non-2xx replies from downstream as non-successful
Signed-off-by: Vladimir Varankin <[email protected]>
1 parent 5c736df commit da77ead

File tree

3 files changed

+62
-26
lines changed

3 files changed

+62
-26
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
* `other`: any other request
8484
* [BUGFIX] Fix performance regression introduced in Mimir 2.11.0 when uploading blocks to AWS S3. #7240
8585
* [BUGFIX] Query-frontend: fix race condition when sharding active series is enabled (see above) and response is compressed with snappy. #7290
86+
* [BUGFIX] Query-frontend: "query stats" log unsuccessful replies from downstream as "failed". #7296
8687

8788
### Mixin
8889

pkg/frontend/transport/handler.go

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ var (
4747
errRequestEntityTooLarge = httpgrpc.Errorf(http.StatusRequestEntityTooLarge, "http: request body too large")
4848
)
4949

50-
// Config for a Handler.
50+
// HandlerConfig is a config for the handler.
5151
type HandlerConfig struct {
5252
LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
5353
LogQueryRequestHeaders flagext.StringSliceCSV `yaml:"log_query_request_headers" category:"advanced"`
@@ -196,7 +196,7 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
196196

197197
if err != nil {
198198
writeError(w, err)
199-
f.reportQueryStats(r, params, startTime, queryResponseTime, 0, queryDetails, err)
199+
f.reportQueryStats(r, params, startTime, queryResponseTime, 0, queryDetails, 0, err)
200200
return
201201
}
202202

@@ -217,13 +217,13 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
217217
f.reportSlowQuery(r, params, queryResponseTime, queryDetails)
218218
}
219219
if f.cfg.QueryStatsEnabled {
220-
f.reportQueryStats(r, params, startTime, queryResponseTime, queryResponseSize, queryDetails, nil)
220+
f.reportQueryStats(r, params, startTime, queryResponseTime, queryResponseSize, queryDetails, resp.StatusCode, nil)
221221
}
222222
}
223223

224224
// reportSlowQuery reports slow queries.
225225
func (f *Handler) reportSlowQuery(r *http.Request, queryString url.Values, queryResponseTime time.Duration, details *querymiddleware.QueryDetails) {
226-
logMessage := append([]interface{}{
226+
logMessage := append([]any{
227227
"msg", "slow query detected",
228228
"method", r.Method,
229229
"host", r.Host,
@@ -238,7 +238,16 @@ func (f *Handler) reportSlowQuery(r *http.Request, queryString url.Values, query
238238
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
239239
}
240240

241-
func (f *Handler) reportQueryStats(r *http.Request, queryString url.Values, queryStartTime time.Time, queryResponseTime time.Duration, queryResponseSizeBytes int64, details *querymiddleware.QueryDetails, queryErr error) {
241+
func (f *Handler) reportQueryStats(
242+
r *http.Request,
243+
queryString url.Values,
244+
queryStartTime time.Time,
245+
queryResponseTime time.Duration,
246+
queryResponseSizeBytes int64,
247+
details *querymiddleware.QueryDetails,
248+
queryResponseStatusCode int,
249+
queryErr error,
250+
) {
242251
tenantIDs, err := tenant.TenantIDs(r.Context())
243252
if err != nil {
244253
return
@@ -266,12 +275,13 @@ func (f *Handler) reportQueryStats(r *http.Request, queryString url.Values, quer
266275
}
267276

268277
// Log stats.
269-
logMessage := append([]interface{}{
278+
logMessage := append([]any{
270279
"msg", "query stats",
271280
"component", "query-frontend",
272281
"method", r.Method,
273282
"path", r.URL.Path,
274283
"user_agent", r.UserAgent(),
284+
"status_code", queryResponseStatusCode,
275285
"response_time", queryResponseTime,
276286
"response_size_bytes", queryResponseSizeBytes,
277287
"query_wall_time_seconds", wallTime.Seconds(),
@@ -312,6 +322,11 @@ func (f *Handler) reportQueryStats(r *http.Request, queryString url.Values, quer
312322
logMessage = append(logMessage, formatRequestHeaders(&r.Header, f.cfg.LogQueryRequestHeaders)...)
313323
}
314324

325+
if queryErr == nil && queryResponseStatusCode/100 != 2 {
326+
// If downstream replied with non-2xx, log this as a failure.
327+
queryErr = fmt.Errorf("downstream replied with %s", http.StatusText(queryResponseStatusCode))
328+
}
329+
315330
if queryErr != nil {
316331
logStatus := "failed"
317332
if errors.Is(queryErr, context.Canceled) {
@@ -332,7 +347,7 @@ func (f *Handler) reportQueryStats(r *http.Request, queryString url.Values, quer
332347
}
333348

334349
// formatQueryString prefers printing start, end, and step from details if they are not nil.
335-
func formatQueryString(details *querymiddleware.QueryDetails, queryString url.Values) (fields []interface{}) {
350+
func formatQueryString(details *querymiddleware.QueryDetails, queryString url.Values) (fields []any) {
336351
for k, v := range queryString {
337352
var formattedValue string
338353
if details != nil {
@@ -368,7 +383,7 @@ func paramValueFromDetails(details *querymiddleware.QueryDetails, paramName stri
368383
return ""
369384
}
370385

371-
func formatRequestHeaders(h *http.Header, headersToLog []string) (fields []interface{}) {
386+
func formatRequestHeaders(h *http.Header, headersToLog []string) (fields []any) {
372387
for _, s := range headersToLog {
373388
if v := h.Get(s); v != "" {
374389
fields = append(fields, fmt.Sprintf("header_%s", strings.ReplaceAll(strings.ToLower(s), "-", "_")), v)

pkg/frontend/transport/handler_test.go

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -238,45 +238,66 @@ func TestHandler_FailedRoundTrip(t *testing.T) {
238238
for _, test := range []struct {
239239
name string
240240
cfg HandlerConfig
241-
expectedMetrics int
242241
path string
242+
queryResponseFunc roundTripperFunc
243+
expectedStatusCode int
244+
expectedMetrics int
245+
expectedStatusLog string
243246
expectQueryParamLog bool
244-
queryErr error
245247
}{
246248
{
247-
name: "Failed round trip with context cancelled",
248-
cfg: HandlerConfig{QueryStatsEnabled: false},
249+
name: "Failed round trip with context cancelled",
250+
cfg: HandlerConfig{QueryStatsEnabled: false},
251+
path: "/api/v1/query?query=up&time=2015-07-01T20:10:51.781Z",
252+
queryResponseFunc: func(*http.Request) (*http.Response, error) {
253+
return nil, context.Canceled
254+
},
255+
expectedStatusCode: StatusClientClosedRequest,
249256
expectedMetrics: 0,
250-
path: "/api/v1/query?query=up&time=2015-07-01T20:10:51.781Z",
257+
expectedStatusLog: "canceled",
251258
expectQueryParamLog: true,
252-
queryErr: context.Canceled,
253259
},
254260
{
255-
name: "Failed round trip with no query params",
256-
cfg: HandlerConfig{QueryStatsEnabled: true},
261+
name: "Failed round trip with no query params",
262+
cfg: HandlerConfig{QueryStatsEnabled: true},
263+
path: "/api/v1/query",
264+
queryResponseFunc: func(*http.Request) (*http.Response, error) {
265+
return nil, context.Canceled
266+
},
267+
expectedStatusCode: StatusClientClosedRequest,
257268
expectedMetrics: 5,
258-
path: "/api/v1/query",
269+
expectedStatusLog: "canceled",
270+
expectQueryParamLog: false,
271+
},
272+
{
273+
name: "Failed round trip with HTTP response",
274+
cfg: HandlerConfig{QueryStatsEnabled: true},
275+
path: "/api/v1/query",
276+
queryResponseFunc: func(*http.Request) (*http.Response, error) {
277+
return &http.Response{
278+
StatusCode: http.StatusInternalServerError,
279+
Body: io.NopCloser(strings.NewReader("{}")),
280+
}, nil
281+
},
282+
expectedStatusCode: http.StatusInternalServerError,
283+
expectedMetrics: 5,
284+
expectedStatusLog: "failed",
259285
expectQueryParamLog: false,
260-
queryErr: context.Canceled,
261286
},
262287
} {
263288
t.Run(test.name, func(t *testing.T) {
264-
roundTripper := roundTripperFunc(func(req *http.Request) (*http.Response, error) {
265-
return nil, test.queryErr
266-
})
267-
268289
reg := prometheus.NewPedanticRegistry()
269290
logs := &concurrency.SyncBuffer{}
270291
logger := log.NewLogfmtLogger(logs)
271-
handler := NewHandler(test.cfg, roundTripper, logger, reg, nil)
292+
handler := NewHandler(test.cfg, test.queryResponseFunc, logger, reg, nil)
272293

273294
ctx := user.InjectOrgID(context.Background(), "12345")
274295
req := httptest.NewRequest("GET", test.path, nil)
275296
req = req.WithContext(ctx)
276297
resp := httptest.NewRecorder()
277298

278299
handler.ServeHTTP(resp, req)
279-
require.Equal(t, StatusClientClosedRequest, resp.Code)
300+
require.Equal(t, test.expectedStatusCode, resp.Code)
280301

281302
count, err := promtest.GatherAndCount(
282303
reg,
@@ -286,11 +307,10 @@ func TestHandler_FailedRoundTrip(t *testing.T) {
286307
"cortex_query_fetched_chunks_total",
287308
"cortex_query_fetched_index_bytes_total",
288309
)
289-
290310
require.NoError(t, err)
291311

292312
assert.Contains(t, strings.TrimSpace(logs.String()), "sharded_queries")
293-
assert.Contains(t, strings.TrimSpace(logs.String()), "status=canceled")
313+
assert.Contains(t, strings.TrimSpace(logs.String()), fmt.Sprintf("status=%s", test.expectedStatusLog))
294314
if test.expectQueryParamLog {
295315
assert.Contains(t, strings.TrimSpace(logs.String()), "param_query")
296316
}

0 commit comments

Comments
 (0)