Skip to content

Commit ab97c9e

Browse files
committed
fix!: stop unbounded memory usage from query log
Resolves: prometheus#15433 When I converted prometheus to use slog in prometheus#14906, I update both the `QueryLogger` interface, as well as how the log calls to the `QueryLogger` were built up in `promql.Engine.exec()`. The backing logger for the `QueryLogger` in the engine is a `util/logging.JSONFileLogger`, and it's implementation of the `With()` method updates the logger the logger in place with the new keyvals added onto the underlying `slog.Logger`, which means they get inherited onto everything after. All subsequent calls to `With()`, even in later queries, would continue to then append on more and more keyvals for the various params and fields built up in the logger. In turn, this causes unbounded growth of the logger, leading to increased memory usage, and in at least one report was the likely cause of an OOM kill. More information can be found in the issue and the linked slack thread. This commit does a few things: - It was referenced in feedback in prometheus#14906 that it would've been better to not change the `QueryLogger` interface if possible, this PR proposes changes that bring it closer to alignment with the pre-3.0 `QueryLogger` interface contract - reverts `promql.Engine.exec()`'s usage of the query logger to the pattern of building up an array of args to pass at once to the end log call. Avoiding the repetitious calls to `.With()` are what resolve the issue with the logger growth/memory usage. - updates the scrape failure logger to use the update `QueryLogger` methods in the contract. - updates tests accordingly - cleans up unused methods Builds and passes tests successfully. Tested locally and confirmed I could no longer reproduce the issue/it resolved the issue. Signed-off-by: TJ Hoplock <[email protected]>
1 parent cc390aa commit ab97c9e

File tree

5 files changed

+22
-60
lines changed

5 files changed

+22
-60
lines changed

promql/engine.go

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,7 @@ type QueryEngine interface {
126126
// QueryLogger is an interface that can be used to log all the queries logged
127127
// by the engine.
128128
type QueryLogger interface {
129-
Error(msg string, args ...any)
130-
Info(msg string, args ...any)
131-
Debug(msg string, args ...any)
132-
Warn(msg string, args ...any)
129+
Log(context.Context, slog.Level, string, ...any)
133130
With(args ...any)
134131
Close() error
135132
}
@@ -637,20 +634,20 @@ func (ng *Engine) exec(ctx context.Context, q *query) (v parser.Value, ws annota
637634
// The step provided by the user is in seconds.
638635
params["step"] = int64(eq.Interval / (time.Second / time.Nanosecond))
639636
}
640-
l.With("params", params)
637+
f := []interface{}{"params", params}
641638
if err != nil {
642-
l.With("error", err)
639+
f = append(f, "error", err)
643640
}
644-
l.With("stats", stats.NewQueryStats(q.Stats()))
641+
f = append(f, "stats", stats.NewQueryStats(q.Stats()))
645642
if span := trace.SpanFromContext(ctx); span != nil {
646-
l.With("spanID", span.SpanContext().SpanID())
643+
f = append(f, "spanID", span.SpanContext().SpanID())
647644
}
648645
if origin := ctx.Value(QueryOrigin{}); origin != nil {
649646
for k, v := range origin.(map[string]interface{}) {
650-
l.With(k, v)
647+
f = append(f, k, v)
651648
}
652649
}
653-
l.Info("promql query logged")
650+
l.Log(context.Background(), slog.LevelInfo, "promql query logged", f...)
654651
// TODO: @tjhop -- do we still need this metric/error log if logger doesn't return errors?
655652
// ng.metrics.queryLogFailures.Inc()
656653
// ng.logger.Error("can't log query", "err", err)

promql/engine_test.go

Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"context"
1818
"errors"
1919
"fmt"
20+
"log/slog"
2021
"math"
2122
"sort"
2223
"strings"
@@ -2034,31 +2035,10 @@ func (f *FakeQueryLogger) Close() error {
20342035
}
20352036

20362037
// It implements the promql.QueryLogger interface.
2037-
func (f *FakeQueryLogger) Info(msg string, args ...any) {
2038-
log := append([]any{msg}, args...)
2039-
log = append(log, f.attrs...)
2040-
f.attrs = f.attrs[:0]
2041-
f.logs = append(f.logs, log...)
2042-
}
2043-
2044-
// It implements the promql.QueryLogger interface.
2045-
func (f *FakeQueryLogger) Error(msg string, args ...any) {
2046-
log := append([]any{msg}, args...)
2047-
log = append(log, f.attrs...)
2048-
f.attrs = f.attrs[:0]
2049-
f.logs = append(f.logs, log...)
2050-
}
2051-
2052-
// It implements the promql.QueryLogger interface.
2053-
func (f *FakeQueryLogger) Warn(msg string, args ...any) {
2054-
log := append([]any{msg}, args...)
2055-
log = append(log, f.attrs...)
2056-
f.attrs = f.attrs[:0]
2057-
f.logs = append(f.logs, log...)
2058-
}
2059-
2060-
// It implements the promql.QueryLogger interface.
2061-
func (f *FakeQueryLogger) Debug(msg string, args ...any) {
2038+
func (f *FakeQueryLogger) Log(ctx context.Context, level slog.Level, msg string, args ...any) {
2039+
// Test usage only really cares about existence of keyvals passed in
2040+
// via args, just append in the log message before handling the
2041+
// provided args and any embedded kvs added via `.With()` on f.attrs.
20622042
log := append([]any{msg}, args...)
20632043
log = append(log, f.attrs...)
20642044
f.attrs = f.attrs[:0]

scrape/scrape.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1421,7 +1421,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er
14211421
sl.l.Debug("Scrape failed", "err", scrapeErr)
14221422
sl.scrapeFailureLoggerMtx.RLock()
14231423
if sl.scrapeFailureLogger != nil {
1424-
sl.scrapeFailureLogger.Error(scrapeErr.Error())
1424+
sl.scrapeFailureLogger.Log(context.Background(), slog.LevelError, scrapeErr.Error())
14251425
}
14261426
sl.scrapeFailureLoggerMtx.RUnlock()
14271427
if errc != nil {

util/logging/file.go

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
package logging
1515

1616
import (
17+
"context"
1718
"fmt"
1819
"log/slog"
1920
"os"
@@ -57,26 +58,8 @@ func (l *JSONFileLogger) With(args ...any) {
5758
l.logger = l.logger.With(args...)
5859
}
5960

60-
// Info calls the `Info()` method on the underlying `log/slog.Logger` with the
61+
// Log calls the `Log()` method on the underlying `log/slog.Logger` with the
6162
// provided msg and args. It implements the promql.QueryLogger interface.
62-
func (l *JSONFileLogger) Info(msg string, args ...any) {
63-
l.logger.Info(msg, args...)
64-
}
65-
66-
// Error calls the `Error()` method on the underlying `log/slog.Logger` with the
67-
// provided msg and args. It implements the promql.QueryLogger interface.
68-
func (l *JSONFileLogger) Error(msg string, args ...any) {
69-
l.logger.Error(msg, args...)
70-
}
71-
72-
// Debug calls the `Debug()` method on the underlying `log/slog.Logger` with the
73-
// provided msg and args. It implements the promql.QueryLogger interface.
74-
func (l *JSONFileLogger) Debug(msg string, args ...any) {
75-
l.logger.Debug(msg, args...)
76-
}
77-
78-
// Warn calls the `Warn()` method on the underlying `log/slog.Logger` with the
79-
// provided msg and args. It implements the promql.QueryLogger interface.
80-
func (l *JSONFileLogger) Warn(msg string, args ...any) {
81-
l.logger.Warn(msg, args...)
63+
func (l *JSONFileLogger) Log(ctx context.Context, level slog.Level, msg string, args ...any) {
64+
l.logger.Log(ctx, level, msg, args...)
8265
}

util/logging/file_test.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
package logging
1515

1616
import (
17+
"context"
18+
"log/slog"
1719
"os"
1820
"strings"
1921
"testing"
@@ -34,7 +36,7 @@ func TestJSONFileLogger_basic(t *testing.T) {
3436
require.NoError(t, err)
3537
require.NotNil(t, l, "logger can't be nil")
3638

37-
l.Info("test", "hello", "world")
39+
l.Log(context.Background(), slog.LevelInfo, "test", "hello", "world")
3840
require.NoError(t, err)
3941
r := make([]byte, 1024)
4042
_, err = f.Read(r)
@@ -64,14 +66,14 @@ func TestJSONFileLogger_parallel(t *testing.T) {
6466
require.NoError(t, err)
6567
require.NotNil(t, l, "logger can't be nil")
6668

67-
l.Info("test", "hello", "world")
69+
l.Log(context.Background(), slog.LevelInfo, "test", "hello", "world")
6870
require.NoError(t, err)
6971

7072
l2, err := NewJSONFileLogger(f.Name())
7173
require.NoError(t, err)
7274
require.NotNil(t, l, "logger can't be nil")
7375

74-
l2.Info("test", "hello", "world")
76+
l2.Log(context.Background(), slog.LevelInfo, "test", "hello", "world")
7577
require.NoError(t, err)
7678

7779
err = l.Close()

0 commit comments

Comments
 (0)