Skip to content

Commit d85bef2

Browse files
authored
feature: add native histogram support to latency metrics (#3737)
Note that this does not stop showing classic metrics, for now it is up to the scrape config to decide whether to keep those instead or both. Signed-off-by: György Krajcsovits <[email protected]>
1 parent d1fe4b7 commit d85bef2

File tree

6 files changed

+38
-17
lines changed

6 files changed

+38
-17
lines changed

cluster/channel.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,13 @@ func NewChannel(
7070
ConstLabels: prometheus.Labels{"key": key},
7171
})
7272
oversizeGossipDuration := prometheus.NewHistogram(prometheus.HistogramOpts{
73-
Name: "alertmanager_oversize_gossip_message_duration_seconds",
74-
Help: "Duration of oversized gossip message requests.",
75-
ConstLabels: prometheus.Labels{"key": key},
73+
Name: "alertmanager_oversize_gossip_message_duration_seconds",
74+
Help: "Duration of oversized gossip message requests.",
75+
ConstLabels: prometheus.Labels{"key": key},
76+
Buckets: prometheus.DefBuckets,
77+
NativeHistogramBucketFactor: 1.1,
78+
NativeHistogramMaxBucketNumber: 100,
79+
NativeHistogramMinResetDuration: 1 * time.Hour,
7680
})
7781

7882
reg.MustRegister(oversizeGossipDuration, oversizeGossipMessageFailureTotal, oversizeGossipMessageDroppedTotal, oversizeGossipMessageSentTotal)

cluster/delegate.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,12 @@ func newDelegate(l log.Logger, reg prometheus.Registerer, p *Peer, retransmit in
104104
}, []string{"peer"},
105105
)
106106
nodePingDuration := prometheus.NewHistogramVec(prometheus.HistogramOpts{
107-
Name: "alertmanager_cluster_pings_seconds",
108-
Help: "Histogram of latencies for ping messages.",
109-
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5},
107+
Name: "alertmanager_cluster_pings_seconds",
108+
Help: "Histogram of latencies for ping messages.",
109+
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5},
110+
NativeHistogramBucketFactor: 1.1,
111+
NativeHistogramMaxBucketNumber: 100,
112+
NativeHistogramMinResetDuration: 1 * time.Hour,
110113
}, []string{"peer"},
111114
)
112115

cmd/alertmanager/main.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,12 @@ import (
6464
var (
6565
requestDuration = prometheus.NewHistogramVec(
6666
prometheus.HistogramOpts{
67-
Name: "alertmanager_http_request_duration_seconds",
68-
Help: "Histogram of latencies for HTTP requests.",
69-
Buckets: []float64{.05, 0.1, .25, .5, .75, 1, 2, 5, 20, 60},
67+
Name: "alertmanager_http_request_duration_seconds",
68+
Help: "Histogram of latencies for HTTP requests.",
69+
Buckets: []float64{.05, 0.1, .25, .5, .75, 1, 2, 5, 20, 60},
70+
NativeHistogramBucketFactor: 1.1,
71+
NativeHistogramMaxBucketNumber: 100,
72+
NativeHistogramMinResetDuration: 1 * time.Hour,
7073
},
7174
[]string{"handler", "method"},
7275
)

nflog/nflog.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,12 @@ func newMetrics(r prometheus.Registerer) *metrics {
139139
Help: "Number notification log received queries that failed.",
140140
})
141141
m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
142-
Name: "alertmanager_nflog_query_duration_seconds",
143-
Help: "Duration of notification log query evaluation.",
142+
Name: "alertmanager_nflog_query_duration_seconds",
143+
Help: "Duration of notification log query evaluation.",
144+
Buckets: prometheus.DefBuckets,
145+
NativeHistogramBucketFactor: 1.1,
146+
NativeHistogramMaxBucketNumber: 100,
147+
NativeHistogramMinResetDuration: 1 * time.Hour,
144148
})
145149
m.propagatedMessagesTotal = prometheus.NewCounter(prometheus.CounterOpts{
146150
Name: "alertmanager_nflog_gossip_messages_propagated_total",

notify/notify.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,10 +291,13 @@ func NewMetrics(r prometheus.Registerer, ff featurecontrol.Flagger) *Metrics {
291291
Help: "The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.",
292292
}, []string{"reason"}),
293293
notificationLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
294-
Namespace: "alertmanager",
295-
Name: "notification_latency_seconds",
296-
Help: "The latency of notifications in seconds.",
297-
Buckets: []float64{1, 5, 10, 15, 20},
294+
Namespace: "alertmanager",
295+
Name: "notification_latency_seconds",
296+
Help: "The latency of notifications in seconds.",
297+
Buckets: []float64{1, 5, 10, 15, 20},
298+
NativeHistogramBucketFactor: 1.1,
299+
NativeHistogramMaxBucketNumber: 100,
300+
NativeHistogramMinResetDuration: 1 * time.Hour,
298301
}, labels),
299302
ff: ff,
300303
}

silence/silence.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,12 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
271271
Help: "How many silence received queries did not succeed.",
272272
})
273273
m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
274-
Name: "alertmanager_silences_query_duration_seconds",
275-
Help: "Duration of silence query evaluation.",
274+
Name: "alertmanager_silences_query_duration_seconds",
275+
Help: "Duration of silence query evaluation.",
276+
Buckets: prometheus.DefBuckets,
277+
NativeHistogramBucketFactor: 1.1,
278+
NativeHistogramMaxBucketNumber: 100,
279+
NativeHistogramMinResetDuration: 1 * time.Hour,
276280
})
277281
m.propagatedMessagesTotal = prometheus.NewCounter(prometheus.CounterOpts{
278282
Name: "alertmanager_silences_gossip_messages_propagated_total",

0 commit comments

Comments
 (0)