Skip to content

Commit c002c1e

Browse files
grab reason label from cortex_alertmanager_notifications_failed_total metric
Signed-off-by: Krishna Teja Puttagunta <[email protected]>
1 parent 52320c2 commit c002c1e

File tree

2 files changed

+70
-69
lines changed

2 files changed

+70
-69
lines changed

pkg/alertmanager/alertmanager_metrics.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ func newAlertmanagerMetrics() *alertmanagerMetrics {
8484
numFailedNotifications: prometheus.NewDesc(
8585
"cortex_alertmanager_notifications_failed_total",
8686
"The total number of failed notifications.",
87-
[]string{"user", "integration"}, nil),
87+
[]string{"user", "integration", "reason"}, nil),
8888
numNotificationRequestsTotal: prometheus.NewDesc(
8989
"cortex_alertmanager_notification_requests_total",
9090
"The total number of attempted notification requests.",
@@ -292,7 +292,7 @@ func (m *alertmanagerMetrics) Collect(out chan<- prometheus.Metric) {
292292
data.SendSumOfCountersPerUser(out, m.alertsInvalid, "alertmanager_alerts_invalid_total")
293293

294294
data.SendSumOfCountersPerUserWithLabels(out, m.numNotifications, "alertmanager_notifications_total", "integration")
295-
data.SendSumOfCountersPerUserWithLabels(out, m.numFailedNotifications, "alertmanager_notifications_failed_total", "integration")
295+
data.SendSumOfCountersPerUserWithLabels(out, m.numFailedNotifications, "alertmanager_notifications_failed_total", "integration", "reason")
296296
data.SendSumOfCountersPerUserWithLabels(out, m.numNotificationRequestsTotal, "alertmanager_notification_requests_total", "integration")
297297
data.SendSumOfCountersPerUserWithLabels(out, m.numNotificationRequestsFailedTotal, "alertmanager_notification_requests_failed_total", "integration")
298298
data.SendSumOfHistograms(out, m.notificationLatencySeconds, "alertmanager_notification_latency_seconds")

pkg/alertmanager/alertmanager_metrics_test.go

Lines changed: 68 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ var integrations = []string{
2121
"webhook",
2222
"victorops",
2323
}
24+
var reason = "other"
2425

2526
func TestAlertmanagerMetricsStore(t *testing.T) {
2627
mainReg := prometheus.NewPedanticRegistry()
@@ -107,30 +108,30 @@ func TestAlertmanagerMetricsStore(t *testing.T) {
107108
cortex_alertmanager_notification_latency_seconds_count 24
108109
# HELP cortex_alertmanager_notifications_failed_total The total number of failed notifications.
109110
# TYPE cortex_alertmanager_notifications_failed_total counter
110-
cortex_alertmanager_notifications_failed_total{integration="email",user="user1"} 0
111-
cortex_alertmanager_notifications_failed_total{integration="email",user="user2"} 0
112-
cortex_alertmanager_notifications_failed_total{integration="email",user="user3"} 0
113-
cortex_alertmanager_notifications_failed_total{integration="opsgenie",user="user1"} 5
114-
cortex_alertmanager_notifications_failed_total{integration="opsgenie",user="user2"} 50
115-
cortex_alertmanager_notifications_failed_total{integration="opsgenie",user="user3"} 500
116-
cortex_alertmanager_notifications_failed_total{integration="pagerduty",user="user1"} 1
117-
cortex_alertmanager_notifications_failed_total{integration="pagerduty",user="user2"} 10
118-
cortex_alertmanager_notifications_failed_total{integration="pagerduty",user="user3"} 100
119-
cortex_alertmanager_notifications_failed_total{integration="pushover",user="user1"} 3
120-
cortex_alertmanager_notifications_failed_total{integration="pushover",user="user2"} 30
121-
cortex_alertmanager_notifications_failed_total{integration="pushover",user="user3"} 300
122-
cortex_alertmanager_notifications_failed_total{integration="slack",user="user1"} 4
123-
cortex_alertmanager_notifications_failed_total{integration="slack",user="user2"} 40
124-
cortex_alertmanager_notifications_failed_total{integration="slack",user="user3"} 400
125-
cortex_alertmanager_notifications_failed_total{integration="victorops",user="user1"} 7
126-
cortex_alertmanager_notifications_failed_total{integration="victorops",user="user2"} 70
127-
cortex_alertmanager_notifications_failed_total{integration="victorops",user="user3"} 700
128-
cortex_alertmanager_notifications_failed_total{integration="webhook",user="user1"} 6
129-
cortex_alertmanager_notifications_failed_total{integration="webhook",user="user2"} 60
130-
cortex_alertmanager_notifications_failed_total{integration="webhook",user="user3"} 600
131-
cortex_alertmanager_notifications_failed_total{integration="wechat",user="user1"} 2
132-
cortex_alertmanager_notifications_failed_total{integration="wechat",user="user2"} 20
133-
cortex_alertmanager_notifications_failed_total{integration="wechat",user="user3"} 200
111+
cortex_alertmanager_notifications_failed_total{integration="email",reason="other",user="user1"} 0
112+
cortex_alertmanager_notifications_failed_total{integration="email",reason="other",user="user2"} 0
113+
cortex_alertmanager_notifications_failed_total{integration="email",reason="other",user="user3"} 0
114+
cortex_alertmanager_notifications_failed_total{integration="opsgenie",reason="other",user="user1"} 5
115+
cortex_alertmanager_notifications_failed_total{integration="opsgenie",reason="other",user="user2"} 50
116+
cortex_alertmanager_notifications_failed_total{integration="opsgenie",reason="other",user="user3"} 500
117+
cortex_alertmanager_notifications_failed_total{integration="pagerduty",reason="other",user="user1"} 1
118+
cortex_alertmanager_notifications_failed_total{integration="pagerduty",reason="other",user="user2"} 10
119+
cortex_alertmanager_notifications_failed_total{integration="pagerduty",reason="other",user="user3"} 100
120+
cortex_alertmanager_notifications_failed_total{integration="pushover",reason="other",user="user1"} 3
121+
cortex_alertmanager_notifications_failed_total{integration="pushover",reason="other",user="user2"} 30
122+
cortex_alertmanager_notifications_failed_total{integration="pushover",reason="other",user="user3"} 300
123+
cortex_alertmanager_notifications_failed_total{integration="slack",reason="other",user="user1"} 4
124+
cortex_alertmanager_notifications_failed_total{integration="slack",reason="other",user="user2"} 40
125+
cortex_alertmanager_notifications_failed_total{integration="slack",reason="other",user="user3"} 400
126+
cortex_alertmanager_notifications_failed_total{integration="victorops",reason="other",user="user1"} 7
127+
cortex_alertmanager_notifications_failed_total{integration="victorops",reason="other",user="user2"} 70
128+
cortex_alertmanager_notifications_failed_total{integration="victorops",reason="other",user="user3"} 700
129+
cortex_alertmanager_notifications_failed_total{integration="webhook",reason="other",user="user1"} 6
130+
cortex_alertmanager_notifications_failed_total{integration="webhook",reason="other",user="user2"} 60
131+
cortex_alertmanager_notifications_failed_total{integration="webhook",reason="other",user="user3"} 600
132+
cortex_alertmanager_notifications_failed_total{integration="wechat",reason="other",user="user1"} 2
133+
cortex_alertmanager_notifications_failed_total{integration="wechat",reason="other",user="user2"} 20
134+
cortex_alertmanager_notifications_failed_total{integration="wechat",reason="other",user="user3"} 200
134135
# HELP cortex_alertmanager_notification_requests_total The total number of attempted notification requests.
135136
# TYPE cortex_alertmanager_notification_requests_total counter
136137
cortex_alertmanager_notification_requests_total{integration="email",user="user1"} 0
@@ -453,30 +454,30 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
453454
454455
# HELP cortex_alertmanager_notifications_failed_total The total number of failed notifications.
455456
# TYPE cortex_alertmanager_notifications_failed_total counter
456-
cortex_alertmanager_notifications_failed_total{integration="email",user="user1"} 0
457-
cortex_alertmanager_notifications_failed_total{integration="email",user="user2"} 0
458-
cortex_alertmanager_notifications_failed_total{integration="email",user="user3"} 0
459-
cortex_alertmanager_notifications_failed_total{integration="opsgenie",user="user1"} 5
460-
cortex_alertmanager_notifications_failed_total{integration="opsgenie",user="user2"} 50
461-
cortex_alertmanager_notifications_failed_total{integration="opsgenie",user="user3"} 500
462-
cortex_alertmanager_notifications_failed_total{integration="pagerduty",user="user1"} 1
463-
cortex_alertmanager_notifications_failed_total{integration="pagerduty",user="user2"} 10
464-
cortex_alertmanager_notifications_failed_total{integration="pagerduty",user="user3"} 100
465-
cortex_alertmanager_notifications_failed_total{integration="pushover",user="user1"} 3
466-
cortex_alertmanager_notifications_failed_total{integration="pushover",user="user2"} 30
467-
cortex_alertmanager_notifications_failed_total{integration="pushover",user="user3"} 300
468-
cortex_alertmanager_notifications_failed_total{integration="slack",user="user1"} 4
469-
cortex_alertmanager_notifications_failed_total{integration="slack",user="user2"} 40
470-
cortex_alertmanager_notifications_failed_total{integration="slack",user="user3"} 400
471-
cortex_alertmanager_notifications_failed_total{integration="victorops",user="user1"} 7
472-
cortex_alertmanager_notifications_failed_total{integration="victorops",user="user2"} 70
473-
cortex_alertmanager_notifications_failed_total{integration="victorops",user="user3"} 700
474-
cortex_alertmanager_notifications_failed_total{integration="webhook",user="user1"} 6
475-
cortex_alertmanager_notifications_failed_total{integration="webhook",user="user2"} 60
476-
cortex_alertmanager_notifications_failed_total{integration="webhook",user="user3"} 600
477-
cortex_alertmanager_notifications_failed_total{integration="wechat",user="user1"} 2
478-
cortex_alertmanager_notifications_failed_total{integration="wechat",user="user2"} 20
479-
cortex_alertmanager_notifications_failed_total{integration="wechat",user="user3"} 200
457+
cortex_alertmanager_notifications_failed_total{integration="email",reason="other",user="user1"} 0
458+
cortex_alertmanager_notifications_failed_total{integration="email",reason="other",user="user2"} 0
459+
cortex_alertmanager_notifications_failed_total{integration="email",reason="other",user="user3"} 0
460+
cortex_alertmanager_notifications_failed_total{integration="opsgenie",reason="other",user="user1"} 5
461+
cortex_alertmanager_notifications_failed_total{integration="opsgenie",reason="other",user="user2"} 50
462+
cortex_alertmanager_notifications_failed_total{integration="opsgenie",reason="other",user="user3"} 500
463+
cortex_alertmanager_notifications_failed_total{integration="pagerduty",reason="other",user="user1"} 1
464+
cortex_alertmanager_notifications_failed_total{integration="pagerduty",reason="other",user="user2"} 10
465+
cortex_alertmanager_notifications_failed_total{integration="pagerduty",reason="other",user="user3"} 100
466+
cortex_alertmanager_notifications_failed_total{integration="pushover",reason="other",user="user1"} 3
467+
cortex_alertmanager_notifications_failed_total{integration="pushover",reason="other",user="user2"} 30
468+
cortex_alertmanager_notifications_failed_total{integration="pushover",reason="other",user="user3"} 300
469+
cortex_alertmanager_notifications_failed_total{integration="slack",reason="other",user="user1"} 4
470+
cortex_alertmanager_notifications_failed_total{integration="slack",reason="other",user="user2"} 40
471+
cortex_alertmanager_notifications_failed_total{integration="slack",reason="other",user="user3"} 400
472+
cortex_alertmanager_notifications_failed_total{integration="victorops",reason="other",user="user1"} 7
473+
cortex_alertmanager_notifications_failed_total{integration="victorops",reason="other",user="user2"} 70
474+
cortex_alertmanager_notifications_failed_total{integration="victorops",reason="other",user="user3"} 700
475+
cortex_alertmanager_notifications_failed_total{integration="webhook",reason="other",user="user1"} 6
476+
cortex_alertmanager_notifications_failed_total{integration="webhook",reason="other",user="user2"} 60
477+
cortex_alertmanager_notifications_failed_total{integration="webhook",reason="other",user="user3"} 600
478+
cortex_alertmanager_notifications_failed_total{integration="wechat",reason="other",user="user1"} 2
479+
cortex_alertmanager_notifications_failed_total{integration="wechat",reason="other",user="user2"} 20
480+
cortex_alertmanager_notifications_failed_total{integration="wechat",reason="other",user="user3"} 200
480481
481482
# HELP cortex_alertmanager_notifications_total The total number of attempted notifications.
482483
# TYPE cortex_alertmanager_notifications_total counter
@@ -721,22 +722,22 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
721722
722723
# HELP cortex_alertmanager_notifications_failed_total The total number of failed notifications.
723724
# TYPE cortex_alertmanager_notifications_failed_total counter
724-
cortex_alertmanager_notifications_failed_total{integration="email",user="user1"} 0
725-
cortex_alertmanager_notifications_failed_total{integration="email",user="user2"} 0
726-
cortex_alertmanager_notifications_failed_total{integration="opsgenie",user="user1"} 5
727-
cortex_alertmanager_notifications_failed_total{integration="opsgenie",user="user2"} 50
728-
cortex_alertmanager_notifications_failed_total{integration="pagerduty",user="user1"} 1
729-
cortex_alertmanager_notifications_failed_total{integration="pagerduty",user="user2"} 10
730-
cortex_alertmanager_notifications_failed_total{integration="pushover",user="user1"} 3
731-
cortex_alertmanager_notifications_failed_total{integration="pushover",user="user2"} 30
732-
cortex_alertmanager_notifications_failed_total{integration="slack",user="user1"} 4
733-
cortex_alertmanager_notifications_failed_total{integration="slack",user="user2"} 40
734-
cortex_alertmanager_notifications_failed_total{integration="victorops",user="user1"} 7
735-
cortex_alertmanager_notifications_failed_total{integration="victorops",user="user2"} 70
736-
cortex_alertmanager_notifications_failed_total{integration="webhook",user="user1"} 6
737-
cortex_alertmanager_notifications_failed_total{integration="webhook",user="user2"} 60
738-
cortex_alertmanager_notifications_failed_total{integration="wechat",user="user1"} 2
739-
cortex_alertmanager_notifications_failed_total{integration="wechat",user="user2"} 20
725+
cortex_alertmanager_notifications_failed_total{integration="email",reason="other",user="user1"} 0
726+
cortex_alertmanager_notifications_failed_total{integration="email",reason="other",user="user2"} 0
727+
cortex_alertmanager_notifications_failed_total{integration="opsgenie",reason="other",user="user1"} 5
728+
cortex_alertmanager_notifications_failed_total{integration="opsgenie",reason="other",user="user2"} 50
729+
cortex_alertmanager_notifications_failed_total{integration="pagerduty",reason="other",user="user1"} 1
730+
cortex_alertmanager_notifications_failed_total{integration="pagerduty",reason="other",user="user2"} 10
731+
cortex_alertmanager_notifications_failed_total{integration="pushover",reason="other",user="user1"} 3
732+
cortex_alertmanager_notifications_failed_total{integration="pushover",reason="other",user="user2"} 30
733+
cortex_alertmanager_notifications_failed_total{integration="slack",reason="other",user="user1"} 4
734+
cortex_alertmanager_notifications_failed_total{integration="slack",reason="other",user="user2"} 40
735+
cortex_alertmanager_notifications_failed_total{integration="victorops",reason="other",user="user1"} 7
736+
cortex_alertmanager_notifications_failed_total{integration="victorops",reason="other",user="user2"} 70
737+
cortex_alertmanager_notifications_failed_total{integration="webhook",reason="other",user="user1"} 6
738+
cortex_alertmanager_notifications_failed_total{integration="webhook",reason="other",user="user2"} 60
739+
cortex_alertmanager_notifications_failed_total{integration="wechat",reason="other",user="user1"} 2
740+
cortex_alertmanager_notifications_failed_total{integration="wechat",reason="other",user="user2"} 20
740741
741742
# HELP cortex_alertmanager_notifications_total The total number of attempted notifications.
742743
# TYPE cortex_alertmanager_notifications_total counter
@@ -872,7 +873,7 @@ func populateAlertmanager(base float64) *prometheus.Registry {
872873
nm := newNotifyMetrics(reg)
873874
for i, integration := range integrations {
874875
nm.numNotifications.WithLabelValues(integration).Add(base * float64(i))
875-
nm.numFailedNotifications.WithLabelValues(integration).Add(base * float64(i))
876+
nm.numFailedNotifications.WithLabelValues(integration, reason).Add(base * float64(i))
876877
nm.numNotificationRequestsTotal.WithLabelValues(integration).Add(base * float64(i))
877878
nm.numNotificationRequestsFailedTotal.WithLabelValues(integration).Add(base * float64(i))
878879
nm.notificationLatencySeconds.WithLabelValues(integration).Observe(base * float64(i) * 0.025)
@@ -1034,7 +1035,7 @@ func newNotifyMetrics(r prometheus.Registerer) *notifyMetrics {
10341035
Namespace: "alertmanager",
10351036
Name: "notifications_failed_total",
10361037
Help: "The total number of failed notifications.",
1037-
}, []string{"integration"}),
1038+
}, []string{"integration", "reason"}),
10381039
numNotificationRequestsTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
10391040
Namespace: "alertmanager",
10401041
Name: "notification_requests_total",
@@ -1054,7 +1055,7 @@ func newNotifyMetrics(r prometheus.Registerer) *notifyMetrics {
10541055
}
10551056
for _, integration := range integrations {
10561057
m.numNotifications.WithLabelValues(integration)
1057-
m.numFailedNotifications.WithLabelValues(integration)
1058+
m.numFailedNotifications.WithLabelValues(integration, reason)
10581059
m.numNotificationRequestsTotal.WithLabelValues(integration)
10591060
m.numNotificationRequestsFailedTotal.WithLabelValues(integration)
10601061
m.notificationLatencySeconds.WithLabelValues(integration)

0 commit comments

Comments
 (0)