Skip to content

Commit 12b65a7

Browse files
committed
use status reason rather than code
Signed-off-by: Yijie Qin <[email protected]>
1 parent aec12dd commit 12b65a7

File tree

4 files changed

+46
-48
lines changed

4 files changed

+46
-48
lines changed

notify/notify.go

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,6 @@ type Peer interface {
5151
// to a notification pipeline.
5252
const MinTimeout = 10 * time.Second
5353

54-
// defaultStatusCodeCategory is the default status code category for numTotalFailedNotifications metric
55-
const defaultStatusCodeCategory = "5xx"
56-
57-
const (
58-
failure4xxCategoryCode = "4xx"
59-
failure5xxCategoryCode = "5xx"
60-
)
61-
62-
// possibleFailureStatusCategory is a list of possible failure status code category
63-
var possibleFailureStatusCategory = []string{failure4xxCategoryCode, failure5xxCategoryCode}
64-
6554
// Notifier notifies about alerts under constraints of the given context. It
6655
// returns an error if unsuccessful and a flag whether the error is
6756
// recoverable. This information is useful for a retry logic.
@@ -677,12 +666,12 @@ func (r RetryStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Ale
677666
r.metrics.numNotifications.WithLabelValues(r.integration.Name()).Inc()
678667
ctx, alerts, err := r.exec(ctx, l, alerts...)
679668

680-
statusCodeCategory := defaultStatusCodeCategory
669+
failureReason := defaultFailureReason
681670
if err != nil {
682-
if e, ok := errors.Cause(err).(*ErrorWithStatusCode); ok {
683-
statusCodeCategory = getFailureStatusCodeCategory(e.StatusCode)
671+
if e, ok := errors.Cause(err).(*ErrorWithReason); ok {
672+
failureReason = e.Reason
684673
}
685-
r.metrics.numTotalFailedNotifications.WithLabelValues(r.integration.Name(), statusCodeCategory).Inc()
674+
r.metrics.numTotalFailedNotifications.WithLabelValues(r.integration.Name(), failureReason).Inc()
686675
}
687676
return ctx, alerts, err
688677
}
@@ -895,17 +884,3 @@ func inTimeIntervals(now time.Time, intervals map[string][]timeinterval.TimeInte
895884
}
896885
return false, nil
897886
}
898-
899-
// getFailureStatusCodeCategory return the status code category for failure request
900-
// the status starts with 4 will return 4xx and starts with 5 will return 5xx
901-
// other than 4xx and 5xx input status will return an 5xx.
902-
func getFailureStatusCodeCategory(statusCode int) string {
903-
if statusCode/100 == 4 {
904-
return failure4xxCategoryCode
905-
}
906-
if statusCode/100 == 5 {
907-
return failure5xxCategoryCode
908-
}
909-
910-
return defaultStatusCodeCategory
911-
}

notify/notify_test.go

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -425,15 +425,13 @@ func TestRetryStageWithError(t *testing.T) {
425425

426426
func TestRetryStageWithErrorCode(t *testing.T) {
427427
testcases := map[string]struct {
428-
errorcode int
429-
codelabel string
428+
reason string
429+
reasonlabel string
430430
expectedCount int
431431
}{
432-
"for 400": {errorcode: 400, codelabel: failure4xxCategoryCode, expectedCount: 1},
433-
"for 402": {errorcode: 402, codelabel: failure4xxCategoryCode, expectedCount: 1},
434-
"for 500": {errorcode: 500, codelabel: failure5xxCategoryCode, expectedCount: 1},
435-
"for 502": {errorcode: 502, codelabel: failure5xxCategoryCode, expectedCount: 1},
436-
"for expected code 100": {errorcode: 100, codelabel: failure5xxCategoryCode, expectedCount: 1},
432+
"for clientError": {reason: clientError, reasonlabel: clientError, expectedCount: 1},
433+
"for serverError": {reason: serverError, reasonlabel: serverError, expectedCount: 1},
434+
"for unexpected code": {reason: "unexpected", reasonlabel: defaultFailureReason, expectedCount: 1},
437435
}
438436
for _, testData := range testcases {
439437
fail, retry := true, false
@@ -444,7 +442,7 @@ func TestRetryStageWithErrorCode(t *testing.T) {
444442
notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) {
445443
if fail {
446444
fail = false
447-
return retry, NewErrorWithStatusCode(testData.errorcode, errors.New("fail to deliver notification"))
445+
return retry, NewErrorWithReason(testData.reason, errors.New("fail to deliver notification"))
448446
}
449447
sent = append(sent, alerts...)
450448
return false, nil
@@ -471,7 +469,7 @@ func TestRetryStageWithErrorCode(t *testing.T) {
471469
resctx, _, err := r.Exec(ctx, log.NewNopLogger(), alerts...)
472470
counter := r.metrics.numTotalFailedNotifications
473471

474-
require.Equal(t, testData.expectedCount, int(prom_testutil.ToFloat64(counter.WithLabelValues(r.integration.Name(), testData.codelabel))))
472+
require.Equal(t, testData.expectedCount, int(prom_testutil.ToFloat64(counter.WithLabelValues(r.integration.Name(), testData.reasonlabel))))
475473

476474
require.NotNil(t, err)
477475
require.NotNil(t, resctx)

notify/sns/sns.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ func (n *Notifier) Notify(ctx context.Context, alert ...*types.Alert) (bool, err
8585
if e, ok := err.(awserr.RequestFailure); ok {
8686
retryable, error := n.retrier.Check(e.StatusCode(), strings.NewReader(e.Message()))
8787

88-
statusErr := notify.NewErrorWithStatusCode(e.StatusCode(), error)
89-
return retryable, statusErr
88+
reasonErr := notify.NewErrorWithReason(notify.GetFailureReasonFromStatusCode(e.StatusCode()), error)
89+
return retryable, reasonErr
9090
}
9191
return true, err
9292
}

notify/util.go

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,17 @@ import (
3030
"github.com/prometheus/alertmanager/types"
3131
)
3232

33+
// defaultFailureReason is the default reason for numTotalFailedNotifications metric
34+
const defaultFailureReason = "other"
35+
36+
const (
37+
clientError = "clientError"
38+
serverError = "ServerError"
39+
)
40+
41+
// possibleFailureStatusCategory is a list of possible failure reason
42+
var possibleFailureStatusCategory = []string{clientError, serverError, defaultFailureReason}
43+
3344
// UserAgentHeader is the default User-Agent for notification requests
3445
var UserAgentHeader = fmt.Sprintf("Alertmanager/%s", version.Version)
3546

@@ -210,20 +221,34 @@ func (r *Retrier) Check(statusCode int, body io.Reader) (bool, error) {
210221
return retry, errors.New(s)
211222
}
212223

213-
type ErrorWithStatusCode struct {
224+
type ErrorWithReason struct {
214225
Err error
215226

216-
// The status code of the HTTP response.
217-
StatusCode int
227+
// The reason of the failure.
228+
Reason string
218229
}
219230

220-
func NewErrorWithStatusCode(statusCode int, err error) *ErrorWithStatusCode {
221-
return &ErrorWithStatusCode{
222-
Err: err,
223-
StatusCode: statusCode,
231+
func NewErrorWithReason(reason string, err error) *ErrorWithReason {
232+
return &ErrorWithReason{
233+
Err: err,
234+
Reason: reason,
224235
}
225236
}
226237

227-
func (e *ErrorWithStatusCode) Error() string {
238+
func (e *ErrorWithReason) Error() string {
228239
return e.Err.Error()
229240
}
241+
242+
// GetFailureReasonFromStatusCode return the reason for failure request
243+
// the status starts with 4 will return 4xx and starts with 5 will return 5xx
244+
// other than 4xx and 5xx input status will return an 5xx.
245+
func GetFailureReasonFromStatusCode(statusCode int) string {
246+
if statusCode/100 == 4 {
247+
return clientError
248+
}
249+
if statusCode/100 == 5 {
250+
return serverError
251+
}
252+
253+
return defaultFailureReason
254+
}

0 commit comments

Comments
 (0)