Skip to content

Commit cd7ebce

Browse files
Merge pull request #95 from grafana/santihernandezc/cherry_pick_webhook_timeouts
Cherry-pick "Add timeout option for webhook notifier. (prometheus#4137)" 0f65e8f
2 parents 1841e9d + 1a8748c commit cd7ebce

File tree

4 files changed

+69
-0
lines changed

4 files changed

+69
-0
lines changed

config/notifiers.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,10 @@ type WebhookConfig struct {
501501
// Alerts exceeding this threshold will be truncated. Setting this to 0
502502
// allows an unlimited number of alerts.
503503
MaxAlerts uint64 `yaml:"max_alerts" json:"max_alerts"`
504+
505+
// Timeout is the maximum time allowed to invoke the webhook. Setting this to 0
506+
// does not impose a timeout.
507+
Timeout time.Duration `yaml:"timeout" json:"timeout"`
504508
}
505509

506510
// UnmarshalYAML implements the yaml.Unmarshaler interface.

docs/configuration.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,6 +1390,13 @@ url_file: <filepath>
13901390
# above this threshold are truncated. When leaving this at its default value of
13911391
# 0, all alerts are included.
13921392
[ max_alerts: <int> | default = 0 ]
1393+
1394+
# The maximum time to wait for a webhook request to complete, before failing the
1395+
# request and allowing it to be retried. The default value of 0s indicates that
1396+
# no timeout should be applied.
1397+
# NOTE: This will have no effect if set higher than the group_interval.
1398+
[ timeout: <duration> | default = 0s ]
1399+
13931400
```
13941401

13951402
The Alertmanager

notify/webhook/webhook.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,17 @@ func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, er
124124
url = strings.TrimSpace(string(content))
125125
}
126126

127+
if n.conf.Timeout > 0 {
128+
postCtx, cancel := context.WithTimeoutCause(ctx, n.conf.Timeout, fmt.Errorf("configured webhook timeout reached (%s)", n.conf.Timeout))
129+
defer cancel()
130+
ctx = postCtx
131+
}
132+
127133
resp, err := notify.PostJSON(ctx, n.client, url, &buf)
128134
if err != nil {
135+
if ctx.Err() != nil {
136+
err = fmt.Errorf("%w: %w", err, context.Cause(ctx))
137+
}
129138
return true, notify.RedactURL(err)
130139
}
131140
defer notify.Drain(resp)

test/with_api_v2/acceptance/send_test.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,3 +464,52 @@ receivers:
464464

465465
t.Log(co.Check())
466466
}
467+
468+
func TestWebhookTimeout(t *testing.T) {
469+
t.Parallel()
470+
471+
// This integration test uses an extended group_interval to check that
472+
// the webhook level timeout has the desired effect, and that notification
473+
// sending is retried in this case.
474+
conf := `
475+
route:
476+
receiver: "default"
477+
group_by: [alertname]
478+
group_wait: 1s
479+
group_interval: 1m
480+
repeat_interval: 1m
481+
482+
receivers:
483+
- name: "default"
484+
webhook_configs:
485+
- url: 'http://%s'
486+
timeout: 500ms
487+
`
488+
489+
at := NewAcceptanceTest(t, &AcceptanceOpts{
490+
Tolerance: 150 * time.Millisecond,
491+
})
492+
493+
co := at.Collector("webhook")
494+
wh := NewWebhook(t, co)
495+
496+
wh.Func = func(ts float64) bool {
497+
// Make some webhook requests slow enough to hit the webhook
498+
// timeout, but not so slow as to hit the dispatcher timeout.
499+
if ts < 3 {
500+
time.Sleep(time.Second)
501+
return true
502+
}
503+
return false
504+
}
505+
506+
am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)
507+
508+
am.Push(At(1), Alert("alertname", "test1"))
509+
510+
co.Want(Between(3, 4), Alert("alertname", "test1").Active(1))
511+
512+
at.Run()
513+
514+
t.Log(co.Check())
515+
}

0 commit comments

Comments
 (0)