Skip to content

Commit b444381

Browse files
Fix MaxSilences limit causes incomplete updates of existing silences (#3877)
* Fix MaxSilences limit causes incomplete updates of existing silences This commit fixes a bug where the MaxSilences limit can cause an incomplete update of existing silences, where the old silence can be expired but the new silence cannot be created because it would exceeded the maximum number of silences. Signed-off-by: George Robinson <[email protected]> --------- Signed-off-by: George Robinson <[email protected]>
1 parent 36d653a commit b444381

File tree

2 files changed

+69
-41
lines changed

2 files changed

+69
-41
lines changed

silence/silence.go

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,13 @@ func (s *Silences) getSilence(id string) (*pb.Silence, bool) {
564564
return msil.Silence, true
565565
}
566566

567+
func (s *Silences) toMeshSilence(sil *pb.Silence) *pb.MeshSilence {
568+
return &pb.MeshSilence{
569+
Silence: sil,
570+
ExpiresAt: sil.EndsAt.Add(s.retention),
571+
}
572+
}
573+
567574
func (s *Silences) setSilence(sil *pb.Silence, now time.Time, skipValidate bool) error {
568575
sil.UpdatedAt = now
569576

@@ -573,10 +580,7 @@ func (s *Silences) setSilence(sil *pb.Silence, now time.Time, skipValidate bool)
573580
}
574581
}
575582

576-
msil := &pb.MeshSilence{
577-
Silence: sil,
578-
ExpiresAt: sil.EndsAt.Add(s.retention),
579-
}
583+
msil := s.toMeshSilence(sil)
580584
b, err := marshalMeshSilence(msil)
581585
if err != nil {
582586
return err
@@ -612,25 +616,27 @@ func (s *Silences) Set(sil *pb.Silence) error {
612616
return ErrNotFound
613617
}
614618

615-
if ok {
616-
if canUpdate(prev, sil, now) {
617-
return s.setSilence(sil, now, false)
618-
}
619-
if getState(prev, s.nowUTC()) != types.SilenceStateExpired {
620-
// We cannot update the silence, expire the old one.
621-
if err := s.expire(prev.Id); err != nil {
622-
return fmt.Errorf("expire previous silence: %w", err)
623-
}
624-
}
619+
if ok && canUpdate(prev, sil, now) {
620+
return s.setSilence(sil, now, false)
625621
}
626622

627-
// If we got here it's either a new silence or a replacing one.
623+
// If we got here it's either a new silence or a replacing one (which would
624+
// also create a new silence) so we need to make sure we have capacity for
625+
// the new silence.
628626
if s.limits.MaxSilences != nil {
629627
if m := s.limits.MaxSilences(); m > 0 && len(s.st)+1 > m {
630628
return fmt.Errorf("exceeded maximum number of silences: %d (limit: %d)", len(s.st), m)
631629
}
632630
}
633631

632+
if ok && getState(prev, s.nowUTC()) != types.SilenceStateExpired {
633+
// We cannot update the silence, expire the old one to leave a history of
634+
// the silence before modification.
635+
if err := s.expire(prev.Id); err != nil {
636+
return fmt.Errorf("expire previous silence: %w", err)
637+
}
638+
}
639+
634640
uid, err := uuid.NewV4()
635641
if err != nil {
636642
return fmt.Errorf("generate uuid: %w", err)

silence/silence_test.go

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ package silence
1515

1616
import (
1717
"bytes"
18+
"fmt"
1819
"os"
1920
"runtime"
2021
"sort"
@@ -470,32 +471,24 @@ func TestSilenceLimits(t *testing.T) {
470471
EndsAt: time.Now().Add(5 * time.Minute),
471472
}
472473
require.NoError(t, s.Set(sil1))
473-
require.NotEqual(t, "", sil1.Id)
474474

475-
// Insert sil2 should fail because maximum number of silences
476-
// has been exceeded.
475+
// Insert sil2 should fail because maximum number of silences has been
476+
// exceeded.
477477
sil2 := &pb.Silence{
478-
Matchers: []*pb.Matcher{{Name: "a", Pattern: "b"}},
478+
Matchers: []*pb.Matcher{{Name: "c", Pattern: "d"}},
479479
StartsAt: time.Now(),
480480
EndsAt: time.Now().Add(5 * time.Minute),
481481
}
482482
require.EqualError(t, s.Set(sil2), "exceeded maximum number of silences: 1 (limit: 1)")
483-
require.Equal(t, "", sil2.Id)
484483

485-
// Expire sil1 and run the GC. This should allow sil2 to be
486-
// inserted.
484+
// Expire sil1 and run the GC. This should allow sil2 to be inserted.
487485
require.NoError(t, s.Expire(sil1.Id))
488486
n, err := s.GC()
489487
require.NoError(t, err)
490488
require.Equal(t, 1, n)
491-
492489
require.NoError(t, s.Set(sil2))
493-
require.NotEqual(t, "", sil2.Id)
494490

495-
// Should be able to update sil2 without hitting the limit.
496-
require.NoError(t, s.Set(sil2))
497-
498-
// Expire sil2.
491+
// Expire sil2 and run the GC.
499492
require.NoError(t, s.Expire(sil2.Id))
500493
n, err = s.GC()
501494
require.NoError(t, err)
@@ -505,26 +498,55 @@ func TestSilenceLimits(t *testing.T) {
505498
sil3 := &pb.Silence{
506499
Matchers: []*pb.Matcher{
507500
{
508-
Name: strings.Repeat("a", 2<<9),
509-
Pattern: strings.Repeat("b", 2<<9),
501+
Name: strings.Repeat("e", 2<<9),
502+
Pattern: strings.Repeat("f", 2<<9),
510503
},
511504
{
512-
Name: strings.Repeat("c", 2<<9),
513-
Pattern: strings.Repeat("d", 2<<9),
505+
Name: strings.Repeat("g", 2<<9),
506+
Pattern: strings.Repeat("h", 2<<9),
514507
},
515508
},
516-
CreatedBy: strings.Repeat("e", 2<<9),
517-
Comment: strings.Repeat("f", 2<<9),
509+
CreatedBy: strings.Repeat("i", 2<<9),
510+
Comment: strings.Repeat("j", 2<<9),
518511
StartsAt: time.Now(),
519512
EndsAt: time.Now().Add(5 * time.Minute),
520513
}
521-
err = s.Set(sil3)
522-
require.Error(t, err)
523-
// Do not check the exact size as it can change between consecutive runs
524-
// due to padding.
525-
require.Contains(t, err.Error(), "silence exceeded maximum size")
526-
// TODO: Once we fix https://github.com/prometheus/alertmanager/issues/3878 this should be require.Equal.
527-
require.NotEqual(t, "", sil3.Id)
514+
require.EqualError(t, s.Set(sil3), fmt.Sprintf("silence exceeded maximum size: %d bytes (limit: 4096 bytes)", s.toMeshSilence(sil3).Size()))
515+
516+
// Should be able to insert sil4.
517+
sil4 := &pb.Silence{
518+
Matchers: []*pb.Matcher{{Name: "k", Pattern: "l"}},
519+
StartsAt: time.Now(),
520+
EndsAt: time.Now().Add(5 * time.Minute),
521+
}
522+
require.NoError(t, s.Set(sil4))
523+
524+
// Should be able to update sil4 without modifications. It is expected to
525+
// keep the same ID.
526+
sil5 := cloneSilence(sil4)
527+
require.NoError(t, s.Set(sil5))
528+
require.Equal(t, sil4.Id, sil5.Id)
529+
530+
// Should be able to update the comment. It is also expected to keep the
531+
// same ID.
532+
sil6 := cloneSilence(sil5)
533+
sil6.Comment = "m"
534+
require.NoError(t, s.Set(sil6))
535+
require.Equal(t, sil5.Id, sil6.Id)
536+
537+
// Should not be able to update the start and end time as this requires
538+
// sil6 to be expired and a new silence to be created. However, this would
539+
// exceed the maximum number of silences, which counts both active and
540+
// expired silences.
541+
sil7 := cloneSilence(sil6)
542+
sil7.StartsAt = time.Now().Add(5 * time.Minute)
543+
sil7.EndsAt = time.Now().Add(10 * time.Minute)
544+
require.EqualError(t, s.Set(sil7), "exceeded maximum number of silences: 1 (limit: 1)")
545+
546+
// sil6 should not be expired because the update failed.
547+
sil6, err = s.QueryOne(QIDs(sil6.Id))
548+
require.NoError(t, err)
549+
require.Equal(t, types.SilenceStateActive, getState(sil6, s.nowUTC()))
528550
}
529551

530552
func TestSilenceNoLimits(t *testing.T) {

0 commit comments

Comments
 (0)