@@ -601,6 +601,23 @@ func (r *recordStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.A
601
601
return ctx , nil , nil
602
602
}
603
603
604
+ type notifyStage struct {
605
+ C chan struct {}
606
+ }
607
+
608
+ func newNotifyStage () * notifyStage {
609
+ return & notifyStage {C : make (chan struct {}, 1 )}
610
+ }
611
+
612
+ func (s * notifyStage ) Exec (ctx context.Context , l log.Logger , alerts ... * types.Alert ) (context.Context , []* types.Alert , error ) {
613
+ select {
614
+ case <- ctx .Done ():
615
+ return ctx , nil , ctx .Err ()
616
+ case s .C <- struct {}{}:
617
+ return ctx , alerts , nil
618
+ }
619
+ }
620
+
604
621
var (
605
622
// Set the start time in the past to trigger a flush immediately.
606
623
t0 = time .Now ().Add (- time .Minute )
@@ -651,7 +668,7 @@ func TestDispatcherRaceOnFirstAlertNotDeliveredWhenGroupWaitIsZero(t *testing.T)
651
668
route := & Route {
652
669
RouteOpts : RouteOpts {
653
670
Receiver : "default" ,
654
- GroupBy : map [model.LabelName ]struct {}{"alertname" : {}},
671
+ GroupBy : map [model.LabelName ]struct {}{model . AlertNameLabel : {}},
655
672
GroupWait : 0 ,
656
673
GroupInterval : 1 * time .Hour , // Should never hit in this test.
657
674
RepeatInterval : 1 * time .Hour , // Should never hit in this test.
@@ -666,7 +683,7 @@ func TestDispatcherRaceOnFirstAlertNotDeliveredWhenGroupWaitIsZero(t *testing.T)
666
683
667
684
// Push all alerts.
668
685
for i := 0 ; i < numAlerts ; i ++ {
669
- alert := newAlert (model.LabelSet {"alertname" : model .LabelValue (fmt .Sprintf ("Alert_%d" , i ))})
686
+ alert := newAlert (model.LabelSet {model . AlertNameLabel : model .LabelValue (fmt .Sprintf ("Alert_%d" , i ))})
670
687
require .NoError (t , alerts .Put (alert ))
671
688
}
672
689
@@ -684,6 +701,133 @@ func TestDispatcherRaceOnFirstAlertNotDeliveredWhenGroupWaitIsZero(t *testing.T)
684
701
require .Equal (t , numAlerts , len (recorder .Alerts ()))
685
702
}
686
703
704
+ func TestDispatcherReceiveAndNotifyRepeatedResolvedAlerts (t * testing.T ) {
705
+ // More background here: https://github.com/prometheus/alertmanager/pull/3006
706
+
707
+ logger := log .NewNopLogger ()
708
+ // logger := log.NewLogfmtLogger(os.Stdout)
709
+ marker := types .NewMarker (prometheus .NewRegistry ())
710
+ alerts , err := mem .NewAlerts (context .Background (), marker , time .Hour , nil , logger , nil )
711
+ if err != nil {
712
+ t .Fatal (err )
713
+ }
714
+ defer alerts .Close ()
715
+
716
+ route := & Route {
717
+ RouteOpts : RouteOpts {
718
+ Receiver : "default" ,
719
+ GroupBy : map [model.LabelName ]struct {}{model .AlertNameLabel : {}},
720
+ GroupWait : 0 ,
721
+ GroupInterval : 6 * time .Second ,
722
+ RepeatInterval : 6 * time .Second ,
723
+ },
724
+ }
725
+
726
+ recorder := & recordStage {alerts : make (map [string ]map [model.Fingerprint ]* types.Alert )}
727
+ notifier := newNotifyStage ()
728
+ dispatcher := NewDispatcher (alerts , route , notify.FanoutStage {recorder , notifier }, marker , nil , nil , logger , NewDispatcherMetrics (false , prometheus .NewRegistry ()))
729
+ go dispatcher .Run ()
730
+ defer dispatcher .Stop ()
731
+
732
+ // Here we simulate the case when the alertmanager receives resolved alerts
733
+ // right after the aggrGroup is deleted.
734
+ t .Run ("repeated alerts after aggrGroup deleted" , func (t * testing.T ) {
735
+ alert := newAlert (model.LabelSet {model .AlertNameLabel : "test-repeated-resolved-alerts-1" })
736
+ alert .Alert .EndsAt = alert .StartsAt .Add (time .Second )
737
+ alert .UpdatedAt = alert .Alert .EndsAt
738
+ require .True (t , alert .Resolved ())
739
+ require .NoError (t , alerts .Put (alert ))
740
+ select {
741
+ case <- time .After (20 * time .Second ):
742
+ case <- notifier .C :
743
+ t .Errorf ("unexpected repeated resolved alerts" )
744
+ }
745
+ })
746
+
747
+ // Alertmanager receives repeated resolved alerts after aggrGroup.flush.
748
+ t .Run ("repeated alerts after aggrGroup flush" , func (t * testing.T ) {
749
+ alert := newAlert (model.LabelSet {model .AlertNameLabel : "test-repeated-resolved-alerts-2" })
750
+ require .NoError (t , alerts .Put (alert ))
751
+ select {
752
+ case <- time .After (20 * time .Second ):
753
+ t .Errorf ("wait active alert timed out" )
754
+ case <- notifier .C :
755
+ }
756
+
757
+ alert .Alert .EndsAt = alert .StartsAt .Add (time .Second )
758
+ alert .UpdatedAt = alert .Alert .EndsAt
759
+ require .True (t , alert .Resolved ())
760
+ require .NoError (t , alerts .Put (alert ))
761
+ select {
762
+ case <- time .After (20 * time .Second ):
763
+ t .Errorf ("wait resolved alert timed out" )
764
+ case <- notifier .C :
765
+ }
766
+
767
+ alert .UpdatedAt = alert .Alert .EndsAt .Add (time .Second )
768
+ require .True (t , alert .Resolved ())
769
+ require .NoError (t , alerts .Put (alert ))
770
+ select {
771
+ case <- time .After (20 * time .Second ):
772
+ case <- notifier .C :
773
+ t .Errorf ("unexpected repeated resolved alerts" )
774
+ }
775
+ })
776
+ }
777
+
778
+ func TestDispatcherRepeatedResolvedAlertsAfterAggrGroupGetsDeleted (t * testing.T ) {
779
+ logger := log .NewNopLogger ()
780
+ // logger := log.NewLogfmtLogger(os.Stdout)
781
+ marker := types .NewMarker (prometheus .NewRegistry ())
782
+ alerts , err := mem .NewAlerts (context .Background (), marker , time .Hour , nil , logger , nil )
783
+ if err != nil {
784
+ t .Fatal (err )
785
+ }
786
+ defer alerts .Close ()
787
+
788
+ route := & Route {
789
+ RouteOpts : RouteOpts {
790
+ Receiver : "default" ,
791
+ GroupBy : map [model.LabelName ]struct {}{model .AlertNameLabel : {}},
792
+ GroupWait : 0 ,
793
+ GroupInterval : 6 * time .Second ,
794
+ RepeatInterval : 6 * time .Second ,
795
+ },
796
+ }
797
+
798
+ recorder := & recordStage {alerts : make (map [string ]map [model.Fingerprint ]* types.Alert )}
799
+ notifier := newNotifyStage ()
800
+ dispatcher := NewDispatcher (alerts , route , notify.FanoutStage {recorder , notifier }, marker , nil , nil , logger , NewDispatcherMetrics (false , prometheus .NewRegistry ()))
801
+ go dispatcher .Run ()
802
+ defer dispatcher .Stop ()
803
+
804
+ alert := newAlert (model.LabelSet {model .AlertNameLabel : "test-repeated-resolved-alerts" })
805
+ require .NoError (t , alerts .Put (alert ))
806
+ select {
807
+ case <- time .After (20 * time .Second ):
808
+ t .Errorf ("wait active alert timed out" )
809
+ case <- notifier .C :
810
+ }
811
+
812
+ alert .Alert .EndsAt = alert .StartsAt .Add (time .Second )
813
+ require .True (t , alert .Alert .EndsAt .Before (time .Now ()))
814
+ alert .UpdatedAt = alert .Alert .EndsAt
815
+ require .NoError (t , alerts .Put (alert ))
816
+ select {
817
+ case <- time .After (20 * time .Second ):
818
+ t .Errorf ("wait resolved alert timed out" )
819
+ case <- notifier .C :
820
+ }
821
+
822
+ alert .UpdatedAt = alert .Alert .EndsAt .Add (time .Second )
823
+ require .NoError (t , alerts .Put (alert ))
824
+ select {
825
+ case <- time .After (20 * time .Second ):
826
+ case <- notifier .C :
827
+ t .Errorf ("unexpected repeated resolved alerts" )
828
+ }
829
+ }
830
+
687
831
type limits struct {
688
832
groups int
689
833
}
0 commit comments