@@ -2,7 +2,6 @@ package grpcclient
2
2
3
3
import (
4
4
"context"
5
- "errors"
6
5
"flag"
7
6
"fmt"
8
7
"io"
@@ -11,41 +10,49 @@ import (
11
10
12
11
"github.com/go-kit/log"
13
12
"github.com/go-kit/log/level"
13
+ "github.com/gogo/status"
14
14
"github.com/weaveworks/common/user"
15
15
"go.uber.org/atomic"
16
16
"google.golang.org/grpc"
17
+ "google.golang.org/grpc/codes"
17
18
"google.golang.org/grpc/health/grpc_health_v1"
18
19
19
20
"github.com/cortexproject/cortex/pkg/util/services"
20
21
)
21
22
22
23
var (
23
- unhealthyErr = errors . New ( "instance marked as unhealthy" )
24
+ unhealthyErr = status . Error ( codes . Unavailable , "instance marked as unhealthy" )
24
25
)
25
26
26
27
type HealthCheckConfig struct {
27
28
* HealthCheckInterceptors `yaml:"-"`
28
29
29
- UnhealthyThreshold int `yaml:"unhealthy_threshold"`
30
+ UnhealthyThreshold int64 `yaml:"unhealthy_threshold"`
30
31
Interval time.Duration `yaml:"interval"`
31
32
Timeout time.Duration `yaml:"timeout"`
32
33
}
33
34
34
35
// RegisterFlagsWithPrefix for Config.
35
36
func (cfg * HealthCheckConfig ) RegisterFlagsWithPrefix (prefix string , f * flag.FlagSet ) {
36
- f .IntVar (& cfg .UnhealthyThreshold , prefix + ".unhealthy-threshold" , 0 , "The number of consecutive failed health checks required before considering a target unhealthy. 0 means disabled." )
37
+ f .Int64Var (& cfg .UnhealthyThreshold , prefix + ".unhealthy-threshold" , 0 , "The number of consecutive failed health checks required before considering a target unhealthy. 0 means disabled." )
37
38
f .DurationVar (& cfg .Timeout , prefix + ".timeout" , 1 * time .Second , "The amount of time during which no response from a target means a failed health check." )
38
39
f .DurationVar (& cfg .Interval , prefix + ".interval" , 5 * time .Second , "The approximate amount of time between health checks of an individual target." )
39
40
}
40
41
41
- type healthCheckEntry struct {
42
- address string
43
- clientConfig * ConfigWithHealthCheck
42
+ type healthCheckClient struct {
43
+ grpc_health_v1.HealthClient
44
+ io.Closer
45
+ }
44
46
45
- sync.RWMutex
46
- unhealthyCount int
47
+ type healthCheckEntry struct {
48
+ address string
49
+ clientConfig * ConfigWithHealthCheck
47
50
lastCheckTime atomic.Time
48
51
lastTickTime atomic.Time
52
+ unhealthyCount atomic.Int64
53
+
54
+ healthCheckClientMutex sync.RWMutex
55
+ healthCheckClient * healthCheckClient
49
56
}
50
57
51
58
type HealthCheckInterceptors struct {
@@ -75,18 +82,14 @@ func NewHealthCheckInterceptors(logger log.Logger) *HealthCheckInterceptors {
75
82
}
76
83
77
84
func (e * healthCheckEntry ) isHealthy () bool {
78
- e .RLock ()
79
- defer e .RUnlock ()
80
- return e .unhealthyCount < e .clientConfig .HealthCheckConfig .UnhealthyThreshold
85
+ return e .unhealthyCount .Load () < e .clientConfig .HealthCheckConfig .UnhealthyThreshold
81
86
}
82
87
83
88
func (e * healthCheckEntry ) recordHealth (err error ) error {
84
- e .Lock ()
85
- defer e .Unlock ()
86
89
if err != nil {
87
- e .unhealthyCount ++
90
+ e .unhealthyCount . Inc ()
88
91
} else {
89
- e .unhealthyCount = 0
92
+ e .unhealthyCount . Store ( 0 )
90
93
}
91
94
92
95
return err
@@ -96,6 +99,51 @@ func (e *healthCheckEntry) tick() {
96
99
e .lastTickTime .Store (time .Now ())
97
100
}
98
101
102
+ func (e * healthCheckEntry ) close () error {
103
+ e .healthCheckClientMutex .Lock ()
104
+ defer e .healthCheckClientMutex .Unlock ()
105
+
106
+ if e .healthCheckClient != nil {
107
+ err := e .healthCheckClient .Close ()
108
+ e .healthCheckClient = nil
109
+ return err
110
+ }
111
+
112
+ return nil
113
+ }
114
+
115
+ func (e * healthCheckEntry ) getClient (factory func (cc * grpc.ClientConn ) (grpc_health_v1.HealthClient , io.Closer )) (* healthCheckClient , error ) {
116
+ e .healthCheckClientMutex .RLock ()
117
+ c := e .healthCheckClient
118
+ e .healthCheckClientMutex .RUnlock ()
119
+
120
+ if c != nil {
121
+ return c , nil
122
+ }
123
+
124
+ e .healthCheckClientMutex .Lock ()
125
+ defer e .healthCheckClientMutex .Unlock ()
126
+
127
+ if e .healthCheckClient == nil {
128
+ dialOpts , err := e .clientConfig .Config .DialOption (nil , nil )
129
+ if err != nil {
130
+ return nil , err
131
+ }
132
+ conn , err := grpc .NewClient (e .address , dialOpts ... )
133
+ if err != nil {
134
+ return nil , err
135
+ }
136
+
137
+ client , closer := factory (conn )
138
+ e .healthCheckClient = & healthCheckClient {
139
+ HealthClient : client ,
140
+ Closer : closer ,
141
+ }
142
+ }
143
+
144
+ return e .healthCheckClient , nil
145
+ }
146
+
99
147
func (h * HealthCheckInterceptors ) registeredInstances () []* healthCheckEntry {
100
148
h .RLock ()
101
149
defer h .RUnlock ()
@@ -112,6 +160,9 @@ func (h *HealthCheckInterceptors) iteration(ctx context.Context) error {
112
160
for _ , instance := range h .registeredInstances () {
113
161
if time .Since (instance .lastTickTime .Load ()) >= h .instanceGcTimeout {
114
162
h .Lock ()
163
+ if err := instance .close (); err != nil {
164
+ level .Warn (h .logger ).Log ("msg" , "Error closing health check" , "err" , err )
165
+ }
115
166
delete (h .activeInstances , instance .address )
116
167
h .Unlock ()
117
168
continue
@@ -124,25 +175,13 @@ func (h *HealthCheckInterceptors) iteration(ctx context.Context) error {
124
175
instance .lastCheckTime .Store (time .Now ())
125
176
126
177
go func (i * healthCheckEntry ) {
127
- dialOpts , err := i .clientConfig .Config .DialOption (nil , nil )
128
- if err != nil {
129
- level .Error (h .logger ).Log ("msg" , "error creating dialOpts to perform healthcheck" , "address" , i .address , "err" , err )
130
- return
131
- }
132
- conn , err := grpc .NewClient (i .address , dialOpts ... )
178
+ client , err := i .getClient (h .healthClientFactory )
179
+
133
180
if err != nil {
134
- level .Error (h .logger ).Log ("msg" , "error creating client to perform healthcheck" , "address" , i .address , "err" , err )
181
+ level .Error (h .logger ).Log ("msg" , "error creating healthcheck client to perform healthcheck" , "address" , i .address , "err" , err )
135
182
return
136
183
}
137
184
138
- client , closer := h .healthClientFactory (conn )
139
-
140
- defer func () {
141
- if err := closer .Close (); err != nil {
142
- level .Warn (h .logger ).Log ("msg" , "error closing connection" , "address" , i .address , "err" , err )
143
- }
144
- }()
145
-
146
185
if err := i .recordHealth (healthCheck (client , i .clientConfig .HealthCheckConfig .Timeout )); ! i .isHealthy () {
147
186
level .Warn (h .logger ).Log ("msg" , "instance marked as unhealthy" , "address" , i .address , "err" , err )
148
187
}
0 commit comments