@@ -221,6 +221,57 @@ let do_db_xml_rpc_persistent_with_reopen ~host:_ ~path (req : string) :
221
221
else if ! backoff_delay > 256.0 then
222
222
backoff_delay := 256.0
223
223
in
224
+ let reconnect () =
225
+ (* RPC failed - there's no way we can recover from this so try reopening connection every 2s + backoff delay *)
226
+ ( match ! my_connection with
227
+ | None ->
228
+ ()
229
+ | Some st_proc -> (
230
+ my_connection := None ;
231
+ (* don't want to try closing multiple times *)
232
+ try Stunnel. disconnect st_proc with _ -> ()
233
+ )
234
+ ) ;
235
+ let time_sofar = Unix. gettimeofday () -. time_call_started in
236
+ if ! connection_timeout < 0. then (
237
+ if not ! surpress_no_timeout_logs then (
238
+ debug
239
+ " Connection to master died. I will continue to retry indefinitely \
240
+ (supressing future logging of this message)." ;
241
+ error
242
+ " Connection to master died. I will continue to retry indefinitely \
243
+ (supressing future logging of this message)."
244
+ ) ;
245
+ surpress_no_timeout_logs := true
246
+ ) else
247
+ debug
248
+ " Connection to master died: time taken so far in this call '%f'; will \
249
+ %s"
250
+ time_sofar
251
+ ( if ! connection_timeout < 0. then
252
+ " never timeout"
253
+ else
254
+ Printf. sprintf " timeout after '%f'" ! connection_timeout
255
+ ) ;
256
+ if time_sofar > ! connection_timeout && ! connection_timeout > = 0. then
257
+ if ! restart_on_connection_timeout then (
258
+ debug " Exceeded timeout for retrying master connection: restarting xapi" ;
259
+ ! Db_globs. restart_fn ()
260
+ ) else (
261
+ debug
262
+ " Exceeded timeout for retrying master connection: raising \
263
+ Cannot_connect_to_master" ;
264
+ raise Cannot_connect_to_master
265
+ ) ;
266
+ debug " Sleeping %f seconds before retrying master connection..."
267
+ ! backoff_delay ;
268
+ let timed_out = Scheduler.PipeDelay. wait delay ! backoff_delay in
269
+ if not timed_out then
270
+ debug " %s: Sleep interrupted, retrying master connection now" __FUNCTION__ ;
271
+ update_backoff_delay () ;
272
+ D. log_and_ignore_exn open_secure_connection
273
+ in
274
+
224
275
while not ! write_ok do
225
276
try
226
277
let req_string = req in
@@ -266,67 +317,13 @@ let do_db_xml_rpc_persistent_with_reopen ~host:_ ~path (req : string) :
266
317
Db_globs. http_limit_max_rpc_size ;
267
318
debug " Re-raising exception to caller." ;
268
319
raise Http. Client_requested_size_over_limit
269
- (* TODO: This http exception handler caused CA-36936 and can probably be removed now that there's backoff delay in the generic handler _ below *)
270
320
| Http_client. Http_error (http_code , err_msg ) ->
271
- error
272
- " Received HTTP error %s (%s) from master. This suggests our master \
273
- address is wrong. Sleeping for %.0fs and then executing restart_fn."
274
- http_code err_msg
275
- ! Db_globs. permanent_master_failure_retry_interval ;
276
- Thread. delay ! Db_globs. permanent_master_failure_retry_interval ;
277
- ! Db_globs. restart_fn ()
321
+ error " Received HTTP error %s (%s) from the coordinator" http_code
322
+ err_msg ;
323
+ reconnect ()
278
324
| e ->
279
325
error " Caught %s" (Printexc. to_string e) ;
280
- (* RPC failed - there's no way we can recover from this so try reopening connection every 2s + backoff delay *)
281
- ( match ! my_connection with
282
- | None ->
283
- ()
284
- | Some st_proc -> (
285
- my_connection := None ;
286
- (* don't want to try closing multiple times *)
287
- try Stunnel. disconnect st_proc with _ -> ()
288
- )
289
- ) ;
290
- let time_sofar = Unix. gettimeofday () -. time_call_started in
291
- if ! connection_timeout < 0. then (
292
- if not ! surpress_no_timeout_logs then (
293
- debug
294
- " Connection to master died. I will continue to retry \
295
- indefinitely (supressing future logging of this message)." ;
296
- error
297
- " Connection to master died. I will continue to retry \
298
- indefinitely (supressing future logging of this message)."
299
- ) ;
300
- surpress_no_timeout_logs := true
301
- ) else
302
- debug
303
- " Connection to master died: time taken so far in this call '%f'; \
304
- will %s"
305
- time_sofar
306
- ( if ! connection_timeout < 0. then
307
- " never timeout"
308
- else
309
- Printf. sprintf " timeout after '%f'" ! connection_timeout
310
- ) ;
311
- if time_sofar > ! connection_timeout && ! connection_timeout > = 0. then
312
- if ! restart_on_connection_timeout then (
313
- debug
314
- " Exceeded timeout for retrying master connection: restarting xapi" ;
315
- ! Db_globs. restart_fn ()
316
- ) else (
317
- debug
318
- " Exceeded timeout for retrying master connection: raising \
319
- Cannot_connect_to_master" ;
320
- raise Cannot_connect_to_master
321
- ) ;
322
- debug " Sleeping %f seconds before retrying master connection..."
323
- ! backoff_delay ;
324
- let timed_out = Scheduler.PipeDelay. wait delay ! backoff_delay in
325
- if not timed_out then
326
- debug " %s: Sleep interrupted, retrying master connection now"
327
- __FUNCTION__ ;
328
- update_backoff_delay () ;
329
- D. log_and_ignore_exn open_secure_connection
326
+ reconnect ()
330
327
done ;
331
328
! result
332
329
0 commit comments