Skip to content

CA-404013: do not relock the mutex when backing up rrds #6215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions ocaml/database/db_xml.ml
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,9 @@ module From = struct
D.warn "no lifetime information about %s.%s, ignoring" tblname k ;
false
in
if do_not_load then (
D.info
{|dropping column "%s.%s": it has been removed from the datamodel|}
tblname k ;
if do_not_load then
row
) else
else
let column_schema = Schema.Table.find k table_schema in
let value =
Schema.Value.unmarshal column_schema.Schema.Column.ty
Expand Down
52 changes: 21 additions & 31 deletions ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ let archive_rrd vm_uuid (remote_address : string option) : unit =
master host, exclusively. Any attempt to send the rrds to pools outside
the host will fail. *)
let backup_rrds (remote_address : string option) () : unit =
let __FUN = __FUNCTION__ in
let transport =
Option.map
(fun address ->
Expand All @@ -119,50 +120,39 @@ let backup_rrds (remote_address : string option) () : unit =
| Some address ->
Printf.sprintf "host %s" address
in
info "%s: trying to back up RRDs to %s" __FUNCTION__ destination ;
info "%s: trying to back up RRDs to %s" __FUN destination ;
let total_cycles = 5 in
let cycles_tried = ref 0 in
let host_uuid = Inventory.lookup Inventory._installation_uuid in
while !cycles_tried < total_cycles do
if Mutex.try_lock mutex then (
cycles_tried := total_cycles ;
let vrrds =
try Hashtbl.fold (fun k v acc -> (k, v.rrd) :: acc) vm_rrds []
with exn -> Mutex.unlock mutex ; raise exn
in
Mutex.unlock mutex ;
List.iter
(fun (uuid, rrd) ->
debug "%s: saving RRD for VM uuid=%s" __FUNCTION__ uuid ;
let rrd = with_lock mutex (fun () -> Rrd.copy_rrd rrd) in
archive_rrd_internal ~transport ~uuid ~rrd ()
)
vrrds ;
Mutex.lock mutex ;
let srrds =
try Hashtbl.fold (fun k v acc -> (k, v.rrd) :: acc) sr_rrds []
with exn -> Mutex.unlock mutex ; raise exn
let rrds_copy =
[
Hashtbl.fold
(fun k v acc -> ("VM", k, Rrd.copy_rrd v.rrd) :: acc)
vm_rrds []
; Hashtbl.fold
(fun k v acc -> ("SR", k, Rrd.copy_rrd v.rrd) :: acc)
sr_rrds []
; Option.fold ~none:[]
~some:(fun rrdi -> [("host", host_uuid, Rrd.copy_rrd rrdi.rrd)])
!host_rrd
]
|> List.concat
in
Mutex.unlock mutex ;

List.iter
(fun (uuid, rrd) ->
debug "%s: saving RRD for SR uuid=%s" __FUNCTION__ uuid ;
let rrd = with_lock mutex (fun () -> Rrd.copy_rrd rrd) in
(fun (cls, uuid, rrd) ->
debug "%s: saving RRD for %s uuid=%s" __FUN cls uuid ;
archive_rrd_internal ~transport ~uuid ~rrd ()
)
srrds ;
match !host_rrd with
| Some rrdi ->
debug "%s: saving RRD for host" __FUNCTION__ ;
let rrd = with_lock mutex (fun () -> Rrd.copy_rrd rrdi.rrd) in
archive_rrd_internal ~transport
~uuid:(Inventory.lookup Inventory._installation_uuid)
~rrd ()
| None ->
()
rrds_copy
) else (
cycles_tried := 1 + !cycles_tried ;
if !cycles_tried >= total_cycles then
warn "%s: Could not acquire RRD lock, skipping RRD backup" __FUNCTION__
warn "%s: Could not acquire RRD lock, skipping RRD backup" __FUN
else
Thread.delay 1.
)
Expand Down
Loading