Skip to content

Commit 43d01ca

Browse files
authored
CA-404597 - rrd: Fix incorrect processing of Gauge and Absolute data sources (#6233)
Some recent changes related to RRDs likely exposed a long-standing latent issue where the RRD library would process the passed-in values for Gauge and Absolute data sources incorrectly leading to constant values changing from update to update, for example: ``` $ rrd2csv memory_total_kib timestamp, AVERAGE:host:8b533333-91e1-4698-bd17-95b9732ffbb6:memory_total_kib 2025-01-15T08:41:40Z, 33351000 2025-01-15T08:41:45Z, 33350000 2025-01-15T08:41:50Z, 33346000 2025-01-15T08:41:55Z, 33352000 ``` Instead of treating Gauge and Absolute data sources as a variation on the rate-based Derive data source type, expecting time-based calculations to cancel each other out, do not undertake any calculations on non-rate data sources at all. First commit adds a failing unit test, second makes it pass. === I've verified these changes through manual testing, they've also passed the testcases that discovered this issue: SNMP memory testcases (JobIDs 4197305, 4196759, 4196744) and ShimMemory testcase (4197050). This branch also passed Ring3 BST+BVT (210577)
2 parents 6c1e7ea + 73ca3cc commit 43d01ca

File tree

2 files changed

+134
-11
lines changed

2 files changed

+134
-11
lines changed

ocaml/libs/xapi-rrd/lib/rrd.ml

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,10 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps =
341341
Array.iter updatefn rrd.rrd_rras
342342

343343
(* We assume that the data being given is of the form of a rate; that is,
344-
it's dependent on the time interval between updates. To be able to
345-
deal with gauge DSs, we multiply by the interval so that it cancels
346-
the subsequent divide by interval later on *)
344+
it's dependent on the time interval between updates.
345+
Gauge and Absolute data sources are simply kept as is without any
346+
time-based calculations, while Derive data sources will be changed according
347+
to the time passed since the last measurement. (see CA-404597) *)
347348
let process_ds_value ds value interval new_rrd =
348349
if interval > ds.ds_mrhb then
349350
nan
@@ -360,10 +361,8 @@ let process_ds_value ds value interval new_rrd =
360361

361362
let rate =
362363
match (ds.ds_ty, new_rrd) with
363-
| Absolute, _ | Derive, true ->
364+
| Absolute, _ | Derive, true | Gauge, _ ->
364365
value_raw
365-
| Gauge, _ ->
366-
value_raw *. interval
367366
| Derive, false -> (
368367
match (ds.ds_last, value) with
369368
| VT_Int64 x, VT_Int64 y ->
@@ -433,7 +432,14 @@ let ds_update rrd timestamp valuesandtransforms new_rrd =
433432
if Utils.isnan value then
434433
ds.ds_unknown_sec <- pre_int
435434
else
436-
ds.ds_value <- ds.ds_value +. (pre_int *. value /. interval)
435+
(* CA-404597 - Gauge and Absolute values should be passed as-is,
436+
without being involved in time-based calculations at all.
437+
This applies to calculations below as well *)
438+
match ds.ds_ty with
439+
| Gauge | Absolute ->
440+
ds.ds_value <- value
441+
| Derive ->
442+
ds.ds_value <- ds.ds_value +. (pre_int *. value /. interval)
437443
)
438444
v2s ;
439445

@@ -450,7 +456,13 @@ let ds_update rrd timestamp valuesandtransforms new_rrd =
450456
let raw =
451457
let proc_pdp_st = get_float_time last_updated rrd.timestep in
452458
let occu_pdp_st = get_float_time timestamp rrd.timestep in
453-
ds.ds_value /. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec)
459+
460+
match ds.ds_ty with
461+
| Gauge | Absolute ->
462+
ds.ds_value
463+
| Derive ->
464+
ds.ds_value
465+
/. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec)
454466
in
455467
(* Apply the transform after the raw value has been calculated *)
456468
let raw = apply_transform_function transform raw in
@@ -473,8 +485,12 @@ let ds_update rrd timestamp valuesandtransforms new_rrd =
473485
ds.ds_value <- 0.0 ;
474486
ds.ds_unknown_sec <- post_int
475487
) else (
476-
ds.ds_value <- post_int *. value /. interval ;
477-
ds.ds_unknown_sec <- 0.0
488+
ds.ds_unknown_sec <- 0.0 ;
489+
match ds.ds_ty with
490+
| Gauge | Absolute ->
491+
ds.ds_value <- value
492+
| Derive ->
493+
ds.ds_value <- post_int *. value /. interval
478494
)
479495
)
480496
v2s

ocaml/libs/xapi-rrd/lib_test/unit_tests.ml

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,110 @@ let test_length_invariants rrd () =
107107
let check_length dss rra = check_length_of_fring dss rra.rra_data in
108108
Array.iter (check_length rrd.rrd_dss) rrd.rrd_rras
109109

110+
let absolute_rrd =
111+
let rra = rra_create CF_Average 100 1 0.5 in
112+
let rra2 = rra_create CF_Average 100 10 0.5 in
113+
let rra3 = rra_create CF_Average 100 100 0.5 in
114+
let rra4 = rra_create CF_Average 100 1000 0.5 in
115+
let ts = 1000000000.0 in
116+
let ds = ds_create "foo" Absolute ~mrhb:10.0 (VT_Float 0.0) in
117+
let ds2 = ds_create "bar" Absolute ~mrhb:10.0 (VT_Float 0.0) in
118+
let ds3 = ds_create "baz" Absolute ~mrhb:10.0 (VT_Float 0.0) in
119+
let ds4 = ds_create "boo" Absolute ~mrhb:10.0 (VT_Float 0.0) in
120+
let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in
121+
let id = Identity in
122+
for i = 1 to 100000 do
123+
let t = 1000000.0 +. (0.7 *. float_of_int i) in
124+
let v1 =
125+
(0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id})
126+
in
127+
let v2 =
128+
(1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id})
129+
in
130+
let v3 =
131+
(2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id})
132+
in
133+
let v4 =
134+
(3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id})
135+
in
136+
ds_update rrd t [|v1; v2; v3; v4|] false
137+
done ;
138+
rrd
139+
140+
let absolute_rrd_CA_404597 () =
141+
let rra = rra_create CF_Average 100 1 0.5 in
142+
let rra2 = rra_create CF_Average 100 10 0.5 in
143+
let rra3 = rra_create CF_Average 100 100 0.5 in
144+
let rra4 = rra_create CF_Average 100 1000 0.5 in
145+
let ts = 1000000000.0 in
146+
let ds = ds_create "foo" Absolute ~mrhb:10.0 (VT_Float 0.0) in
147+
let ds2 = ds_create "bar" Absolute ~mrhb:10.0 (VT_Float 0.0) in
148+
let ds3 = ds_create "baz" Absolute ~mrhb:10.0 (VT_Float 0.0) in
149+
let ds4 = ds_create "boo" Absolute ~mrhb:10.0 (VT_Float 0.0) in
150+
let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in
151+
let id = Identity in
152+
for i = 1 to 100000 do
153+
let t = 1000000.0 +. (0.7 *. float_of_int i) in
154+
let ((_, val1) as v1) =
155+
(0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id})
156+
in
157+
let ((_, val2) as v2) =
158+
(1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id})
159+
in
160+
let ((_, val3) as v3) =
161+
(2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id})
162+
in
163+
let ((_, val4) as v4) =
164+
(3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id})
165+
in
166+
ds_update rrd t [|v1; v2; v3; v4|] false ;
167+
168+
Array.iter2
169+
(fun ds value ->
170+
compare_float __LOC__ ds.ds_value
171+
(float_of_string (ds_value_to_string value.value))
172+
)
173+
rrd.rrd_dss [|val1; val2; val3; val4|]
174+
done
175+
176+
(** Verify that Gauge data soruce values are correctly handled by the RRD lib
177+
and that timestamps do not cause absolute values to fluctuate *)
178+
let gauge_rrd_CA_404597 () =
179+
let rra = rra_create CF_Average 100 1 0.5 in
180+
let rra2 = rra_create CF_Average 100 10 0.5 in
181+
let rra3 = rra_create CF_Average 100 100 0.5 in
182+
let rra4 = rra_create CF_Average 100 1000 0.5 in
183+
let ts = 1000000000.0 in
184+
let ds = ds_create "foo" Gauge ~mrhb:10.0 (VT_Float 0.0) in
185+
let ds2 = ds_create "bar" Gauge ~mrhb:10.0 (VT_Float 0.0) in
186+
let ds3 = ds_create "baz" Gauge ~mrhb:10.0 (VT_Float 0.0) in
187+
let ds4 = ds_create "boo" Gauge ~mrhb:10.0 (VT_Float 0.0) in
188+
let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in
189+
let id = Identity in
190+
for i = 1 to 100000 do
191+
let t = 1000000.0 +. (0.7 *. float_of_int i) in
192+
let ((_, val1) as v1) =
193+
(0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id})
194+
in
195+
let ((_, val2) as v2) =
196+
(1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id})
197+
in
198+
let ((_, val3) as v3) =
199+
(2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id})
200+
in
201+
let ((_, val4) as v4) =
202+
(3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id})
203+
in
204+
ds_update rrd t [|v1; v2; v3; v4|] false ;
205+
206+
Array.iter2
207+
(fun ds value ->
208+
compare_float __LOC__ ds.ds_value
209+
(float_of_string (ds_value_to_string value.value))
210+
)
211+
rrd.rrd_dss [|val1; val2; val3; val4|]
212+
done
213+
110214
let gauge_rrd =
111215
let rra = rra_create CF_Average 100 1 0.5 in
112216
let rra2 = rra_create CF_Average 100 10 0.5 in
@@ -328,12 +432,15 @@ let regression_suite =
328432
; ("CA-329043 (1)", `Quick, test_ranges ca_329043_rrd_1)
329433
; ("CA-329043 (2)", `Quick, test_ranges ca_329043_rrd_2)
330434
; ("CA-329813", `Quick, test_ranges ca_329813_rrd)
435+
; ("CA-404597 (1)", `Quick, gauge_rrd_CA_404597)
436+
; ("CA-404597 (2)", `Quick, absolute_rrd_CA_404597)
331437
]
332438

333439
let () =
334440
Alcotest.run "Test RRD library"
335441
[
336-
("Gauge RRD", rrd_suite gauge_rrd)
442+
("Absolute RRD", rrd_suite absolute_rrd)
443+
; ("Gauge RRD", rrd_suite gauge_rrd)
337444
; ("RRD for CA-322008", rrd_suite ca_322008_rrd)
338445
; ("RRD for CA-329043", rrd_suite ca_329043_rrd_1)
339446
; ("RRD for CA-329813", rrd_suite ca_329813_rrd)

0 commit comments

Comments
 (0)