Skip to content

Commit f84c908

Browse files
committed
feat(metrics/net): add tap tx latency metric
This metric measures how long Firecracker VMM thread is blocked on the write syscalls when accessing a tap device. By looking at it, we will be able to see what portion of tx net latency is attributed to factors external to Firecracker. (cherry picked from commit f7d8a33) Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent 1bfe717 commit f84c908

File tree

3 files changed

+19
-2
lines changed

3 files changed

+19
-2
lines changed

src/vmm/src/devices/virtio/net/device.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,7 @@ impl Net {
490490
});
491491
}
492492

493+
let _metric = net_metrics.tap_write_agg.record_latency_metrics();
493494
match Self::write_tap(tap, frame_iovec) {
494495
Ok(_) => {
495496
let len = frame_iovec.len() as u64;

src/vmm/src/devices/virtio/net/metrics.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ use std::sync::{Arc, RwLock};
8585
use serde::ser::SerializeMap;
8686
use serde::{Serialize, Serializer};
8787

88-
use crate::logger::{IncMetric, SharedIncMetric};
88+
use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric};
8989

9090
/// map of network interface id and metrics
9191
/// this should be protected by a lock before accessing.
@@ -107,7 +107,7 @@ impl NetMetricsPerDevice {
107107
.write()
108108
.unwrap()
109109
.metrics
110-
.insert(iface_id.clone(), Arc::new(NetDeviceMetrics::default()));
110+
.insert(iface_id.clone(), Arc::new(NetDeviceMetrics::new()));
111111
}
112112
METRICS
113113
.read()
@@ -184,6 +184,8 @@ pub struct NetDeviceMetrics {
184184
pub tap_read_fails: SharedIncMetric,
185185
/// Number of times writing to TAP failed.
186186
pub tap_write_fails: SharedIncMetric,
187+
/// Duration of all tap write operations.
188+
pub tap_write_agg: LatencyAggregateMetrics,
187189
/// Number of transmitted bytes.
188190
pub tx_bytes_count: SharedIncMetric,
189191
/// Number of malformed TX frames.
@@ -207,6 +209,14 @@ pub struct NetDeviceMetrics {
207209
}
208210

209211
impl NetDeviceMetrics {
212+
/// Const default construction.
213+
pub fn new() -> Self {
214+
Self {
215+
tap_write_agg: LatencyAggregateMetrics::new(),
216+
..Default::default()
217+
}
218+
}
219+
210220
/// Net metrics are SharedIncMetric where the diff of current vs
211221
/// old is serialized i.e. serialize_u64(current-old).
212222
/// So to have the aggregate serialized in same way we need to
@@ -239,6 +249,9 @@ impl NetDeviceMetrics {
239249
self.rx_count.add(other.rx_count.fetch_diff());
240250
self.tap_read_fails.add(other.tap_read_fails.fetch_diff());
241251
self.tap_write_fails.add(other.tap_write_fails.fetch_diff());
252+
self.tap_write_agg
253+
.sum_us
254+
.add(other.tap_write_agg.sum_us.fetch_diff());
242255
self.tx_bytes_count.add(other.tx_bytes_count.fetch_diff());
243256
self.tx_malformed_frames
244257
.add(other.tx_malformed_frames.fetch_diff());

tests/host_tools/fcmetrics.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,9 @@ def validate_fc_metrics(metrics):
240240
"read_agg",
241241
"write_agg",
242242
],
243+
"net": [
244+
"tap_write_agg",
245+
],
243246
}
244247

245248
# validate timestamp before jsonschema validation which some more time

0 commit comments

Comments
 (0)