Skip to content

Commit e557c8d

Browse files
committed
virtio: allow IoVecBufferMut to hold multiple DescriptorChain objects
Allow IoVecBufferMut objects to store multiple DescriptorChain objects, so that we can describe guest memory meant to be used for receiving data (for example memory used for network RX) as a single (sparse) memory region. This will allow us to always keep track all the available memory we have for performing RX and use `readv` for copying memory from the TAP device inside guest memory avoiding the extra copy. In the future, it will also facilitate the implementation of mergeable buffers for the RX path of the network device. Signed-off-by: Babis Chalios <[email protected]>
1 parent d8789df commit e557c8d

File tree

4 files changed

+162
-43
lines changed

4 files changed

+162
-43
lines changed

src/vmm/src/devices/virtio/iovec.rs

Lines changed: 147 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@ use std::io::ErrorKind;
55

66
use libc::{c_void, iovec, size_t};
77
use smallvec::SmallVec;
8+
#[cfg(not(kani))]
89
use vm_memory::bitmap::Bitmap;
910
use vm_memory::{
1011
GuestMemory, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile,
1112
};
1213

14+
use super::iov_deque::{IovDeque, IovDequeError};
1315
use crate::devices::virtio::queue::DescriptorChain;
1416
use crate::vstate::memory::GuestMemoryMmap;
1517

@@ -23,6 +25,8 @@ pub enum IoVecError {
2325
OverflowedDescriptor,
2426
/// Guest memory error: {0}
2527
GuestMemory(#[from] GuestMemoryError),
28+
/// Error with underlying `IovDeque`: {0}
29+
IovDeque(#[from] IovDequeError),
2630
}
2731

2832
// Using SmallVec in the kani proofs causes kani to use unbounded amounts of memory
@@ -219,28 +223,24 @@ impl IoVecBuffer {
219223
/// It describes a write-only buffer passed to us by the guest that is scattered across multiple
220224
/// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges
221225
/// of data from that buffer.
222-
#[derive(Debug, Default, Clone)]
223-
pub struct IoVecBufferMut {
226+
#[derive(Debug)]
227+
pub struct IoVecBufferMut<'a> {
224228
// container of the memory regions included in this IO vector
225-
vecs: IoVecVec,
229+
vecs: IovDeque<'a>,
226230
// Total length of the IoVecBufferMut
227-
len: u32,
231+
len: usize,
228232
}
229233

230-
impl IoVecBufferMut {
231-
/// Create an `IoVecBuffer` from a `DescriptorChain`
232-
///
233-
/// # Safety
234-
///
235-
/// The descriptor chain cannot be referencing the same memory location as another chain
236-
pub unsafe fn load_descriptor_chain(
234+
impl<'a> IoVecBufferMut<'a> {
235+
/// Parse a `DescriptorChain` object and append the memory regions it describes in the
236+
/// underlying ring buffer.
237+
fn parse_descriptor(
237238
&mut self,
238239
mem: &GuestMemoryMmap,
239240
head: DescriptorChain,
240-
) -> Result<(), IoVecError> {
241-
self.clear();
242-
241+
) -> Result<u32, IoVecError> {
243242
let mut next_descriptor = Some(head);
243+
let mut length = 0u32;
244244
while let Some(desc) = next_descriptor {
245245
if !desc.is_write_only() {
246246
return Err(IoVecError::ReadOnlyDescriptor);
@@ -257,18 +257,78 @@ impl IoVecBufferMut {
257257
slice.bitmap().mark_dirty(0, desc.len as usize);
258258

259259
let iov_base = slice.ptr_guard_mut().as_ptr().cast::<c_void>();
260-
self.vecs.push(iovec {
260+
self.vecs.push_back(iovec {
261261
iov_base,
262262
iov_len: desc.len as size_t,
263263
});
264-
self.len = self
265-
.len
264+
length = length
266265
.checked_add(desc.len)
267266
.ok_or(IoVecError::OverflowedDescriptor)?;
268267

269268
next_descriptor = desc.next_descriptor();
270269
}
271270

271+
self.len = self
272+
.len
273+
.checked_add(length as usize)
274+
.ok_or(IoVecError::OverflowedDescriptor)?;
275+
276+
Ok(length)
277+
}
278+
279+
/// Create an empty `IoVecBufferMut`.
280+
pub(crate) fn new() -> Result<Self, IovDequeError> {
281+
let vecs = IovDeque::new()?;
282+
Ok(Self { vecs, len: 0 })
283+
}
284+
285+
/// Create an `IoVecBufferMut` from a `DescriptorChain`
286+
///
287+
/// This will clear any previous `iovec` objects in the buffer and load the new
288+
/// [`DescriptorChain`].
289+
///
290+
/// # Safety
291+
///
292+
/// The descriptor chain cannot be referencing the same memory location as another chain
293+
pub unsafe fn load_descriptor_chain(
294+
&mut self,
295+
mem: &GuestMemoryMmap,
296+
head: DescriptorChain,
297+
) -> Result<(), IoVecError> {
298+
self.clear();
299+
let _ = self.parse_descriptor(mem, head)?;
300+
Ok(())
301+
}
302+
303+
/// Append a `DescriptorChain` in this `IoVecBufferMut`
304+
///
305+
/// # Safety
306+
///
307+
/// The descriptor chain cannot be referencing the same memory location as another chain
308+
pub unsafe fn append_descriptor_chain(
309+
&mut self,
310+
mem: &GuestMemoryMmap,
311+
head: DescriptorChain,
312+
) -> Result<u32, IoVecError> {
313+
self.parse_descriptor(mem, head)
314+
}
315+
316+
/// Drop memory from the `IoVecBufferMut`
317+
///
318+
/// This will drop memory described by the `IoVecBufferMut` starting from the beginning.
319+
pub fn drop_iovecs(&mut self, size: u32) -> Result<(), IoVecError> {
320+
let dropped = self.vecs.drop_iovs(size as usize);
321+
322+
// Users should ask us to drop a `size` of memory that is not exactly covered by `iovec`
323+
// objects. In other words, the sum of the lengths of all dropped `iovec` objects should be
324+
// equal to the `size` we were asked to drop. If it isn't, something is seriously wrong
325+
// with the VirtIO queue or the emulation logic, so fail at this point.
326+
assert_eq!(u32::try_from(dropped).unwrap(), size);
327+
self.len = self
328+
.len
329+
.checked_sub(size as usize)
330+
.ok_or(IoVecError::OverflowedDescriptor)?;
331+
272332
Ok(())
273333
}
274334

@@ -281,20 +341,34 @@ impl IoVecBufferMut {
281341
mem: &GuestMemoryMmap,
282342
head: DescriptorChain,
283343
) -> Result<Self, IoVecError> {
284-
let mut new_buffer = Self::default();
344+
let mut new_buffer = Self::new()?;
285345
new_buffer.load_descriptor_chain(mem, head)?;
286346
Ok(new_buffer)
287347
}
288348

289349
/// Get the total length of the memory regions covered by this `IoVecBuffer`
290-
pub(crate) fn len(&self) -> u32 {
350+
///
351+
/// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns
352+
/// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have
353+
/// the limit that the length of a buffer is limited by `u32`.
354+
pub(crate) fn len(&self) -> usize {
291355
self.len
292356
}
293357

358+
/// Returns a pointer to the memory keeping the `iovec` structs
359+
pub fn as_iovec_ptr(&mut self) -> *mut iovec {
360+
self.vecs.as_mut_slice().as_mut_ptr()
361+
}
362+
363+
/// Returns the length of the `iovec` array.
364+
pub fn iovec_count(&self) -> usize {
365+
self.vecs.len()
366+
}
367+
294368
/// Clears the `iovec` array
295369
pub fn clear(&mut self) {
296370
self.vecs.clear();
297-
self.len = 0u32;
371+
self.len = 0;
298372
}
299373

300374
/// Writes a number of bytes into the `IoVecBufferMut` starting at a given offset.
@@ -313,7 +387,7 @@ impl IoVecBufferMut {
313387
mut buf: &[u8],
314388
offset: usize,
315389
) -> Result<(), VolatileMemoryError> {
316-
if offset < self.len() as usize {
390+
if offset < self.len() {
317391
let expected = buf.len();
318392
let bytes_written = self.write_volatile_at(&mut buf, offset, expected)?;
319393

@@ -342,7 +416,7 @@ impl IoVecBufferMut {
342416
) -> Result<usize, VolatileMemoryError> {
343417
let mut total_bytes_read = 0;
344418

345-
for iov in &self.vecs {
419+
for iov in self.vecs.as_mut_slice() {
346420
if len == 0 {
347421
break;
348422
}
@@ -391,6 +465,7 @@ mod tests {
391465
use vm_memory::VolatileMemoryError;
392466

393467
use super::{IoVecBuffer, IoVecBufferMut};
468+
use crate::devices::virtio::iov_deque::IovDeque;
394469
use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE};
395470
use crate::devices::virtio::test_utils::VirtQueue;
396471
use crate::utilities::test_utils::multi_region_mem;
@@ -427,15 +502,17 @@ mod tests {
427502
}
428503
}
429504

430-
impl From<&mut [u8]> for IoVecBufferMut {
505+
impl<'a> From<&mut [u8]> for IoVecBufferMut<'a> {
431506
fn from(buf: &mut [u8]) -> Self {
507+
let mut vecs = IovDeque::new().unwrap();
508+
vecs.push_back(iovec {
509+
iov_base: buf.as_mut_ptr().cast::<c_void>(),
510+
iov_len: buf.len(),
511+
});
512+
432513
Self {
433-
vecs: vec![iovec {
434-
iov_base: buf.as_mut_ptr().cast::<c_void>(),
435-
iov_len: buf.len(),
436-
}]
437-
.into(),
438-
len: buf.len().try_into().unwrap(),
514+
vecs,
515+
len: buf.len(),
439516
}
440517
}
441518
}
@@ -528,8 +605,19 @@ mod tests {
528605
let head = q.pop().unwrap();
529606

530607
// SAFETY: This descriptor chain is only loaded once in this test
531-
let iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
608+
let mut iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
532609
assert_eq!(iovec.len(), 4 * 64);
610+
611+
// We are creating a new queue where we can get descriptors from. Probably, this is not
612+
// something that we will ever want to do, as `IoVecBufferMut`s are typically
613+
// (concpetually) associated with a single `Queue`. We just do this here to be able to test
614+
// the appending logic.
615+
let (mut q, _) = write_only_chain(&mem);
616+
let head = q.pop().unwrap();
617+
// SAFETY: it is actually unsafe, but we just want to check the length of the
618+
// `IoVecBufferMut` after appending.
619+
let _ = unsafe { iovec.append_descriptor_chain(&mem, head).unwrap() };
620+
assert_eq!(iovec.len(), 8 * 64);
533621
}
534622

535623
#[test]
@@ -687,6 +775,7 @@ mod verification {
687775
use vm_memory::VolatileSlice;
688776

689777
use super::{IoVecBuffer, IoVecBufferMut, IoVecVec};
778+
use crate::devices::virtio::iov_deque::IovDeque;
690779

691780
// Maximum memory size to use for our buffers. For the time being 1KB.
692781
const GUEST_MEMORY_SIZE: usize = 1 << 10;
@@ -702,10 +791,10 @@ mod verification {
702791
let mut vecs: Vec<iovec> = Vec::with_capacity(nr_descs);
703792
let mut len = 0u32;
704793
for _ in 0..nr_descs {
705-
// The `IoVecBuffer(Mut)` constructors ensure that the memory region described by every
794+
// The `IoVecBuffer` constructors ensure that the memory region described by every
706795
// `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
707796
// mmap. The assumption, here, that the last address is within the memory object's
708-
// bound substitutes these checks that `IoVecBuffer(Mut)::new() performs.`
797+
// bound substitutes these checks that `IoVecBuffer::new() performs.`
709798
let addr: usize = kani::any();
710799
let iov_len: usize =
711800
kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
@@ -728,7 +817,27 @@ mod verification {
728817
}
729818
}
730819

731-
impl IoVecBufferMut {
820+
fn create_iovecs_mut(mem: *mut u8, size: usize, nr_descs: usize) -> (IovDeque<'static>, u32) {
821+
let mut vecs = IovDeque::new().unwrap();
822+
let mut len = 0u32;
823+
for _ in 0..nr_descs {
824+
// The `IoVecBufferMut` constructors ensure that the memory region described by every
825+
// `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
826+
// mmap. The assumption, here, that the last address is within the memory object's
827+
// bound substitutes these checks that `IoVecBufferMut::new() performs.`
828+
let addr: usize = kani::any();
829+
let iov_len: usize =
830+
kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
831+
let iov_base = unsafe { mem.offset(addr.try_into().unwrap()) } as *mut c_void;
832+
833+
vecs.push_back(iovec { iov_base, iov_len });
834+
len += u32::try_from(iov_len).unwrap();
835+
}
836+
837+
(vecs, len)
838+
}
839+
840+
impl IoVecBufferMut<'_> {
732841
fn any_of_length(nr_descs: usize) -> Self {
733842
// We only write into `IoVecBufferMut` objects, so we can simply create a guest memory
734843
// object initialized to zeroes, trying to be nice to Kani.
@@ -739,8 +848,11 @@ mod verification {
739848
))
740849
};
741850

742-
let (vecs, len) = create_iovecs(mem, GUEST_MEMORY_SIZE, nr_descs);
743-
Self { vecs, len }
851+
let (vecs, len) = create_iovecs_mut(mem, GUEST_MEMORY_SIZE, nr_descs);
852+
Self {
853+
vecs,
854+
len: len.try_into().unwrap(),
855+
}
744856
}
745857
}
746858

src/vmm/src/devices/virtio/rng/device.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,15 @@ impl Entropy {
112112
return Ok(0);
113113
}
114114

115-
let mut rand_bytes = vec![0; iovec.len() as usize];
115+
let mut rand_bytes = vec![0; iovec.len()];
116116
rand::fill(&mut rand_bytes).map_err(|err| {
117117
METRICS.host_rng_fails.inc();
118118
err
119119
})?;
120120

121121
// It is ok to unwrap here. We are writing `iovec.len()` bytes at offset 0.
122122
iovec.write_all_volatile_at(&rand_bytes, 0).unwrap();
123-
Ok(iovec.len())
123+
Ok(u32::try_from(iovec.len()).unwrap())
124124
}
125125

126126
fn process_entropy_queue(&mut self) {
@@ -145,7 +145,10 @@ impl Entropy {
145145
// Check for available rate limiting budget.
146146
// If not enough budget is available, leave the request descriptor in the queue
147147
// to handle once we do have budget.
148-
if !Self::rate_limit_request(&mut self.rate_limiter, u64::from(iovec.len())) {
148+
if !Self::rate_limit_request(
149+
&mut self.rate_limiter,
150+
u64::try_from(iovec.len()).unwrap(),
151+
) {
149152
debug!("entropy: throttling entropy queue");
150153
METRICS.entropy_rate_limiter_throttled.inc();
151154
self.queues[RNG_QUEUE].undo_pop();

src/vmm/src/devices/virtio/vsock/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ pub use self::defs::uapi::VIRTIO_ID_VSOCK as TYPE_VSOCK;
3030
pub use self::defs::VSOCK_DEV_ID;
3131
pub use self::device::Vsock;
3232
pub use self::unix::{VsockUnixBackend, VsockUnixBackendError};
33+
use super::iov_deque::IovDequeError;
3334
use crate::devices::virtio::iovec::IoVecError;
3435
use crate::devices::virtio::persist::PersistError as VirtioStateError;
3536

@@ -138,6 +139,8 @@ pub enum VsockError {
138139
VirtioState(VirtioStateError),
139140
/// Vsock uds backend error: {0}
140141
VsockUdsBackend(VsockUnixBackendError),
142+
/// Underlying IovDeque error: {0}
143+
IovDeque(IovDequeError),
141144
}
142145

143146
impl From<IoVecError> for VsockError {
@@ -147,6 +150,7 @@ impl From<IoVecError> for VsockError {
147150
IoVecError::ReadOnlyDescriptor => VsockError::UnwritableDescriptor,
148151
IoVecError::GuestMemory(err) => VsockError::GuestMemoryMmap(err),
149152
IoVecError::OverflowedDescriptor => VsockError::DescChainOverflow,
153+
IoVecError::IovDeque(err) => VsockError::IovDeque(err),
150154
}
151155
}
152156
}

0 commit comments

Comments
 (0)