Skip to content

Commit f98af8a

Browse files
committed
virtio: allow IoVecBufferMut to hold multiple DescriptorChain objects
Allow IoVecBufferMut objects to store multiple DescriptorChain objects, so that we can describe guest memory meant to be used for receiving data (for example memory used for network RX) as a single (sparse) memory region. This will allow us to always keep track all the available memory we have for performing RX and use `readv` for copying memory from the TAP device inside guest memory avoiding the extra copy. In the future, it will also facilitate the implementation of mergeable buffers for the RX path of the network device. Signed-off-by: Babis Chalios <[email protected]>
1 parent 02a261f commit f98af8a

File tree

4 files changed

+143
-43
lines changed

4 files changed

+143
-43
lines changed

src/vmm/src/devices/virtio/iovec.rs

Lines changed: 128 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use vm_memory::{
1010
GuestMemory, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile,
1111
};
1212

13+
use super::iov_deque::{IovDeque, IovDequeError};
1314
use crate::devices::virtio::queue::DescriptorChain;
1415
use crate::vstate::memory::GuestMemoryMmap;
1516

@@ -23,6 +24,8 @@ pub enum IoVecError {
2324
OverflowedDescriptor,
2425
/// Guest memory error: {0}
2526
GuestMemory(#[from] GuestMemoryError),
27+
/// Error with underlying `IovDeque`: {0}
28+
IovDeque(#[from] IovDequeError),
2629
}
2730

2831
// Using SmallVec in the kani proofs causes kani to use unbounded amounts of memory
@@ -219,28 +222,24 @@ impl IoVecBuffer {
219222
/// It describes a write-only buffer passed to us by the guest that is scattered across multiple
220223
/// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges
221224
/// of data from that buffer.
222-
#[derive(Debug, Default, Clone)]
223-
pub struct IoVecBufferMut {
225+
#[derive(Debug)]
226+
pub struct IoVecBufferMut<'a> {
224227
// container of the memory regions included in this IO vector
225-
vecs: IoVecVec,
228+
vecs: IovDeque<'a>,
226229
// Total length of the IoVecBufferMut
227-
len: u32,
230+
len: usize,
228231
}
229232

230-
impl IoVecBufferMut {
231-
/// Create an `IoVecBuffer` from a `DescriptorChain`
232-
///
233-
/// # Safety
234-
///
235-
/// The descriptor chain cannot be referencing the same memory location as another chain
236-
pub unsafe fn load_descriptor_chain(
233+
impl<'a> IoVecBufferMut<'a> {
234+
/// Parse a `DescriptorChain` object and append the memory regions it describes in the
235+
/// underlying ring buffer.
236+
fn parse_descriptor(
237237
&mut self,
238238
mem: &GuestMemoryMmap,
239239
head: DescriptorChain,
240-
) -> Result<(), IoVecError> {
241-
self.clear();
242-
240+
) -> Result<u32, IoVecError> {
243241
let mut next_descriptor = Some(head);
242+
let mut length = 0u32;
244243
while let Some(desc) = next_descriptor {
245244
if !desc.is_write_only() {
246245
return Err(IoVecError::ReadOnlyDescriptor);
@@ -257,18 +256,80 @@ impl IoVecBufferMut {
257256
slice.bitmap().mark_dirty(0, desc.len as usize);
258257

259258
let iov_base = slice.ptr_guard_mut().as_ptr().cast::<c_void>();
260-
self.vecs.push(iovec {
261-
iov_base,
262-
iov_len: desc.len as size_t,
263-
});
264-
self.len = self
265-
.len
259+
self.vecs
260+
.push_back(iovec {
261+
iov_base,
262+
iov_len: desc.len as size_t,
263+
})
264+
.unwrap();
265+
length = length
266266
.checked_add(desc.len)
267267
.ok_or(IoVecError::OverflowedDescriptor)?;
268268

269269
next_descriptor = desc.next_descriptor();
270270
}
271271

272+
self.len = self
273+
.len
274+
.checked_add(length as usize)
275+
.ok_or(IoVecError::OverflowedDescriptor)?;
276+
277+
Ok(length)
278+
}
279+
280+
/// Create an empty `IoVecBufferMut`.
281+
pub(crate) fn new() -> Result<Self, IovDequeError> {
282+
let vecs = IovDeque::new()?;
283+
Ok(Self { vecs, len: 0 })
284+
}
285+
286+
/// Create an `IoVecBufferMut` from a `DescriptorChain`
287+
///
288+
/// This will clear any previous `iovec` objects in the buffer and load the new
289+
/// [`DescriptorChain`].
290+
///
291+
/// # Safety
292+
///
293+
/// The descriptor chain cannot be referencing the same memory location as another chain
294+
pub unsafe fn load_descriptor_chain(
295+
&mut self,
296+
mem: &GuestMemoryMmap,
297+
head: DescriptorChain,
298+
) -> Result<(), IoVecError> {
299+
self.clear();
300+
let _ = self.parse_descriptor(mem, head)?;
301+
Ok(())
302+
}
303+
304+
/// Append a `DescriptorChain` in this `IoVecBufferMut`
305+
///
306+
/// # Safety
307+
///
308+
/// The descriptor chain cannot be referencing the same memory location as another chain
309+
pub unsafe fn append_descriptor_chain(
310+
&mut self,
311+
mem: &GuestMemoryMmap,
312+
head: DescriptorChain,
313+
) -> Result<u32, IoVecError> {
314+
self.parse_descriptor(mem, head)
315+
}
316+
317+
/// Drop memory from the `IoVecBufferMut`
318+
///
319+
/// This will drop memory described by the `IoVecBufferMut` starting from the beginning.
320+
pub fn drop_iovecs(&mut self, size: u32) -> Result<(), IoVecError> {
321+
let dropped = self.vecs.drop_iovs(size as usize);
322+
323+
// Users should ask us to drop a `size` of memory that is not exactly covered by `iovec`
324+
// objects. In other words, the sum of the lengths of all dropped `iovec` objects should be
325+
// equal to the `size` we were asked to drop. If it isn't, something is seriously wrong
326+
// with the VirtIO queue or the emulation logic, so fail at this point.
327+
assert_eq!(u32::try_from(dropped).unwrap(), size);
328+
self.len = self
329+
.len
330+
.checked_sub(size as usize)
331+
.ok_or(IoVecError::OverflowedDescriptor)?;
332+
272333
Ok(())
273334
}
274335

@@ -281,20 +342,34 @@ impl IoVecBufferMut {
281342
mem: &GuestMemoryMmap,
282343
head: DescriptorChain,
283344
) -> Result<Self, IoVecError> {
284-
let mut new_buffer = Self::default();
345+
let mut new_buffer = Self::new()?;
285346
new_buffer.load_descriptor_chain(mem, head)?;
286347
Ok(new_buffer)
287348
}
288349

289350
/// Get the total length of the memory regions covered by this `IoVecBuffer`
290-
pub(crate) fn len(&self) -> u32 {
351+
///
352+
/// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns
353+
/// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have
354+
/// the limit that the length of a buffer is limited by `u32`.
355+
pub(crate) fn len(&self) -> usize {
291356
self.len
292357
}
293358

359+
/// Returns a pointer to the memory keeping the `iovec` structs
360+
pub fn as_iovec_ptr(&mut self) -> *mut iovec {
361+
self.vecs.as_mut_slice().as_mut_ptr()
362+
}
363+
364+
/// Returns the length of the `iovec` array.
365+
pub fn iovec_count(&self) -> usize {
366+
self.vecs.len()
367+
}
368+
294369
/// Clears the `iovec` array
295370
pub fn clear(&mut self) {
296371
self.vecs.clear();
297-
self.len = 0u32;
372+
self.len = 0;
298373
}
299374

300375
/// Writes a number of bytes into the `IoVecBufferMut` starting at a given offset.
@@ -313,7 +388,7 @@ impl IoVecBufferMut {
313388
mut buf: &[u8],
314389
offset: usize,
315390
) -> Result<(), VolatileMemoryError> {
316-
if offset < self.len() as usize {
391+
if offset < self.len() {
317392
let expected = buf.len();
318393
let bytes_written = self.write_volatile_at(&mut buf, offset, expected)?;
319394

@@ -342,7 +417,7 @@ impl IoVecBufferMut {
342417
) -> Result<usize, VolatileMemoryError> {
343418
let mut total_bytes_read = 0;
344419

345-
for iov in &self.vecs {
420+
for iov in self.vecs.as_mut_slice() {
346421
if len == 0 {
347422
break;
348423
}
@@ -391,6 +466,7 @@ mod tests {
391466
use vm_memory::VolatileMemoryError;
392467

393468
use super::{IoVecBuffer, IoVecBufferMut};
469+
use crate::devices::virtio::iov_deque::IovDeque;
394470
use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE};
395471
use crate::devices::virtio::test_utils::VirtQueue;
396472
use crate::utilities::test_utils::multi_region_mem;
@@ -427,15 +503,18 @@ mod tests {
427503
}
428504
}
429505

430-
impl From<&mut [u8]> for IoVecBufferMut {
506+
impl<'a> From<&mut [u8]> for IoVecBufferMut<'a> {
431507
fn from(buf: &mut [u8]) -> Self {
508+
let mut vecs = IovDeque::new().unwrap();
509+
vecs.push_back(iovec {
510+
iov_base: buf.as_mut_ptr().cast::<c_void>(),
511+
iov_len: buf.len(),
512+
})
513+
.unwrap();
514+
432515
Self {
433-
vecs: vec![iovec {
434-
iov_base: buf.as_mut_ptr().cast::<c_void>(),
435-
iov_len: buf.len(),
436-
}]
437-
.into(),
438-
len: buf.len().try_into().unwrap(),
516+
vecs,
517+
len: buf.len(),
439518
}
440519
}
441520
}
@@ -528,8 +607,19 @@ mod tests {
528607
let head = q.pop().unwrap();
529608

530609
// SAFETY: This descriptor chain is only loaded once in this test
531-
let iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
610+
let mut iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
532611
assert_eq!(iovec.len(), 4 * 64);
612+
613+
// We are creating a new queue where we can get descriptors from. Probably, this is not
614+
// something that we will ever want to do, as `IoVecBufferMut`s are typically
615+
// (concpetually) associated with a single `Queue`. We just do this here to be able to test
616+
// the appending logic.
617+
let (mut q, _) = write_only_chain(&mem);
618+
let head = q.pop().unwrap();
619+
// SAFETY: it is actually unsafe, but we just want to check the length of the
620+
// `IoVecBufferMut` after appending.
621+
let _ = unsafe { iovec.append_descriptor_chain(&mem, head).unwrap() };
622+
assert_eq!(iovec.len(), 8 * 64);
533623
}
534624

535625
#[test]
@@ -728,7 +818,7 @@ mod verification {
728818
}
729819
}
730820

731-
impl IoVecBufferMut {
821+
impl IoVecBufferMut<'_> {
732822
fn any_of_length(nr_descs: usize) -> Self {
733823
// We only write into `IoVecBufferMut` objects, so we can simply create a guest memory
734824
// object initialized to zeroes, trying to be nice to Kani.
@@ -740,7 +830,10 @@ mod verification {
740830
};
741831

742832
let (vecs, len) = create_iovecs(mem, GUEST_MEMORY_SIZE, nr_descs);
743-
Self { vecs, len }
833+
Self {
834+
vecs,
835+
len: len.try_into().unwrap(),
836+
}
744837
}
745838
}
746839

src/vmm/src/devices/virtio/rng/device.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,15 @@ impl Entropy {
112112
return Ok(0);
113113
}
114114

115-
let mut rand_bytes = vec![0; iovec.len() as usize];
115+
let mut rand_bytes = vec![0; iovec.len()];
116116
rand::fill(&mut rand_bytes).map_err(|err| {
117117
METRICS.host_rng_fails.inc();
118118
err
119119
})?;
120120

121121
// It is ok to unwrap here. We are writing `iovec.len()` bytes at offset 0.
122122
iovec.write_all_volatile_at(&rand_bytes, 0).unwrap();
123-
Ok(iovec.len())
123+
Ok(u32::try_from(iovec.len()).unwrap())
124124
}
125125

126126
fn process_entropy_queue(&mut self) {
@@ -145,7 +145,10 @@ impl Entropy {
145145
// Check for available rate limiting budget.
146146
// If not enough budget is available, leave the request descriptor in the queue
147147
// to handle once we do have budget.
148-
if !Self::rate_limit_request(&mut self.rate_limiter, u64::from(iovec.len())) {
148+
if !Self::rate_limit_request(
149+
&mut self.rate_limiter,
150+
u64::try_from(iovec.len()).unwrap(),
151+
) {
149152
debug!("entropy: throttling entropy queue");
150153
METRICS.entropy_rate_limiter_throttled.inc();
151154
self.queues[RNG_QUEUE].undo_pop();

src/vmm/src/devices/virtio/vsock/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ pub use self::defs::uapi::VIRTIO_ID_VSOCK as TYPE_VSOCK;
3030
pub use self::defs::VSOCK_DEV_ID;
3131
pub use self::device::Vsock;
3232
pub use self::unix::{VsockUnixBackend, VsockUnixBackendError};
33+
use super::iov_deque::IovDequeError;
3334
use crate::devices::virtio::iovec::IoVecError;
3435
use crate::devices::virtio::persist::PersistError as VirtioStateError;
3536

@@ -138,6 +139,8 @@ pub enum VsockError {
138139
VirtioState(VirtioStateError),
139140
/// Vsock uds backend error: {0}
140141
VsockUdsBackend(VsockUnixBackendError),
142+
/// Underlying IovDeque error: {0}
143+
IovDeque(IovDequeError),
141144
}
142145

143146
impl From<IoVecError> for VsockError {
@@ -147,6 +150,7 @@ impl From<IoVecError> for VsockError {
147150
IoVecError::ReadOnlyDescriptor => VsockError::UnwritableDescriptor,
148151
IoVecError::GuestMemory(err) => VsockError::GuestMemoryMmap(err),
149152
IoVecError::OverflowedDescriptor => VsockError::DescChainOverflow,
153+
IoVecError::IovDeque(err) => VsockError::IovDeque(err),
150154
}
151155
}
152156
}

src/vmm/src/devices/virtio/vsock/packet.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ pub enum VsockPacketBuffer {
9292
/// Buffer holds a read-only guest-to-host (TX) packet
9393
Tx(IoVecBuffer),
9494
/// Buffer holds a write-only host-to-guest (RX) packet
95-
Rx(IoVecBufferMut),
95+
Rx(IoVecBufferMut<'static>),
9696
}
9797

9898
/// Struct describing a single vsock packet.
@@ -172,8 +172,8 @@ impl VsockPacket {
172172
// are live at the same time, meaning this has exclusive ownership over the memory
173173
let buffer = unsafe { IoVecBufferMut::from_descriptor_chain(mem, chain)? };
174174

175-
if buffer.len() < VSOCK_PKT_HDR_SIZE {
176-
return Err(VsockError::DescChainTooShortForHeader(buffer.len() as usize));
175+
if (u32::try_from(buffer.len()).unwrap()) < VSOCK_PKT_HDR_SIZE {
176+
return Err(VsockError::DescChainTooShortForHeader(buffer.len()));
177177
}
178178

179179
Ok(Self {
@@ -222,7 +222,7 @@ impl VsockPacket {
222222
pub fn buf_size(&self) -> usize {
223223
let chain_length = match self.buffer {
224224
VsockPacketBuffer::Tx(ref iovec_buf) => iovec_buf.len(),
225-
VsockPacketBuffer::Rx(ref iovec_buf) => iovec_buf.len(),
225+
VsockPacketBuffer::Rx(ref iovec_buf) => iovec_buf.len().try_into().unwrap(),
226226
};
227227
(chain_length - VSOCK_PKT_HDR_SIZE) as usize
228228
}
@@ -237,7 +237,7 @@ impl VsockPacket {
237237
VsockPacketBuffer::Tx(_) => Err(VsockError::UnwritableDescriptor),
238238
VsockPacketBuffer::Rx(ref mut buffer) => {
239239
if count
240-
> (buffer.len() as usize)
240+
> (buffer.len())
241241
.saturating_sub(VSOCK_PKT_HDR_SIZE as usize)
242242
.saturating_sub(offset)
243243
{

0 commit comments

Comments
 (0)