Skip to content

Allow directly writing snapshots on top of existing files #4301

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
Removed support for creating Firecracker snapshots targeting older versions
of Firecracker. With this change, running 'firecracker --version' will not
print the supported snapshot versions.
- [#4301](https://github.com/firecracker-microvm/firecracker/pull/4301):
Allow merging of diff snapshots into base snapshots by directly writing
the diff snapshot on top of the base snapshot's memory file. This can be
done by setting the `mem_file_path` to the path of the pre-existing full
snapshot.

### Deprecated

Expand Down Expand Up @@ -87,6 +92,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
Fixed a bug in the asynchronous virtio-block engine that rendered the device
non-functional after a PATCH request was issued to Firecracker for updating
the path to the host-side backing file of the device.
- [#4301](https://github.com/firecracker-microvm/firecracker/pull/4301):
Fixed a bug where if Firecracker was instructed to take a snapshot of a
microvm which itself was restored from a snapshot, specifying `mem_file_path`
to be the path of the memory file from which the microvm was restored would
result in both the microvm and the snapshot being corrupted. It now instead
performs a "write-back" of all memory that was updated since the snapshot
was originally loaded.

## [1.5.0]

Expand Down
4 changes: 4 additions & 0 deletions resources/seccomp/aarch64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
"default_action": "trap",
"filter_action": "allow",
"filter": [
{
"syscall": "newfstatat",
"comment": "Used when creating snapshots in vmm:persist::snapshot_memory_to_file through std::fs::File::metadata"
},
{
"syscall": "epoll_ctl"
},
Expand Down
4 changes: 4 additions & 0 deletions resources/seccomp/x86_64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
"default_action": "trap",
"filter_action": "allow",
"filter": [
{
"syscall": "stat",
"comment": "Used when creating snapshots in vmm:persist::snapshot_memory_to_file through std::fs::File::metadata"
},
{
"syscall": "epoll_ctl"
},
Expand Down
117 changes: 40 additions & 77 deletions src/vmm/src/persist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ pub enum CreateSnapshotError {
SerializeMicrovmState(snapshot::Error),
/// Cannot perform {0} on the snapshot backing file: {1}
SnapshotBackingFile(&'static str, io::Error),
#[rustfmt::skip]
#[doc = "Size mismatch when writing diff snapshot on top of base layer: base layer size is {0} but diff layer is size {1}."]
SnapshotBackingFileLengthMismatch(u64, u64),
#[cfg(target_arch = "x86_64")]
#[rustfmt::skip]
#[doc = "Too many devices attached: {0}. The maximum number allowed for the snapshot data version requested is {FC_V0_23_MAX_DEVICES:}."]
Expand All @@ -200,7 +203,7 @@ pub fn create_snapshot(
version_map,
)?;

snapshot_memory_to_file(vmm, &params.mem_file_path, &params.snapshot_type)?;
snapshot_memory_to_file(vmm, &params.mem_file_path, params.snapshot_type)?;

Ok(())
}
Expand Down Expand Up @@ -231,23 +234,54 @@ fn snapshot_state_to_file(
.map_err(|err| SnapshotBackingFile("sync_all", err))
}

/// Takes a snapshot of the virtual machine running inside the given [`Vmm`] and saves it to
/// `mem_file_path`.
///
/// If `snapshot_type` is [`SnapshotType::Diff`], and `mem_file_path` exists and is a snapshot file
/// of matching size, then the diff snapshot will be directly merged into the existing snapshot.
/// Otherwise, existing files are simply overwritten.
fn snapshot_memory_to_file(
vmm: &Vmm,
mem_file_path: &Path,
snapshot_type: &SnapshotType,
snapshot_type: SnapshotType,
) -> Result<(), CreateSnapshotError> {
use self::CreateSnapshotError::*;

// Need to check this here, as we create the file in the line below
let file_existed = mem_file_path.exists();

let mut file = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(mem_file_path)
.map_err(|err| MemoryBackingFile("open", err))?;

// Set the length of the file to the full size of the memory area.
// Determine what size our total memory area is.
let mem_size_mib = mem_size_mib(vmm.guest_memory());
file.set_len(mem_size_mib * 1024 * 1024)
.map_err(|err| MemoryBackingFile("set_length", err))?;
let expected_size = mem_size_mib * 1024 * 1024;

if file_existed {
let file_size = file
.metadata()
.map_err(|e| MemoryBackingFile("get_metadata", e))?
.len();

// Here we only truncate the file if the size mismatches.
// - For full snapshots, the entire file's contents will be overwritten anyway. We have to
// avoid truncating here to deal with the edge case where it represents the snapshot file
// from which this very microVM was loaded (as modifying the memory file would be
// reflected in the mmap of the file, meaning a truncate operation would zero out guest
// memory, and thus corrupt the VM).
// - For diff snapshots, we want to merge the diff layer directly into the file.
if file_size != expected_size {
file.set_len(0)
.map_err(|err| MemoryBackingFile("truncate", err))?;
}
}

// Set the length of the file to the full size of the memory area.
file.set_len(expected_size)
.map_err(|e| MemoryBackingFile("set_length", e))?;

match snapshot_type {
SnapshotType::Diff => {
Expand Down Expand Up @@ -578,7 +612,6 @@ fn guest_memory_from_uffd(
#[cfg(test)]
mod tests {
use snapshot::Persist;
use utils::errno;
use utils::tempfile::TempFile;

use super::*;
Expand Down Expand Up @@ -692,74 +725,4 @@ mod tests {
microvm_state.device_states
)
}

#[test]
fn test_create_snapshot_error_display() {
use crate::persist::CreateSnapshotError::*;
use crate::vstate::memory::MemoryError;

let err = DirtyBitmap(VmmError::DirtyBitmap(kvm_ioctls::Error::new(20)));
let _ = format!("{}{:?}", err, err);

let err = InvalidVersionFormat;
let _ = format!("{}{:?}", err, err);

let err = UnsupportedVersion;
let _ = format!("{}{:?}", err, err);

let err = Memory(MemoryError::WriteMemory(
vm_memory::GuestMemoryError::HostAddressNotAvailable,
));
let _ = format!("{}{:?}", err, err);

let err = MemoryBackingFile("open", io::Error::from_raw_os_error(0));
let _ = format!("{}{:?}", err, err);

let err = MicrovmState(MicrovmStateError::UnexpectedVcpuResponse);
let _ = format!("{}{:?}", err, err);

let err = SerializeMicrovmState(snapshot::Error::InvalidMagic(0));
let _ = format!("{}{:?}", err, err);

let err = SnapshotBackingFile("open", io::Error::from_raw_os_error(0));
let _ = format!("{}{:?}", err, err);

#[cfg(target_arch = "x86_64")]
{
let err = TooManyDevices(0);
let _ = format!("{}{:?}", err, err);
}
}

#[test]
fn test_microvm_state_error_display() {
use crate::persist::MicrovmStateError::*;

let err = InvalidInput;
let _ = format!("{}{:?}", err, err);

let err = NotAllowed(String::from(""));
let _ = format!("{}{:?}", err, err);

let err = RestoreDevices(DevicePersistError::MmioTransport);
let _ = format!("{}{:?}", err, err);

let err = RestoreVcpuState(vstate::vcpu::VcpuError::VcpuTlsInit);
let _ = format!("{}{:?}", err, err);

let err = RestoreVmState(vstate::vm::VmError::NotEnoughMemorySlots);
let _ = format!("{}{:?}", err, err);

let err = SaveVcpuState(vstate::vcpu::VcpuError::VcpuTlsNotPresent);
let _ = format!("{}{:?}", err, err);

let err = SaveVmState(vstate::vm::VmError::NotEnoughMemorySlots);
let _ = format!("{}{:?}", err, err);

let err = SignalVcpu(VcpuSendEventError(errno::Error::new(0)));
let _ = format!("{}{:?}", err, err);

let err = UnexpectedVcpuResponse;
let _ = format!("{}{:?}", err, err);
}
}
2 changes: 1 addition & 1 deletion src/vmm/src/vmm_config/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize};

/// The snapshot type options that are available when
/// creating a new snapshot.
#[derive(Debug, Default, PartialEq, Eq, Deserialize, Serialize)]
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)]
pub enum SnapshotType {
/// Diff snapshot.
Diff,
Expand Down
21 changes: 14 additions & 7 deletions tests/framework/microvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,13 +807,20 @@ def resume(self):
"""Resume the microVM"""
self.api.vm.patch(state="Resumed")

def make_snapshot(self, snapshot_type: SnapshotType | str):
def make_snapshot(
self,
snapshot_type: SnapshotType | str,
*,
mem_path: str = "mem",
vmstate_path="vmstate",
):
"""Create a Snapshot object from a microvm.

The snapshot's memory and vstate files will be saved at the specified paths
relative to the Microvm's chroot.

It pauses the microvm before taking the snapshot.
"""
vmstate_path = "vmstate"
mem_path = "mem"
snapshot_type = SnapshotType(snapshot_type)
self.pause()
self.api.snapshot_create.put(
Expand All @@ -831,13 +838,13 @@ def make_snapshot(self, snapshot_type: SnapshotType | str):
snapshot_type=snapshot_type,
)

def snapshot_diff(self):
def snapshot_diff(self, *, mem_path: str = "mem", vmstate_path="vmstate"):
"""Make a Diff snapshot"""
return self.make_snapshot("Diff")
return self.make_snapshot("Diff", mem_path=mem_path, vmstate_path=vmstate_path)

def snapshot_full(self):
def snapshot_full(self, *, mem_path: str = "mem", vmstate_path="vmstate"):
"""Make a Full snapshot"""
return self.make_snapshot("Full")
return self.make_snapshot("Full", mem_path=mem_path, vmstate_path=vmstate_path)

def restore_from_snapshot(
self,
Expand Down
92 changes: 90 additions & 2 deletions tests/integration_tests/functional/test_snapshot_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import os
import re
import shutil
from pathlib import Path

import pytest
Expand Down Expand Up @@ -256,13 +257,14 @@ def test_cmp_full_and_first_diff_mem(microvm_factory, guest_kernel, rootfs):

logger.info("Create full snapshot.")
# Create full snapshot.
full_snapshot = vm.snapshot_full()
full_snapshot = vm.snapshot_full(mem_path="mem_full")

logger.info("Create diff snapshot.")
# Create diff snapshot.
diff_snapshot = vm.snapshot_diff()

assert filecmp.cmp(full_snapshot.mem, diff_snapshot.mem)
assert full_snapshot.mem != diff_snapshot.mem
assert filecmp.cmp(full_snapshot.mem, diff_snapshot.mem, shallow=False)


def test_negative_postload_api(test_microvm_with_api, microvm_factory):
Expand Down Expand Up @@ -410,3 +412,89 @@ def test_create_large_diff_snapshot(test_microvm_with_api):

# If the regression was not fixed, this would have failed. The Firecracker
# process would have been taken down.


def test_diff_snapshot_overlay(guest_kernel, rootfs, microvm_factory):
"""
Tests that if we take a diff snapshot and direct firecracker to write it on
top of an existing snapshot file, it will successfully merge them.
"""
basevm = microvm_factory.build(guest_kernel, rootfs)
basevm.spawn()
basevm.basic_config(track_dirty_pages=True)
basevm.add_net_iface()
basevm.start()

# Wait for microvm to be booted
rc, _, stderr = basevm.ssh.run("true")
assert rc == 0, stderr

# The first snapshot taken will always contain all memory (even if its specified as "diff").
# We use a diff snapshot here, as taking a full snapshot does not clear the dirty page tracking,
# meaning the `snapshot_diff()` call below would again dump the entire guest memory instead of
# only dirty regions.
full_snapshot = basevm.snapshot_diff()
basevm.resume()

# Run some command to dirty some pages
rc, _, stderr = basevm.ssh.run("true")
assert rc == 0, stderr

# First copy the base snapshot somewhere else, so we can make sure
# it will actually get updated
first_snapshot_backup = Path(basevm.chroot()) / "mem.old"
shutil.copyfile(full_snapshot.mem, first_snapshot_backup)

# One Microvm object will always write its snapshot files to the same location
merged_snapshot = basevm.snapshot_diff()
assert full_snapshot.mem == merged_snapshot.mem

assert not filecmp.cmp(merged_snapshot.mem, first_snapshot_backup, shallow=False)

new_vm = microvm_factory.build()
new_vm.spawn()
new_vm.restore_from_snapshot(merged_snapshot, resume=True)

# Run some command to check that the restored VM works
rc, _, stderr = new_vm.ssh.run("true")
assert rc == 0, stderr


def test_snapshot_overwrite_self(guest_kernel, rootfs, microvm_factory):
"""Tests that if we try to take a snapshot that would overwrite the
very file from which the current VM is stored, nothing happens.

Note that even though we map the file as MAP_PRIVATE, the documentation
of mmap does not specify what should happen if the file is changed after being
mmap'd (https://man7.org/linux/man-pages/man2/mmap.2.html). It seems that
these changes can propagate to the mmap'd memory region."""
base_vm = microvm_factory.build(guest_kernel, rootfs)
base_vm.spawn()
base_vm.basic_config()
base_vm.add_net_iface()
base_vm.start()

# Wait for microvm to be booted
rc, _, stderr = base_vm.ssh.run("true")
assert rc == 0, stderr

snapshot = base_vm.snapshot_full()
base_vm.kill()

vm = microvm_factory.build()
vm.spawn()
vm.restore_from_snapshot(snapshot, resume=True)

# When restoring a snapshot, vm.restore_from_snapshot first copies
# the memory file (inside of the jailer) to /mem.src
currently_loaded = Path(vm.chroot()) / "mem.src"

assert currently_loaded.exists()

vm.snapshot_full(mem_path="mem.src")
vm.resume()

# Check the overwriting the snapshot file from which this microvm was originally
# restored, with a new snapshot of this vm, does not break the VM
rc, _, stderr = vm.ssh.run("true")
assert rc == 0, stderr