firecracker-microvm · roypat · Dec 7, 2023 · Nov 24, 2023 · Nov 24, 2023 · Dec 5, 2023
@@ -59,6 +59,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   Removed support for creating Firecracker snapshots targeting older versions
   of Firecracker. With this change, running 'firecracker --version' will not
   print the supported snapshot versions.
+- [#4301](https://github.com/firecracker-microvm/firecracker/pull/4301):
+  Allow merging of diff snapshots into base snapshots by directly writing
+  the diff snapshot on top of the base snapshot's memory file. This can be
+  done by setting the `mem_file_path` to the path of the pre-existing full
+  snapshot.
 
 ### Deprecated
 
@@ -87,6 +92,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   Fixed a bug in the asynchronous virtio-block engine that rendered the device
   non-functional after a PATCH request was issued to Firecracker for updating
   the path to the host-side backing file of the device.
+- [#4301](https://github.com/firecracker-microvm/firecracker/pull/4301):
+  Fixed a bug where if Firecracker was instructed to take a snapshot of a
+  microvm which itself was restored from a snapshot, specifying `mem_file_path`
+  to be the path of the memory file from which the microvm was restored would
+  result in both the microvm and the snapshot being corrupted. It now instead
+  performs a "write-back" of all memory that was updated since the snapshot
+  was originally loaded.
 
 ## [1.5.0]
 

diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json
@@ -3,6 +3,10 @@
         "default_action": "trap",
         "filter_action": "allow",
         "filter": [
+            {
+                "syscall": "newfstatat",
+                "comment": "Used when creating snapshots in vmm:persist::snapshot_memory_to_file through std::fs::File::metadata"
+            },
             {
                 "syscall": "epoll_ctl"
             },

diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json
@@ -3,6 +3,10 @@
         "default_action": "trap",
         "filter_action": "allow",
         "filter": [
+            {
+                "syscall": "stat",
+                "comment": "Used when creating snapshots in vmm:persist::snapshot_memory_to_file through std::fs::File::metadata"
+            },
             {
                 "syscall": "epoll_ctl"
             },

diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs
@@ -174,6 +174,9 @@ pub enum CreateSnapshotError {
     SerializeMicrovmState(snapshot::Error),
     /// Cannot perform {0} on the snapshot backing file: {1}
     SnapshotBackingFile(&'static str, io::Error),
+    #[rustfmt::skip]
+    #[doc = "Size mismatch when writing diff snapshot on top of base layer: base layer size is {0} but diff layer is size {1}."]
+    SnapshotBackingFileLengthMismatch(u64, u64),
     #[cfg(target_arch = "x86_64")]
     #[rustfmt::skip]
     #[doc = "Too many devices attached: {0}. The maximum number allowed for the snapshot data version requested is {FC_V0_23_MAX_DEVICES:}."]
@@ -200,7 +203,7 @@ pub fn create_snapshot(
         version_map,
     )?;
 
-    snapshot_memory_to_file(vmm, &params.mem_file_path, &params.snapshot_type)?;
+    snapshot_memory_to_file(vmm, &params.mem_file_path, params.snapshot_type)?;
 
     Ok(())
 }
@@ -231,23 +234,54 @@ fn snapshot_state_to_file(
         .map_err(|err| SnapshotBackingFile("sync_all", err))
 }
 
+/// Takes a snapshot of the virtual machine running inside the given [`Vmm`] and saves it to
+/// `mem_file_path`.
+///
+/// If `snapshot_type` is [`SnapshotType::Diff`], and `mem_file_path` exists and is a snapshot file
+/// of matching size, then the diff snapshot will be directly merged into the existing snapshot.
+/// Otherwise, existing files are simply overwritten.
 fn snapshot_memory_to_file(
     vmm: &Vmm,
     mem_file_path: &Path,
-    snapshot_type: &SnapshotType,
+    snapshot_type: SnapshotType,
 ) -> Result<(), CreateSnapshotError> {
     use self::CreateSnapshotError::*;
+
+    // Need to check this here, as we create the file in the line below
+    let file_existed = mem_file_path.exists();
+
     let mut file = OpenOptions::new()
         .write(true)
         .create(true)
-        .truncate(true)
         .open(mem_file_path)
         .map_err(|err| MemoryBackingFile("open", err))?;
 
-    // Set the length of the file to the full size of the memory area.
+    // Determine what size our total memory area is.
     let mem_size_mib = mem_size_mib(vmm.guest_memory());
-    file.set_len(mem_size_mib * 1024 * 1024)
-        .map_err(|err| MemoryBackingFile("set_length", err))?;
+    let expected_size = mem_size_mib * 1024 * 1024;
+
+    if file_existed {
+        let file_size = file
+            .metadata()
+            .map_err(|e| MemoryBackingFile("get_metadata", e))?
+            .len();
+
+        // Here we only truncate the file if the size mismatches.
+        // - For full snapshots, the entire file's contents will be overwritten anyway. We have to
+        //   avoid truncating here to deal with the edge case where it represents the snapshot file
+        //   from which this very microVM was loaded (as modifying the memory file would be
+        //   reflected in the mmap of the file, meaning a truncate operation would zero out guest
+        //   memory, and thus corrupt the VM).
+        // - For diff snapshots, we want to merge the diff layer directly into the file.
+        if file_size != expected_size {
+            file.set_len(0)
+                .map_err(|err| MemoryBackingFile("truncate", err))?;
+        }
+    }
+
+    // Set the length of the file to the full size of the memory area.
+    file.set_len(expected_size)
+        .map_err(|e| MemoryBackingFile("set_length", e))?;
 
     match snapshot_type {
         SnapshotType::Diff => {
@@ -578,7 +612,6 @@ fn guest_memory_from_uffd(
 #[cfg(test)]
 mod tests {
     use snapshot::Persist;
-    use utils::errno;
     use utils::tempfile::TempFile;
 
     use super::*;
@@ -692,74 +725,4 @@ mod tests {
             microvm_state.device_states
         )
     }
-
-    #[test]
-    fn test_create_snapshot_error_display() {
-        use crate::persist::CreateSnapshotError::*;
-        use crate::vstate::memory::MemoryError;
-
-        let err = DirtyBitmap(VmmError::DirtyBitmap(kvm_ioctls::Error::new(20)));
-        let _ = format!("{}{:?}", err, err);
-
-        let err = InvalidVersionFormat;
-        let _ = format!("{}{:?}", err, err);
-
-        let err = UnsupportedVersion;
-        let _ = format!("{}{:?}", err, err);
-
-        let err = Memory(MemoryError::WriteMemory(
-            vm_memory::GuestMemoryError::HostAddressNotAvailable,
-        ));
-        let _ = format!("{}{:?}", err, err);
-
-        let err = MemoryBackingFile("open", io::Error::from_raw_os_error(0));
-        let _ = format!("{}{:?}", err, err);
-
-        let err = MicrovmState(MicrovmStateError::UnexpectedVcpuResponse);
-        let _ = format!("{}{:?}", err, err);
-
-        let err = SerializeMicrovmState(snapshot::Error::InvalidMagic(0));
-        let _ = format!("{}{:?}", err, err);
-
-        let err = SnapshotBackingFile("open", io::Error::from_raw_os_error(0));
-        let _ = format!("{}{:?}", err, err);
-
-        #[cfg(target_arch = "x86_64")]
-        {
-            let err = TooManyDevices(0);
-            let _ = format!("{}{:?}", err, err);
-        }
-    }
-
-    #[test]
-    fn test_microvm_state_error_display() {
-        use crate::persist::MicrovmStateError::*;
-
-        let err = InvalidInput;
-        let _ = format!("{}{:?}", err, err);
-
-        let err = NotAllowed(String::from(""));
-        let _ = format!("{}{:?}", err, err);
-
-        let err = RestoreDevices(DevicePersistError::MmioTransport);
-        let _ = format!("{}{:?}", err, err);
-
-        let err = RestoreVcpuState(vstate::vcpu::VcpuError::VcpuTlsInit);
-        let _ = format!("{}{:?}", err, err);
-
-        let err = RestoreVmState(vstate::vm::VmError::NotEnoughMemorySlots);
-        let _ = format!("{}{:?}", err, err);
-
-        let err = SaveVcpuState(vstate::vcpu::VcpuError::VcpuTlsNotPresent);
-        let _ = format!("{}{:?}", err, err);
-
-        let err = SaveVmState(vstate::vm::VmError::NotEnoughMemorySlots);
-        let _ = format!("{}{:?}", err, err);
-
-        let err = SignalVcpu(VcpuSendEventError(errno::Error::new(0)));
-        let _ = format!("{}{:?}", err, err);
-
-        let err = UnexpectedVcpuResponse;
-        let _ = format!("{}{:?}", err, err);
-    }
 }
diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs
@@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize};
 
 /// The snapshot type options that are available when
 /// creating a new snapshot.
-#[derive(Debug, Default, PartialEq, Eq, Deserialize, Serialize)]
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)]
 pub enum SnapshotType {
     /// Diff snapshot.
     Diff,

diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py
@@ -807,13 +807,20 @@ def resume(self):
         """Resume the microVM"""
         self.api.vm.patch(state="Resumed")
 
-    def make_snapshot(self, snapshot_type: SnapshotType | str):
+    def make_snapshot(
+        self,
+        snapshot_type: SnapshotType | str,
+        *,
+        mem_path: str = "mem",
+        vmstate_path="vmstate",
+    ):
         """Create a Snapshot object from a microvm.
 
+        The snapshot's memory and vstate files will be saved at the specified paths
+        relative to the Microvm's chroot.
+
         It pauses the microvm before taking the snapshot.
         """
-        vmstate_path = "vmstate"
-        mem_path = "mem"
         snapshot_type = SnapshotType(snapshot_type)
         self.pause()
         self.api.snapshot_create.put(
@@ -831,13 +838,13 @@ def make_snapshot(self, snapshot_type: SnapshotType | str):
             snapshot_type=snapshot_type,
         )
 
-    def snapshot_diff(self):
+    def snapshot_diff(self, *, mem_path: str = "mem", vmstate_path="vmstate"):
         """Make a Diff snapshot"""
-        return self.make_snapshot("Diff")
+        return self.make_snapshot("Diff", mem_path=mem_path, vmstate_path=vmstate_path)
 
-    def snapshot_full(self):
+    def snapshot_full(self, *, mem_path: str = "mem", vmstate_path="vmstate"):
         """Make a Full snapshot"""
-        return self.make_snapshot("Full")
+        return self.make_snapshot("Full", mem_path=mem_path, vmstate_path=vmstate_path)
 
     def restore_from_snapshot(
         self,

diff --git a/tests/integration_tests/functional/test_snapshot_basic.py b/tests/integration_tests/functional/test_snapshot_basic.py
@@ -6,6 +6,7 @@
 import logging
 import os
 import re
+import shutil
 from pathlib import Path
 
 import pytest
@@ -256,13 +257,14 @@ def test_cmp_full_and_first_diff_mem(microvm_factory, guest_kernel, rootfs):
 
     logger.info("Create full snapshot.")
     # Create full snapshot.
-    full_snapshot = vm.snapshot_full()
+    full_snapshot = vm.snapshot_full(mem_path="mem_full")
 
     logger.info("Create diff snapshot.")
     # Create diff snapshot.
     diff_snapshot = vm.snapshot_diff()
 
-    assert filecmp.cmp(full_snapshot.mem, diff_snapshot.mem)
+    assert full_snapshot.mem != diff_snapshot.mem
+    assert filecmp.cmp(full_snapshot.mem, diff_snapshot.mem, shallow=False)
 
 
 def test_negative_postload_api(test_microvm_with_api, microvm_factory):
@@ -410,3 +412,89 @@ def test_create_large_diff_snapshot(test_microvm_with_api):
 
     # If the regression was not fixed, this would have failed. The Firecracker
     # process would have been taken down.
+
+
+def test_diff_snapshot_overlay(guest_kernel, rootfs, microvm_factory):
+    """
+    Tests that if we take a diff snapshot and direct firecracker to write it on
+    top of an existing snapshot file, it will successfully merge them.
+    """
+    basevm = microvm_factory.build(guest_kernel, rootfs)
+    basevm.spawn()
+    basevm.basic_config(track_dirty_pages=True)
+    basevm.add_net_iface()
+    basevm.start()
+
+    # Wait for microvm to be booted
+    rc, _, stderr = basevm.ssh.run("true")
+    assert rc == 0, stderr
+
+    # The first snapshot taken will always contain all memory (even if its specified as "diff").
+    # We use a diff snapshot here, as taking a full snapshot does not clear the dirty page tracking,
+    # meaning the `snapshot_diff()` call below would again dump the entire guest memory instead of
+    # only dirty regions.
+    full_snapshot = basevm.snapshot_diff()
+    basevm.resume()
+
+    # Run some command to dirty some pages
+    rc, _, stderr = basevm.ssh.run("true")
+    assert rc == 0, stderr
+
+    # First copy the base snapshot somewhere else, so we can make sure
+    # it will actually get updated
+    first_snapshot_backup = Path(basevm.chroot()) / "mem.old"
+    shutil.copyfile(full_snapshot.mem, first_snapshot_backup)
+
+    # One Microvm object will always write its snapshot files to the same location
+    merged_snapshot = basevm.snapshot_diff()
+    assert full_snapshot.mem == merged_snapshot.mem
+
+    assert not filecmp.cmp(merged_snapshot.mem, first_snapshot_backup, shallow=False)
+
+    new_vm = microvm_factory.build()
+    new_vm.spawn()
+    new_vm.restore_from_snapshot(merged_snapshot, resume=True)
+
+    # Run some command to check that the restored VM works
+    rc, _, stderr = new_vm.ssh.run("true")
+    assert rc == 0, stderr
+
+
+def test_snapshot_overwrite_self(guest_kernel, rootfs, microvm_factory):
+    """Tests that if we try to take a snapshot that would overwrite the
+    very file from which the current VM is stored, nothing happens.
+
+    Note that even though we map the file as MAP_PRIVATE, the documentation
+    of mmap does not specify what should happen if the file is changed after being
+    mmap'd (https://man7.org/linux/man-pages/man2/mmap.2.html). It seems that
+    these changes can propagate to the mmap'd memory region."""
+    base_vm = microvm_factory.build(guest_kernel, rootfs)
+    base_vm.spawn()
+    base_vm.basic_config()
+    base_vm.add_net_iface()
+    base_vm.start()
+
+    # Wait for microvm to be booted
+    rc, _, stderr = base_vm.ssh.run("true")
+    assert rc == 0, stderr
+
+    snapshot = base_vm.snapshot_full()
+    base_vm.kill()
+
+    vm = microvm_factory.build()
+    vm.spawn()
+    vm.restore_from_snapshot(snapshot, resume=True)
+
+    # When restoring a snapshot, vm.restore_from_snapshot first copies
+    # the memory file (inside of the jailer) to /mem.src
+    currently_loaded = Path(vm.chroot()) / "mem.src"
+
+    assert currently_loaded.exists()
+
+    vm.snapshot_full(mem_path="mem.src")
+    vm.resume()
+
+    # Check the overwriting the snapshot file from which this microvm was originally
+    # restored, with a new snapshot of this vm, does not break the VM
+    rc, _, stderr = vm.ssh.run("true")
+    assert rc == 0, stderr