Skip to content

Commit 3acf37d

Browse files
committed
fix(vsock): save state after sending a notification
This is a fix for a fix introduced in #4796 The issue was in vsock device hanging after snapshot restoration due to the guest not being notified about the termination packet. But there was bug in the fix, maily we saved the vsock state before the notification was sent, thus discarding all modifications made to sent the notification. The reason original fix worked, is because we were only testing with 1 iteration of snap/restore. This way even though we lost synchronization with the guest in the event queue state, it worked fine once. But doing more iterations causes vsock to hang as before. This commit fixes the issue by storing vsock state after the notification is sent and modifies the vsock test to run multiple iterations of snap/restore. Signed-off-by: Egor Lazarchuk <[email protected]>
1 parent 5d762a8 commit 3acf37d

File tree

2 files changed

+62
-53
lines changed

2 files changed

+62
-53
lines changed

src/vmm/src/device_manager/persist.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -365,11 +365,6 @@ impl<'a> Persist<'a> for MMIODeviceManager {
365365
.downcast_mut::<Vsock<VsockUnixBackend>>()
366366
.unwrap();
367367

368-
let vsock_state = VsockState {
369-
backend: vsock.backend().save(),
370-
frontend: vsock.save(),
371-
};
372-
373368
// Send Transport event to reset connections if device
374369
// is activated.
375370
if vsock.is_activated() {
@@ -378,6 +373,13 @@ impl<'a> Persist<'a> for MMIODeviceManager {
378373
});
379374
}
380375

376+
// Save state after potential notification to the guest. This
377+
// way we save changes to the queue the notification can cause.
378+
let vsock_state = VsockState {
379+
backend: vsock.backend().save(),
380+
frontend: vsock.save(),
381+
};
382+
381383
states.vsock_device = Some(ConnectedVsockState {
382384
device_id: devid.clone(),
383385
device_state: vsock_state,

tests/integration_tests/functional/test_vsock.py

Lines changed: 55 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -225,54 +225,61 @@ def test_vsock_transport_reset_g2h(uvm_nano, microvm_factory):
225225
test_vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path=f"/{VSOCK_UDS_PATH}")
226226
test_vm.start()
227227

228-
host_socket_path = os.path.join(
229-
test_vm.path, f"{VSOCK_UDS_PATH}_{ECHO_SERVER_PORT}"
230-
)
231-
host_socat_commmand = [
232-
"socat",
233-
"-dddd",
234-
f"UNIX-LISTEN:{host_socket_path},fork",
235-
"STDOUT",
236-
]
237-
host_socat = subprocess.Popen(
238-
host_socat_commmand, stdout=subprocess.PIPE, stderr=subprocess.PIPE
239-
)
240-
241-
# Give some time for host socat to create socket
242-
time.sleep(0.5)
243-
assert Path(host_socket_path).exists()
244-
test_vm.create_jailed_resource(host_socket_path)
245-
246-
# Create a socat process in the guest which will connect to the host socat
247-
guest_socat_commmand = f"tmux new -d 'socat - vsock-connect:2:{ECHO_SERVER_PORT}'"
248-
test_vm.ssh.run(guest_socat_commmand)
249-
250-
# socat should be running in the guest now
251-
code, _, _ = test_vm.ssh.run("pidof socat")
252-
assert code == 0
253-
254-
# Create snapshot.
228+
# Create snapshot and terminate a VM.
255229
snapshot = test_vm.snapshot_full()
256-
test_vm.resume()
257-
258-
# After `create_snapshot` + 'restore' calls, connection should be dropped
259-
code, _, _ = test_vm.ssh.run("pidof socat")
260-
assert code == 1
261-
262-
# Kill host socat as it is not useful anymore
263-
host_socat.kill()
264-
host_socat.communicate()
265-
266-
# Terminate VM.
267230
test_vm.kill()
268231

269-
# Load snapshot.
270-
vm2 = microvm_factory.build()
271-
vm2.spawn()
272-
vm2.restore_from_snapshot(snapshot, resume=True)
273-
274-
# After snap restore all vsock connections should be
275-
# dropped. This means guest socat should exit same way
276-
# as it did after snapshot was taken.
277-
code, _, _ = vm2.ssh.run("pidof socat")
278-
assert code == 1
232+
for _ in range(5):
233+
# Load snapshot.
234+
new_vm = microvm_factory.build()
235+
new_vm.spawn()
236+
new_vm.restore_from_snapshot(snapshot, resume=True)
237+
238+
# After snap restore all vsock connections should be
239+
# dropped. This means guest socat should exit same way
240+
# as it did after snapshot was taken.
241+
code, _, _ = new_vm.ssh.run("pidof socat")
242+
assert code == 1
243+
244+
host_socket_path = os.path.join(
245+
new_vm.path, f"{VSOCK_UDS_PATH}_{ECHO_SERVER_PORT}"
246+
)
247+
host_socat_commmand = [
248+
"socat",
249+
"-dddd",
250+
f"UNIX-LISTEN:{host_socket_path},fork",
251+
"STDOUT",
252+
]
253+
host_socat = subprocess.Popen(
254+
host_socat_commmand, stdout=subprocess.PIPE, stderr=subprocess.PIPE
255+
)
256+
257+
# Give some time for host socat to create socket
258+
time.sleep(0.5)
259+
assert Path(host_socket_path).exists()
260+
new_vm.create_jailed_resource(host_socket_path)
261+
262+
# Create a socat process in the guest which will connect to the host socat
263+
guest_socat_commmand = (
264+
f"tmux new -d 'socat - vsock-connect:2:{ECHO_SERVER_PORT}'"
265+
)
266+
new_vm.ssh.run(guest_socat_commmand)
267+
268+
# socat should be running in the guest now
269+
code, _, _ = new_vm.ssh.run("pidof socat")
270+
assert code == 0
271+
272+
# Create snapshot.
273+
snapshot = new_vm.snapshot_full()
274+
new_vm.resume()
275+
276+
# After `create_snapshot` + 'restore' calls, connection should be dropped
277+
code, _, _ = new_vm.ssh.run("pidof socat")
278+
assert code == 1
279+
280+
# Kill host socat as it is not useful anymore
281+
host_socat.kill()
282+
host_socat.communicate()
283+
284+
# Terminate VM.
285+
new_vm.kill()

0 commit comments

Comments
 (0)