Skip to content

Commit 31323f9

Browse files
committed
fix: Ensure to exit with error if any vcpu exits with error
Previously, when a VM exited, we looked for the first vcpu that reported an exit status, and then indiscriminately reported that back. However, it is possible to one vcpu to exit successfully while another exits with an error, and this could lead us to report "Firecracker Exited Successfully" even though it did not. Now, we explicitly look for the status code of all vcpus. If any of them report an error, this takes precedence over non-error status codes. Signed-off-by: Patrick Roy <[email protected]>
1 parent 33ff7b0 commit 31323f9

File tree

1 file changed

+20
-16
lines changed

1 file changed

+20
-16
lines changed

src/vmm/src/lib.rs

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ pub mod vstate;
109109
use std::collections::HashMap;
110110
use std::io;
111111
use std::os::unix::io::AsRawFd;
112-
use std::sync::mpsc::{RecvTimeoutError, TryRecvError};
112+
use std::sync::mpsc::RecvTimeoutError;
113113
use std::sync::{Arc, Barrier, Mutex};
114114
use std::time::Duration;
115115

@@ -905,23 +905,27 @@ impl MutEventSubscriber for Vmm {
905905
// Exit event handling should never do anything more than call 'self.stop()'.
906906
let _ = self.vcpus_exit_evt.read();
907907

908-
let mut exit_code = None;
909-
// Query each vcpu for their exit_code.
910-
for handle in &self.vcpus_handles {
911-
match handle.response_receiver().try_recv() {
912-
Ok(VcpuResponse::Exited(status)) => {
913-
exit_code = Some(status);
914-
// Just use the first encountered exit-code.
915-
break;
916-
}
917-
Ok(_response) => {} // Don't care about these, we are exiting.
918-
Err(TryRecvError::Empty) => {} // Nothing pending in channel
919-
Err(err) => {
920-
panic!("Error while looking for VCPU exit status: {}", err);
908+
let exit_code = 'exit_code: {
909+
// Query each vcpu for their exit_code.
910+
for handle in &self.vcpus_handles {
911+
// Drain all vcpu responses that are pending from this vcpu until we find an
912+
// exit status.
913+
for response in handle.response_receiver().try_iter() {
914+
if let VcpuResponse::Exited(status) = response {
915+
// It could be that some vcpus exited successfully while others
916+
// errored out. Thus make sure that error exits from one vcpu always
917+
// takes precedence over "ok" exits
918+
if status != FcExitCode::Ok {
919+
break 'exit_code status;
920+
}
921+
}
921922
}
922923
}
923-
}
924-
self.stop(exit_code.unwrap_or(FcExitCode::Ok));
924+
925+
// No CPUs exited with error status code, report "Ok"
926+
FcExitCode::Ok
927+
};
928+
self.stop(exit_code);
925929
} else {
926930
error!("Spurious EventManager event for handler: Vmm");
927931
}

0 commit comments

Comments
 (0)