Skip to content

Add and fix hyperv modules test #3853

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions lisa/tools/dhclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ def _freebsd_tool(cls) -> Optional[Type[Tool]]:
def can_install(self) -> bool:
return False

def generate_renew_command(self, interface: str = "eth0") -> str:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also reuse it inside of the dhclient to reduce duplicate code.

if "dhclient" in self._command:
option = "-r"
elif "dhcpcd" in self._command:
option = "-k"
else:
raise LisaException(f"Unsupported command: {self._command}")
return f"{self._command} {option} {interface}; {self._command} {interface}"

def get_timeout(self) -> int:
is_default_value: bool = True
if (
Expand Down
53 changes: 28 additions & 25 deletions lisa/tools/modprobe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from lisa.executable import ExecutableResult, Tool
from lisa.tools.kernel_config import KLDStat
from lisa.tools import Dhclient
from lisa.util import UnsupportedOperationException


Expand All @@ -22,22 +23,6 @@ def _check_exists(self) -> bool:
def _initialize(self, *args: Any, **kwargs: Any) -> None:
self._command = "modprobe"

# hv_netvsc needs a special case, since reloading it has the potential
# to leave the node without a network connection if things go wrong.
def _reload_hv_netvsc(self) -> None:
# These commands must be sent together, bundle them up as one line
# If the VM is disconnected after running below command, wait 60s is enough.
# Don't need to wait the default timeout 600s. So set timeout 60.
self.node.execute(
"modprobe -r hv_netvsc; modprobe hv_netvsc; "
"ip link set eth0 down; ip link set eth0 up;"
"dhclient -r eth0; dhclient eth0",
sudo=True,
shell=True,
nohup=True,
timeout=60,
)

def is_module_loaded(
self,
mod_name: str,
Expand Down Expand Up @@ -126,21 +111,39 @@ def module_exists(self, modules: Union[str, List[str]]) -> bool:
def reload(
self,
mod_names: List[str],
times: int = 1,
verbose: bool = False,
timeout: int = 60,
nohup: bool = True,
) -> None:
for mod_name in mod_names:
if verbose:
reload_command = f"modprobe -r -v {mod_name}; modprobe -v {mod_name};"
else:
reload_command = f"modprobe -r {mod_name}; modprobe {mod_name};"
if self.is_module_loaded(mod_name, force_run=True):
# hv_netvsc reload requires resetting the network interface
if times > 1:
reload_command = (
f"for i in $(seq 1 {times}); do " + reload_command + "done; "
)
# hv_netvsc needs a special case, since reloading it has the potential
# to leave the node without a network connection if things go wrong.
if mod_name == "hv_netvsc":
# handle special case
self._reload_hv_netvsc()
else:
# execute the command for regular non-network modules
self.node.execute(
f"modprobe -r {mod_name}; modprobe {mod_name};",
sudo=True,
shell=True,
# These commands must be sent together, bundle them up as one line
# If the VM is disconnected after running below command, wait 60s is enough.
# however, go with bigger timeout if times > 1 (multiple times of reload)
renew_command = self.node.tools[Dhclient].generate_renew_command()
reload_command = (
reload_command
+ "ip link set eth0 down; ip link set eth0 up; "
+ renew_command
)

result = self.node.execute(
reload_command, sudo=True, nohup=nohup, shell=True, timeout=timeout
)
return result

def load_by_file(
self, file_name: str, ignore_error: bool = False
) -> ExecutableResult:
Expand Down
173 changes: 138 additions & 35 deletions microsoft/testsuites/core/hv_module.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from typing import List

from typing import List, Literal
from enum import Enum
from assertpy import assert_that
from semver import VersionInfo

Expand All @@ -18,9 +18,21 @@
from lisa.operating_system import BSD, Redhat
from lisa.sut_orchestrator import AZURE, HYPERV, READY
from lisa.sut_orchestrator.azure.platform_ import AzurePlatform
from lisa.tools import KernelConfig, LisDriver, Lsinitrd, Lsmod, Modinfo, Modprobe
from lisa.tools import Cat, KernelConfig, LisDriver, Lsinitrd, Lsmod, Modinfo, Modprobe
from lisa.util import LisaException, SkippedException

ModulesType = Enum(
"ModulesType",
[
# Modules which dont have "=y" in the kernel config
# and therefore are not built into the kernel.
"NOT_BUILT_IN",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be

  • BUILT_IN =y
  • MODULE =m
  • NOT_BUILD =n

# Modules which have "=m" in the kernel config
# and therefore are built as loadable modules.
"LOADABLE",
],
)


@TestSuiteMetadata(
area="core",
Expand Down Expand Up @@ -61,7 +73,7 @@ def verify_lis_modules_version(self, node: Node) -> None:
lis_driver = node.tools[LisDriver]
lis_version = lis_driver.get_version()

hv_modules = self._get_not_built_in_modules(node)
hv_modules = self._get_modules_by_type(node)
for module in hv_modules:
module_version = VersionInfo.parse(modinfo.get_version(module))
assert_that(module_version).described_as(
Expand Down Expand Up @@ -154,7 +166,7 @@ def _get_built_in_modules(self, node: Node) -> List[str]:
)
def verify_hyperv_modules(self, log: Logger, environment: Environment) -> None:
node = environment.nodes[0]
hv_modules = self._get_not_built_in_modules(node)
hv_modules = self._get_modules_by_type(node)
distro_version = node.os.information.version
if len(hv_modules) == 0:
raise SkippedException(
Expand Down Expand Up @@ -206,45 +218,129 @@ def verify_hyperv_modules(self, log: Logger, environment: Environment) -> None:
),
)
def verify_reload_hyperv_modules(self, log: Logger, node: Node) -> None:
# Constants
module = "hv_netvsc"
loop_count = 100

if isinstance(node.os, Redhat):
try:
log.debug("Checking LIS installation before reload.")
node.tools.get(LisDriver)
except Exception:
log.debug("Updating LIS failed. Moving on to attempt reload.")

if module not in self._get_not_built_in_modules(node):
raise SkippedException(
f"{module} is loaded statically into the "
"kernel and therefore can not be reloaded"
preferred_log_level = 4
log_level = int(
node.tools[Cat]
.read("/proc/sys/kernel/printk", force_run=True, sudo=True)
.split()[0]
)

# The 10-minute timeout specified in node.execute is not being honoured,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you asking if the log floods cause the test to run longer, specifically up to 50 seconds? How many lines are printed when running this test case?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taking hv_netvsc as an example where we are trying to run the following command (which runs for 100 iterations as you can see below):
for i in $(seq 1 100); do modprobe -r -v hv_netvsc; modprobe -v hv_netvsc; done; sleep 1; ip link set eth0 down; ip link set eth0 up; dhclient -r eth0; dhclient eth0

I tried running locally to the VMs without LISA:
With log level 4:
The execution finishes in around 10 seconds.
200 lines

With log level 7:
The execution takes around 90 seconds.
200 + 800 lines

Coming back to LISA:
The process runs only for around 50 seconds, when the execution time goes beyond that, it shows that as not running. This is what is coming from the LocalProcess of spur lib.

Basically, in the wait_result() method of process.py file, there is a while loop condition:
while self.is_running() and timeout >= timer.elapsed(False):
In that the first condition is returning False after approx. 50 seconds and hence it comes out of the loop it stops waiting for the execution to complete even though the process is actually running. Hence the timeout of 10 mins is also not considered.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand the reduced log level. Regarding the timeout, I need more information. Do you mean it happens only in LocalProcess, not SshProcess? If self.is_running() returns False, it means the process has exited. This is by design. How do you know the process is actually running?

Copy link
Collaborator Author

@kanchansenlaskar kanchansenlaskar Jun 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean it happens only in LocalProcess, not SshProcess?

I tried with SshProcess as well in LISA, and faced the same problem

do you know the process is actually running?

I don't know actually, however, when I ran the same command locally on the same vm with the same log level, it ran without any issues. It only gave issue when I ran with spur lib from LISA.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you check if the issue still exists, after moved to modprobe? And please share me the full log, I can take a look.

# as the local process in the spur library stops running after approximately
# 50 seconds. To mitigate this, we set the log level to 4 for any VM where
# the log level exceeds 4. This helps reduce excessive logging during module
# reloads, which could otherwise cause command execution to time out due to
# the loop count being set to 100.
if log_level > preferred_log_level:
log.debug(
f"Current dmesg log level is {log_level}, "
f"setting it to {preferred_log_level} for module reload."
)
self._set_dmesg_log_level(log, node, preferred_log_level)

result = node.execute(
("for i in $(seq 1 %i); do " % loop_count)
+ f"modprobe -r -v {module}; modprobe -v {module}; "
"done; sleep 1; "
"ip link set eth0 down; ip link set eth0 up; dhclient eth0",
sudo=True,
shell=True,
skipped_modules = []
failed_modules = {}
hv_modules = [
"hv_vmbus",
"hv_netvsc",
"hv_storvsc",
"hv_utils",
"hv_balloon",
"hid_hyperv",
"hyperv_keyboard",
"hyperv_fb",
]
loadable_modules = set(
self._get_modules_by_type(node, module_type=ModulesType.LOADABLE)
)
# Constants
try:
for module in hv_modules:
# try:
if module not in loadable_modules:
log.debug(f"{module} is not a reloadable module")
skipped_modules.append(module)
continue
loop_count = 100
log.debug(f"Reloading {module} for {loop_count} times")
modprobe = node.tools[Modprobe]

if "is in use" in result.stdout:
raise SkippedException(
f"Module {module} is in use so it cannot be reloaded"
)
result = modprobe.reload(
mod_names=[module],
times=loop_count,
verbose=True,
timeout=600,
nohup=False,
)

assert_that(result.stdout.count("rmmod")).described_as(
f"Expected {module} to be removed {loop_count} times"
).is_equal_to(loop_count)
assert_that(result.stdout.count("insmod")).described_as(
f"Expected {module} to be inserted {loop_count} times"
).is_equal_to(loop_count)
if (
"is in use" in result.stdout
or "Device or resource busy" in result.stdout
):
# If the module is in use, it cannot be reloaded.
log.debug(f"Module {module} is in use so it cannot be reloaded")
skipped_modules.append(module)
continue

if (
result.stdout.count("rmmod") != loop_count
or result.stdout.count("insmod") != loop_count
):
failure_message = (
f"Module {module} was not reloaded {loop_count} times. "
f"rmmod count: {result.stdout.count('rmmod')}, "
f"insmod count: {result.stdout.count('insmod')}"
)
failed_modules[module] = failure_message

finally:
if log_level > preferred_log_level:
log.debug("Restoring dmesg log level to original value")
self._set_dmesg_log_level(log, node, log_level)
if failed_modules:
raise AssertionError(
"The following modules have reload count mismatch:\n"
+ ",\n".join(
f"{module}: {msg}" for module, msg in failed_modules.items()
)
)

if skipped_modules:
raise SkippedException(
f"The following modules were skipped during"
f" reload: {', '.join(skipped_modules)}. "
"This may be due to them being built into the kernel or in use."
)

def _set_dmesg_log_level(
self, log: Logger, node: Node, preferred_log_level: int
) -> None:
log.debug(f"Setting dmesg log level to {preferred_log_level}")
node.execute(f"dmesg -n {preferred_log_level}", sudo=True, shell=True)

def _get_not_built_in_modules(self, node: Node) -> List[str]:
new_log_level = int(
node.tools[Cat]
.read("/proc/sys/kernel/printk", force_run=True, sudo=True)
.split()[0]
)

assert_that(new_log_level).described_as(
f"Expected dmesg log level to be set to {preferred_log_level}, "
f"but it is {new_log_level}."
).is_equal_to(preferred_log_level)

def _get_modules_by_type(
self,
node: Node,
module_type: ModulesType = ModulesType.NOT_BUILT_IN,
) -> List[str]:
"""
Returns the hv_modules that are not directly loaded into the kernel and
therefore would be expected to show up in lsmod.
Expand All @@ -264,12 +360,19 @@ def _get_not_built_in_modules(self, node: Node) -> List[str]:
"hid_hyperv": "CONFIG_HID_HYPERV_MOUSE",
"hv_balloon": "CONFIG_HYPERV_BALLOON",
"hyperv_keyboard": "CONFIG_HYPERV_KEYBOARD",
"hyperv_fb": "CONFIG_FB_HYPERV",
}
modules = []
for module in hv_modules_configuration:
if not node.tools[KernelConfig].is_built_in(
hv_modules_configuration[module]
):
modules.append(module)
if module_type == ModulesType.LOADABLE:
if node.tools[KernelConfig].is_built_as_module(
hv_modules_configuration[module]
):
modules.append(module)
elif module_type == ModulesType.NOT_BUILT_IN:
if not node.tools[KernelConfig].is_built_in(
hv_modules_configuration[module]
):
modules.append(module)

return modules
Loading