Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Multi-host migration] Network subsystem #3987

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions avocado_vt/plugins/vt_bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,15 @@ def configure(self, parser):
"generating the host configuration entry."
),
)
parser.add_argument(
"--vt-cluster-config",
action="store",
metavar="CLUSTER_CONFIG",
help=(
"The cluster config json file to be used when "
"generating the cluster hosts configuration entry."
),
)

def run(self, config):
try:
Expand Down
111 changes: 111 additions & 0 deletions avocado_vt/plugins/vt_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import logging
import os
import sys

from avocado.core import exit_codes
from avocado.core.plugin_interfaces import JobPostTests as Post
from avocado.core.plugin_interfaces import JobPreTests as Pre
from avocado.utils.stacktrace import log_exc_info
from virttest.vt_cluster import cluster, node_metadata
from virttest.vt_imgr import vt_imgr
from virttest.vt_resmgr import resmgr
from virttest.vt_netmgr import vt_netmgr


class ClusterSetupError(Exception):
"""
Represents any error situation when attempting to create a cluster.
"""

pass


class ClusterManagerSetupError(ClusterSetupError):
pass


class ClusterCleanupError(Exception):
pass


class ClusterManagerCleanupError(ClusterCleanupError):
pass


class VTCluster(Pre, Post):

name = "vt-cluster"
description = "Avocado-VT Cluster Pre/Post"

def __init__(self, **kwargs):
self._log = logging.getLogger("avocado.app")

@staticmethod
def _pre_node_setup():
try:
for node in cluster.get_all_nodes():
node.start_agent_server()
node_metadata.load_metadata()
except Exception as err:
raise ClusterSetupError(err)

@staticmethod
def _pre_mgr_setup():
try:
# Pre-setup the cluster manager
resmgr.startup()
vt_imgr.startup()
vt_netmgr.startup()
except Exception as err:
raise ClusterManagerSetupError(err)

@staticmethod
def _post_mgr_cleanup():
try:
# Post-cleanup the cluster manager
vt_netmgr.teardown()
vt_imgr.teardown()
resmgr.teardown()
except Exception as err:
raise ClusterManagerCleanupError(err)

def _post_node_setup(self, job):
cluster_dir = os.path.join(job.logdir, "cluster")
for node in cluster.get_all_nodes():
node_dir = os.path.join(cluster_dir, node.name)
os.makedirs(node_dir)
try:
node.upload_agent_log(node_dir)
except Exception as err:
self._log.warning(err)
finally:
try:
node.stop_agent_server()
except Exception as detail:
err = ClusterCleanupError(detail)
msg = (
f"Failed to stop the agent "
f"server on node '{node.name}': {err}"
)
self._log.warning(msg)
node_metadata.unload_metadata()

def pre_tests(self, job):
if cluster.get_all_nodes():
try:
self._pre_node_setup()
self._pre_mgr_setup()
except Exception as detail:
msg = "Failure trying to set Avocado-VT job env: %s" % detail
self._log.error(msg)
log_exc_info(sys.exc_info(), self._log.name)
sys.exit(exit_codes.AVOCADO_JOB_FAIL | job.exitcode)

def post_tests(self, job):
if cluster.get_all_nodes():
try:
self._post_mgr_cleanup()
except ClusterManagerCleanupError as err:
self._log.warning(err)
finally:
self._post_node_setup(job)
85 changes: 85 additions & 0 deletions avocado_vt/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
version,
)
from virttest._wrappers import load_source
from virttest.vt_cluster import cluster, logger, selector
from virttest.vt_resmgr import resmgr

# avocado-vt no longer needs autotest for the majority of its functionality,
# except by:
Expand Down Expand Up @@ -115,6 +117,10 @@ def __init__(self, **kwargs):
utils_logfile.set_log_file_dir(self.logdir)
self.__status = None
self.__exc_info = None
self._cluster_partition = None
self._logger_server = logger.LoggerServer(
cluster.logger_server_host, cluster.logger_server_port, self.log
)

@property
def params(self):
Expand Down Expand Up @@ -262,6 +268,10 @@ def _runTest(self):

try:
try:
self._init_partition()
self._setup_partition()
self._logger_server.start()
self._start_logger_client()
try:
# Pre-process
try:
Expand Down Expand Up @@ -321,6 +331,14 @@ def _runTest(self):
or params.get("env_cleanup", "no") == "yes"
):
env.destroy() # Force-clean as it can't be stored
self._stop_logger_client()
self._logger_server.stop()
self._clear_partition()
if (
self._safe_env_save(env)
or params.get("env_cleanup", "no") == "yes"
):
env.destroy() # Force-clean as it can't be stored

except Exception as e:
if params.get("abort_on_error") != "yes":
Expand All @@ -345,3 +363,70 @@ def _runTest(self):
raise exceptions.JobError("Abort requested (%s)" % e)

return test_passed

def _init_partition(self):
self._cluster_partition = cluster.create_partition()

def _setup_partition(self):
for node in self.params.objects("nodes"):
node_params = self.params.object_params(node)
node_selectors = node_params.get("node_selectors")
_node = selector.select_node(cluster.free_nodes, node_selectors)
if not _node:
raise selector.SelectorError(
f'No available nodes for "{node}" with "{node_selectors}"'
)
_node.tag = node
self._cluster_partition.add_node(_node)

# Select the pools when the user specifies the pools param
for pool_tag in self.params.objects("pools"):
pool_params = self.params.object_params(pool_tag)
pool_selectors = pool_params.get("pool_selectors")

pools = set(resmgr.get_all_pools()) - set(cluster.partition.pools.values())
pool_id = selector.select_resource_pool(list(pools), pool_selectors)
if not pool_id:
raise selector.SelectorError(
f"No pool selected for {pool_tag} with {pool_selectors}"
)
self._cluster_partition.pools[pool_tag] = pool_id

def _clear_partition(self):
self._cluster_partition.pools.clear()
cluster_dir = os.path.join(self.resultsdir, "cluster")
if self._cluster_partition.nodes:
for node in self._cluster_partition.nodes:
node_dir = os.path.join(cluster_dir, node.tag)
os.makedirs(node_dir)
# node.upload_service_log(node_dir)
node.upload_logs(node_dir)
cluster.clear_partition(self._cluster_partition)
self._cluster_partition = None

def _start_logger_client(self):
if self._cluster_partition.nodes:
for node in self._cluster_partition.nodes:
try:
node.proxy.api.start_logger_client(
cluster.logger_server_host, cluster.logger_server_port
)
except ModuleNotFoundError:
pass

def _stop_logger_client(self):
if self._cluster_partition.nodes:
for node in self._cluster_partition.nodes:
try:
node.proxy.api.stop_logger_client()
except ModuleNotFoundError:
pass

@property
def nodes(self):
return self._cluster_partition.nodes

def get_node(self, node_tag):
for node in self._cluster_partition.nodes:
if node_tag == node.tag:
return node
11 changes: 11 additions & 0 deletions examples/tests/vt_node_test.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
- vt_node_test:
type = vt_node_test
start_vm = no
not_preprocess = yes
nodes = node1 node2 node3
node_selectors_node1 = [{"key": "cpu_vendor_id", "operator": "eq", "values": "AuthenticAMD"},
node_selectors_node1 += {"key": "hostname", "operator": "contains", "values": "redhat.com"}]
node_selectors_node2 = [{"key": "cpu_vendor_id", "operator": "==", "values": "AuthenticAMD"},
node_selectors_node2 += {"key": "hostname", "operator": "contains", "values": "redhat.com"}]
node_selectors_node3 = [{"key": "cpu_vendor_id", "operator": "==", "values": "AuthenticAMD"},
node_selectors_node3 += {"key": "hostname", "operator": "contains", "values": "redhat.com"}]
14 changes: 14 additions & 0 deletions examples/tests/vt_node_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
Simple vt node handling test.

Please put the configuration file vt_node_test.cfg into $tests/cfg/ directory.

"""


def run(test, params, env):
for node in test.nodes:
test.log.info("========Start test on %s========", node.name)
node.proxy.unittest.hello.say()
node.proxy.unittest.testcase.vm.boot_up()
test.log.info("========End test========")
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def run(self):
],
"avocado.plugins.result_events": [
"vt-joblock = avocado_vt.plugins.vt_joblock:VTJobLock",
"vt-cluster = avocado_vt.plugins.vt_cluster:VTCluster",
],
"avocado.plugins.init": [
"vt-init = avocado_vt.plugins.vt_init:VtInit",
Expand Down
49 changes: 49 additions & 0 deletions virttest/bootstrap.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import glob
import json
import logging
import os
import re
Expand All @@ -10,6 +11,9 @@
from avocado.utils import path as utils_path
from avocado.utils import process

from virttest.vt_cluster import cluster, node
from virttest.vt_resmgr import resmgr

from . import arch, asset, cartesian_config, data_dir, defaults, utils_selinux
from .compat import get_opt

Expand Down Expand Up @@ -875,6 +879,38 @@ def verify_selinux(datadir, imagesdir, isosdir, tmpdir, interactive, selinux=Fal
LOG.info("Corrected contexts on %d files/dirs", len(changes))


def _load_cluster_config(cluster_config):
"""Load the cluster config"""
with open(cluster_config, "r") as config:
return json.load(config)


def _register_hosts(hosts_configs):
"""Register the configs of the hosts into the cluster."""
if hosts_configs:
cluster.cleanup_env()
for host, host_params in hosts_configs.items():
_node = node.Node(host_params, host)
_node.setup_agent_env()
cluster.register_node(_node.name, _node)
LOG.debug("Host %s registered", host)


def _initialize_managers(pools_params):
resmgr.initialize(pools_params)


def _config_master_server(master_config):
"""Configure the master server."""
if master_config:
logger_server_host = master_config.get("logger_server_host")
if logger_server_host:
cluster.assign_logger_server_host(logger_server_host)
logger_server_port = master_config.get("logger_server_port")
if logger_server_port:
cluster.assign_logger_server_port(logger_server_port)


def bootstrap(options, interactive=False):
"""
Common virt test assistant module.
Expand Down Expand Up @@ -1042,6 +1078,19 @@ def bootstrap(options, interactive=False):
else:
LOG.debug("Module %s loaded", module)

# Setup the cluster environment.
vt_cluster_config = get_opt(options, "vt_cluster_config")
if vt_cluster_config:
LOG.info("")
step += 1
LOG.info(
"%d - Setting up the cluster environment via %s", step, vt_cluster_config
)
cluster_config = _load_cluster_config(vt_cluster_config)
_register_hosts(cluster_config.get("hosts"))
_config_master_server(cluster_config.get("master"))
_initialize_managers(cluster_config.get("pools"))

LOG.info("")
LOG.info("VT-BOOTSTRAP FINISHED")
LOG.debug("You may take a look at the following online docs for more info:")
Expand Down
2 changes: 1 addition & 1 deletion virttest/data_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def get_tmp_dir(public=True):
:param public: If public for all users' access
"""
persistent_dir = get_settings_value("vt.common", "tmp_dir", default="")
if persistent_dir != "":
if persistent_dir is not None:
return persistent_dir
tmp_dir = None
# apparmor deny /tmp/* /var/tmp/* and cause failure across tests
Expand Down
Loading
Loading