-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
48 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 0 additions & 1 deletion
1
tests/functional/requirments.txt → tests/functional/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1 @@ | ||
tenacity | ||
pyyaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,66 +1,78 @@ | ||
import json | ||
import subprocess | ||
import urllib.request | ||
from time import sleep | ||
|
||
from tenacity import retry, stop_after_delay, wait_fixed | ||
import pytest | ||
from tenacity import retry, stop_after_delay, wait_fixed, Retrying | ||
|
||
|
||
@retry(wait=wait_fixed(5), stop=stop_after_delay(30)) | ||
def test_dcgm_exporter(): | ||
"""Test of the dcgm-exporter service and its endpoint.""" | ||
dcgm_exporter_service = "snap.dcgm.dcgm-exporter" | ||
endpoint = "http://localhost:9400/metrics" | ||
|
||
assert 0 == subprocess.call( | ||
f"sudo systemctl is-active --quiet {dcgm_exporter_service}".split() | ||
), f"{dcgm_exporter_service} is not running" | ||
|
||
# Check the exporter endpoint, will raise an exception if the endpoint is not reachable | ||
urllib.request.urlopen(endpoint) | ||
response = urllib.request.urlopen(endpoint) | ||
|
||
# The output of the exporter endpoint is not tested | ||
# as in a virtual environment it will not have any GPU metrics | ||
assert 200 == response.getcode(), "DCGM exporter endpoint returned an error" | ||
|
||
|
||
def test_dcgm_nv_hostengine(): | ||
"""Check the dcgm-nv-hostengine service.""" | ||
nv_hostengine_service = "dcgm.nv-hostengine" | ||
nv_hostengine_service = "snap.dcgm.nv-hostengine" | ||
nv_hostengine_port = 5555 | ||
|
||
service = subprocess.run( | ||
f"snap services {nv_hostengine_service}".split(), | ||
check=True, | ||
capture_output=True, | ||
text=True, | ||
) | ||
assert 0 == subprocess.call( | ||
f"sudo systemctl is-active --quiet {nv_hostengine_service}".split() | ||
), f"{nv_hostengine_service} is not running" | ||
|
||
assert " active" in service.stdout.strip(), f"{nv_hostengine_service} service is not active" | ||
assert 0 == subprocess.call( | ||
f"nc -z localhost {nv_hostengine_port}".split() | ||
), f"{nv_hostengine_service} is not listening on port {nv_hostengine_port}" | ||
|
||
|
||
def test_dcgmi(): | ||
"""Test of the dcgmi command.""" | ||
result = subprocess.run( | ||
"dcgm.dcgmi discovery -l".split(), check=True, capture_output=True, text=True | ||
) | ||
assert "GPU ID" in result.stdout.strip(), "DCGMI is not working" | ||
|
||
# Test if the command is working and outputs a table with the GPU ID | ||
# The table will be empty in a virtual environment, but the command should still work | ||
assert "GPU ID" in result.stdout.strip(), "DCGMI didn't produce the expected table" | ||
|
||
def test_dcgm_bind_configs(): | ||
"""Test snap port configuratin.""" | ||
services = ["dcgm.dcgm-exporter", "dcgm.nv-hostengine"] | ||
configs = ["dcgm-exporter-address", "nv-hostengine-port"] | ||
new_values = [":9466", "5666"] | ||
|
||
bind_test_data = [ | ||
("dcgm.dcgm-exporter", "dcgm-exporter-address", ":9466"), | ||
("dcgm.nv-hostengine", "nv-hostengine-port", "5566"), | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("service, config, new_value", bind_test_data) | ||
def test_dcgm_bind_config(service: str, config: str, new_value: str): | ||
"""Test snap bind configuration.""" | ||
result = subprocess.run( | ||
"sudo snap get dcgm -d".split(), check=True, capture_output=True, text=True | ||
) | ||
dcgm_snap_config = json.loads(result.stdout.strip()) | ||
assert all(config in dcgm_snap_config for config in configs), "Missing snap configuration keys" | ||
assert config in dcgm_snap_config, f"{config} is not in the snap configuration" | ||
|
||
for config, new_value in zip(configs, new_values): | ||
subprocess.run( | ||
f"sudo snap set dcgm {config}={new_value}".split(), check=True | ||
), f"Failed to set {config}" | ||
assert 0 == subprocess.call( | ||
f"sudo snap set dcgm {config}={new_value}".split() | ||
), f"Failed to set {config} to {new_value}" | ||
|
||
# restart the service to apply the new configuration | ||
for service in services: | ||
subprocess.run(f"sudo snap restart {service}".split(), check=True) | ||
|
||
sleep(5) | ||
subprocess.run(f"sudo snap restart {service}".split(), check=True) | ||
|
||
for service, port in zip(services, new_values): | ||
subprocess.run( | ||
f"sudo lsof -i :{port.lstrip(':')}".split(), check=True | ||
), f"{service} port is not listening" | ||
for attempt in Retrying(wait=wait_fixed(2), stop=stop_after_delay(10)): | ||
with attempt: | ||
assert 0 == subprocess.call( | ||
f"nc -z localhost {new_value.lstrip(':')}".split() | ||
), f"{service} is not listening on {new_value}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters