Skip to content

Commit

Permalink
Improve MPI hook
Browse files Browse the repository at this point in the history
  • Loading branch information
michele-brambilla committed Oct 2, 2023
1 parent 0193d0b commit a2b256d
Show file tree
Hide file tree
Showing 14 changed files with 470 additions and 428 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [Unreleased]

### Added

- MPI Hook: added support for the `com.hooks.mpi.mount_dir_parent` OCI annotation.
This annotation can be used to customize the parent directory of the folder where the hook adds symlinks and mounts for dependency libraries which don't have a suitable counterpart in the container.

### Changed

- The configuration files for the SSH hook and the Slurm sync hook are no longer generated automatically as part of the CMake installation process.
In other words, the aforementioned hooks are no longer configured and enabled by default.
- MPI Hook: dependency libraries and symlinks added by the hook in the container are now mounted or created in a dedicated directory.
Previously, the hook made mounts and created multiple duplicate links in several system directories. More details [here] (https://sarus.readthedocs.io/en/stable/config/mpi-hook.html#hook-configuration)
- Updated recommended runc version to 1.1.9

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion CI/src/integration_tests/test_device_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def test_no_additional_permissions(self):
out = util.run_command_in_container(is_centralized_repository=False,
image=self.CONTAINER_IMAGE,
command=container_args,
options_of_run_command=sarus_options)
options_of_run_command=sarus_options, timeout=15)
assert out[0] == device_path

# Restore full device access in host cgroup
Expand Down
21 changes: 20 additions & 1 deletion CI/src/integration_tests/test_mpi_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,19 @@
import common.util as util


def assert_annotation_overrides_default_mount_dir(image_tag, expected_libs, parent_mount_path):
try:
result = util.run_command_in_container(is_centralized_repository=False,
image="quay.io/ethcscs/sarus-integration-tests:" + image_tag,
command=["ls", parent_mount_path + "/mpi_hook"],
options_of_run_command=[
"--mpi", "--annotation=com.hooks.mpi.mount_dir_parent=" + parent_mount_path
])
assert (expected_libs == set(result))
except subprocess.CalledProcessError as _:
assert (False)


class TestMPIHook(unittest.TestCase):
"""
These tests verify that the host MPI libraries are properly brought into the container.
Expand Down Expand Up @@ -101,7 +114,6 @@ def _generate_hook_config(cls):
},
"stages": ["prestart"]
}

return hook_config

def setUp(self):
Expand Down Expand Up @@ -170,6 +182,13 @@ def _assert_sarus_raises_mpi_warning_containing_text(self, text, expected_occurr
number_of_occurrences = sum(["[WARN]" in line and text in line for line in output.split('\n')])
assert number_of_occurrences == expected_occurrences, 'Sarus didn\'t generate the expected MPI warnings containing the text "{}".'.format(text)

def test_cli_annotation_overrides_default_mount_path(self):
parent_mount_path = "/opt/xyz"
for image_tag, libs in zip(["mpich_compatible", "mpich_compatible_symlink"],
[{'libdependency0.so', 'libdependency1.so', 'libmpi.so', 'libmpi.so.12', 'libmpich.so', 'libmpich.so.12'},
{'libdependency0.so', 'libdependency1.so', 'libmpi.so', 'libmpi.so.12', 'libmpich.so', 'libmpich.so.12', 'libmpi.so.12.5.5'}]):
assert_annotation_overrides_default_mount_dir(image_tag, libs, parent_mount_path)


@pytest.mark.asroot
class TestMPIHookDevices(unittest.TestCase):
Expand Down
21 changes: 19 additions & 2 deletions doc/config/mpi-hook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,25 @@ arguments, but its actions are controlled through a few environment variables:
schema.

* ``MPI_DEPENDENCY_LIBS``: Colon separated list of absolute paths to
libraries that are dependencies of the ``MPI_LIBS``. These libraries
are always bind mounted in the container under ``/usr/lib``.
libraries that are dependencies of the ``MPI_LIBS``. The hook attempts to
overlay these libraries onto corresponding libraries within the container.
However, if the version compatibility check fails due to differences in minor
versions or the inability to validate compatibility, the libraries are
mounted to an alternate location to prevent conflicts.
By default, the path of this directory in the container is ``/opt/mpi_hook``;
this location is also used for other files and symlinks which the hook may
need to create in the container.
By making use of the annotation ``com.hooks.mpi.mount_dir_parent=<libraries parent directory>``,
the user can customize the parent directory of the designated mount folder for the
libraries which don't have a suitable counterpart in the container,
for example:

.. code-block:: bash
$ sarus run --mpi --annotation=com.hooks.mpi.mount_dir_parent=<libraries parent directory> <repo name>/<image name>
When using the annotation as in the previous example, the hook constructs
the path for the mounts as ``<libraries parent directory>/mpi_hook/``.

* ``BIND_MOUNTS``: Colon separated list of absolute paths to generic
files or directories that are required for the correct functionality of the
Expand Down
19 changes: 19 additions & 0 deletions doc/user/user_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1188,6 +1188,25 @@ enabled. The ``(default)`` qualifier alongside an MPI type indicates the
default MPI hook for the Sarus installation, which can be enabled just with
the ``sarus run --mpi`` option.
.. note::
The MPI hook attempts to overlay dependency libraries (not the MPI libraries
themselves, which are always substituted in-place) onto existing
corresponding libraries within the container.
However, if the version compatibility check fails due to differences
in minor versions or the inability to validate compatibility, the libraries
will be mounted to an alternate location to prevent conflicts.
By making use of the annotation ``com.hooks.mpi.mount_dir_parent=<libraries parent directory>``
the user can configure the parent directory of the designated mount folder
for the libraries, for example:
.. code-block:: bash
$ sarus run --mpi --annotation=com.hooks.mpi.mount_dir_parent=<libraries parent directory> <repo name>/<image name>
When using the annotation as in the previous example, the hook constructs
the path for the mounts as ``<libraries parent directory>/mpi_hook/``.
.. _user-nvidia-hook:
NVIDIA GPU support
Expand Down
6 changes: 6 additions & 0 deletions src/common/Utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <limits.h>
#include <fcntl.h>

#include <boost/filesystem.hpp>
#include <boost/regex.hpp>
#include <boost/algorithm/string.hpp>
#include <rapidjson/istreamwrapper.h>
Expand Down Expand Up @@ -787,6 +788,11 @@ std::vector<boost::filesystem::path> getSharedLibsFromDynamicLinker(
bool isSharedLib(const boost::filesystem::path& file) {
auto filename = file.filename().string();

// check that is not a directory, e.g. /etc/ld.so.conf.d
if(boost::filesystem::is_directory(file)){
return false;
}

// check that extension doesn't end with '.conf', e.g. /etc/ld.so.conf
if(boost::ends_with(filename, ".conf")) {
return false;
Expand Down
119 changes: 71 additions & 48 deletions src/hooks/mpi/MpiHook.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unordered_set>

#include <boost/format.hpp>
#include <boost/algorithm/string.hpp>
Expand Down Expand Up @@ -59,6 +60,15 @@ MpiHook::MpiHook() {
log("Successfully initialized hook", sarus::common::LogLevel::INFO);
}

static void updateLdconfigPath(const std::unordered_set<std::string>& uniquePaths, const boost::filesystem::path& ldconfigFile) {
sarus::common::createFileIfNecessary(ldconfigFile.string());
std::ofstream of{ldconfigFile.string()};
for(auto& path: uniquePaths) {
sarus::common::Logger::getInstance().log(boost::format("Adding %s to %s") % path % ldconfigFile.string(), "MPI Hook", sarus::common::LogLevel::DEBUG);
of << path << std::endl;
}
}

void MpiHook::activateMpiSupport() {
log("Activating MPI support", sarus::common::LogLevel::INFO);

Expand All @@ -75,6 +85,9 @@ void MpiHook::activateMpiSupport() {
injectHostLibraries(hostMpiLibs, hostToContainerMpiLibs);
injectHostLibraries(hostDepLibs, hostToContainerDependencyLibs);
performBindMounts();
updateLdconfigPath({hookMountRoot.string()}, rootfsDir / "/etc/ld.so.conf.d/mpi_hook.conf");

log("Execute LDCONFIG", sarus::common::LogLevel::DEBUG);
sarus::common::executeCommand(ldconfig.string() + " -r " + rootfsDir.string()); // update container's dynamic linker

log("Successfully activated MPI support", sarus::common::LogLevel::INFO);
Expand All @@ -99,6 +112,14 @@ void MpiHook::parseConfigJSONOfBundle() {
gid_t gidOfUser = json["process"]["user"]["gid"].GetInt();
userIdentity = sarus::common::UserIdentity(uidOfUser, gidOfUser, {});

if(json.HasMember("annotations")) {
if(json["annotations"].HasMember("com.hooks.mpi.mount_dir_parent")) {
hookMountRoot = boost::filesystem::path(json["annotations"]["com.hooks.mpi.mount_dir_parent"].GetString()) / "mpi_hook";
auto message = boost::format("Mount folder for mpi_hook is set to %s") % hookMountRoot.string();
log(message, sarus::common::LogLevel::INFO);
}
}

log("Successfully parsed bundle's config.json", sarus::common::LogLevel::INFO);
}

Expand Down Expand Up @@ -225,15 +246,14 @@ void MpiHook::checkHostContainerAbiCompatibility(const HostToContainerLibsMap& h
% hostLib.getRealName()
% containerLib.getRealName();
log(message, sarus::common::LogLevel::WARN);
continue;
}
else {
auto message = boost::format(
"Failed to activate MPI support. Host's MPI library %s is not ABI"
" compatible with container's MPI library %s.")
% hostLib.getRealName()
% containerLib.getRealName();
SARUS_THROW_ERROR(message.str());
}
auto message = boost::format(
"Failed to activate MPI support. Host's MPI library %s is not ABI"
" compatible with container's MPI library %s.")
% hostLib.getRealName()
% containerLib.getRealName();
SARUS_THROW_ERROR(message.str());
}
}

Expand All @@ -258,8 +278,8 @@ void MpiHook::injectHostLibrary(const SharedLibrary& hostLib,

const auto it = hostToContainerLibs.find(hostLib.getPath());
if (it == hostToContainerLibs.cend()) {
log(boost::format{"no corresponding libs in container => bind mount (%s) into /lib"} % hostLib.getPath(), sarus::common::LogLevel::DEBUG);
auto containerLib = "/lib" / hostLib.getPath().filename();
log(boost::format{"no corresponding libs in container => bind mount (%s) into %s"} % hostLib.getPath() % hookMountRoot.string(), sarus::common::LogLevel::DEBUG);
auto containerLib = hookMountRoot / hostLib.getPath().filename();
sarus::runtime::validatedBindMount(hostLib.getPath(), containerLib, userIdentity, rootfsDir);
createSymlinksInDynamicLinkerDefaultSearchDirs(containerLib, hostLib.getPath().filename(), false);
return;
Expand All @@ -278,17 +298,16 @@ void MpiHook::injectHostLibrary(const SharedLibrary& hostLib,
}
else if (bestCandidateLib.isMajorAbiCompatible(hostLib)){
// risky replacement, issue warning.
log(boost::format{"WARNING: container lib (%s) is major-only-abi-compatible => bind mount host lib (%s) into /lib"} % bestCandidateLib.getPath() % hostLib.getPath(), sarus::common::LogLevel::DEBUG);
auto containerLib = "/lib" / hostLib.getPath().filename();
sarus::runtime::validatedBindMount(hostLib.getPath(), containerLib, userIdentity, rootfsDir);
createSymlinksInDynamicLinkerDefaultSearchDirs(containerLib, hostLib.getPath().filename(), containerHasLibsWithIncompatibleVersion);
log(boost::format{"WARNING: container lib (%s) is major-only-abi-compatible"} % bestCandidateLib.getPath(), sarus::common::LogLevel::DEBUG);
sarus::runtime::validatedBindMount(hostLib.getPath(), bestCandidateLib.getPath(), userIdentity, rootfsDir);
createSymlinksInDynamicLinkerDefaultSearchDirs(bestCandidateLib.getPath(), hostLib.getPath().filename(), containerHasLibsWithIncompatibleVersion);
}
else {
// inject with warning
// NOTE: This branch is only for MPI dependency libraries. MPI libraries compatibility was already checked before at checkHostContainerAbiCompatibility. Hint for future refactoring.
log(boost::format{"WARNING: could not find ABI-compatible counterpart for host lib (%s) inside container (best candidate found: %s) => adding host lib (%s) into container's /lib via bind mount "}
% hostLib.getPath() % bestCandidateLib.getPath() % hostLib.getPath(), sarus::common::LogLevel::WARN);
auto containerLib = "/lib" / hostLib.getPath().filename();
log(boost::format{"WARNING: could not find ABI-compatible counterpart for host lib (%s) inside container (best candidate found: %s) => adding host lib (%s) into container's %s via bind mount "}
% hostLib.getPath() % bestCandidateLib.getPath() % hostLib.getPath() % hookMountRoot.string() , sarus::common::LogLevel::WARN);
auto containerLib = hookMountRoot / hostLib.getPath().filename();
sarus::runtime::validatedBindMount(hostLib.getPath(), containerLib, userIdentity, rootfsDir);
createSymlinksInDynamicLinkerDefaultSearchDirs(containerLib, hostLib.getPath().filename(), true);
}
Expand Down Expand Up @@ -327,10 +346,18 @@ void MpiHook::performBindMounts() const {
log("Successfully performed bind mounts", sarus::common::LogLevel::INFO);
}

boost::optional<std::string> parseSharedLibAbiToSoname(const boost::filesystem::path& linkFilename) {
auto versionNumbers = sarus::common::parseSharedLibAbi(linkFilename);
if (versionNumbers.size() == 0) {
return boost::none;
}
return versionNumbers[0];
}

void MpiHook::createSymlinksInDynamicLinkerDefaultSearchDirs(const boost::filesystem::path& target,
const boost::filesystem::path& linkFilename,
const bool preserveRootLink) const {
// Generate symlinks to the library in the container's /lib and /lib64, to make sure that:
// Generate symlinks to the library in the hook mount path, to make sure that:
//
// 1. ldconfig will find the library in the container, because the symlink will be in
// one of ldconfig's default search directories.
Expand All @@ -346,10 +373,6 @@ void MpiHook::createSymlinksInDynamicLinkerDefaultSearchDirs(const boost::filesy
// ld.so from dynamically linking MPI applications to libmpi.so.12, if libmpi.so.12 is not
// in one of the ld.so's default search paths.
//
// Some ldconfig/ld.so versions/builds only search in the default directories /lib or /lib64.
// So, let's create symlinks to the library in both /lib and /lib64 to make sure that they
// will be found.
//
// preserveRootLink:
// As explained above, this method helps you create also a chain of symlinks that go from your library version
// up to the root linkername link. (e.g. you inject libfoo.so.4.1 and you end up with links libfoo.so.4 and libfoo.so).
Expand All @@ -360,8 +383,10 @@ void MpiHook::createSymlinksInDynamicLinkerDefaultSearchDirs(const boost::filesy
// not the linker names, to avoid breaking the injected library for the same reason stated above.
auto libName = sarus::common::getSharedLibLinkerName(linkFilename);
auto linkNames = std::vector<std::string> { libName.string() };
for(const auto& versionNumber : sarus::common::parseSharedLibAbi(linkFilename)) {
linkNames.push_back(linkNames.back() + "." + versionNumber);

auto versionNumber = parseSharedLibAbiToSoname(linkFilename);
if(versionNumber) {
linkNames.push_back(linkNames.back() + "." + versionNumber.get());
}

// Preserve root links (when requested)
Expand All @@ -378,34 +403,32 @@ void MpiHook::createSymlinksInDynamicLinkerDefaultSearchDirs(const boost::filesy
}
}

// Let's create symlinks in /lib and /lib64
auto linkerDefaultSearchDirs = std::vector<boost::filesystem::path> {"/lib", "/lib64"};
for (const auto& dir: linkerDefaultSearchDirs) {
auto searchDir = rootfsDir / dir;
sarus::common::createFoldersIfNecessary(searchDir);
auto searchDir = rootfsDir / hookMountRoot;
sarus::common::createFoldersIfNecessary(searchDir);

// prevent writing as root where we are not allowed to
if (!sarus::runtime::isPathOnAllowedDevice(searchDir, rootfsDir)) {
log(boost::format("The hook is not allowed to write to %s. Ignoring symlinks creation in this path.") % searchDir, sarus::common::LogLevel::WARN);
// prevent writing as root where we are not allowed to
if (!sarus::runtime::isPathOnAllowedDevice(searchDir, rootfsDir)) {
log(boost::format("The hook is not allowed to write to %s. Ignoring symlinks creation in this path.") % searchDir, sarus::common::LogLevel::WARN);
}

for (const auto& linkName : linkNames) {
if(boost::filesystem::path{linkName}.filename() == target.filename()) {
continue;
}

for (const auto& linkName : linkNames) {
auto realLink = sarus::common::realpathWithinRootfs(rootfsDir, dir / linkName);
auto realTarget = sarus::common::realpathWithinRootfs(rootfsDir, target);
bool linkIsTarget = (realLink == realTarget);
bool preserveLink = (linkName == libName && preserveRootLink && rootLinkExists);
if (linkIsTarget || preserveLink) {
continue;
}

auto link = searchDir / linkName;
boost::filesystem::remove(link);
boost::filesystem::create_symlink(target, link);

auto message = boost::format("Created symlink in container %s -> %s") % link % target;
log(message, sarus::common::LogLevel::DEBUG);
auto realLink = sarus::common::realpathWithinRootfs(rootfsDir, hookMountRoot / linkName);
auto realTarget = sarus::common::realpathWithinRootfs(rootfsDir, target);
bool linkIsTarget = (realLink == realTarget);
bool preserveLink = (linkName == libName && preserveRootLink && rootLinkExists);
if (linkIsTarget || preserveLink) {
continue;
}

auto link = searchDir / linkName;
boost::filesystem::remove(link);
boost::filesystem::create_symlink(target, link);

auto message = boost::format("Created symlink in container %s -> %s") % link % target;
log(message, sarus::common::LogLevel::DEBUG);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/hooks/mpi/MpiHook.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class MpiHook {
private:
boost::filesystem::path bundleDir;
boost::filesystem::path rootfsDir;
boost::filesystem::path hookMountRoot{"/opt/mpi_hook"};
pid_t pidOfContainer;
sarus::common::UserIdentity userIdentity;
boost::filesystem::path ldconfig;
Expand Down
Loading

0 comments on commit a2b256d

Please sign in to comment.