Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overlay handling with fuse-overlayfs #1062

Merged
merged 36 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
be28a5f
feat: Implement FUSE-based overlay mount for containerexec and runexec
Jul 2, 2024
0facbcf
feat: Support FUSE-based overlay mount for benchexec
Jul 2, 2024
85f02ca
Only if the kernel overlay fails, try using fuse-overlayfs
Jul 7, 2024
69e929e
refactor: added user-visible messages and some refactoring
younghojan Jul 10, 2024
9818716
feat: Clear ambient capabilities in drop_capabilities() and add const…
Aug 4, 2024
3f69c41
Merge branch 'main' into gsoc-overlay-handling-with-fuse-overlayfs-dev
younghojan Aug 4, 2024
4ee99a3
Merge branch 'main' into gsoc-overlay-handling-with-fuse-overlayfs-dev
PhilippWendler Aug 6, 2024
a699b2f
chore: Fix bug in cap_permitted_to_ambient function
Aug 11, 2024
8006c20
fix: Use single fusermount for all fuse-based overlays, and avoid mix…
Aug 11, 2024
195e4d0
chore: Add functions and extracted some code into functions, add comm…
Aug 13, 2024
00f9cb8
chore: Refactor some functions related to fuse-based overlay mounts a…
Aug 13, 2024
328aad4
chore: Refactor functions related to fuse-based overlay mounts and im…
Aug 13, 2024
de86749
chore: Refactor functions related to fuse-based overlay mounts and im…
Aug 14, 2024
a308c46
chore: Replace f-string in logging.debug with %s formatting
Aug 15, 2024
b941329
Add fuse-overlayfs to our recommended dependencies
PhilippWendler Aug 16, 2024
2529120
Update documentation on kernel overlayfs vs. fuse-overlayfs
PhilippWendler Aug 16, 2024
dde34ea
test: Add tests for checking fuse-overlayfs functionality and triple-…
younghojan Aug 17, 2024
b1a02d6
fix: Specify stdin=subprocess.DEVNULL when launching the fuse-overlay…
Aug 22, 2024
1f6d696
feat: Check if fuse-overlayfs meets the minimum version requirement, …
Aug 26, 2024
ba6bb91
Merge branch 'main' into gsoc-overlay-handling-with-fuse-overlayfs-dev
PhilippWendler Aug 26, 2024
dc482b2
fix: fix issue of checking for fuse-overlayfs functionality outside o…
Aug 28, 2024
e0aec8c
chore: Refactor and improve test_triple_nested_runexec
Aug 29, 2024
e0833b3
chore: Refactor fuse-overlayfs setup and error handling
Aug 29, 2024
147b4e2
Merge 'main' into gsoc-overlay-handling-with-fuse-overlayfs-dev
PhilippWendler Sep 2, 2024
b63db00
Refactor and improve fuse-overlay related tests
Sep 2, 2024
38a0508
Omit test_triple_nested_runexec when coverage testing
Sep 4, 2024
5d2a349
Refactor COV_CORE_SOURCE environment variable handling
Sep 4, 2024
a8a3516
Safely encode string for fuse-overlayfs paths
Sep 5, 2024
34f57f1
Refactor determine_directory_mode function for fuse-overlayfs compati…
Sep 5, 2024
2fd26ff
Refactor file handling in test_runexecutor.py for better readability
Sep 5, 2024
88db419
Refactor overlay mount error handling for better compatibility
Sep 5, 2024
2f9d52e
Fix typo
Sep 15, 2024
1c49af2
Refactor handling of COV_CORE_SOURCE environment variable in TestRunE…
Sep 15, 2024
ea92000
Change internal paths used for fuse-overlayfs mounts
PhilippWendler Sep 19, 2024
ec11b7f
Add logging about why fuse-overlayfs is used
PhilippWendler Sep 19, 2024
33249f1
Detect and error out if temp is not hidden and we use fuse-overlayfs
PhilippWendler Sep 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
304 changes: 268 additions & 36 deletions benchexec/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@
import fcntl
import logging
import os
import re
import resource # noqa: F401 @UnusedImport necessary to eagerly import this module
import shlex
import shutil
import signal
import socket
import struct
import sys
import subprocess

from benchexec import libc
from benchexec import seccomp
Expand Down Expand Up @@ -475,6 +478,14 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes):

overlay_count = 0

# Check if we need to use fuse-overlayfs for all overlay mounts.
use_fuse = check_use_fuse_overlayfs(mount_base, dir_modes)

# Create overlay mounts for all mount points.
fuse_overlay_mount_path = (
setup_fuse_overlay(temp_base, work_base) if use_fuse else None
)

for _unused_source, full_mountpoint, fstype, options in list(get_mount_points()):
if not util.path_is_below(full_mountpoint, mount_base):
continue
Expand Down Expand Up @@ -529,33 +540,76 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes):
temp_path = temp_base + mountpoint

if mode == DIR_OVERLAY:
overlay_count += 1
work_path = work_base + b"/" + str(overlay_count).encode()
os.makedirs(temp_path, exist_ok=True)
os.makedirs(work_path, exist_ok=True)
try:
# Previous mount in this place not needed if replaced with overlay dir.
libc.umount(mount_path)
except OSError as e:
logging.debug(e)
try:
make_overlay_mount(mount_path, mountpoint, temp_path, work_path)
except OSError as e:
mp = mountpoint.decode()
raise OSError(
e.errno,
f"Creating overlay mount for '{mp}' failed: {os.strerror(e.errno)}. "
f"Please use other directory modes, "
f"for example '--read-only-dir {shlex.quote(mp)}'.",
)
if os.path.ismount(mount_path):
try:
# Previous mount in this place not needed if replaced with overlay dir.
libc.umount(mount_path)
except OSError as e:
logging.debug(e)

if use_fuse and fuse_overlay_mount_path:
fuse_mount_path = fuse_overlay_mount_path + mountpoint
make_bind_mount(fuse_mount_path, mount_path)
else:
overlay_count += 1
os.makedirs(temp_path, exist_ok=True)
work_path = work_base + b"/" + str(overlay_count).encode()
os.makedirs(work_path, exist_ok=True)
try:
make_overlay_mount(mount_path, mountpoint, temp_path, work_path)
except OSError as e:
# Resort to fuse-overlayfs if kernel overlayfs is not available.
PhilippWendler marked this conversation as resolved.
Show resolved Hide resolved
PhilippWendler marked this conversation as resolved.
Show resolved Hide resolved
# This part of the code (using fuse-overlayfs as a fallback) is intentionally
# kept as a workaround for triple-nested execution with kernel overlayfs.
PhilippWendler marked this conversation as resolved.
Show resolved Hide resolved
mp = mountpoint.decode()
PhilippWendler marked this conversation as resolved.
Show resolved Hide resolved
if fuse_overlay_mount_path:
logging.debug(
"Fallback to fuse-overlayfs for overlay mount at '%s'.",
mp,
)
fuse_mount_path = fuse_overlay_mount_path + mountpoint
make_bind_mount(fuse_mount_path, mount_path)
else:
if use_fuse:
# We tried to use overlayfs before, but it failed.
# No need to try again, just log the error.
raise OSError(
e.errno,
f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}, "
f"Please either install fuse-overlayfs in at least version 1.10, "
f"or use a different directory mode such as '--read-only-dir {shlex.quote(mp)}'.",
) from e
fuse_overlay_mount_path = setup_fuse_overlay(
temp_base, work_base
)
if fuse_overlay_mount_path:
logging.debug(
"Fallback to fuse-overlayfs for overlay mount at '%s'.",
mp,
)
fuse_mount_path = fuse_overlay_mount_path + mountpoint
make_bind_mount(fuse_mount_path, mount_path)
elif os.getenv("container") == "podman" or os.path.exists(
"/run/.containerenv"
):
# benchexec running in a container without /dev/fuse
raise OSError(
e.errno,
f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. "
f"Looks like you are running in a container, "
f"please either launch the container with --device /dev/fuse "
f"or use a different directory mode, "
f"such as '--read-only-dir {shlex.quote(mp)}'.",
) from e

elif mode == DIR_HIDDEN:
os.makedirs(temp_path, exist_ok=True)
try:
# Previous mount in this place not needed if replaced with hidden dir.
libc.umount(mount_path)
except OSError as e:
logging.debug(e)
if os.path.ismount(mount_path):
try:
# Previous mount in this place not needed if replaced with hidden dir.
libc.umount(mount_path)
except OSError as e:
logging.debug(e)
make_bind_mount(temp_path, mount_path)

elif mode == DIR_READ_ONLY:
Expand Down Expand Up @@ -716,6 +770,25 @@ def remount_with_additional_flags(mountpoint, fstype, existing_options, mountfla
libc.mount(None, mountpoint, None, mountflags, None)


def escape_overlayfs_parameters(s):
"""
Safely encode a string for being used as a path for both kernel overlayfs
and fuse-overlayfs.
In addition to escaping ",", which separates mount options,
we need to escape ":", which overlayfs uses to separate multiple lower dirs
(cf. https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt).
Also, the path shall be nomalized to avoid issues with "//" in the beginning
(cf. https://github.com/sosy-lab/benchexec/pull/1062).
"""
assert s[0] == ord(b"/"), "Path must be absolute"
normalized_path = b"/" + s.lstrip(b"/")
return (
normalized_path.replace(b"\\", rb"\\")
.replace(b":", rb"\:")
.replace(b",", rb"\,")
)


def make_overlay_mount(mount, lower, upper, work):
logging.debug(
"Creating overlay mount: target=%s, lower=%s, upper=%s, work=%s",
Expand All @@ -725,28 +798,187 @@ def make_overlay_mount(mount, lower, upper, work):
work,
)

def escape(s):
"""
Safely encode a string for being used as a path for overlayfs.
In addition to escaping ",", which separates mount options,
we need to escape ":", which overlayfs uses to separate multiple lower dirs
(cf. https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt).
"""
return s.replace(b"\\", rb"\\").replace(b":", rb"\:").replace(b",", rb"\,")

libc.mount(
b"none",
mount,
b"overlay",
0,
b"lowerdir="
+ escape(lower)
+ escape_overlayfs_parameters(lower)
+ b",upperdir="
+ escape_overlayfs_parameters(upper)
+ b",workdir="
+ escape_overlayfs_parameters(work),
)


def check_use_fuse_overlayfs(mount_base, dir_modes):
PhilippWendler marked this conversation as resolved.
Show resolved Hide resolved
"""
Check whether an overlay mountpoint requires the use of fuse-overlayfs
by determining if there are any sub-mounts below it.
"""
mount_points = [
(full_mountpoint, fstype)
for _unused_source, full_mountpoint, fstype, _options in get_mount_points()
if util.path_is_below(full_mountpoint, mount_base)
]

for full_mountpoint, fstype in mount_points:
mountpoint = full_mountpoint[len(mount_base) :] or b"/"
mode = determine_directory_mode(dir_modes, mountpoint, fstype)

if not mode or not os.path.exists(mountpoint):
continue

if mode == DIR_OVERLAY:
# Check if there are any sub-mounts within the current overlay mount point
for sub_mountpoint, _unused_fstype in mount_points:
if (
util.path_is_below(sub_mountpoint, mountpoint)
and sub_mountpoint != mountpoint
):
return True

return False


@contextlib.contextmanager
def permitted_cap_as_ambient():
PhilippWendler marked this conversation as resolved.
Show resolved Hide resolved
"""
Transfer all permitted capabilities to the inheritable set
and raise them in the ambient set if effective.
Finanlly drop all ambient capabilities by removing them from the ambient set,
and undo changes made to inheritable set.

Used by fuse-based overlay mounts needing temporary capability elevation.
"""
header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0)
data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)()

libc.capget(header, data)
original_inheritable = [data[0].inheritable, data[1].inheritable]
cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0")

try:
data[0].inheritable = data[0].permitted
data[1].inheritable = data[1].permitted
libc.capset(header, data)

effective = (data[1].effective << 32) | data[0].effective
for cap in range(cap_last_cap + 1):
if effective & (1 << cap):
libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_RAISE, cap, 0, 0)

yield
finally:
libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0)

data[0].inheritable, data[1].inheritable = original_inheritable
libc.capset(header, data)


def get_fuse_overlayfs_executable():
"""
Retrieve the path to the fuse-overlayfs executable
if it is available and meets the version requirement.

@return: The path to fuse-overlayfs executable if found and valid, None otherwise.
"""
fuse = shutil.which("fuse-overlayfs")
if fuse is None:
return None

try:
result = subprocess.run(
args=(fuse, "--version"),
check=True,
stdin=subprocess.DEVNULL,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
output = result.stdout
except subprocess.CalledProcessError as e:
logging.warning("%s not available: %s", fuse, e)
return None

if match := re.search(
r"^fuse-overlayfs:.*?(\d+\.\d+(\.\d+)?)", output, re.MULTILINE
):
version = [int(part) for part in match[1].split(".")]
if version >= [1, 10]:
logging.debug("%s version: %s", fuse, match[1])
return fuse
else:
logging.warning(
"Ignoring %s because its version %s is broken. "
"Please install version 1.10 or newer.",
fuse,
match[1],
)
return None
else:
logging.warning(
"Could not find version information of %s in output, but still attempt to use it.",
fuse,
)
return fuse


def setup_fuse_overlay(temp_base, work_base):
"""
Check if fuse-overlayfs is available on the system and,
if so, creates a temporary overlay filesystem by stacking the root directory
with a specified temporary directory.

@return: The path to the mounted overlay filesystem if successful, None otherwise.
"""
fuse = get_fuse_overlayfs_executable()
if fuse is None:
return None
temp_fuse = temp_base + b"/fuse"
work_fuse = work_base + b"/fuse"
os.makedirs(temp_fuse, exist_ok=True)
os.makedirs(work_fuse, exist_ok=True)

logging.debug(
"Creating overlay mount with %s: target=%s, lower=%s, upper=%s, work=%s",
fuse,
temp_fuse,
b"/",
temp_base,
work_fuse,
)

cmd = (
fuse,
b"-o",
b"lowerdir=/"
+ b",upperdir="
+ escape(upper)
+ escape_overlayfs_parameters(temp_base)
+ b",workdir="
+ escape(work),
+ escape_overlayfs_parameters(work_fuse),
escape_overlayfs_parameters(temp_fuse),
)

try:
with permitted_cap_as_ambient():
# Temporarily elevate permitted capabilities to the inheritable set
# and raise them in the ambient set.
result = subprocess.run(
args=cmd,
check=True,
stdin=subprocess.DEVNULL,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
if result.stdout:
logging.debug("fuse-overlayfs: %s", result.stdout.decode())
return temp_fuse
except subprocess.CalledProcessError as e:
logging.critical("Failed to create overlay mount with %s: %s", fuse, e)
return None
PhilippWendler marked this conversation as resolved.
Show resolved Hide resolved


def mount_proc(container_system_config):
"""Mount the /proc filesystem.
Expand Down
5 changes: 4 additions & 1 deletion benchexec/containerized_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ def _init_container(

# Container config
container.setup_user_mapping(os.getpid(), uid, gid)
_setup_container_filesystem(temp_dir, dir_modes, container_system_config)
if container_system_config:
socket.sethostname(container.CONTAINER_HOSTNAME)
if not network_access:
Expand All @@ -225,6 +224,10 @@ def _init_container(
os.waitpid(pid, 0)
os._exit(0)

# We setup the container's filesystem in the child process.
# Delaying this until after the fork can avoid "Transport endpoint not connected" issue.
_setup_container_filesystem(temp_dir, dir_modes, container_system_config)

# Finalize container setup in child
container.mount_proc(container_system_config) # only possible in child
container.drop_capabilities()
Expand Down
Loading