Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[hotfix] seccomp: default to -ENOSYS for SECCOMP_RET_ERRNO #2746

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ RUN echo 'deb https://download.opensuse.org/repositories/devel:/tools:/criu/Debi
crossbuild-essential-ppc64el \
curl \
gawk \
gcc \
iptables \
jq \
kmod \
Expand Down
2 changes: 1 addition & 1 deletion Vagrantfile.centos7
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Vagrant.configure("2") do |config|
# install yum packages
yum install -y -q epel-release
(cd /etc/yum.repos.d && curl -O https://copr.fedorainfracloud.org/coprs/adrian/criu-el7/repo/epel-7/adrian-criu-el7-epel-7.repo)
yum install -y -q gcc git iptables jq libseccomp-devel make skopeo criu
yum install -y -q gcc git iptables jq glibc-static libseccomp-devel make skopeo criu
yum clean all

# install Go
Expand Down
2 changes: 1 addition & 1 deletion Vagrantfile.fedora33
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Vagrant.configure("2") do |config|
config exclude kernel,kernel-core
config install_weak_deps false
update
install iptables gcc make golang-go libseccomp-devel bats jq git-core criu skopeo
install iptables gcc make golang-go glibc-static libseccomp-devel bats jq git-core criu skopeo
ts run
EOF
done
Expand Down
61 changes: 57 additions & 4 deletions libcontainer/seccomp/seccomp_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ import (
"strings"

"github.com/opencontainers/runc/libcontainer/configs"
libseccomp "github.com/seccomp/libseccomp-golang"

libseccomp "github.com/seccomp/libseccomp-golang"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

Expand All @@ -29,6 +30,49 @@ const (
syscallMaxArguments int = 6
)

// enosysHotfixFilter adds explicit -EPERM rules for any syscall present in
// Linux 3.0 (meaning its syscall number is smaller than "setns") which did not
// have a rule in the filter. This is to avoid giving -ENOSYS for basic
// syscalls.
func enosysHotfixFilter(config *configs.Seccomp) error {
// Our default actions for hotfixed syscalls.
defaultAction := config.DefaultAction
defaultErrno := uint(unix.EPERM)

// Collect all syscalls that had some rule.
seenSyscalls := map[string]bool{}
for _, rule := range config.Syscalls {
seenSyscalls[rule.Name] = true
}

// And now we create unconditional rules for any syscalls not present in
// the allow list at all, up to the last syscall number (which is currently
// the last syscall added to Linux 3.0 -- "setns").
lastSysNo, err := libseccomp.GetSyscallFromName("setns")
if err != nil {
// TODO: Maybe have a nicer fallback than this?
return errors.New("cannot find syscall number for 'setns'")
}
for sysNo := 0; sysNo <= int(lastSysNo); sysNo++ {
sysName, err := libseccomp.ScmpSyscall(sysNo).GetName()
if err != nil {
// No such syscall...
continue
}
if seenSyscalls[sysName] {
// Rule already exists.
continue
}
logrus.Debugf("seccomp hotfix: injecting blanket EPERM rule for %s", sysName)
config.Syscalls = append(config.Syscalls, &configs.Syscall{
Name: sysName,
Action: defaultAction,
ErrnoRet: &defaultErrno,
})
}
return nil
}

// Filters given syscalls in a container, preventing them from being used
// Started in the container init process, and carried over to all child processes
// Setns calls, however, require a separate invocation, as they are not children
Expand All @@ -38,7 +82,18 @@ func InitSeccomp(config *configs.Seccomp) error {
return errors.New("cannot initialize Seccomp - nil config passed")
}

defaultAction, err := getAction(config.DefaultAction, nil)
// Default to an errno of ENOSYS as the default action if the default
// action is SECCOMP_ACT_ERRNO. This is to avoid causing glibc headaches
// when new syscalls are added.
defaultErrno := uint(unix.EPERM)
if config.DefaultAction == configs.Errno {
defaultErrno = uint(unix.ENOSYS)
if err := enosysHotfixFilter(config); err != nil {
return fmt.Errorf("error hotfixing filter: %s", err)
}
}

defaultAction, err := getAction(config.DefaultAction, &defaultErrno)
if err != nil {
return errors.New("error initializing seccomp - invalid default action")
}
Expand All @@ -54,7 +109,6 @@ func InitSeccomp(config *configs.Seccomp) error {
if err != nil {
return fmt.Errorf("error validating Seccomp architecture: %s", err)
}

if err := filter.AddArch(scmpArch); err != nil {
return fmt.Errorf("error adding architecture to seccomp filter: %s", err)
}
Expand All @@ -70,7 +124,6 @@ func InitSeccomp(config *configs.Seccomp) error {
if call == nil {
return errors.New("encountered nil syscall while initializing Seccomp")
}

if err = matchCall(filter, call); err != nil {
return err
}
Expand Down
Empty file removed tests/integration/config.json
Empty file.
24 changes: 24 additions & 0 deletions tests/integration/seccomp.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env bats

load helpers

function setup() {
teardown_busybox
setup_busybox
}

function teardown() {
teardown_busybox
}

@test "runc run [seccomp -ENOSYS handling]" {
TEST_NAME="seccomp_syscall_test1"

# Compile the test binary and update the config to run it.
gcc -static -o rootfs/seccomp_test "${TESTDATA}/${TEST_NAME}.c"
update_config ".linux.seccomp = $(<"${TESTDATA}/${TEST_NAME}.json")"
update_config '.process.args = ["/seccomp_test"]'

runc run test_busybox
[ "$status" -eq 0 ]
}
79 changes: 79 additions & 0 deletions tests/integration/testdata/seccomp_syscall_test1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <sched.h>

#include <sys/types.h>
#include <sys/socket.h>
#include <sys/syscall.h>

static int exit_code = 0;

/*
* We need raw wrappers around each syscall so that glibc won't rewrite the
* errno value when it is returned from the seccomp filter (glibc has a habit
* of hiding -ENOSYS if possible -- which counters what we're trying to test).
*/
#define raw(name, ...) \
syscall(SYS_ ## name, ##__VA_ARGS__)

#define syscall_assert(sval, rval) \
do { \
int L = (sval), R = (rval); \
if (L < 0) \
L = -errno; \
if (L != R) { \
printf("syscall_assert(%s == %s) failed: %d != %d\n", #sval, #rval, L, R); \
exit_code = 32; \
} \
} while (0)

int main(void)
{
// Basic permitted syscalls.
syscall_assert(write(-1, NULL, 0), -EBADF);

// Basic syscall with masked rules.
syscall_assert(raw(socket, AF_UNIX, SOCK_STREAM, 0x000), 3);
syscall_assert(raw(socket, AF_UNIX, SOCK_STREAM, 0x0FF), -EPROTONOSUPPORT);
syscall_assert(raw(socket, AF_UNIX, SOCK_STREAM, 0x001), 4);
syscall_assert(raw(socket, AF_UNIX, SOCK_STREAM, 0x100), -ENOSYS); // FIXME :(
syscall_assert(raw(socket, AF_UNIX, SOCK_STREAM, 0xC00), -ENOSYS); // FIXME :(

// Multiple arguments with OR rules.
syscall_assert(raw(process_vm_readv, 100, NULL, 0, NULL, 0, ~0), -EINVAL);
syscall_assert(raw(process_vm_readv, 9001, NULL, 0, NULL, 0, ~0), -EINVAL);
syscall_assert(raw(process_vm_readv, 0, NULL, 0, NULL, 0, ~0), -ENOSYS); // FIXME :(
syscall_assert(raw(process_vm_readv, 0, NULL, 0, NULL, 0, ~0), -ENOSYS); // FIXME :(

// Multiple arguments with OR rules -- rule is ERRNO(-ENOANO).
syscall_assert(raw(process_vm_writev, 1337, NULL, 0, NULL, 0, ~0), -ENOANO);
syscall_assert(raw(process_vm_writev, 2020, NULL, 0, NULL, 0, ~0), -ENOANO);
syscall_assert(raw(process_vm_writev, 0, NULL, 0, NULL, 0, ~0), -ENOSYS); // FIXME :(
syscall_assert(raw(process_vm_writev, 0, NULL, 0, NULL, 0, ~0), -ENOSYS); // FIXME :(

// Multiple arguments with AND rules.
syscall_assert(raw(kcmp, 0, 1337, 0, 0, 0), -ESRCH);
syscall_assert(raw(kcmp, 0, 0, 0, 0, 0), -ENOSYS); // FIXME :(
syscall_assert(raw(kcmp, 500, 1337, 0, 0, 0), -ENOSYS); // FIXME :(
syscall_assert(raw(kcmp, 500, 500, 0, 0, 0), -ENOSYS); // FIXME :(

// Multiple rules for the same syscall.
syscall_assert(raw(dup3, 0, -100, 0xFFFF), -ENOSYS); // FIXME :(
syscall_assert(raw(dup3, 1, -100, 0xFFFF), -EINVAL);
syscall_assert(raw(dup3, 2, -100, 0xFFFF), -ENOSYS); // FIXME :(
syscall_assert(raw(dup3, 3, -100, 0xFFFF), -EINVAL);

// Explicitly denied syscalls (those in Linux 3.0) get -EPERM.
syscall_assert(raw(unshare, 0), -EPERM);
syscall_assert(raw(setns, 0, 0), -EPERM);

// Out-of-bounds fake syscall.
syscall_assert(syscall(1000, 0xDEADBEEF, 0xCAFEFEED, 0x1337), -ENOSYS);

return exit_code;
}
Loading