From 6d142e4d1706877548db2fad45ab8a64efe1ddd6 Mon Sep 17 00:00:00 2001 From: utam0k Date: Fri, 24 Mar 2023 12:22:25 +0000 Subject: [PATCH] Add I/O priority Signed-off-by: utam0k --- docs/spec-conformance.md | 1 + docs/terminals.md | 2 +- libcontainer/configs/config.go | 11 +++++++ libcontainer/configs/validate/validator.go | 12 ++++++++ .../configs/validate/validator_test.go | 29 ++++++++++++++++++ libcontainer/process.go | 2 ++ libcontainer/process_linux.go | 25 ++++++++++++++++ libcontainer/specconv/spec_linux.go | 5 ++++ libcontainer/standard_init_linux.go | 5 ++++ tests/integration/ioprio.bats | 30 +++++++++++++++++++ utils_linux.go | 5 ++++ 11 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 tests/integration/ioprio.bats diff --git a/docs/spec-conformance.md b/docs/spec-conformance.md index 7ef21d6f948..7de7476ecd8 100644 --- a/docs/spec-conformance.md +++ b/docs/spec-conformance.md @@ -9,6 +9,7 @@ Spec version | Feature | PR -------------|------------------------------------------|---------------------------------------------------------- v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862) v1.1.0 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783) +v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859)) ## Architectures diff --git a/docs/terminals.md b/docs/terminals.md index aa9f71ee059..bec9a5fe2a9 100644 --- a/docs/terminals.md +++ b/docs/terminals.md @@ -58,7 +58,7 @@ you use `runc` directly in something like a `systemd` unit file. To disable this `LISTEN_FDS`-style passing just unset `LISTEN_FDS`. **Be very careful when passing file descriptors to a container process.** Due -to some Linux kernel (mis)features, a container with access to certain types of +to some Linux kernel misfeatures, a container with access to certain types of file descriptors (such as `O_PATH` descriptors) outside of the container's root file system can use these to break out of the container's pivoted mount namespace. [This has resulted in CVEs in the past.][CVE-2016-9962] diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index a0a79d19d53..22fe0f9b4c1 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -222,6 +222,9 @@ type Config struct { // Personality contains configuration for the Linux personality syscall. Personality *LinuxPersonality `json:"personality,omitempty"` + + // IOPriority is the container's I/O priority. + IOPriority *IOPriority `json:"io_priority,omitempty"` } // Scheduler is based on the Linux sched_setattr(2) syscall. @@ -283,6 +286,14 @@ func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) { }, nil } +var IOPrioClassMapping = map[specs.IOPriorityClass]int{ + specs.IOPRIO_CLASS_RT: 1, + specs.IOPRIO_CLASS_BE: 2, + specs.IOPRIO_CLASS_IDLE: 3, +} + +type IOPriority = specs.LinuxIOPriority + type ( HookName string HookList []Hook diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index ed63e950e43..37ece0aebbd 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -32,6 +32,7 @@ func Validate(config *configs.Config) error { rootlessEUIDCheck, mountsStrict, scheduler, + ioPriority, } for _, c := range checks { if err := c(config); err != nil { @@ -396,3 +397,14 @@ func scheduler(config *configs.Config) error { } return nil } + +func ioPriority(config *configs.Config) error { + if config.IOPriority == nil { + return nil + } + priority := config.IOPriority.Priority + if priority < 0 || priority > 7 { + return fmt.Errorf("invalid ioPriority.Priority: %d", priority) + } + return nil +} diff --git a/libcontainer/configs/validate/validator_test.go b/libcontainer/configs/validate/validator_test.go index 85b5e5eeade..b0b740a122d 100644 --- a/libcontainer/configs/validate/validator_test.go +++ b/libcontainer/configs/validate/validator_test.go @@ -842,3 +842,32 @@ func TestValidateScheduler(t *testing.T) { } } } + +func TestValidateIOPriority(t *testing.T) { + testCases := []struct { + isErr bool + priority int + }{ + {isErr: false, priority: 0}, + {isErr: false, priority: 7}, + {isErr: true, priority: -1}, + } + + for _, tc := range testCases { + ioPriroty := configs.IOPriority{ + Priority: tc.priority, + } + config := &configs.Config{ + Rootfs: "/var", + IOPriority: &ioPriroty, + } + + err := Validate(config) + if tc.isErr && err == nil { + t.Errorf("iopriority: %d, expected error, got nil", tc.priority) + } + if !tc.isErr && err != nil { + t.Errorf("iopriority: %d, expected nil, got error %v", tc.priority, err) + } + } +} diff --git a/libcontainer/process.go b/libcontainer/process.go index 8181062ae64..3663c7e0dd2 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go @@ -100,6 +100,8 @@ type Process struct { SubCgroupPaths map[string]string Scheduler *configs.Scheduler + + IOPriority *configs.IOPriority } // Wait waits for the process to exit. diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index ba916571489..6d51eada2b3 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -124,6 +124,13 @@ func (p *setnsProcess) signal(sig os.Signal) error { func (p *setnsProcess) start() (retErr error) { defer p.comm.closeParent() + + if p.process.IOPriority != nil { + if err := setIOPriority(p.process.IOPriority); err != nil { + return err + } + } + // get the "before" value of oom kill count oom, _ := p.manager.OOMKillCount() err := p.cmd.Start() @@ -972,3 +979,21 @@ func initWaiter(r io.Reader) chan error { return ch } + +func setIOPriority(ioprio *configs.IOPriority) error { + const ioprioWhoPgrp = 1 + + class, ok := configs.IOPrioClassMapping[ioprio.Class] + if !ok { + return fmt.Errorf("invalid io priority class: %s", ioprio.Class) + } + + // Combine class and priority into a single value + // https://github.com/torvalds/linux/blob/v5.18/include/uapi/linux/ioprio.h#L5-L17 + iop := (class << 13) | ioprio.Priority + _, _, errno := unix.RawSyscall(unix.SYS_IOPRIO_SET, ioprioWhoPgrp, 0, uintptr(iop)) + if errno != 0 { + return fmt.Errorf("failed to set io priority: %w", errno) + } + return nil +} diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index fbb68c24d5d..10c28faa4fd 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -534,6 +534,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { s := *spec.Process.Scheduler config.Scheduler = &s } + + if spec.Process.IOPriority != nil { + ioPriority := *spec.Process.IOPriority + config.IOPriority = &ioPriority + } } createHooks(spec, config) config.Version = specs.Version diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 3096d0d81ee..496aeb94900 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -162,6 +162,11 @@ func (l *linuxStandardInit) Init() error { return err } } + if l.config.Config.IOPriority != nil { + if err := setIOPriority(l.config.Config.IOPriority); err != nil { + return err + } + } // Tell our parent that we're ready to Execv. This must be done before the // Seccomp rules have been applied, because we need to be able to read and diff --git a/tests/integration/ioprio.bats b/tests/integration/ioprio.bats new file mode 100644 index 00000000000..a907d782f01 --- /dev/null +++ b/tests/integration/ioprio.bats @@ -0,0 +1,30 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + setup_debian +} + +function teardown() { + teardown_bundle +} + +@test "ioprio_set is applied to process group" { + # Create a container with a specific I/O priority. + update_config '.process.ioPriority = {"class": "IOPRIO_CLASS_BE", "priority": 4}' + + runc run -d --console-socket "$CONSOLE_SOCKET" test_ioprio + [ "$status" -eq 0 ] + + # Check the init process. + runc exec test_ioprio ionice -p 1 + [ "$status" -eq 0 ] + [[ "$output" = *'best-effort: prio 4'* ]] + + # Check the process made from the exec command. + runc exec test_ioprio ionice + [ "$status" -eq 0 ] + + [[ "$output" = *'best-effort: prio 4'* ]] +} diff --git a/utils_linux.go b/utils_linux.go index e7b362cdb2e..a59301c1874 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -67,6 +67,11 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { lp.Scheduler = &s } + if p.IOPriority != nil { + ioPriority := *p.IOPriority + lp.IOPriority = &ioPriority + } + if p.Capabilities != nil { lp.Capabilities = &configs.Capabilities{} lp.Capabilities.Bounding = p.Capabilities.Bounding