From b0512bcee5602d4e5a241b77f2e1b838704b2de1 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 23 Jun 2024 16:31:57 -0700 Subject: [PATCH] libct: speedup process.Env handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation sets all the environment variables passed in Process.Env in the current process, one by one, then uses os.Environ to read those back. As pointed out in [1], this is slow, as runc calls os.Setenv for every variable, and there may be a few thousands of those. Looking into how os.Setenv is implemented, it is indeed slow, especially when cgo is enabled. Looking into why it was implemented, I found commit 9744d72c and traced it to [2], which discusses the actual reasons. At the time were: - HOME is not passed into container as it is set in setupUser by os.Setenv and has no effect on config.Env; - there is no deduplication of environment variables. Yet it was decided to not go ahead with this patch, but later [3] was merged with the carry of this patch. Now, from what I see: 1. Passing environment to exec is way faster than using os.Setenv and os.Environment() (tests show ~20x faster in simple Go test, and 2x faster in real-world test, see below). 2. Setting environment variables in the runc context can result is ugly side effects (think GODEBUG). 3. Nothing in runtime spec says that the environment needs to be deduplicated, or the order of preference (whether the first or the last value of a variable with the same name is to be used). In C (Linux/glibc), the first value is used. In Go, it's the last one. We should probably stick to what we have in order to maintain backward compatibility. This patch: - switches to passing env directly to exec; - adds deduplication mechanism to retain backward compatibility; - sets PATH from process.Env in the current process; - adds HOME to process.Env if not set; - removes os.Clearenv call as it's no longer needed. The benchmark added by the previous commit shows 2x improvement: > name old time/op new time/op delta > ExecInBigEnv-20 61.7ms ± 4% 24.9ms ±14% -59.73% (p=0.000 n=10+10) The remaining questions are: - are there any potential regressions (for example, from not setting values from process.Env to the current process); - should deduplication show warnings (maybe promoted to errors later); - whether a default for PATH (e.g "/bin:/usr/bin" should be added, when PATH is not set. [1]: https://github.com/opencontainers/runc/pull/1983 [2]: https://github.com/docker-archive/libcontainer/pull/418 [3]: https://github.com/docker-archive/libcontainer/pull/432 Signed-off-by: Kir Kolyshkin --- libcontainer/env.go | 56 +++++++++++++++++++++++++++++ libcontainer/env_test.go | 40 +++++++++++++++++++++ libcontainer/init_linux.go | 55 ++++++++-------------------- libcontainer/setns_init_linux.go | 7 ++-- libcontainer/standard_init_linux.go | 7 ++-- 5 files changed, 118 insertions(+), 47 deletions(-) create mode 100644 libcontainer/env.go create mode 100644 libcontainer/env_test.go diff --git a/libcontainer/env.go b/libcontainer/env.go new file mode 100644 index 00000000000..36c5014f95d --- /dev/null +++ b/libcontainer/env.go @@ -0,0 +1,56 @@ +package libcontainer + +import ( + "errors" + "fmt" + "os" + "slices" + "strings" +) + +// prepareEnv checks supplied environment variables for validity, removes +// duplicates (leaving the last value only), and sets PATH from env, if found. +// Returns the deduplicated environment, and a flag telling if HOME is found. +func prepareEnv(env []string) ([]string, bool, error) { + // Clear the current environment (better be safe than sorry). + os.Clearenv() + + if env == nil { + return nil, false, nil + } + // Deduplication code based on dedupEnv from Go 1.22 os/exec. + + // Construct the output in reverse order, to preserve the + // last occurrence of each key. + out := make([]string, 0, len(env)) + saw := make(map[string]bool, len(env)) + for n := len(env); n > 0; n-- { + kv := env[n-1] + i := strings.IndexByte(kv, '=') + if i == -1 { + return nil, false, errors.New("invalid environment variable: missing '='") + } + if i == 0 { + return nil, false, errors.New("invalid environment variable: name cannot be empty") + } + key := kv[:i] + if saw[key] { // Duplicate. + continue + } + saw[key] = true + if strings.IndexByte(kv, 0) >= 0 { + return nil, false, fmt.Errorf("invalid environment variable %q: contains nul byte (\\x00)", key) + } + if key == "PATH" { + // Needs to be set as it is used for binary lookup. + if err := os.Setenv("PATH", kv[5:]); err != nil { + return nil, false, err + } + } + out = append(out, kv) + } + // Restore the original order. + slices.Reverse(out) + + return out, saw["HOME"], nil +} diff --git a/libcontainer/env_test.go b/libcontainer/env_test.go new file mode 100644 index 00000000000..72d63b4af23 --- /dev/null +++ b/libcontainer/env_test.go @@ -0,0 +1,40 @@ +package libcontainer + +import ( + "slices" + "testing" +) + +func TestPrepareEnvDedup(t *testing.T) { + tests := []struct { + env, wantEnv []string + }{ + { + env: []string{}, + wantEnv: []string{}, + }, + { + env: []string{"HOME=/root", "FOO=bar"}, + wantEnv: []string{"HOME=/root", "FOO=bar"}, + }, + { + env: []string{"A=a", "A=b", "A=c"}, + wantEnv: []string{"A=c"}, + }, + { + env: []string{"TERM=vt100", "HOME=/home/one", "HOME=/home/two", "TERM=xterm", "HOME=/home/three", "FOO=bar"}, + wantEnv: []string{"TERM=xterm", "HOME=/home/three", "FOO=bar"}, + }, + } + + for _, tc := range tests { + env, _, err := prepareEnv(tc.env) + if err != nil { + t.Error(err) + continue + } + if !slices.Equal(env, tc.wantEnv) { + t.Errorf("want %v, got %v", tc.wantEnv, env) + } + } +} diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index e0a65d5cc8f..2e4cc521ddd 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -11,7 +11,6 @@ import ( "runtime" "runtime/debug" "strconv" - "strings" "syscall" "github.com/containerd/console" @@ -196,10 +195,6 @@ func startInitialization() (retErr error) { dmzExe = os.NewFile(uintptr(dmzFd), "runc-dmz") } - // clear the current process's environment to clean any libcontainer - // specific env vars. - os.Clearenv() - defer func() { if err := recover(); err != nil { if err2, ok := err.(error); ok { @@ -220,9 +215,11 @@ func startInitialization() (retErr error) { } func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket, fifoFile, logPipe, dmzExe *os.File) error { - if err := populateProcessEnvironment(config.Env); err != nil { + env, homeSet, err := prepareEnv(config.Env) + if err != nil { return err } + config.Env = env // Clean the RLIMIT_NOFILE cache in go runtime. // Issue: https://github.com/opencontainers/runc/issues/4195 @@ -237,6 +234,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock config: config, logPipe: logPipe, dmzExe: dmzExe, + addHome: !homeSet, } return i.Init() case initStandard: @@ -249,37 +247,13 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock fifoFile: fifoFile, logPipe: logPipe, dmzExe: dmzExe, + addHome: !homeSet, } return i.Init() } return fmt.Errorf("unknown init type %q", t) } -// populateProcessEnvironment loads the provided environment variables into the -// current processes's environment. -func populateProcessEnvironment(env []string) error { - for _, pair := range env { - p := strings.SplitN(pair, "=", 2) - if len(p) < 2 { - return errors.New("invalid environment variable: missing '='") - } - name, val := p[0], p[1] - if name == "" { - return errors.New("invalid environment variable: name cannot be empty") - } - if strings.IndexByte(name, 0) >= 0 { - return fmt.Errorf("invalid environment variable %q: name contains nul byte (\\x00)", name) - } - if strings.IndexByte(val, 0) >= 0 { - return fmt.Errorf("invalid environment variable %q: value contains nul byte (\\x00)", name) - } - if err := os.Setenv(name, val); err != nil { - return err - } - } - return nil -} - // verifyCwd ensures that the current directory is actually inside the mount // namespace root of the current process. func verifyCwd() error { @@ -308,8 +282,8 @@ func verifyCwd() error { // finalizeNamespace drops the caps, sets the correct user // and working dir, and closes any leaked file descriptors -// before executing the command inside the namespace -func finalizeNamespace(config *initConfig) error { +// before executing the command inside the namespace. +func finalizeNamespace(config *initConfig, addHome bool) error { // Ensure that all unwanted fds we may have accidentally // inherited are marked close-on-exec so they stay out of the // container @@ -355,7 +329,7 @@ func finalizeNamespace(config *initConfig) error { if err := system.SetKeepCaps(); err != nil { return fmt.Errorf("unable to set keep caps: %w", err) } - if err := setupUser(config); err != nil { + if err := setupUser(config, addHome); err != nil { return fmt.Errorf("unable to setup user: %w", err) } // Change working directory AFTER the user has been set up, if we haven't done it yet. @@ -473,8 +447,9 @@ func syncParentSeccomp(pipe *syncSocket, seccompFd *os.File) error { return readSync(pipe, procSeccompDone) } -// setupUser changes the groups, gid, and uid for the user inside the container -func setupUser(config *initConfig) error { +// setupUser changes the groups, gid, and uid for the user inside the container, +// and appends user's HOME to config.Env if addHome is true. +func setupUser(config *initConfig, addHome bool) error { // Set up defaults. defaultExecUser := user.ExecUser{ Uid: 0, @@ -555,11 +530,9 @@ func setupUser(config *initConfig) error { return err } - // if we didn't get HOME already, set it based on the user's HOME - if envHome := os.Getenv("HOME"); envHome == "" { - if err := os.Setenv("HOME", execUser.Home); err != nil { - return err - } + // If we didn't get HOME already, set it based on the user's HOME. + if addHome { + config.Env = append(config.Env, "HOME="+execUser.Home) } return nil } diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index d14198772aa..c515df675e9 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -26,6 +26,7 @@ type linuxSetnsInit struct { config *initConfig logPipe *os.File dmzExe *os.File + addHome bool } func (l *linuxSetnsInit) getSessionRingName() string { @@ -101,7 +102,7 @@ func (l *linuxSetnsInit) Init() error { return err } } - if err := finalizeNamespace(l.config); err != nil { + if err := finalizeNamespace(l.config, l.addHome); err != nil { return err } if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { @@ -143,7 +144,7 @@ func (l *linuxSetnsInit) Init() error { if l.dmzExe != nil { l.config.Args[0] = name - return system.Fexecve(l.dmzExe.Fd(), l.config.Args, os.Environ()) + return system.Fexecve(l.dmzExe.Fd(), l.config.Args, l.config.Env) } // Close all file descriptors we are not passing to the container. This is // necessary because the execve target could use internal runc fds as the @@ -163,5 +164,5 @@ func (l *linuxSetnsInit) Init() error { if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil { return err } - return system.Exec(name, l.config.Args, os.Environ()) + return system.Exec(name, l.config.Args, l.config.Env) } diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index ec2e814370a..edbb20e5b98 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -28,6 +28,7 @@ type linuxStandardInit struct { logPipe *os.File dmzExe *os.File config *initConfig + addHome bool } func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) { @@ -190,7 +191,7 @@ func (l *linuxStandardInit) Init() error { return err } } - if err := finalizeNamespace(l.config); err != nil { + if err := finalizeNamespace(l.config, l.addHome); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death @@ -277,7 +278,7 @@ func (l *linuxStandardInit) Init() error { if l.dmzExe != nil { l.config.Args[0] = name - return system.Fexecve(l.dmzExe.Fd(), l.config.Args, os.Environ()) + return system.Fexecve(l.dmzExe.Fd(), l.config.Args, l.config.Env) } // Close all file descriptors we are not passing to the container. This is // necessary because the execve target could use internal runc fds as the @@ -297,5 +298,5 @@ func (l *linuxStandardInit) Init() error { if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil { return err } - return system.Exec(name, l.config.Args, os.Environ()) + return system.Exec(name, l.config.Args, l.config.Env) }