From 3c4b2ad07ce1b830bbfa00b3fdcb7dca0d95782a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 7 Nov 2024 20:22:27 -0800 Subject: [PATCH] gofer: open volumes from the initial userns The gofer process can encounter permission issues accessing volumes when custom user/group mappings are defined within its user namespace. To address this, the gofer now attempts to open volumes from the initial userns as a fallback mechanism. Fixes #11040 PiperOrigin-RevId: 694341928 --- runsc/cmd/gofer.go | 48 +++++++++--- runsc/container/BUILD | 3 + runsc/container/container.go | 22 ++++++ runsc/container/gofer_to_host_rpc.go | 106 +++++++++++++++++++++++++++ 4 files changed, 169 insertions(+), 10 deletions(-) create mode 100644 runsc/container/gofer_to_host_rpc.go diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 08cdc76c4b..645bbbe15c 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -32,9 +32,11 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/devices/tpuproxy/vfio" "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/cmd/util" "gvisor.dev/gvisor/runsc/config" + "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/flag" "gvisor.dev/gvisor/runsc/fsgofer" "gvisor.dev/gvisor/runsc/fsgofer/filter" @@ -89,11 +91,12 @@ type Gofer struct { setUpRoot bool mountConfs boot.GoferMountConfFlags - specFD int - mountsFD int - profileFDs profile.FDArgs - syncFDs goferSyncFDs - stopProfiling func() + specFD int + mountsFD int + goferToHostRPCFD int + profileFDs profile.FDArgs + syncFDs goferSyncFDs + stopProfiling func() } // Name implements subcommands.Command. @@ -123,6 +126,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) { f.IntVar(&g.devIoFD, "dev-io-fd", -1, "optional FD to connect /dev gofer server") f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec") f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).") + f.IntVar(&g.goferToHostRPCFD, "rpc-fd", -1, "gofer-to-host RPC file descriptor.") // Add synchronization FD flags. g.syncFDs.setFlags(f) @@ -153,8 +157,16 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcomm g.syncFDs.syncNVProxy() g.syncFDs.syncUsernsForRootless() + goferToHostRPCSock, err := unet.NewSocket(g.goferToHostRPCFD) + if err != nil { + util.Fatalf("creating rpc socket: %v", err) + } + + goferToHostRPC := urpc.NewClient(goferToHostRPCSock) + defer goferToHostRPC.Close() + if g.setUpRoot { - if err := g.setupRootFS(spec, conf); err != nil { + if err := g.setupRootFS(spec, conf, goferToHostRPC); err != nil { util.Fatalf("Error setting up root FS: %v", err) } if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { @@ -162,6 +174,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcomm defer cleanupUnmounter() } } + goferToHostRPC.Close() if g.applyCaps { overrides := g.syncFDs.flags() overrides["apply-caps"] = "false" @@ -369,7 +382,7 @@ func (g *Gofer) writeMounts(mounts []specs.Mount) error { // It is protected by selinux rules. const procFDBindMount = "/proc/fs" -func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config) error { +func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config, goferToHostRPC *urpc.Client) error { // Convert all shared mounts into slaves to be sure that nothing will be // propagated outside of our namespace. procPath := "/proc" @@ -437,7 +450,7 @@ func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config) error { } // Replace the current spec, with the clean spec with symlinks resolved. - if err := g.setupMounts(conf, spec.Mounts, root, procPath); err != nil { + if err := g.setupMounts(conf, spec.Mounts, root, procPath, goferToHostRPC); err != nil { util.Fatalf("error setting up FS: %v", err) } @@ -487,7 +500,7 @@ func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config) error { // setupMounts bind mounts all mounts specified in the spec in their correct // location inside root. It will resolve relative paths and symlinks. It also // creates directories as needed. -func (g *Gofer) setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string) error { +func (g *Gofer) setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string, goferToHostRPC *urpc.Client) error { mountIdx := 1 // First index is for rootfs. for _, m := range mounts { if !specutils.IsGoferMount(m) { @@ -511,7 +524,22 @@ func (g *Gofer) setupMounts(conf *config.Config, mounts []specs.Mount, root, pro } log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) - if err := specutils.SafeSetupAndMount(m.Source, dst, m.Type, flags, procPath); err != nil { + src := m.Source + var srcFile *os.File + if err := unix.Access(src, unix.R_OK); err != nil { + // The current process doesn't have enough permissions + // to open the mount, so let's try to open it in the + // parent user namespace. + var res container.OpenMountResult + if err := goferToHostRPC.Call("goferRPC.OpenMount", &m, &res); err != nil { + return fmt.Errorf("opening %s: %w", m.Source, err) + } + srcFile = res.Files[0] + src = fmt.Sprintf("%s/self/fd/%d", procPath, srcFile.Fd()) + } + err = specutils.SafeSetupAndMount(src, dst, m.Type, flags, procPath) + srcFile.Close() + if err != nil { return fmt.Errorf("mounting %+v: %v", m, err) } diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 135a918cbb..d1830de2a9 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -9,6 +9,7 @@ go_library( name = "container", srcs = [ "container.go", + "gofer_to_host_rpc.go", "hook.go", "state_file.go", "status.go", @@ -28,6 +29,8 @@ go_library( "//pkg/sighandling", "//pkg/state/statefile", "//pkg/sync", + "//pkg/unet", + "//pkg/urpc", "//runsc/boot", "//runsc/cgroup", "//runsc/config", diff --git a/runsc/container/container.go b/runsc/container/container.go index a87c48c865..3f46490ab8 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -41,6 +41,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sighandling" "gvisor.dev/gvisor/pkg/state/statefile" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/cgroup" "gvisor.dev/gvisor/runsc/config" @@ -1272,6 +1274,25 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu } donations.DonateAndClose("mounts-fd", mountsGofer) + rpcServ, rpcClnt, err := unet.SocketPair(false) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to create an rpc socket pair: %w", err) + } + rpcClntFD, _ := rpcClnt.Release() + donations.DonateAndClose("rpc-fd", os.NewFile(uintptr(rpcClntFD), "gofer-rpc")) + rpcPidCh := make(chan int, 1) + defer close(rpcPidCh) + go func() { + pid := <-rpcPidCh + if pid == 0 { + rpcServ.Close() + return + } + s := urpc.NewServer() + s.Register(&goferToHostRPC{goferPID: pid}) + s.StartHandling(rpcServ) + }() + // Count the number of mounts that needs an IO file. ioFileCount := 0 for _, cfg := range c.GoferMountConfs { @@ -1370,6 +1391,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu log.Infof("Gofer started, PID: %d", cmd.Process.Pid) c.GoferPid = cmd.Process.Pid c.goferIsChild = true + rpcPidCh <- cmd.Process.Pid // Set up and synchronize rootless mode userns mappings. if rootlessEUID { diff --git a/runsc/container/gofer_to_host_rpc.go b/runsc/container/gofer_to_host_rpc.go new file mode 100644 index 0000000000..51023dd0da --- /dev/null +++ b/runsc/container/gofer_to_host_rpc.go @@ -0,0 +1,106 @@ +// Copyright 2024 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package container + +import ( + "fmt" + "os" + "runtime" + "sync" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/urpc" +) + +type openMountRequest struct { + mount *specs.Mount + result *OpenMountResult + done chan error +} + +// goferToHostRPC is an rpc server that allows the gofer process to do RPC +// calls outside of its namespace container. It is used to configure sandbox +// mounts. +type goferToHostRPC struct { + mu sync.Mutex + openMountRequests chan *openMountRequest + goferPID int +} + +// OpenMountResult is a result of the rpcp.OpenMount call. +type OpenMountResult struct { + urpc.FilePayload +} + +func (rpc *goferToHostRPC) handleRequest(req *openMountRequest) { + defer close(req.done) + fd, err := os.OpenFile(req.mount.Source, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + req.done <- err + return + } + req.result.Files = []*os.File{fd} +} + +func (rpc *goferToHostRPC) openMountThread() error { + if err := unix.Unshare(unix.CLONE_FS); err != nil { + return fmt.Errorf("open mount thread: unshare filesystem attributes: %w", err) + } + nsFd, err := os.Open(fmt.Sprintf("/proc/%d/ns/mnt", rpc.goferPID)) + if err != nil { + return fmt.Errorf("open mount thread: open container mntns: %w", err) + } + defer nsFd.Close() + if err := unix.Setns(int(nsFd.Fd()), unix.CLONE_NEWNS); err != nil { + return fmt.Errorf("open mount thread: join container mntns: %w", err) + } + for req := range rpc.openMountRequests { + rpc.handleRequest(req) + } + return nil +} + +// OpenMount opens a specified mount and returns a file descriptor to it. It is +// used when the mount isn't accessible from the gofer user namespace. +func (rpc *goferToHostRPC) OpenMount(m *specs.Mount, res *OpenMountResult) error { + rpc.mu.Lock() + defer rpc.mu.Unlock() + + if rpc.openMountRequests == nil { + rpc.openMountRequests = make(chan *openMountRequest) + go func() { + // This goroutine holds the current threads forever. It + // never exits, because child proccesses can set + // PDEATHSIG. It can't serve other go-routines, because + // it does unshare CLONE_FS. + runtime.LockOSThread() + if err := rpc.openMountThread(); err != nil { + for req := range rpc.openMountRequests { + req.done <- err + } + } + panic("unreachable") + }() + } + req := openMountRequest{ + mount: m, + result: res, + done: make(chan error), + } + rpc.openMountRequests <- &req + err := <-req.done + return err +}