From 6e13016b72f2808eb255d46e34e6d9d960898d33 Mon Sep 17 00:00:00 2001 From: Kathryn Baldauf Date: Mon, 10 Aug 2020 15:41:31 -0700 Subject: [PATCH 01/20] Refactor code in hcsoci into logical packages * Created a new package `cmd` that contains code for running processes in a compute system * Created a new package `resources` that contains code for creating, updating, releasing container resources * Created a new package `credentials` which handles container credential guard instances needed for gmsa * Created a new package `layers` that contains code for creating container image layers Signed-off-by: Kathryn Baldauf --- cmd/containerd-shim-runhcs-v1/exec_hcs.go | 10 +-- cmd/containerd-shim-runhcs-v1/task_hcs.go | 12 +-- .../task_wcow_podsandbox.go | 4 +- cmd/runhcs/container.go | 21 +++--- cmd/runhcs/shim.go | 4 +- internal/{hcsoci => cmd}/cmd.go | 4 +- internal/{hcsoci => cmd}/cmd_test.go | 2 +- internal/{hcsoci => cmd}/diag.go | 2 +- internal/{hcsoci => cmd}/io.go | 2 +- internal/{hcsoci => cmd}/io_npipe.go | 2 +- .../{hcsoci => credentials}/credentials.go | 9 ++- internal/hcsoci/create.go | 55 +++++++------- internal/hcsoci/hcsdoc_wcow.go | 6 +- internal/hcsoci/network.go | 9 ++- internal/hcsoci/resources_lcow.go | 47 +++++------- internal/hcsoci/resources_wcow.go | 34 ++++----- internal/{hcsoci => layers}/layers.go | 27 +++++-- internal/lcow/scratch.go | 10 +-- internal/{hcsoci => resources}/resources.go | 75 ++++++++++++------- internal/tools/uvmboot/lcow.go | 4 +- internal/tools/uvmboot/wcow.go | 4 +- internal/uvm/automanagedvhd.go | 27 +++++++ internal/uvm/constants.go | 13 ++++ internal/wclayer/wclayer.go | 3 + test/functional/lcow_test.go | 8 +- test/functional/test.go | 5 +- test/functional/wcow_test.go | 32 ++++---- 27 files changed, 257 insertions(+), 174 deletions(-) rename internal/{hcsoci => cmd}/cmd.go (97%) rename internal/{hcsoci => cmd}/cmd_test.go (99%) rename internal/{hcsoci => cmd}/diag.go (98%) rename internal/{hcsoci => cmd}/io.go (98%) rename internal/{hcsoci => cmd}/io_npipe.go (99%) rename internal/{hcsoci => credentials}/credentials.go (95%) rename internal/{hcsoci => layers}/layers.go (91%) rename internal/{hcsoci => resources}/resources.go (70%) create mode 100644 internal/uvm/automanagedvhd.go diff --git a/cmd/containerd-shim-runhcs-v1/exec_hcs.go b/cmd/containerd-shim-runhcs-v1/exec_hcs.go index f48260e3e1..728ef51c10 100644 --- a/cmd/containerd-shim-runhcs-v1/exec_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/exec_hcs.go @@ -6,9 +6,9 @@ import ( "sync" "time" + "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/guestrequest" - "github.com/Microsoft/hcsshim/internal/hcsoci" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/signals" "github.com/Microsoft/hcsshim/internal/uvm" @@ -49,7 +49,7 @@ func newHcsExec( id, bundle string, isWCOW bool, spec *specs.Process, - io hcsoci.UpstreamIO) shimExec { + io cmd.UpstreamIO) shimExec { log.G(ctx).WithFields(logrus.Fields{ "tid": tid, "eid": id, // Init exec ID is always same as Task ID @@ -118,7 +118,7 @@ type hcsExec struct { // create time in order to be valid. // // This MUST be treated as read only in the lifetime of the exec. - io hcsoci.UpstreamIO + io cmd.UpstreamIO processDone chan struct{} processDoneOnce sync.Once @@ -129,7 +129,7 @@ type hcsExec struct { pid int exitStatus uint32 exitedAt time.Time - p *hcsoci.Cmd + p *cmd.Cmd // exited is a wait block which waits async for the process to exit. exited chan struct{} @@ -205,7 +205,7 @@ func (he *hcsExec) Start(ctx context.Context) (err error) { } }() } - cmd := &hcsoci.Cmd{ + cmd := &cmd.Cmd{ Host: he.c, Stdin: he.io.Stdin(), Stdout: he.io.Stdout(), diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 62965a01b9..303f16b403 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -10,11 +10,13 @@ import ( "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcsoci" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oci" + "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/schema2" "github.com/Microsoft/hcsshim/internal/shimdiag" @@ -120,7 +122,7 @@ func newHcsTask( owner := filepath.Base(os.Args[0]) - io, err := hcsoci.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal) + io, err := cmd.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal) if err != nil { return nil, err } @@ -220,7 +222,7 @@ type hcsTask struct { // // It MUST be treated as read only in the lifetime of this task EXCEPT after // a Kill to the init task in which all resources must be released. - cr *hcsoci.Resources + cr *resources.Resources // init is the init process of the container. // // Note: the invariant `container state == init.State()` MUST be true. IE: @@ -269,7 +271,7 @@ func (ht *hcsTask) CreateExec(ctx context.Context, req *task.ExecProcessRequest, return errors.Wrapf(errdefs.ErrFailedPrecondition, "exec: '' in task: '%s' must be running to create additional execs", ht.id) } - io, err := hcsoci.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal) + io, err := cmd.NewNpipeIO(ctx, req.Stdin, req.Stdout, req.Stderr, req.Terminal) if err != nil { return err } @@ -532,7 +534,7 @@ func (ht *hcsTask) close(ctx context.Context) { } // Release any resources associated with the container. - if err := hcsoci.ReleaseResources(ctx, ht.cr, ht.host, true); err != nil { + if err := resources.ReleaseResources(ctx, ht.cr, ht.host, true); err != nil { log.G(ctx).WithError(err).Error("failed to release container resources") } @@ -581,7 +583,7 @@ func (ht *hcsTask) ExecInHost(ctx context.Context, req *shimdiag.ExecProcessRequ if ht.host == nil { return 0, errors.New("task is not isolated") } - return hcsoci.ExecInUvm(ctx, ht.host, req) + return cmd.ExecInUvm(ctx, ht.host, req) } func (ht *hcsTask) DumpGuestStacks(ctx context.Context) string { diff --git a/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go b/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go index 2eb4ef3418..238e964f35 100644 --- a/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go +++ b/cmd/containerd-shim-runhcs-v1/task_wcow_podsandbox.go @@ -7,8 +7,8 @@ import ( "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/guestrequest" - "github.com/Microsoft/hcsshim/internal/hcsoci" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/requesttype" hcsschema "github.com/Microsoft/hcsshim/internal/schema2" @@ -224,7 +224,7 @@ func (wpst *wcowPodSandboxTask) ExecInHost(ctx context.Context, req *shimdiag.Ex if wpst.host == nil { return 0, errors.New("task is not isolated") } - return hcsoci.ExecInUvm(ctx, wpst.host, req) + return cmd.ExecInUvm(ctx, wpst.host, req) } func (wpst *wcowPodSandboxTask) DumpGuestStacks(ctx context.Context) string { diff --git a/cmd/runhcs/container.go b/cmd/runhcs/container.go index dabe82a35b..5fcfbdc707 100644 --- a/cmd/runhcs/container.go +++ b/cmd/runhcs/container.go @@ -19,6 +19,7 @@ import ( "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/regstate" + "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/runhcs" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/osversion" @@ -81,7 +82,7 @@ type container struct { persistedState ShimPid int hc *hcs.System - resources *hcsoci.Resources + resources *resources.Resources } func startProcessShim(id, pidFile, logFile string, spec *specs.Process) (_ *os.Process, err error) { @@ -471,17 +472,17 @@ func (c *container) VMIsolated() bool { } func (c *container) unmountInHost(vm *uvm.UtilityVM, all bool) error { - resources := &hcsoci.Resources{} - err := stateKey.Get(c.ID, keyResources, resources) + r := &resources.Resources{} + err := stateKey.Get(c.ID, keyResources, r) if _, ok := err.(*regstate.NoStateError); ok { return nil } if err != nil { return err } - err = hcsoci.ReleaseResources(context.Background(), resources, vm, all) + err = resources.ReleaseResources(context.Background(), r, vm, all) if err != nil { - stateKey.Set(c.ID, keyResources, resources) + stateKey.Set(c.ID, keyResources, r) return err } @@ -537,7 +538,7 @@ func createContainerInHost(c *container, vm *uvm.UtilityVM) (err error) { logfields.ContainerID: c.ID, logfields.UVMID: vmid, }).Info("creating container in UVM") - hc, resources, err := hcsoci.CreateContainer(context.Background(), opts) + hc, r, err := hcsoci.CreateContainer(context.Background(), opts) if err != nil { return err } @@ -545,19 +546,19 @@ func createContainerInHost(c *container, vm *uvm.UtilityVM) (err error) { if err != nil { hc.Terminate(context.Background()) hc.Wait() - hcsoci.ReleaseResources(context.Background(), resources, vm, true) + resources.ReleaseResources(context.Background(), r, vm, true) } }() // Record the network namespace to support namespace sharing by container ID. - if resources.NetNS() != "" { - err = stateKey.Set(c.ID, keyNetNS, resources.NetNS()) + if r.NetNS() != "" { + err = stateKey.Set(c.ID, keyNetNS, r.NetNS()) if err != nil { return err } } - err = stateKey.Set(c.ID, keyResources, resources) + err = stateKey.Set(c.ID, keyResources, r) if err != nil { return err } diff --git a/cmd/runhcs/shim.go b/cmd/runhcs/shim.go index 0d052611f5..eaf84f0c97 100644 --- a/cmd/runhcs/shim.go +++ b/cmd/runhcs/shim.go @@ -13,8 +13,8 @@ import ( winio "github.com/Microsoft/go-winio" "github.com/Microsoft/hcsshim/internal/appargs" + cmdpkg "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/hcs" - "github.com/Microsoft/hcsshim/internal/hcsoci" "github.com/Microsoft/hcsshim/internal/runhcs" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" @@ -162,7 +162,7 @@ var shimCommand = cli.Command{ } // Create the process in the container. - cmd := &hcsoci.Cmd{ + cmd := &cmdpkg.Cmd{ Host: c.hc, Stdin: stdin, Stdout: stdout, diff --git a/internal/hcsoci/cmd.go b/internal/cmd/cmd.go similarity index 97% rename from internal/hcsoci/cmd.go rename to internal/cmd/cmd.go index 9fa98698da..023e0b34d2 100644 --- a/internal/hcsoci/cmd.go +++ b/internal/cmd/cmd.go @@ -1,4 +1,6 @@ -package hcsoci +// Package cmd provides functionality used to execute commands inside of containers +// or UVMs, and to connect an upstream client to those commands for handling in/out/err IO. +package cmd import ( "bytes" diff --git a/internal/hcsoci/cmd_test.go b/internal/cmd/cmd_test.go similarity index 99% rename from internal/hcsoci/cmd_test.go rename to internal/cmd/cmd_test.go index 6cde867137..143aa99ce1 100644 --- a/internal/hcsoci/cmd_test.go +++ b/internal/cmd/cmd_test.go @@ -1,6 +1,6 @@ // build +windows -package hcsoci +package cmd import ( "bytes" diff --git a/internal/hcsoci/diag.go b/internal/cmd/diag.go similarity index 98% rename from internal/hcsoci/diag.go rename to internal/cmd/diag.go index fccad5d355..30fa18a7de 100644 --- a/internal/hcsoci/diag.go +++ b/internal/cmd/diag.go @@ -1,4 +1,4 @@ -package hcsoci +package cmd import ( "context" diff --git a/internal/hcsoci/io.go b/internal/cmd/io.go similarity index 98% rename from internal/hcsoci/io.go rename to internal/cmd/io.go index c46103994b..9c4f08ce92 100644 --- a/internal/hcsoci/io.go +++ b/internal/cmd/io.go @@ -1,4 +1,4 @@ -package hcsoci +package cmd import ( "context" diff --git a/internal/hcsoci/io_npipe.go b/internal/cmd/io_npipe.go similarity index 99% rename from internal/hcsoci/io_npipe.go rename to internal/cmd/io_npipe.go index 242e13d137..f4650538a9 100644 --- a/internal/hcsoci/io_npipe.go +++ b/internal/cmd/io_npipe.go @@ -1,4 +1,4 @@ -package hcsoci +package cmd import ( "context" diff --git a/internal/hcsoci/credentials.go b/internal/credentials/credentials.go similarity index 95% rename from internal/hcsoci/credentials.go rename to internal/credentials/credentials.go index 5226271c45..d5a49ae9a1 100644 --- a/internal/hcsoci/credentials.go +++ b/internal/credentials/credentials.go @@ -1,6 +1,9 @@ // +build windows -package hcsoci +// Package credentials holds the necessary structs and functions for adding +// and removing Container Credential Guard instances (shortened to CCG +// normally) for V2 HCS schema containers. +package credentials import ( "context" @@ -13,9 +16,7 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/schema2" ) -// This file holds the necessary structs and functions for adding and removing Container -// Credential Guard instances (shortened to CCG normally) for V2 HCS schema -// containers. Container Credential Guard is in HCS's own words "The solution to +// Container Credential Guard is in HCS's own words "The solution to // allowing windows containers to have access to domain credentials for the // applications running in their corresponding guest." It essentially acts as // a way to temporarily Active Directory join a given container with a Group diff --git a/internal/hcsoci/create.go b/internal/hcsoci/create.go index f1ffab0ca2..48f58e0cab 100644 --- a/internal/hcsoci/create.go +++ b/internal/hcsoci/create.go @@ -15,6 +15,7 @@ import ( "github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oci" + "github.com/Microsoft/hcsshim/internal/resources" hcsschema "github.com/Microsoft/hcsshim/internal/schema2" "github.com/Microsoft/hcsshim/internal/schemaversion" "github.com/Microsoft/hcsshim/internal/uvm" @@ -68,7 +69,7 @@ type createOptionsInternal struct { // case of an error. This provides support for the debugging option not to // release the resources on failure, so that the client can make the necessary // call to release resources that have been allocated as part of calling this function. -func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.Container, _ *Resources, err error) { +func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.Container, _ *resources.Resources, err error) { coi := &createOptionsInternal{ CreateOptions: createOptions, actualID: createOptions.ID, @@ -103,13 +104,11 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C "schema": coi.actualSchemaVersion, }).Debug("hcsshim::CreateContainer") - resources := &Resources{ - id: createOptions.ID, - } + r := resources.NewContainerResources(createOptions.ID) defer func() { if err != nil { if !coi.DoNotReleaseResourcesOnFailure { - ReleaseResources(ctx, resources, coi.HostingSystem, true) + resources.ReleaseResources(ctx, r, coi.HostingSystem, true) } } }() @@ -117,9 +116,9 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C if coi.HostingSystem != nil { n := coi.HostingSystem.ContainerCounter() if coi.Spec.Linux != nil { - resources.containerRootInUVM = fmt.Sprintf(lcowRootInUVM, createOptions.ID) + r.SetContainerRootInUVM(fmt.Sprintf(lcowRootInUVM, createOptions.ID)) } else { - resources.containerRootInUVM = fmt.Sprintf(wcowRootInUVM, strconv.FormatUint(n, 16)) + r.SetContainerRootInUVM(fmt.Sprintf(wcowRootInUVM, strconv.FormatUint(n, 16))) } } @@ -129,18 +128,18 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C schemaversion.IsV21(coi.actualSchemaVersion) { if coi.NetworkNamespace != "" { - resources.netNS = coi.NetworkNamespace + r.SetNetNS(coi.NetworkNamespace) } else { - err := createNetworkNamespace(ctx, coi, resources) + err := createNetworkNamespace(ctx, coi, r) if err != nil { - return nil, resources, err + return nil, r, err } } - coi.actualNetworkNamespace = resources.netNS + coi.actualNetworkNamespace = r.NetNS() if coi.HostingSystem != nil { ct, _, err := oci.GetSandboxTypeAndID(coi.Spec.Annotations) if err != nil { - return nil, resources, err + return nil, r, err } // Only add the network namespace to a standalone or sandbox // container but not a workload container in a sandbox that inherits @@ -148,19 +147,19 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C if ct == oci.KubernetesContainerTypeNone || ct == oci.KubernetesContainerTypeSandbox { endpoints, err := GetNamespaceEndpoints(ctx, coi.actualNetworkNamespace) if err != nil { - return nil, resources, err + return nil, r, err } err = coi.HostingSystem.AddNetNS(ctx, coi.actualNetworkNamespace) if err != nil { - return nil, resources, err + return nil, r, err } err = coi.HostingSystem.AddEndpointsToNS(ctx, coi.actualNetworkNamespace, endpoints) if err != nil { // Best effort clean up the NS coi.HostingSystem.RemoveNetNS(ctx, coi.actualNetworkNamespace) - return nil, resources, err + return nil, r, err } - resources.addedNetNSToVM = true + r.SetAddedNetNSToVM(true) } } } @@ -169,30 +168,30 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C log.G(ctx).Debug("hcsshim::CreateContainer allocating resources") if coi.Spec.Linux != nil { if schemaversion.IsV10(coi.actualSchemaVersion) { - return nil, resources, errors.New("LCOW v1 not supported") + return nil, r, errors.New("LCOW v1 not supported") } log.G(ctx).Debug("hcsshim::CreateContainer allocateLinuxResources") - err = allocateLinuxResources(ctx, coi, resources) + err = allocateLinuxResources(ctx, coi, r) if err != nil { log.G(ctx).WithError(err).Debug("failed to allocateLinuxResources") - return nil, resources, err + return nil, r, err } - gcsDocument, err = createLinuxContainerDocument(ctx, coi, resources.containerRootInUVM) + gcsDocument, err = createLinuxContainerDocument(ctx, coi, r.ContainerRootInUVM()) if err != nil { log.G(ctx).WithError(err).Debug("failed createHCSContainerDocument") - return nil, resources, err + return nil, r, err } } else { - err = allocateWindowsResources(ctx, coi, resources) + err = allocateWindowsResources(ctx, coi, r) if err != nil { log.G(ctx).WithError(err).Debug("failed to allocateWindowsResources") - return nil, resources, err + return nil, r, err } log.G(ctx).Debug("hcsshim::CreateContainer creating container document") v1, v2, err := createWindowsContainerDocument(ctx, coi) if err != nil { log.G(ctx).WithError(err).Debug("failed createHCSContainerDocument") - return nil, resources, err + return nil, r, err } if schemaversion.IsV10(coi.actualSchemaVersion) { @@ -219,16 +218,16 @@ func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.C if gcsDocument != nil { c, err := coi.HostingSystem.CreateContainer(ctx, coi.actualID, gcsDocument) if err != nil { - return nil, resources, err + return nil, r, err } - return c, resources, nil + return c, r, nil } system, err := hcs.CreateComputeSystem(ctx, coi.actualID, hcsDocument) if err != nil { - return nil, resources, err + return nil, r, err } - return system, resources, nil + return system, r, nil } // isV2Xenon returns true if the create options are for a HCS schema V2 xenon container diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 324596c7c0..5a04d5c98d 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -10,11 +10,11 @@ import ( "regexp" "strings" - "github.com/Microsoft/hcsshim/internal/processorinfo" - + "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oci" + "github.com/Microsoft/hcsshim/internal/processorinfo" "github.com/Microsoft/hcsshim/internal/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/schema2" "github.com/Microsoft/hcsshim/internal/uvm" @@ -208,7 +208,7 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter // Hosting system was supplied, so is v2 Xenon. v2Container.Storage.Path = coi.Spec.Root.Path if coi.HostingSystem.OS() == "windows" { - layers, err := computeV2Layers(ctx, coi.HostingSystem, coi.Spec.Windows.LayerFolders[:len(coi.Spec.Windows.LayerFolders)-1]) + layers, err := layers.GetHCSLayers(ctx, coi.HostingSystem, coi.Spec.Windows.LayerFolders[:len(coi.Spec.Windows.LayerFolders)-1]) if err != nil { return nil, nil, err } diff --git a/internal/hcsoci/network.go b/internal/hcsoci/network.go index 053aca9d7e..d33bd6b838 100644 --- a/internal/hcsoci/network.go +++ b/internal/hcsoci/network.go @@ -6,11 +6,12 @@ import ( "github.com/Microsoft/hcsshim/internal/hns" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/sirupsen/logrus" ) -func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, resources *Resources) error { +func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, r *resources.Resources) error { op := "hcsoci::createNetworkNamespace" l := log.G(ctx).WithField(logfields.ContainerID, coi.ID) l.Debug(op + " - Begin") @@ -26,8 +27,8 @@ func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, res "netID": netID, logfields.ContainerID: coi.ID, }).Info("created network namespace for container") - resources.netNS = netID - resources.createdNetNS = true + r.SetNetNS(netID) + r.SetCreatedNetNS(true) endpoints := make([]string, 0) for _, endpointID := range coi.Spec.Windows.Network.EndpointList { err = hns.AddNamespaceEndpoint(netID, endpointID) @@ -40,7 +41,7 @@ func createNetworkNamespace(ctx context.Context, coi *createOptionsInternal, res }).Info("added network endpoint to namespace") endpoints = append(endpoints, endpointID) } - resources.resources = append(resources.resources, &uvm.NetworkEndpoints{EndpointIDs: endpoints, Namespace: netID}) + r.Add(&uvm.NetworkEndpoints{EndpointIDs: endpoints, Namespace: netID}) return nil } diff --git a/internal/hcsoci/resources_lcow.go b/internal/hcsoci/resources_lcow.go index 4e2dae3a18..8671776b78 100644 --- a/internal/hcsoci/resources_lcow.go +++ b/internal/hcsoci/resources_lcow.go @@ -12,19 +12,15 @@ import ( "path/filepath" "strings" + "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oci" + "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/uvm" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" ) -const lcowMountPathPrefix = "/mounts/m%d" -const lcowGlobalMountPrefix = "/run/mounts/m%d" - -// keep lcowNvidiaMountPath value in sync with opengcs -const lcowNvidiaMountPath = "/run/nvidia" - // getGPUVHDPath gets the gpu vhd path from the shim options or uses the default if no // shim option is set. Right now we only support Nvidia gpus, so this will default to // a gpu vhd with nvidia files @@ -39,34 +35,31 @@ func getGPUVHDPath(coi *createOptionsInternal) (string, error) { return gpuVHDPath, nil } -func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r *Resources) error { +func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r *resources.Resources) error { if coi.Spec.Root == nil { coi.Spec.Root = &specs.Root{} } + containerRootInUVM := r.ContainerRootInUVM() if coi.Spec.Windows != nil && len(coi.Spec.Windows.LayerFolders) > 0 { log.G(ctx).Debug("hcsshim::allocateLinuxResources mounting storage") - rootPath, err := MountContainerLayers(ctx, coi.Spec.Windows.LayerFolders, r.containerRootInUVM, coi.HostingSystem) + rootPath, err := layers.MountContainerLayers(ctx, coi.Spec.Windows.LayerFolders, containerRootInUVM, coi.HostingSystem) if err != nil { return fmt.Errorf("failed to mount container storage: %s", err) } coi.Spec.Root.Path = rootPath - layers := &ImageLayers{ - vm: coi.HostingSystem, - containerRootInUVM: r.containerRootInUVM, - layers: coi.Spec.Windows.LayerFolders, - } - r.layers = layers + layers := layers.NewImageLayers(coi.HostingSystem, containerRootInUVM, coi.Spec.Windows.LayerFolders) + r.SetLayers(layers) } else if coi.Spec.Root.Path != "" { // This is the "Plan 9" root filesystem. // TODO: We need a test for this. Ask @jstarks how you can even lay this out on Windows. hostPath := coi.Spec.Root.Path - uvmPathForContainersFileSystem := path.Join(r.containerRootInUVM, rootfsPath) + uvmPathForContainersFileSystem := path.Join(r.ContainerRootInUVM(), uvm.RootfsPath) share, err := coi.HostingSystem.AddPlan9(ctx, hostPath, uvmPathForContainersFileSystem, coi.Spec.Root.Readonly, false, nil) if err != nil { return fmt.Errorf("adding plan9 root: %s", err) } coi.Spec.Root.Path = uvmPathForContainersFileSystem - r.resources = append(r.resources, share) + r.Add(share) } else { return errors.New("must provide either Windows.LayerFolders or Root.Path") } @@ -87,7 +80,7 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * if coi.HostingSystem != nil { hostPath := mount.Source - uvmPathForShare := path.Join(r.containerRootInUVM, fmt.Sprintf(lcowMountPathPrefix, i)) + uvmPathForShare := path.Join(containerRootInUVM, fmt.Sprintf(uvm.LCOWMountPathPrefix, i)) uvmPathForFile := uvmPathForShare readOnly := false @@ -100,7 +93,7 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * l := log.G(ctx).WithField("mount", fmt.Sprintf("%+v", mount)) if mount.Type == "physical-disk" { l.Debug("hcsshim::allocateLinuxResources Hot-adding SCSI physical disk for OCI mount") - uvmPathForShare = fmt.Sprintf(lcowGlobalMountPrefix, coi.HostingSystem.UVMMountCounter()) + uvmPathForShare = fmt.Sprintf(uvm.LCOWGlobalMountPrefix, coi.HostingSystem.UVMMountCounter()) scsiMount, err := coi.HostingSystem.AddSCSIPhysicalDisk(ctx, hostPath, uvmPathForShare, readOnly) if err != nil { return fmt.Errorf("adding SCSI physical disk mount %+v: %s", mount, err) @@ -108,11 +101,11 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * uvmPathForFile = scsiMount.UVMPath uvmPathForShare = scsiMount.UVMPath - r.resources = append(r.resources, scsiMount) + r.Add(scsiMount) coi.Spec.Mounts[i].Type = "none" } else if mount.Type == "virtual-disk" || mount.Type == "automanage-virtual-disk" { l.Debug("hcsshim::allocateLinuxResources Hot-adding SCSI virtual disk for OCI mount") - uvmPathForShare = fmt.Sprintf(lcowGlobalMountPrefix, coi.HostingSystem.UVMMountCounter()) + uvmPathForShare = fmt.Sprintf(uvm.LCOWGlobalMountPrefix, coi.HostingSystem.UVMMountCounter()) // if the scsi device is already attached then we take the uvm path that the function below returns // that is where it was previously mounted in UVM @@ -124,9 +117,9 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * uvmPathForFile = scsiMount.UVMPath uvmPathForShare = scsiMount.UVMPath if mount.Type == "automanage-virtual-disk" { - r.resources = append(r.resources, &AutoManagedVHD{hostPath: scsiMount.HostPath}) + r.Add(uvm.NewAutoManagedVHD(scsiMount.HostPath)) } - r.resources = append(r.resources, scsiMount) + r.Add(scsiMount) coi.Spec.Mounts[i].Type = "none" } else if strings.HasPrefix(mount.Source, "sandbox://") { // Mounts that map to a path in UVM are specified with 'sandbox://' prefix. @@ -153,7 +146,7 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * if err != nil { return fmt.Errorf("adding plan9 mount %+v: %s", mount, err) } - r.resources = append(r.resources, share) + r.Add(share) } coi.Spec.Mounts[i].Source = uvmPathForFile } @@ -168,7 +161,7 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * if err != nil { return errors.Wrapf(err, "failed to assign gpu device %s to pod %s", d.ID, coi.HostingSystem.ID()) } - r.resources = append(r.resources, vpci) + r.Add(vpci) // update device ID on the spec to the assigned device's resulting vmbus guid so gcs knows which devices to // map into the container coi.Spec.Windows.Devices[i].ID = vpci.VMBusGUID @@ -185,11 +178,11 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * // use lcowNvidiaMountPath since we only support nvidia gpus right now // must use scsi here since DDA'ing a hyper-v pci device is not supported on VMs that have ANY virtual memory // gpuvhd must be granted VM Group access. - scsiMount, err := coi.HostingSystem.AddSCSI(ctx, gpuSupportVhdPath, lcowNvidiaMountPath, true, uvm.VMAccessTypeNoop) + scsiMount, err := coi.HostingSystem.AddSCSI(ctx, gpuSupportVhdPath, uvm.LCOWNvidiaMountPath, true, uvm.VMAccessTypeNoop) if err != nil { - return errors.Wrapf(err, "failed to add scsi device %s in the UVM %s at %s", gpuSupportVhdPath, coi.HostingSystem.ID(), lcowNvidiaMountPath) + return errors.Wrapf(err, "failed to add scsi device %s in the UVM %s at %s", gpuSupportVhdPath, coi.HostingSystem.ID(), uvm.LCOWNvidiaMountPath) } - r.resources = append(r.resources, scsiMount) + r.Add(scsiMount) } return nil } diff --git a/internal/hcsoci/resources_wcow.go b/internal/hcsoci/resources_wcow.go index 9104ef0be6..44e55c50b1 100644 --- a/internal/hcsoci/resources_wcow.go +++ b/internal/hcsoci/resources_wcow.go @@ -11,16 +11,17 @@ import ( "path/filepath" "strings" + "github.com/Microsoft/hcsshim/internal/credentials" + "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/schemaversion" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/internal/wclayer" specs "github.com/opencontainers/runtime-spec/specs-go" ) -const wcowGlobalMountPrefix = "C:\\mounts\\m%d" - -func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r *Resources) error { +func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r *resources.Resources) error { if coi.Spec == nil || coi.Spec.Windows == nil || coi.Spec.Windows.LayerFolders == nil { return fmt.Errorf("field 'Spec.Windows.Layerfolders' is not populated") } @@ -49,17 +50,14 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r if coi.Spec.Root.Path == "" && (coi.HostingSystem != nil || coi.Spec.Windows.HyperV == nil) { log.G(ctx).Debug("hcsshim::allocateWindowsResources mounting storage") - containerRootPath, err := MountContainerLayers(ctx, coi.Spec.Windows.LayerFolders, r.containerRootInUVM, coi.HostingSystem) + containerRootInUVM := r.ContainerRootInUVM() + containerRootPath, err := layers.MountContainerLayers(ctx, coi.Spec.Windows.LayerFolders, containerRootInUVM, coi.HostingSystem) if err != nil { return fmt.Errorf("failed to mount container storage: %s", err) } coi.Spec.Root.Path = containerRootPath - layers := &ImageLayers{ - vm: coi.HostingSystem, - containerRootInUVM: r.containerRootInUVM, - layers: coi.Spec.Windows.LayerFolders, - } - r.layers = layers + layers := layers.NewImageLayers(coi.HostingSystem, containerRootInUVM, coi.Spec.Windows.LayerFolders) + r.SetLayers(layers) } // Validate each of the mounts. If this is a V2 Xenon, we have to add them as @@ -79,7 +77,7 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r } if coi.HostingSystem != nil && schemaversion.IsV21(coi.actualSchemaVersion) { - uvmPath := fmt.Sprintf(wcowGlobalMountPrefix, coi.HostingSystem.UVMMountCounter()) + uvmPath := fmt.Sprintf(uvm.WCOWGlobalMountPrefix, coi.HostingSystem.UVMMountCounter()) readOnly := false for _, o := range mount.Options { if strings.ToLower(o) == "ro" { @@ -95,7 +93,7 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r return fmt.Errorf("adding SCSI physical disk mount %+v: %s", mount, err) } coi.Spec.Mounts[i].Type = "" - r.resources = append(r.resources, scsiMount) + r.Add(scsiMount) } else if mount.Type == "virtual-disk" || mount.Type == "automanage-virtual-disk" { l.Debug("hcsshim::allocateWindowsResources Hot-adding SCSI virtual disk for OCI mount") scsiMount, err := coi.HostingSystem.AddSCSI(ctx, mount.Source, uvmPath, readOnly, uvm.VMAccessTypeIndividual) @@ -104,16 +102,16 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r } coi.Spec.Mounts[i].Type = "" if mount.Type == "automanage-virtual-disk" { - r.resources = append(r.resources, &AutoManagedVHD{hostPath: scsiMount.HostPath}) + r.Add(uvm.NewAutoManagedVHD(scsiMount.HostPath)) } - r.resources = append(r.resources, scsiMount) + r.Add(scsiMount) } else { if uvm.IsPipe(mount.Source) { pipe, err := coi.HostingSystem.AddPipe(ctx, mount.Source) if err != nil { return fmt.Errorf("failed to add named pipe to UVM: %s", err) } - r.resources = append(r.resources, pipe) + r.Add(pipe) } else { l.Debug("hcsshim::allocateWindowsResources Hot-adding VSMB share for OCI mount") options := coi.HostingSystem.DefaultVSMBOptions(readOnly) @@ -121,7 +119,7 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r if err != nil { return fmt.Errorf("failed to add VSMB share to utility VM for mount %+v: %s", mount, err) } - r.resources = append(r.resources, share) + r.Add(share) } } } @@ -131,12 +129,12 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r // Only need to create a CCG instance for v2 containers if schemaversion.IsV21(coi.actualSchemaVersion) { hypervisorIsolated := coi.HostingSystem != nil - ccgState, ccgInstance, err := CreateCredentialGuard(ctx, coi.actualID, cs, hypervisorIsolated) + ccgState, ccgInstance, err := credentials.CreateCredentialGuard(ctx, coi.actualID, cs, hypervisorIsolated) if err != nil { return err } coi.ccgState = ccgState - r.resources = append(r.resources, ccgInstance) + r.Add(ccgInstance) //TODO dcantah: If/when dynamic service table entries is supported register the RpcEndpoint with hvsocket here } } diff --git a/internal/hcsoci/layers.go b/internal/layers/layers.go similarity index 91% rename from internal/hcsoci/layers.go rename to internal/layers/layers.go index b50c9aba0d..b4ce6e6049 100644 --- a/internal/hcsoci/layers.go +++ b/internal/layers/layers.go @@ -1,6 +1,7 @@ // +build windows -package hcsoci +// Package layers deals with container layer mounting/unmounting for LCOW and WCOW +package layers import ( "context" @@ -23,6 +24,14 @@ type ImageLayers struct { layers []string } +func NewImageLayers(vm *uvm.UtilityVM, containerRootInUVM string, layers []string) *ImageLayers { + return &ImageLayers{ + vm: vm, + containerRootInUVM: containerRootInUVM, + layers: layers, + } +} + // Release unmounts all of the layers located in the layers array. func (layers *ImageLayers) Release(ctx context.Context, all bool) error { op := UnmountOperationSCSI @@ -159,14 +168,14 @@ func MountContainerLayers(ctx context.Context, layerFolders []string, guestRoot if uvm.OS() == "windows" { // Load the filter at the C:\s location calculated above. We pass into this request each of the // read-only layer folders. - layers, err := computeV2Layers(ctx, uvm, layersAdded) + layers, err := GetHCSLayers(ctx, uvm, layersAdded) if err != nil { return "", err } err = uvm.CombineLayersWCOW(ctx, layers, containerScratchPathInUVM) rootfs = containerScratchPathInUVM } else { - rootfs = ospath.Join(uvm.OS(), guestRoot, rootfsPath) + rootfs = ospath.Join(uvm.OS(), guestRoot, uvmpkg.RootfsPath) err = uvm.CombineLayersLCOW(ctx, lcowUvmLayerPaths, containerScratchPathInUVM, rootfs) } if err != nil { @@ -189,7 +198,7 @@ func addLCOWLayer(ctx context.Context, uvm *uvmpkg.UtilityVM, layerPath string) } } - uvmPath = fmt.Sprintf(lcowGlobalMountPrefix, uvm.UVMMountCounter()) + uvmPath = fmt.Sprintf(uvmpkg.LCOWGlobalMountPrefix, uvm.UVMMountCounter()) sm, err := uvm.AddSCSI(ctx, layerPath, uvmPath, true, uvmpkg.VMAccessTypeNoop) if err != nil { return "", fmt.Errorf("failed to add SCSI layer: %s", err) @@ -302,7 +311,8 @@ func UnmountContainerLayers(ctx context.Context, layerFolders []string, containe return retError } -func computeV2Layers(ctx context.Context, vm *uvm.UtilityVM, paths []string) (layers []hcsschema.Layer, err error) { +// GetHCSLayers converts host paths corresponding to container layers into HCS schema V2 layers +func GetHCSLayers(ctx context.Context, vm *uvm.UtilityVM, paths []string) (layers []hcsschema.Layer, err error) { for _, path := range paths { uvmPath, err := vm.GetVSMBUvmPath(ctx, path, true) if err != nil { @@ -316,3 +326,10 @@ func computeV2Layers(ctx context.Context, vm *uvm.UtilityVM, paths []string) (la } return layers, nil } + +func containerRootfsPath(uvm *uvm.UtilityVM, rootPath string) string { + if uvm.OS() == "windows" { + return ospath.Join(uvm.OS(), rootPath) + } + return ospath.Join(uvm.OS(), rootPath, uvmpkg.RootfsPath) +} diff --git a/internal/lcow/scratch.go b/internal/lcow/scratch.go index b52345ab17..212d8ac3e9 100644 --- a/internal/lcow/scratch.go +++ b/internal/lcow/scratch.go @@ -9,8 +9,8 @@ import ( "time" "github.com/Microsoft/go-winio/vhd" + cmdpkg "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/copyfile" - "github.com/Microsoft/hcsshim/internal/hcsoci" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/timeout" "github.com/Microsoft/hcsshim/internal/uvm" @@ -88,12 +88,12 @@ func CreateScratch(ctx context.Context, lcowUVM *uvm.UtilityVM, destFile string, testdCtx, cancel := context.WithTimeout(ctx, timeout.TestDRetryLoop) defer cancel() for { - cmd := hcsoci.CommandContext(testdCtx, lcowUVM, "test", "-d", devicePath) + cmd := cmdpkg.CommandContext(testdCtx, lcowUVM, "test", "-d", devicePath) err := cmd.Run() if err == nil { break } - if _, ok := err.(*hcsoci.ExitError); !ok { + if _, ok := err.(*cmdpkg.ExitError); !ok { return fmt.Errorf("failed to run %+v following hot-add %s to utility VM: %s", cmd.Spec.Args, destFile, err) } time.Sleep(time.Millisecond * 10) @@ -102,7 +102,7 @@ func CreateScratch(ctx context.Context, lcowUVM *uvm.UtilityVM, destFile string, // Get the device from under the block subdirectory by doing a simple ls. This will come back as (eg) `sda` lsCtx, cancel := context.WithTimeout(ctx, timeout.ExternalCommandToStart) - cmd := hcsoci.CommandContext(lsCtx, lcowUVM, "ls", devicePath) + cmd := cmdpkg.CommandContext(lsCtx, lcowUVM, "ls", devicePath) lsOutput, err := cmd.Output() cancel() if err != nil { @@ -116,7 +116,7 @@ func CreateScratch(ctx context.Context, lcowUVM *uvm.UtilityVM, destFile string, // Format it ext4 mkfsCtx, cancel := context.WithTimeout(ctx, timeout.ExternalCommandToStart) - cmd = hcsoci.CommandContext(mkfsCtx, lcowUVM, "mkfs.ext4", "-q", "-E", "lazy_itable_init=0,nodiscard", "-O", `^has_journal,sparse_super2,^resize_inode`, device) + cmd = cmdpkg.CommandContext(mkfsCtx, lcowUVM, "mkfs.ext4", "-q", "-E", "lazy_itable_init=0,nodiscard", "-O", `^has_journal,sparse_super2,^resize_inode`, device) var mkfsStderr bytes.Buffer cmd.Stderr = &mkfsStderr err = cmd.Run() diff --git a/internal/hcsoci/resources.go b/internal/resources/resources.go similarity index 70% rename from internal/hcsoci/resources.go rename to internal/resources/resources.go index 08be62bbd0..081984f4c2 100644 --- a/internal/hcsoci/resources.go +++ b/internal/resources/resources.go @@ -1,25 +1,58 @@ -package hcsoci +// Package resources handles creating, updating, and releasing resources +// on a container +package resources import ( "context" "errors" - "os" + "github.com/Microsoft/hcsshim/internal/credentials" + "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" - "github.com/Microsoft/hcsshim/internal/ospath" "github.com/Microsoft/hcsshim/internal/uvm" ) -const ( - scratchPath = "scratch" - rootfsPath = "rootfs" -) - // NetNS returns the network namespace for the container func (r *Resources) NetNS() string { return r.netNS } +// SetNetNS updates the container resource's NetNS +func (r *Resources) SetNetNS(netNS string) { + r.netNS = netNS +} + +// SetCreatedNetNS updates the container resource's CreatedNetNS value +func (r *Resources) SetCreatedNetNS(created bool) { + r.createdNetNS = true +} + +// ContainerRootInUVM returns the containerRootInUVM for the container +func (r *Resources) ContainerRootInUVM() string { + return r.containerRootInUVM +} + +// SetContainerRootInUVM updates the container resource's containerRootInUVM value +func (r *Resources) SetContainerRootInUVM(containerRootInUVM string) { + r.containerRootInUVM = containerRootInUVM +} + +// SetAddedNetNSToVM updates the container resource's AddedNetNSToVM value +func (r *Resources) SetAddedNetNSToVM(addedNetNSToVM bool) { + r.addedNetNSToVM = addedNetNSToVM +} + +// SetLayers updates the container resource's image layers +func (r *Resources) SetLayers(l *layers.ImageLayers) { + r.layers = l +} + +// Add adds one or more resource closers to the resources struct to be +// tracked for release later on +func (r *Resources) Add(newResources ...ResourceCloser) { + r.resources = append(r.resources, newResources...) +} + // Resources is the structure returned as part of creating a container. It holds // nothing useful to clients, hence everything is lowercased. A client would use // it in a call to ReleaseResources to ensure everything is cleaned up when a @@ -39,7 +72,7 @@ type Resources struct { // addedNetNSToVM indicates if the network namespace has been added to the containers utility VM addedNetNSToVM bool // layers is a pointer to a struct of the layers paths of a container - layers *ImageLayers + layers *layers.ImageLayers // resources is an array of the resources associated with a container resources []ResourceCloser } @@ -51,17 +84,12 @@ type ResourceCloser interface { Release(context.Context) error } -// AutoManagedVHD struct representing a VHD that will be cleaned up automatically. -type AutoManagedVHD struct { - hostPath string -} - -// Release removes the vhd. -func (vhd *AutoManagedVHD) Release(ctx context.Context) error { - if err := os.Remove(vhd.hostPath); err != nil { - log.G(ctx).WithField("hostPath", vhd.hostPath).WithError(err).Error("failed to remove automanage-virtual-disk") +// NewContainerResources returns a new empty container Resources struct with the +// given container id +func NewContainerResources(id string) *Resources { + return &Resources{ + id: id, } - return nil } // ReleaseResources releases/frees all of the resources associated with a container. This includes @@ -92,7 +120,7 @@ func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all } r.createdNetNS = false } - case *CCGInstance: + case *credentials.CCGInstance: if err := r.resources[i].Release(ctx); err != nil { log.G(ctx).WithError(err).Error("failed to release container resource") releaseErr = true @@ -132,10 +160,3 @@ func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all } return nil } - -func containerRootfsPath(uvm *uvm.UtilityVM, rootPath string) string { - if uvm.OS() == "windows" { - return ospath.Join(uvm.OS(), rootPath) - } - return ospath.Join(uvm.OS(), rootPath, rootfsPath) -} diff --git a/internal/tools/uvmboot/lcow.go b/internal/tools/uvmboot/lcow.go index 008234ea66..4beb59f8df 100644 --- a/internal/tools/uvmboot/lcow.go +++ b/internal/tools/uvmboot/lcow.go @@ -6,7 +6,7 @@ import ( "os" "strings" - "github.com/Microsoft/hcsshim/internal/hcsoci" + "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/containerd/console" "github.com/sirupsen/logrus" @@ -168,7 +168,7 @@ func runLCOW(ctx context.Context, options *uvm.OptionsLCOW, c *cli.Context) erro } func execViaGcs(vm *uvm.UtilityVM, c *cli.Context) error { - cmd := hcsoci.Command(vm, "/bin/sh", "-c", c.String(execCommandLineArgName)) + cmd := cmd.Command(vm, "/bin/sh", "-c", c.String(execCommandLineArgName)) cmd.Log = logrus.NewEntry(logrus.StandardLogger()) if lcowUseTerminal { cmd.Spec.Terminal = true diff --git a/internal/tools/uvmboot/wcow.go b/internal/tools/uvmboot/wcow.go index 862eccf610..3347e3f8dd 100644 --- a/internal/tools/uvmboot/wcow.go +++ b/internal/tools/uvmboot/wcow.go @@ -10,7 +10,7 @@ import ( "path/filepath" "strings" - "github.com/Microsoft/hcsshim/internal/hcsoci" + "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/containerd/console" "github.com/sirupsen/logrus" @@ -85,7 +85,7 @@ var wcowCommand = cli.Command{ return err } if wcowCommandLine != "" { - cmd := hcsoci.Command(vm, "cmd.exe", "/c", wcowCommandLine) + cmd := cmd.Command(vm, "cmd.exe", "/c", wcowCommandLine) cmd.Spec.User.Username = `NT AUTHORITY\SYSTEM` cmd.Log = logrus.NewEntry(logrus.StandardLogger()) if wcowUseTerminal { diff --git a/internal/uvm/automanagedvhd.go b/internal/uvm/automanagedvhd.go new file mode 100644 index 0000000000..09ff9296a0 --- /dev/null +++ b/internal/uvm/automanagedvhd.go @@ -0,0 +1,27 @@ +package uvm + +import ( + "context" + "os" + + "github.com/Microsoft/hcsshim/internal/log" +) + +// AutoManagedVHD struct representing a VHD that will be cleaned up automatically. +type AutoManagedVHD struct { + hostPath string +} + +func NewAutoManagedVHD(hostPath string) *AutoManagedVHD { + return &AutoManagedVHD{ + hostPath: hostPath, + } +} + +// Release removes the vhd. +func (vhd *AutoManagedVHD) Release(ctx context.Context) error { + if err := os.Remove(vhd.hostPath); err != nil { + log.G(ctx).WithField("hostPath", vhd.hostPath).WithError(err).Error("failed to remove automanage-virtual-disk") + } + return nil +} diff --git a/internal/uvm/constants.go b/internal/uvm/constants.go index 1ddcf903ab..842a8a5e8c 100644 --- a/internal/uvm/constants.go +++ b/internal/uvm/constants.go @@ -16,6 +16,19 @@ const ( // DefaultVPMemSizeBytes is the default size of a VPMem device if the create request // doesn't specify. DefaultVPMemSizeBytes = 4 * 1024 * 1024 * 1024 // 4GB + + // LCOWMountPathPrefix is the path format in the LCOW UVM where non global mounts, such + // as Plan9 mounts are added + LCOWMountPathPrefix = "/mounts/m%d" + // LCOWGlobalMountPrefix is the path format in the LCOW UVM where global mounts are added + LCOWGlobalMountPrefix = "/run/mounts/m%d" + // LCOWNvidiaMountPath is the path format in LCOW UVM where nvidia tools are mounted + // keep this value in sync with opengcs + LCOWNvidiaMountPath = "/run/nvidia" + // WCOWGlobalMountPrefix is the path prefix format in the WCOW UVM where mounts are added + WCOWGlobalMountPrefix = "C:\\mounts\\m%d" + // RootfsPath is part of the container's rootfs path + RootfsPath = "rootfs" ) var ( diff --git a/internal/wclayer/wclayer.go b/internal/wclayer/wclayer.go index dc40bf5194..9b1e06d50c 100644 --- a/internal/wclayer/wclayer.go +++ b/internal/wclayer/wclayer.go @@ -1,3 +1,6 @@ +// Package wclayer provides bindings to HCS's legacy layer management API and +// provides a higher level interface around these calls for container layer +// management. package wclayer import "github.com/Microsoft/go-winio/pkg/guid" diff --git a/test/functional/lcow_test.go b/test/functional/lcow_test.go index 0abef17f73..1684cdce40 100644 --- a/test/functional/lcow_test.go +++ b/test/functional/lcow_test.go @@ -13,9 +13,11 @@ import ( "testing" "time" + "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/hcsoci" "github.com/Microsoft/hcsshim/internal/lcow" + "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/osversion" testutilities "github.com/Microsoft/hcsshim/test/functional/utilities" @@ -189,12 +191,12 @@ func TestLCOWSimplePodScenario(t *testing.T) { if err := c1hcsSystem.Start(context.Background()); err != nil { t.Fatal(err) } - defer hcsoci.ReleaseResources(context.Background(), c1Resources, lcowUVM, true) + defer resources.ReleaseResources(context.Background(), c1Resources, lcowUVM, true) if err := c2hcsSystem.Start(context.Background()); err != nil { t.Fatal(err) } - defer hcsoci.ReleaseResources(context.Background(), c2Resources, lcowUVM, true) + defer resources.ReleaseResources(context.Background(), c2Resources, lcowUVM, true) // Start the init process in each container and grab it's stdout comparing to expected runInitProcess(t, c1hcsSystem, "hello lcow container one") @@ -208,7 +210,7 @@ func runInitProcess(t *testing.T, s cow.Container, expected string) { var errB bytes.Buffer ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - cmd := &hcsoci.Cmd{ + cmd := &cmd.Cmd{ Host: s, Stderr: &errB, Context: ctx, diff --git a/test/functional/test.go b/test/functional/test.go index 798fa6ab19..20a27561ec 100644 --- a/test/functional/test.go +++ b/test/functional/test.go @@ -9,6 +9,7 @@ import ( "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/hcsoci" + "github.com/Microsoft/hcsshim/internal/resources" "github.com/sirupsen/logrus" ) @@ -34,7 +35,7 @@ func init() { } -func CreateContainerTestWrapper(ctx context.Context, options *hcsoci.CreateOptions) (cow.Container, *hcsoci.Resources, error) { +func CreateContainerTestWrapper(ctx context.Context, options *hcsoci.CreateOptions) (cow.Container, *resources.Resources, error) { if pauseDurationOnCreateContainerFailure != 0 { options.DoNotReleaseResourcesOnFailure = true } @@ -42,7 +43,7 @@ func CreateContainerTestWrapper(ctx context.Context, options *hcsoci.CreateOptio if err != nil { logrus.Warnf("Test is pausing for %s for debugging CreateContainer failure", pauseDurationOnCreateContainerFailure) time.Sleep(pauseDurationOnCreateContainerFailure) - hcsoci.ReleaseResources(ctx, r, options.HostingSystem, true) + resources.ReleaseResources(ctx, r, options.HostingSystem, true) } return s, r, err } diff --git a/test/functional/wcow_test.go b/test/functional/wcow_test.go index 6e351e9e4d..a73c4de503 100644 --- a/test/functional/wcow_test.go +++ b/test/functional/wcow_test.go @@ -13,6 +13,8 @@ import ( "github.com/Microsoft/hcsshim" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/hcsoci" + layerspkg "github.com/Microsoft/hcsshim/internal/layers" + "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/schema1" "github.com/Microsoft/hcsshim/internal/schemaversion" "github.com/Microsoft/hcsshim/internal/uvm" @@ -378,12 +380,12 @@ func TestWCOWArgonShim(t *testing.T) { // For cleanup on failure defer func() { if argonShimMounted { - hcsoci.UnmountContainerLayers(context.Background(), append(imageLayers, argonShimScratchDir), "", nil, hcsoci.UnmountOperationAll) + layerspkg.UnmountContainerLayers(context.Background(), append(imageLayers, argonShimScratchDir), "", nil, layerspkg.UnmountOperationAll) } }() // This is a cheat but stops us re-writing exactly the same code just for test - argonShimLocalMountPath, err := hcsoci.MountContainerLayers(context.Background(), append(imageLayers, argonShimScratchDir), "", nil) + argonShimLocalMountPath, err := layerspkg.MountContainerLayers(context.Background(), append(imageLayers, argonShimScratchDir), "", nil) if err != nil { t.Fatal(err) } @@ -416,7 +418,7 @@ func TestWCOWArgonShim(t *testing.T) { } runShimCommands(t, argonShim) stopContainer(t, argonShim) - if err := hcsoci.UnmountContainerLayers(context.Background(), append(imageLayers, argonShimScratchDir), "", nil, hcsoci.UnmountOperationAll); err != nil { + if err := layerspkg.UnmountContainerLayers(context.Background(), append(imageLayers, argonShimScratchDir), "", nil, layerspkg.UnmountOperationAll); err != nil { t.Fatal(err) } argonShimMounted = false @@ -508,11 +510,11 @@ func TestWCOWArgonOciV1(t *testing.T) { defer os.RemoveAll(hostROSharedDirectory) // For cleanup on failure - var argonOci1Resources *hcsoci.Resources + var argonOci1Resources *resources.Resources var argonOci1 cow.Container defer func() { if argonOci1Mounted { - hcsoci.ReleaseResources(context.Background(), argonOci1Resources, nil, true) + resources.ReleaseResources(context.Background(), argonOci1Resources, nil, true) } }() @@ -535,7 +537,7 @@ func TestWCOWArgonOciV1(t *testing.T) { } runHcsCommands(t, argonOci1) stopContainer(t, argonOci1) - if err := hcsoci.ReleaseResources(context.Background(), argonOci1Resources, nil, true); err != nil { + if err := resources.ReleaseResources(context.Background(), argonOci1Resources, nil, true); err != nil { t.Fatal(err) } argonOci1Mounted = false @@ -563,11 +565,11 @@ func TestWCOWXenonOciV1(t *testing.T) { // } // For cleanup on failure - var xenonOci1Resources *hcsoci.Resources + var xenonOci1Resources *resources.Resources var xenonOci1 cow.Container defer func() { if xenonOci1Mounted { - hcsoci.ReleaseResources(context.Background(), xenonOci1Resources, nil, true) + resources.ReleaseResources(context.Background(), xenonOci1Resources, nil, true) } }() @@ -591,7 +593,7 @@ func TestWCOWXenonOciV1(t *testing.T) { } runHcsCommands(t, xenonOci1) stopContainer(t, xenonOci1) - if err := hcsoci.ReleaseResources(context.Background(), xenonOci1Resources, nil, true); err != nil { + if err := resources.ReleaseResources(context.Background(), xenonOci1Resources, nil, true); err != nil { t.Fatal(err) } xenonOci1Mounted = false @@ -614,11 +616,11 @@ func TestWCOWArgonOciV2(t *testing.T) { defer os.RemoveAll(hostROSharedDirectory) // For cleanup on failure - var argonOci2Resources *hcsoci.Resources + var argonOci2Resources *resources.Resources var argonOci2 cow.Container defer func() { if argonOci2Mounted { - hcsoci.ReleaseResources(context.Background(), argonOci2Resources, nil, true) + resources.ReleaseResources(context.Background(), argonOci2Resources, nil, true) } }() @@ -641,7 +643,7 @@ func TestWCOWArgonOciV2(t *testing.T) { } runHcsCommands(t, argonOci2) stopContainer(t, argonOci2) - if err := hcsoci.ReleaseResources(context.Background(), argonOci2Resources, nil, true); err != nil { + if err := resources.ReleaseResources(context.Background(), argonOci2Resources, nil, true); err != nil { t.Fatal(err) } argonOci2Mounted = false @@ -670,12 +672,12 @@ func TestWCOWXenonOciV2(t *testing.T) { t.Fatalf("LocateUVMFolder failed %s", err) } - var xenonOci2Resources *hcsoci.Resources + var xenonOci2Resources *resources.Resources var xenonOci2 cow.Container var xenonOci2UVM *uvm.UtilityVM defer func() { if xenonOci2Mounted { - hcsoci.ReleaseResources(context.Background(), xenonOci2Resources, xenonOci2UVM, true) + resources.ReleaseResources(context.Background(), xenonOci2Resources, xenonOci2UVM, true) } if xenonOci2UVMCreated { xenonOci2UVM.Close() @@ -721,7 +723,7 @@ func TestWCOWXenonOciV2(t *testing.T) { } runHcsCommands(t, xenonOci2) stopContainer(t, xenonOci2) - if err := hcsoci.ReleaseResources(context.Background(), xenonOci2Resources, xenonOci2UVM, true); err != nil { + if err := resources.ReleaseResources(context.Background(), xenonOci2Resources, xenonOci2UVM, true); err != nil { t.Fatal(err) } xenonOci2Mounted = false From 94556e86d3db0e9e09390ea36e8aaf608fcbc9e8 Mon Sep 17 00:00:00 2001 From: Daniel Canter Date: Tue, 18 Aug 2020 14:56:22 -0700 Subject: [PATCH 02/20] Fix flaky cri-containerd LCOW events test * Move the context.WithTimeout after the setup/launch of the sandbox/uvm to avoid hitting the timeout and failing the test. Signed-off-by: Daniel Canter --- test/cri-containerd/container_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/cri-containerd/container_test.go b/test/cri-containerd/container_test.go index f1dd71580c..b99336456b 100644 --- a/test/cri-containerd/container_test.go +++ b/test/cri-containerd/container_test.go @@ -148,9 +148,6 @@ func Test_RunContainer_Events_LCOW(t *testing.T) { podctx, podcancel := context.WithCancel(context.Background()) defer podcancel() - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - targetNamespace := "k8s.io" sandboxRequest := &runtime.RunPodSandboxRequest{ @@ -185,6 +182,9 @@ func Test_RunContainer_Events_LCOW(t *testing.T) { SandboxConfig: sandboxRequest.Config, } + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + topicNames, filters := getTargetRunTopics() eventService := newTestEventService(t) stream, errs := eventService.Subscribe(ctx, filters...) From 301c83a30e7cade283b2afd5dc4e4068181662c3 Mon Sep 17 00:00:00 2001 From: Daniel Canter Date: Tue, 25 Aug 2020 20:17:06 -0700 Subject: [PATCH 03/20] Fix LpIndex JSON annotation * Fix the LpIndex JSON annotation in the LogicalProcessor v2 HCS schema from being the wrong value. Signed-off-by: Daniel Canter --- internal/schema2/logical_processor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/schema2/logical_processor.go b/internal/schema2/logical_processor.go index 8436df02a0..676ad300dc 100644 --- a/internal/schema2/logical_processor.go +++ b/internal/schema2/logical_processor.go @@ -10,7 +10,7 @@ package hcsschema type LogicalProcessor struct { - LpIndex uint32 `json:"LogicalProcessorCount,omitempty"` + LpIndex uint32 `json:"LpIndex,omitempty"` NodeNumber uint8 `json:"NodeNumber, omitempty"` PackageId uint32 `json:"PackageId, omitempty"` CoreId uint32 `json:"CoreId, omitempty"` From ea4a3ab6d5fe9ecac0b6e6e7d90261c0a378620c Mon Sep 17 00:00:00 2001 From: Kathryn Baldauf Date: Tue, 16 Jun 2020 12:40:57 -0700 Subject: [PATCH 04/20] Add ability to query NT obj directories * create new pkg `winapi` that contains the low level syscall bindings for windows dll api calls * add device querying code with CM* api to new package `windevice` * add code to enumerate NT object directories in pkg `winobjdir` Signed-off-by: Kathryn Baldauf --- internal/safefile/safeopen.go | 158 +++++++--------------- internal/safefile/safeopen_admin_test.go | 8 +- internal/safefile/zsyscall_windows.go | 79 ----------- internal/wclayer/baselayer.go | 7 +- internal/wclayer/legacy.go | 17 +-- internal/winapi/devices.go | 13 ++ internal/winapi/errors.go | 12 ++ internal/winapi/heapalloc.go | 4 + internal/winapi/ntfs.go | 61 +++++++++ internal/winapi/utils.go | 60 +++++++++ internal/winapi/winapi.go | 5 + internal/winapi/winapi_test.go | 65 +++++++++ internal/winapi/zsyscall_windows.go | 163 +++++++++++++++++++++++ internal/windevice/devicequery.go | 121 +++++++++++++++++ internal/winobjdir/object_dir.go | 84 ++++++++++++ test/go.sum | 14 +- 16 files changed, 659 insertions(+), 212 deletions(-) delete mode 100644 internal/safefile/zsyscall_windows.go create mode 100644 internal/winapi/devices.go create mode 100644 internal/winapi/errors.go create mode 100644 internal/winapi/heapalloc.go create mode 100644 internal/winapi/ntfs.go create mode 100644 internal/winapi/utils.go create mode 100644 internal/winapi/winapi.go create mode 100644 internal/winapi/winapi_test.go create mode 100644 internal/winapi/zsyscall_windows.go create mode 100644 internal/windevice/devicequery.go create mode 100644 internal/winobjdir/object_dir.go diff --git a/internal/safefile/safeopen.go b/internal/safefile/safeopen.go index f31edfaf86..d484c212cd 100644 --- a/internal/safefile/safeopen.go +++ b/internal/safefile/safeopen.go @@ -11,72 +11,11 @@ import ( "unsafe" "github.com/Microsoft/hcsshim/internal/longpath" + "github.com/Microsoft/hcsshim/internal/winapi" winio "github.com/Microsoft/go-winio" ) -//go:generate go run $GOROOT\src\syscall\mksyscall_windows.go -output zsyscall_windows.go safeopen.go - -//sys ntCreateFile(handle *uintptr, accessMask uint32, oa *objectAttributes, iosb *ioStatusBlock, allocationSize *uint64, fileAttributes uint32, shareAccess uint32, createDisposition uint32, createOptions uint32, eaBuffer *byte, eaLength uint32) (status uint32) = ntdll.NtCreateFile -//sys ntSetInformationFile(handle uintptr, iosb *ioStatusBlock, information uintptr, length uint32, class uint32) (status uint32) = ntdll.NtSetInformationFile -//sys rtlNtStatusToDosError(status uint32) (winerr error) = ntdll.RtlNtStatusToDosErrorNoTeb -//sys localAlloc(flags uint32, size int) (ptr uintptr) = kernel32.LocalAlloc -//sys localFree(ptr uintptr) = kernel32.LocalFree - -type ioStatusBlock struct { - Status, Information uintptr -} - -type objectAttributes struct { - Length uintptr - RootDirectory uintptr - ObjectName uintptr - Attributes uintptr - SecurityDescriptor uintptr - SecurityQoS uintptr -} - -type unicodeString struct { - Length uint16 - MaximumLength uint16 - Buffer uintptr -} - -type fileLinkInformation struct { - ReplaceIfExists bool - RootDirectory uintptr - FileNameLength uint32 - FileName [1]uint16 -} - -type fileDispositionInformationEx struct { - Flags uintptr -} - -const ( - _FileLinkInformation = 11 - _FileDispositionInformationEx = 64 - - FILE_READ_ATTRIBUTES = 0x0080 - FILE_WRITE_ATTRIBUTES = 0x0100 - DELETE = 0x10000 - - FILE_OPEN = 1 - FILE_CREATE = 2 - - FILE_DIRECTORY_FILE = 0x00000001 - FILE_SYNCHRONOUS_IO_NONALERT = 0x00000020 - FILE_DELETE_ON_CLOSE = 0x00001000 - FILE_OPEN_FOR_BACKUP_INTENT = 0x00004000 - FILE_OPEN_REPARSE_POINT = 0x00200000 - - FILE_DISPOSITION_DELETE = 0x00000001 - - _OBJ_DONT_REPARSE = 0x1000 - - _STATUS_REPARSE_POINT_ENCOUNTERED = 0xC000050B -) - func OpenRoot(path string) (*os.File, error) { longpath, err := longpath.LongAbs(path) if err != nil { @@ -85,16 +24,24 @@ func OpenRoot(path string) (*os.File, error) { return winio.OpenForBackup(longpath, syscall.GENERIC_READ, syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE, syscall.OPEN_EXISTING) } -func ntRelativePath(path string) ([]uint16, error) { +func cleanGoStringRelativePath(path string) (string, error) { path = filepath.Clean(path) if strings.Contains(path, ":") { // Since alternate data streams must follow the file they // are attached to, finding one here (out of order) is invalid. - return nil, errors.New("path contains invalid character `:`") + return "", errors.New("path contains invalid character `:`") } fspath := filepath.FromSlash(path) if len(fspath) > 0 && fspath[0] == '\\' { - return nil, errors.New("expected relative path") + return "", errors.New("expected relative path") + } + return fspath, nil +} + +func ntRelativePath(path string) ([]uint16, error) { + fspath, err := cleanGoStringRelativePath(path) + if err != nil { + return nil, err } path16 := utf16.Encode(([]rune)(fspath)) @@ -110,11 +57,11 @@ func ntRelativePath(path string) ([]uint16, error) { func openRelativeInternal(path string, root *os.File, accessMask uint32, shareFlags uint32, createDisposition uint32, flags uint32) (*os.File, error) { var ( h uintptr - iosb ioStatusBlock - oa objectAttributes + iosb winapi.IOStatusBlock + oa winapi.ObjectAttributes ) - path16, err := ntRelativePath(path) + cleanRelativePath, err := cleanGoStringRelativePath(path) if err != nil { return nil, err } @@ -123,20 +70,16 @@ func openRelativeInternal(path string, root *os.File, accessMask uint32, shareFl return nil, errors.New("missing root directory") } - upathBuffer := localAlloc(0, int(unsafe.Sizeof(unicodeString{}))+len(path16)*2) - defer localFree(upathBuffer) - - upath := (*unicodeString)(unsafe.Pointer(upathBuffer)) - upath.Length = uint16(len(path16) * 2) - upath.MaximumLength = upath.Length - upath.Buffer = upathBuffer + unsafe.Sizeof(*upath) - copy((*[32768]uint16)(unsafe.Pointer(upath.Buffer))[:], path16) + pathUnicode, err := winapi.NewUnicodeString(cleanRelativePath) + if err != nil { + return nil, err + } oa.Length = unsafe.Sizeof(oa) - oa.ObjectName = upathBuffer + oa.ObjectName = uintptr(unsafe.Pointer(pathUnicode)) oa.RootDirectory = uintptr(root.Fd()) - oa.Attributes = _OBJ_DONT_REPARSE - status := ntCreateFile( + oa.Attributes = winapi.OBJ_DONT_REPARSE + status := winapi.NtCreateFile( &h, accessMask|syscall.SYNCHRONIZE, &oa, @@ -145,12 +88,12 @@ func openRelativeInternal(path string, root *os.File, accessMask uint32, shareFl 0, shareFlags, createDisposition, - FILE_OPEN_FOR_BACKUP_INTENT|FILE_SYNCHRONOUS_IO_NONALERT|flags, + winapi.FILE_OPEN_FOR_BACKUP_INTENT|winapi.FILE_SYNCHRONOUS_IO_NONALERT|flags, nil, 0, ) if status != 0 { - return nil, rtlNtStatusToDosError(status) + return nil, winapi.RtlNtStatusToDosError(status) } fullPath, err := longpath.LongAbs(filepath.Join(root.Name(), path)) @@ -182,7 +125,7 @@ func LinkRelative(oldname string, oldroot *os.File, newname string, newroot *os. oldroot, syscall.FILE_WRITE_ATTRIBUTES, syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE, - FILE_OPEN, + winapi.FILE_OPEN, 0, ) if err != nil { @@ -199,8 +142,8 @@ func LinkRelative(oldname string, oldroot *os.File, newname string, newroot *os. newroot, syscall.GENERIC_READ, syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE, - FILE_OPEN, - FILE_DIRECTORY_FILE) + winapi.FILE_OPEN, + winapi.FILE_DIRECTORY_FILE) if err != nil { return &os.LinkError{Op: "link", Old: oldf.Name(), New: filepath.Join(newroot.Name(), newname), Err: err} } @@ -211,7 +154,7 @@ func LinkRelative(oldname string, oldroot *os.File, newname string, newroot *os. return err } if (fi.FileAttributes & syscall.FILE_ATTRIBUTE_REPARSE_POINT) != 0 { - return &os.LinkError{Op: "link", Old: oldf.Name(), New: filepath.Join(newroot.Name(), newname), Err: rtlNtStatusToDosError(_STATUS_REPARSE_POINT_ENCOUNTERED)} + return &os.LinkError{Op: "link", Old: oldf.Name(), New: filepath.Join(newroot.Name(), newname), Err: winapi.RtlNtStatusToDosError(winapi.STATUS_REPARSE_POINT_ENCOUNTERED)} } } else { @@ -227,24 +170,25 @@ func LinkRelative(oldname string, oldroot *os.File, newname string, newroot *os. return err } - size := int(unsafe.Offsetof(fileLinkInformation{}.FileName)) + len(newbase16)*2 - linkinfoBuffer := localAlloc(0, size) - defer localFree(linkinfoBuffer) - linkinfo := (*fileLinkInformation)(unsafe.Pointer(linkinfoBuffer)) + size := int(unsafe.Offsetof(winapi.FileLinkInformation{}.FileName)) + len(newbase16)*2 + linkinfoBuffer := winapi.LocalAlloc(0, size) + defer winapi.LocalFree(linkinfoBuffer) + + linkinfo := (*winapi.FileLinkInformation)(unsafe.Pointer(linkinfoBuffer)) linkinfo.RootDirectory = parent.Fd() linkinfo.FileNameLength = uint32(len(newbase16) * 2) copy((*[32768]uint16)(unsafe.Pointer(&linkinfo.FileName[0]))[:], newbase16) - var iosb ioStatusBlock - status := ntSetInformationFile( + var iosb winapi.IOStatusBlock + status := winapi.NtSetInformationFile( oldf.Fd(), &iosb, linkinfoBuffer, uint32(size), - _FileLinkInformation, + winapi.FileLinkInformationClass, ) if status != 0 { - return &os.LinkError{Op: "link", Old: oldf.Name(), New: filepath.Join(parent.Name(), newbase), Err: rtlNtStatusToDosError(status)} + return &os.LinkError{Op: "link", Old: oldf.Name(), New: filepath.Join(parent.Name(), newbase), Err: winapi.RtlNtStatusToDosError(status)} } return nil @@ -252,17 +196,17 @@ func LinkRelative(oldname string, oldroot *os.File, newname string, newroot *os. // deleteOnClose marks a file to be deleted when the handle is closed. func deleteOnClose(f *os.File) error { - disposition := fileDispositionInformationEx{Flags: FILE_DISPOSITION_DELETE} - var iosb ioStatusBlock - status := ntSetInformationFile( + disposition := winapi.FileDispositionInformationEx{Flags: winapi.FILE_DISPOSITION_DELETE} + var iosb winapi.IOStatusBlock + status := winapi.NtSetInformationFile( f.Fd(), &iosb, uintptr(unsafe.Pointer(&disposition)), uint32(unsafe.Sizeof(disposition)), - _FileDispositionInformationEx, + winapi.FileDispositionInformationExClass, ) if status != 0 { - return rtlNtStatusToDosError(status) + return winapi.RtlNtStatusToDosError(status) } return nil } @@ -291,10 +235,10 @@ func RemoveRelative(path string, root *os.File) error { f, err := openRelativeInternal( path, root, - FILE_READ_ATTRIBUTES|FILE_WRITE_ATTRIBUTES|DELETE, + winapi.FILE_READ_ATTRIBUTES|winapi.FILE_WRITE_ATTRIBUTES|winapi.DELETE, syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE, - FILE_OPEN, - FILE_OPEN_REPARSE_POINT) + winapi.FILE_OPEN, + winapi.FILE_OPEN_REPARSE_POINT) if err == nil { defer f.Close() err = deleteOnClose(f) @@ -385,8 +329,8 @@ func MkdirRelative(path string, root *os.File) error { root, 0, syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE, - FILE_CREATE, - FILE_DIRECTORY_FILE) + winapi.FILE_CREATE, + winapi.FILE_DIRECTORY_FILE) if err == nil { f.Close() } else { @@ -401,10 +345,10 @@ func LstatRelative(path string, root *os.File) (os.FileInfo, error) { f, err := openRelativeInternal( path, root, - FILE_READ_ATTRIBUTES, + winapi.FILE_READ_ATTRIBUTES, syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE, - FILE_OPEN, - FILE_OPEN_REPARSE_POINT) + winapi.FILE_OPEN, + winapi.FILE_OPEN_REPARSE_POINT) if err != nil { return nil, &os.PathError{Op: "stat", Path: filepath.Join(root.Name(), path), Err: err} } @@ -421,7 +365,7 @@ func EnsureNotReparsePointRelative(path string, root *os.File) error { root, 0, syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE, - FILE_OPEN, + winapi.FILE_OPEN, 0) if err != nil { return err diff --git a/internal/safefile/safeopen_admin_test.go b/internal/safefile/safeopen_admin_test.go index 0896917851..99cee4a8e2 100644 --- a/internal/safefile/safeopen_admin_test.go +++ b/internal/safefile/safeopen_admin_test.go @@ -7,6 +7,8 @@ import ( "path/filepath" "syscall" "testing" + + "github.com/Microsoft/hcsshim/internal/winapi" ) func TestOpenRelative(t *testing.T) { @@ -25,7 +27,7 @@ func TestOpenRelative(t *testing.T) { defer root.Close() // Create a file - f, err := OpenRelative("foo", root, 0, syscall.FILE_SHARE_READ, FILE_CREATE, 0) + f, err := OpenRelative("foo", root, 0, syscall.FILE_SHARE_READ, winapi.FILE_CREATE, 0) if err != nil { t.Fatal(err) } @@ -57,7 +59,7 @@ func TestOpenRelative(t *testing.T) { } // Make sure opens cannot happen through the symlink - f, err = OpenRelative("dsymlink/foo", root, 0, syscall.FILE_SHARE_READ, FILE_CREATE, 0) + f, err = OpenRelative("dsymlink/foo", root, 0, syscall.FILE_SHARE_READ, winapi.FILE_CREATE, 0) if err == nil { f.Close() t.Fatal("created file in wrong tree!") @@ -112,7 +114,7 @@ func TestOpenRelative(t *testing.T) { } // Make sure it's not possible to escape with .. (NT doesn't support .. at the kernel level) - f, err = OpenRelative("..", root, syscall.GENERIC_READ, syscall.FILE_SHARE_READ, FILE_OPEN, 0) + f, err = OpenRelative("..", root, syscall.GENERIC_READ, syscall.FILE_SHARE_READ, winapi.FILE_OPEN, 0) if err == nil { t.Fatal("escaped the directory") } diff --git a/internal/safefile/zsyscall_windows.go b/internal/safefile/zsyscall_windows.go deleted file mode 100644 index 709b9d3475..0000000000 --- a/internal/safefile/zsyscall_windows.go +++ /dev/null @@ -1,79 +0,0 @@ -// Code generated by 'go generate'; DO NOT EDIT. - -package safefile - -import ( - "syscall" - "unsafe" - - "golang.org/x/sys/windows" -) - -var _ unsafe.Pointer - -// Do the interface allocations only once for common -// Errno values. -const ( - errnoERROR_IO_PENDING = 997 -) - -var ( - errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING) -) - -// errnoErr returns common boxed Errno values, to prevent -// allocations at runtime. -func errnoErr(e syscall.Errno) error { - switch e { - case 0: - return nil - case errnoERROR_IO_PENDING: - return errERROR_IO_PENDING - } - // TODO: add more here, after collecting data on the common - // error values see on Windows. (perhaps when running - // all.bat?) - return e -} - -var ( - modntdll = windows.NewLazySystemDLL("ntdll.dll") - modkernel32 = windows.NewLazySystemDLL("kernel32.dll") - - procNtCreateFile = modntdll.NewProc("NtCreateFile") - procNtSetInformationFile = modntdll.NewProc("NtSetInformationFile") - procRtlNtStatusToDosErrorNoTeb = modntdll.NewProc("RtlNtStatusToDosErrorNoTeb") - procLocalAlloc = modkernel32.NewProc("LocalAlloc") - procLocalFree = modkernel32.NewProc("LocalFree") -) - -func ntCreateFile(handle *uintptr, accessMask uint32, oa *objectAttributes, iosb *ioStatusBlock, allocationSize *uint64, fileAttributes uint32, shareAccess uint32, createDisposition uint32, createOptions uint32, eaBuffer *byte, eaLength uint32) (status uint32) { - r0, _, _ := syscall.Syscall12(procNtCreateFile.Addr(), 11, uintptr(unsafe.Pointer(handle)), uintptr(accessMask), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(unsafe.Pointer(allocationSize)), uintptr(fileAttributes), uintptr(shareAccess), uintptr(createDisposition), uintptr(createOptions), uintptr(unsafe.Pointer(eaBuffer)), uintptr(eaLength), 0) - status = uint32(r0) - return -} - -func ntSetInformationFile(handle uintptr, iosb *ioStatusBlock, information uintptr, length uint32, class uint32) (status uint32) { - r0, _, _ := syscall.Syscall6(procNtSetInformationFile.Addr(), 5, uintptr(handle), uintptr(unsafe.Pointer(iosb)), uintptr(information), uintptr(length), uintptr(class), 0) - status = uint32(r0) - return -} - -func rtlNtStatusToDosError(status uint32) (winerr error) { - r0, _, _ := syscall.Syscall(procRtlNtStatusToDosErrorNoTeb.Addr(), 1, uintptr(status), 0, 0) - if r0 != 0 { - winerr = syscall.Errno(r0) - } - return -} - -func localAlloc(flags uint32, size int) (ptr uintptr) { - r0, _, _ := syscall.Syscall(procLocalAlloc.Addr(), 2, uintptr(flags), uintptr(size), 0) - ptr = uintptr(r0) - return -} - -func localFree(ptr uintptr) { - syscall.Syscall(procLocalFree.Addr(), 1, uintptr(ptr), 0, 0) - return -} diff --git a/internal/wclayer/baselayer.go b/internal/wclayer/baselayer.go index 20cafcf325..3ec708d1ed 100644 --- a/internal/wclayer/baselayer.go +++ b/internal/wclayer/baselayer.go @@ -11,6 +11,7 @@ import ( "github.com/Microsoft/hcsshim/internal/hcserror" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/safefile" + "github.com/Microsoft/hcsshim/internal/winapi" "go.opencensus.io/trace" ) @@ -37,7 +38,7 @@ type dirInfo struct { func reapplyDirectoryTimes(root *os.File, dis []dirInfo) error { for i := range dis { di := &dis[len(dis)-i-1] // reverse order: process child directories first - f, err := safefile.OpenRelative(di.path, root, syscall.GENERIC_READ|syscall.GENERIC_WRITE, syscall.FILE_SHARE_READ, safefile.FILE_OPEN, safefile.FILE_DIRECTORY_FILE|syscall.FILE_FLAG_OPEN_REPARSE_POINT) + f, err := safefile.OpenRelative(di.path, root, syscall.GENERIC_READ|syscall.GENERIC_WRITE, syscall.FILE_SHARE_READ, winapi.FILE_OPEN, winapi.FILE_DIRECTORY_FILE|syscall.FILE_FLAG_OPEN_REPARSE_POINT) if err != nil { return err } @@ -93,12 +94,12 @@ func (w *baseLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo) (err e extraFlags := uint32(0) if fileInfo.FileAttributes&syscall.FILE_ATTRIBUTE_DIRECTORY != 0 { - extraFlags |= safefile.FILE_DIRECTORY_FILE + extraFlags |= winapi.FILE_DIRECTORY_FILE w.dirInfo = append(w.dirInfo, dirInfo{name, *fileInfo}) } mode := uint32(syscall.GENERIC_READ | syscall.GENERIC_WRITE | winio.WRITE_DAC | winio.WRITE_OWNER | winio.ACCESS_SYSTEM_SECURITY) - f, err = safefile.OpenRelative(name, w.root, mode, syscall.FILE_SHARE_READ, safefile.FILE_CREATE, extraFlags) + f, err = safefile.OpenRelative(name, w.root, mode, syscall.FILE_SHARE_READ, winapi.FILE_CREATE, extraFlags) if err != nil { return hcserror.New(err, "Failed to safefile.OpenRelative", name) } diff --git a/internal/wclayer/legacy.go b/internal/wclayer/legacy.go index a8a4db6f3f..dc3caf7510 100644 --- a/internal/wclayer/legacy.go +++ b/internal/wclayer/legacy.go @@ -15,6 +15,7 @@ import ( "github.com/Microsoft/go-winio" "github.com/Microsoft/hcsshim/internal/longpath" "github.com/Microsoft/hcsshim/internal/safefile" + "github.com/Microsoft/hcsshim/internal/winapi" ) var errorIterationCanceled = errors.New("") @@ -472,8 +473,8 @@ func copyFileWithMetadata(srcRoot, destRoot *os.File, subPath string, isDir bool srcRoot, syscall.GENERIC_READ|winio.ACCESS_SYSTEM_SECURITY, syscall.FILE_SHARE_READ, - safefile.FILE_OPEN, - safefile.FILE_OPEN_REPARSE_POINT) + winapi.FILE_OPEN, + winapi.FILE_OPEN_REPARSE_POINT) if err != nil { return nil, err } @@ -488,14 +489,14 @@ func copyFileWithMetadata(srcRoot, destRoot *os.File, subPath string, isDir bool extraFlags := uint32(0) if isDir { - extraFlags |= safefile.FILE_DIRECTORY_FILE + extraFlags |= winapi.FILE_DIRECTORY_FILE } dest, err := safefile.OpenRelative( subPath, destRoot, syscall.GENERIC_READ|syscall.GENERIC_WRITE|winio.WRITE_DAC|winio.WRITE_OWNER|winio.ACCESS_SYSTEM_SECURITY, syscall.FILE_SHARE_READ, - safefile.FILE_CREATE, + winapi.FILE_CREATE, extraFlags) if err != nil { return nil, err @@ -595,7 +596,7 @@ func (w *legacyLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo) erro if !hasPathPrefix(name, utilityVMFilesPath) && name != utilityVMFilesPath { return errors.New("invalid UtilityVM layer") } - createDisposition := uint32(safefile.FILE_OPEN) + createDisposition := uint32(winapi.FILE_OPEN) if (fileInfo.FileAttributes & syscall.FILE_ATTRIBUTE_DIRECTORY) != 0 { st, err := safefile.LstatRelative(name, w.destRoot) if err != nil && !os.IsNotExist(err) { @@ -622,7 +623,7 @@ func (w *legacyLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo) erro if err != nil && !os.IsNotExist(err) { return err } - createDisposition = safefile.FILE_CREATE + createDisposition = winapi.FILE_CREATE } f, err := safefile.OpenRelative( @@ -631,7 +632,7 @@ func (w *legacyLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo) erro syscall.GENERIC_READ|syscall.GENERIC_WRITE|winio.WRITE_DAC|winio.WRITE_OWNER|winio.ACCESS_SYSTEM_SECURITY, syscall.FILE_SHARE_READ, createDisposition, - safefile.FILE_OPEN_REPARSE_POINT, + winapi.FILE_OPEN_REPARSE_POINT, ) if err != nil { return err @@ -668,7 +669,7 @@ func (w *legacyLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo) erro w.currentIsDir = true } - f, err := safefile.OpenRelative(fname, w.root, syscall.GENERIC_READ|syscall.GENERIC_WRITE, syscall.FILE_SHARE_READ, safefile.FILE_CREATE, 0) + f, err := safefile.OpenRelative(fname, w.root, syscall.GENERIC_READ|syscall.GENERIC_WRITE, syscall.FILE_SHARE_READ, winapi.FILE_CREATE, 0) if err != nil { return err } diff --git a/internal/winapi/devices.go b/internal/winapi/devices.go new file mode 100644 index 0000000000..df28ea2421 --- /dev/null +++ b/internal/winapi/devices.go @@ -0,0 +1,13 @@ +package winapi + +import "github.com/Microsoft/go-winio/pkg/guid" + +//sys CMGetDeviceIDListSize(pulLen *uint32, pszFilter *byte, uFlags uint32) (hr error) = cfgmgr32.CM_Get_Device_ID_List_SizeA +//sys CMGetDeviceIDList(pszFilter *byte, buffer *byte, bufferLen uint32, uFlags uint32) (hr error)= cfgmgr32.CM_Get_Device_ID_ListA +//sys CMLocateDevNode(pdnDevInst *uint32, pDeviceID string, uFlags uint32) (hr error) = cfgmgr32.CM_Locate_DevNodeW +//sys CMGetDevNodeProperty(dnDevInst uint32, propertyKey *DevPropKey, propertyType *uint32, propertyBuffer *uint16, propertyBufferSize *uint32, uFlags uint32) (hr error) = cfgmgr32.CM_Get_DevNode_PropertyW + +type DevPropKey struct { + Fmtid guid.GUID + Pid uint32 +} diff --git a/internal/winapi/errors.go b/internal/winapi/errors.go new file mode 100644 index 0000000000..1ca86f4c3c --- /dev/null +++ b/internal/winapi/errors.go @@ -0,0 +1,12 @@ +package winapi + +//sys RtlNtStatusToDosError(status uint32) (winerr error) = ntdll.RtlNtStatusToDosError + +const ( + STATUS_REPARSE_POINT_ENCOUNTERED = 0xC000050B + ERROR_NO_MORE_ITEMS = 0x103 +) + +func NTSuccess(status uint32) bool { + return status == 0 +} diff --git a/internal/winapi/heapalloc.go b/internal/winapi/heapalloc.go new file mode 100644 index 0000000000..53f62948c9 --- /dev/null +++ b/internal/winapi/heapalloc.go @@ -0,0 +1,4 @@ +package winapi + +//sys LocalAlloc(flags uint32, size int) (ptr uintptr) = kernel32.LocalAlloc +//sys LocalFree(ptr uintptr) = kernel32.LocalFree diff --git a/internal/winapi/ntfs.go b/internal/winapi/ntfs.go new file mode 100644 index 0000000000..ab5daea782 --- /dev/null +++ b/internal/winapi/ntfs.go @@ -0,0 +1,61 @@ +package winapi + +//sys NtCreateFile(handle *uintptr, accessMask uint32, oa *ObjectAttributes, iosb *IOStatusBlock, allocationSize *uint64, fileAttributes uint32, shareAccess uint32, createDisposition uint32, createOptions uint32, eaBuffer *byte, eaLength uint32) (status uint32) = ntdll.NtCreateFile +//sys NtSetInformationFile(handle uintptr, iosb *IOStatusBlock, information uintptr, length uint32, class uint32) (status uint32) = ntdll.NtSetInformationFile + +//sys NtOpenDirectoryObject(handle *uintptr, accessMask uint32, oa *ObjectAttributes) (status uint32) = ntdll.NtOpenDirectoryObject +//sys NtQueryDirectoryObject(handle uintptr, buffer *byte, length uint32, singleEntry bool, restartScan bool, context *uint32, returnLength *uint32)(status uint32) = ntdll.NtQueryDirectoryObject + +const ( + FileLinkInformationClass = 11 + FileDispositionInformationExClass = 64 + + FILE_READ_ATTRIBUTES = 0x0080 + FILE_WRITE_ATTRIBUTES = 0x0100 + DELETE = 0x10000 + + FILE_OPEN = 1 + FILE_CREATE = 2 + + FILE_LIST_DIRECTORY = 0x00000001 + FILE_DIRECTORY_FILE = 0x00000001 + FILE_SYNCHRONOUS_IO_NONALERT = 0x00000020 + FILE_OPEN_FOR_BACKUP_INTENT = 0x00004000 + FILE_OPEN_REPARSE_POINT = 0x00200000 + + FILE_DISPOSITION_DELETE = 0x00000001 + + OBJ_DONT_REPARSE = 0x1000 + + STATUS_MORE_ENTRIES = 0x105 + STATUS_NO_MORE_ENTRIES = 0x8000001a +) + +type FileDispositionInformationEx struct { + Flags uintptr +} + +type IOStatusBlock struct { + Status, Information uintptr +} + +type ObjectAttributes struct { + Length uintptr + RootDirectory uintptr + ObjectName uintptr + Attributes uintptr + SecurityDescriptor uintptr + SecurityQoS uintptr +} + +type ObjectDirectoryInformation struct { + Name UnicodeString + TypeName UnicodeString +} + +type FileLinkInformation struct { + ReplaceIfExists bool + RootDirectory uintptr + FileNameLength uint32 + FileName [1]uint16 +} diff --git a/internal/winapi/utils.go b/internal/winapi/utils.go new file mode 100644 index 0000000000..f3055d4175 --- /dev/null +++ b/internal/winapi/utils.go @@ -0,0 +1,60 @@ +package winapi + +import ( + "errors" + "syscall" + "unicode/utf16" + "unsafe" +) + +type UnicodeString struct { + Length uint16 + MaximumLength uint16 + Buffer *uint16 +} + +//String converts a UnicodeString to a golang string +func (uni UnicodeString) String() string { + p := (*[0xffff]uint16)(unsafe.Pointer(uni.Buffer)) + + // UnicodeString is not guaranteed to be null terminated, therefore + // use the UnicodeString's Length field + lengthInChars := uni.Length / 2 + return syscall.UTF16ToString(p[:lengthInChars]) +} + +// NewUnicodeString allocates a new UnicodeString and copies `s` into +// the buffer of the new UnicodeString. +func NewUnicodeString(s string) (*UnicodeString, error) { + ws := utf16.Encode(([]rune)(s)) + if len(ws) > 32767 { + return nil, syscall.ENAMETOOLONG + } + + uni := &UnicodeString{ + Length: uint16(len(ws) * 2), + MaximumLength: uint16(len(ws) * 2), + Buffer: &make([]uint16, len(ws))[0], + } + copy((*[32768]uint16)(unsafe.Pointer(uni.Buffer))[:], ws) + return uni, nil +} + +// ConvertStringSetToSlice is a helper function used to convert the contents of +// `buf` into a string slice. `buf` contains a set of null terminated strings +// with an additional null at the end to indicate the end of the set. +func ConvertStringSetToSlice(buf []byte) ([]string, error) { + var results []string + prev := 0 + for i := range buf { + if buf[i] == 0 { + if prev == i { + // found two null characters in a row, return result + return results, nil + } + results = append(results, string(buf[prev:i])) + prev = i + 1 + } + } + return nil, errors.New("string set malformed: missing null terminator at end of buffer") +} diff --git a/internal/winapi/winapi.go b/internal/winapi/winapi.go new file mode 100644 index 0000000000..26e89a0b33 --- /dev/null +++ b/internal/winapi/winapi.go @@ -0,0 +1,5 @@ +/*Package winapi contains various low-level bindings to Windows APIs. It can +be thought of as an extension to golang.org/x/sys/windows. */ +package winapi + +//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go devices.go heapalloc.go ntfs.go errors.go diff --git a/internal/winapi/winapi_test.go b/internal/winapi/winapi_test.go new file mode 100644 index 0000000000..e55a740428 --- /dev/null +++ b/internal/winapi/winapi_test.go @@ -0,0 +1,65 @@ +package winapi + +import ( + "testing" + "unicode/utf16" + "unsafe" +) + +func wideStringsEqual(target, actual []uint16, actualLengthInBytes int) bool { + actualLength := actualLengthInBytes / 2 + if len(target) != actualLength { + return false + } + + for i := range target { + if target[i] != actual[i] { + return false + } + } + return true +} + +func TestNewUnicodeString(t *testing.T) { + targetStrings := []string{"abcde", "abcd\n", "C:\\Test", "\\&_Test"} + for _, target := range targetStrings { + targetLength := uint16(len(target) * 2) + targetWideString := utf16.Encode(([]rune)(target)) + + uni, err := NewUnicodeString(target) + if err != nil { + t.Fatalf("failed to convert target string %s to Unicode String with %v", target, err) + } + + if uni.Length != targetLength { + t.Fatalf("Expected new Unicode String length to be %d for target string %s, got %d instead", targetLength, target, uni.Length) + } + if uni.MaximumLength != targetLength { + t.Fatalf("Expected new Unicode String maximum length to be %d for target string %s, got %d instead", targetLength, target, uni.MaximumLength) + } + + uniBufferStringAsSlice := (*[32768]uint16)(unsafe.Pointer(uni.Buffer))[:] + + // since we have to do casting to convert the unicode string's buffer into a uint16 slice + // the length of the actual slice will not be the true length of the contents in the unicode buffer + // therefore we need to use the unicode string's length field when comparing + if !wideStringsEqual(targetWideString, uniBufferStringAsSlice, int(uni.Length)) { + t.Fatalf("Expected wide string %v, got %v instead", targetWideString, uniBufferStringAsSlice[:uni.Length]) + } + } +} + +func TestUnicodeToString(t *testing.T) { + targetStrings := []string{"abcde", "abcd\n", "C:\\Test", "\\&_Test"} + for _, target := range targetStrings { + uni, err := NewUnicodeString(target) + if err != nil { + t.Fatalf("failed to convert target string %s to Unicode String with %v", target, err) + } + + actualString := uni.String() + if actualString != target { + t.Fatalf("Expected unicode string function to return %s, instead got %s", target, actualString) + } + } +} diff --git a/internal/winapi/zsyscall_windows.go b/internal/winapi/zsyscall_windows.go new file mode 100644 index 0000000000..1e0ab0ca2e --- /dev/null +++ b/internal/winapi/zsyscall_windows.go @@ -0,0 +1,163 @@ +// Code generated mksyscall_windows.exe DO NOT EDIT + +package winapi + +import ( + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +var _ unsafe.Pointer + +// Do the interface allocations only once for common +// Errno values. +const ( + errnoERROR_IO_PENDING = 997 +) + +var ( + errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING) +) + +// errnoErr returns common boxed Errno values, to prevent +// allocations at runtime. +func errnoErr(e syscall.Errno) error { + switch e { + case 0: + return nil + case errnoERROR_IO_PENDING: + return errERROR_IO_PENDING + } + // TODO: add more here, after collecting data on the common + // error values see on Windows. (perhaps when running + // all.bat?) + return e +} + +var ( + modcfgmgr32 = windows.NewLazySystemDLL("cfgmgr32.dll") + modkernel32 = windows.NewLazySystemDLL("kernel32.dll") + modntdll = windows.NewLazySystemDLL("ntdll.dll") + + procCM_Get_Device_ID_List_SizeA = modcfgmgr32.NewProc("CM_Get_Device_ID_List_SizeA") + procCM_Get_Device_ID_ListA = modcfgmgr32.NewProc("CM_Get_Device_ID_ListA") + procCM_Locate_DevNodeW = modcfgmgr32.NewProc("CM_Locate_DevNodeW") + procCM_Get_DevNode_PropertyW = modcfgmgr32.NewProc("CM_Get_DevNode_PropertyW") + procLocalAlloc = modkernel32.NewProc("LocalAlloc") + procLocalFree = modkernel32.NewProc("LocalFree") + procNtCreateFile = modntdll.NewProc("NtCreateFile") + procNtSetInformationFile = modntdll.NewProc("NtSetInformationFile") + procNtOpenDirectoryObject = modntdll.NewProc("NtOpenDirectoryObject") + procNtQueryDirectoryObject = modntdll.NewProc("NtQueryDirectoryObject") + procRtlNtStatusToDosError = modntdll.NewProc("RtlNtStatusToDosError") +) + +func CMGetDeviceIDListSize(pulLen *uint32, pszFilter *byte, uFlags uint32) (hr error) { + r0, _, _ := syscall.Syscall(procCM_Get_Device_ID_List_SizeA.Addr(), 3, uintptr(unsafe.Pointer(pulLen)), uintptr(unsafe.Pointer(pszFilter)), uintptr(uFlags)) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + +func CMGetDeviceIDList(pszFilter *byte, buffer *byte, bufferLen uint32, uFlags uint32) (hr error) { + r0, _, _ := syscall.Syscall6(procCM_Get_Device_ID_ListA.Addr(), 4, uintptr(unsafe.Pointer(pszFilter)), uintptr(unsafe.Pointer(buffer)), uintptr(bufferLen), uintptr(uFlags), 0, 0) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + +func CMLocateDevNode(pdnDevInst *uint32, pDeviceID string, uFlags uint32) (hr error) { + var _p0 *uint16 + _p0, hr = syscall.UTF16PtrFromString(pDeviceID) + if hr != nil { + return + } + return _CMLocateDevNode(pdnDevInst, _p0, uFlags) +} + +func _CMLocateDevNode(pdnDevInst *uint32, pDeviceID *uint16, uFlags uint32) (hr error) { + r0, _, _ := syscall.Syscall(procCM_Locate_DevNodeW.Addr(), 3, uintptr(unsafe.Pointer(pdnDevInst)), uintptr(unsafe.Pointer(pDeviceID)), uintptr(uFlags)) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + +func CMGetDevNodeProperty(dnDevInst uint32, propertyKey *DevPropKey, propertyType *uint32, propertyBuffer *uint16, propertyBufferSize *uint32, uFlags uint32) (hr error) { + r0, _, _ := syscall.Syscall6(procCM_Get_DevNode_PropertyW.Addr(), 6, uintptr(dnDevInst), uintptr(unsafe.Pointer(propertyKey)), uintptr(unsafe.Pointer(propertyType)), uintptr(unsafe.Pointer(propertyBuffer)), uintptr(unsafe.Pointer(propertyBufferSize)), uintptr(uFlags)) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + +func LocalAlloc(flags uint32, size int) (ptr uintptr) { + r0, _, _ := syscall.Syscall(procLocalAlloc.Addr(), 2, uintptr(flags), uintptr(size), 0) + ptr = uintptr(r0) + return +} + +func LocalFree(ptr uintptr) { + syscall.Syscall(procLocalFree.Addr(), 1, uintptr(ptr), 0, 0) + return +} + +func NtCreateFile(handle *uintptr, accessMask uint32, oa *ObjectAttributes, iosb *IOStatusBlock, allocationSize *uint64, fileAttributes uint32, shareAccess uint32, createDisposition uint32, createOptions uint32, eaBuffer *byte, eaLength uint32) (status uint32) { + r0, _, _ := syscall.Syscall12(procNtCreateFile.Addr(), 11, uintptr(unsafe.Pointer(handle)), uintptr(accessMask), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(unsafe.Pointer(allocationSize)), uintptr(fileAttributes), uintptr(shareAccess), uintptr(createDisposition), uintptr(createOptions), uintptr(unsafe.Pointer(eaBuffer)), uintptr(eaLength), 0) + status = uint32(r0) + return +} + +func NtSetInformationFile(handle uintptr, iosb *IOStatusBlock, information uintptr, length uint32, class uint32) (status uint32) { + r0, _, _ := syscall.Syscall6(procNtSetInformationFile.Addr(), 5, uintptr(handle), uintptr(unsafe.Pointer(iosb)), uintptr(information), uintptr(length), uintptr(class), 0) + status = uint32(r0) + return +} + +func NtOpenDirectoryObject(handle *uintptr, accessMask uint32, oa *ObjectAttributes) (status uint32) { + r0, _, _ := syscall.Syscall(procNtOpenDirectoryObject.Addr(), 3, uintptr(unsafe.Pointer(handle)), uintptr(accessMask), uintptr(unsafe.Pointer(oa))) + status = uint32(r0) + return +} + +func NtQueryDirectoryObject(handle uintptr, buffer *byte, length uint32, singleEntry bool, restartScan bool, context *uint32, returnLength *uint32) (status uint32) { + var _p0 uint32 + if singleEntry { + _p0 = 1 + } else { + _p0 = 0 + } + var _p1 uint32 + if restartScan { + _p1 = 1 + } else { + _p1 = 0 + } + r0, _, _ := syscall.Syscall9(procNtQueryDirectoryObject.Addr(), 7, uintptr(handle), uintptr(unsafe.Pointer(buffer)), uintptr(length), uintptr(_p0), uintptr(_p1), uintptr(unsafe.Pointer(context)), uintptr(unsafe.Pointer(returnLength)), 0, 0) + status = uint32(r0) + return +} + +func RtlNtStatusToDosError(status uint32) (winerr error) { + r0, _, _ := syscall.Syscall(procRtlNtStatusToDosError.Addr(), 1, uintptr(status), 0, 0) + if r0 != 0 { + winerr = syscall.Errno(r0) + } + return +} diff --git a/internal/windevice/devicequery.go b/internal/windevice/devicequery.go new file mode 100644 index 0000000000..19eddec590 --- /dev/null +++ b/internal/windevice/devicequery.go @@ -0,0 +1,121 @@ +package windevice + +import ( + "fmt" + "strings" + "unicode/utf16" + + "github.com/Microsoft/go-winio/pkg/guid" + "github.com/Microsoft/hcsshim/internal/winapi" + "github.com/pkg/errors" +) + +const ( + _CM_GETIDLIST_FILTER_BUSRELATIONS uint32 = 0x00000020 + + _CM_LOCATE_DEVNODE_NORMAL uint32 = 0x00000000 + + _DEVPROP_TYPE_STRING uint32 = 0x00000012 + _DEVPROP_TYPEMOD_LIST uint32 = 0x00002000 + _DEVPROP_TYPE_STRING_LIST uint32 = (_DEVPROP_TYPE_STRING | _DEVPROP_TYPEMOD_LIST) + + _DEVPKEY_LOCATIONPATHS_GUID = "a45c254e-df1c-4efd-8020-67d146a850e0" +) + +// getDevPKeyDeviceLocationPaths creates a DEVPROPKEY struct for the +// DEVPKEY_Device_LocationPaths property as defined in devpkey.h +func getDevPKeyDeviceLocationPaths() (*winapi.DevPropKey, error) { + guid, err := guid.FromString(_DEVPKEY_LOCATIONPATHS_GUID) + if err != nil { + return nil, err + } + return &winapi.DevPropKey{ + Fmtid: guid, + // pid value is defined in devpkey.h + Pid: 37, + }, nil +} + +func GetDeviceLocationPathsFromIDs(ids []string) ([]string, error) { + result := []string{} + devPKeyDeviceLocationPaths, err := getDevPKeyDeviceLocationPaths() + if err != nil { + return nil, err + } + for _, id := range ids { + var devNodeInst uint32 + err = winapi.CMLocateDevNode(&devNodeInst, id, _CM_LOCATE_DEVNODE_NORMAL) + if err != nil { + return nil, errors.Wrapf(err, "failed to locate device node for %s", id) + } + propertyType := uint32(0) + propertyBufferSize := uint32(0) + + // get the size of the property buffer by querying with a nil buffer and zeroed propertyBufferSize + err = winapi.CMGetDevNodeProperty(devNodeInst, devPKeyDeviceLocationPaths, &propertyType, nil, &propertyBufferSize, 0) + if err != nil { + return nil, errors.Wrapf(err, "failed to get property buffer size of devnode query for %s with", id) + } + + // get the property with the resulting propertyBufferSize + propertyBuffer := make([]uint16, propertyBufferSize/2) + err = winapi.CMGetDevNodeProperty(devNodeInst, devPKeyDeviceLocationPaths, &propertyType, &propertyBuffer[0], &propertyBufferSize, 0) + if err != nil { + return nil, errors.Wrapf(err, "failed to get location path property from device node for %s with", id) + } + if propertyType != _DEVPROP_TYPE_STRING_LIST { + return nil, fmt.Errorf("expected to return property type DEVPROP_TYPE_STRING_LIST %d, instead got %d", _DEVPROP_TYPE_STRING_LIST, propertyType) + } + if int(propertyBufferSize/2) > len(propertyBuffer) { + return nil, fmt.Errorf("location path is too large for the buffer, size in bytes %d", propertyBufferSize) + } + + formattedResult, err := convertFirstNullTerminatedValueToString(propertyBuffer[:propertyBufferSize/2]) + if err != nil { + return nil, err + } + result = append(result, formattedResult) + } + + return result, nil +} + +// helper function that finds the first \u0000 rune and returns the wide string as a regular go string +func convertFirstNullTerminatedValueToString(buf []uint16) (string, error) { + r := utf16.Decode(buf) + converted := string(r) + zerosIndex := strings.IndexRune(converted, '\u0000') + if zerosIndex == -1 { + return "", errors.New("cannot convert value to string, malformed data passed") + } + return converted[:zerosIndex], nil +} + +func GetChildrenFromInstanceIDs(parentIDs []string) ([]string, error) { + var result []string + for _, id := range parentIDs { + pszFilterParentID := []byte(id) + children, err := getDeviceIDList(&pszFilterParentID[0], _CM_GETIDLIST_FILTER_BUSRELATIONS) + if err != nil { + return nil, err + } + result = append(result, children...) + } + return result, nil +} + +func getDeviceIDList(pszFilter *byte, ulFlags uint32) ([]string, error) { + listLength := uint32(0) + if err := winapi.CMGetDeviceIDListSize(&listLength, pszFilter, ulFlags); err != nil { + return nil, err + } + if listLength == 0 { + return []string{}, nil + } + buf := make([]byte, listLength) + if err := winapi.CMGetDeviceIDList(pszFilter, &buf[0], uint32(listLength), ulFlags); err != nil { + return nil, err + } + + return winapi.ConvertStringSetToSlice(buf) +} diff --git a/internal/winobjdir/object_dir.go b/internal/winobjdir/object_dir.go new file mode 100644 index 0000000000..6d448c896b --- /dev/null +++ b/internal/winobjdir/object_dir.go @@ -0,0 +1,84 @@ +package winobjdir + +import ( + "path/filepath" + "syscall" + "unsafe" + + "github.com/Microsoft/hcsshim/internal/winapi" +) + +const bufferSize = 1024 + +// EnumerateNTObjectDirectory queries for all entires in the object +// directory at `ntObjDirPath`. returns the resulting entry names as a string slice. +func EnumerateNTObjectDirectory(ntObjDirPath string) ([]string, error) { + var ( + handle uintptr + oa winapi.ObjectAttributes + + context uint32 + returnLength uint32 + buffer [bufferSize]byte + result []string + ) + + path := filepath.Clean(ntObjDirPath) + fsNtPath := filepath.FromSlash(path) + + pathUnicode, err := winapi.NewUnicodeString(fsNtPath) + if err != nil { + return nil, err + } + + oa.Length = unsafe.Sizeof(oa) + oa.ObjectName = uintptr(unsafe.Pointer(pathUnicode)) + + // open `ntObjDirPath` directory + status := winapi.NtOpenDirectoryObject( + &handle, + winapi.FILE_LIST_DIRECTORY, + &oa, + ) + + if !winapi.NTSuccess(status) { + return nil, winapi.RtlNtStatusToDosError(status) + } + + defer syscall.Close(syscall.Handle(handle)) + + for { + // Query opened `globalNTPath` for entries. This call takes in a + // set length buffer, so to ensure we find all entires, we make + // successive calls until no more entires exist or an error is seen. + status = winapi.NtQueryDirectoryObject( + handle, + &buffer[0], + bufferSize, + false, + false, + &context, + &returnLength, + ) + + if !winapi.NTSuccess(status) && status != winapi.STATUS_MORE_ENTRIES { + if status == winapi.STATUS_NO_MORE_ENTRIES { + break + } + return nil, winapi.RtlNtStatusToDosError(status) + } + dirInfo := (*winapi.ObjectDirectoryInformation)(unsafe.Pointer(&buffer[0])) + index := 1 + for { + if dirInfo == nil || dirInfo.Name.Length == 0 { + break + } + result = append(result, dirInfo.Name.String()) + size := unsafe.Sizeof(winapi.ObjectDirectoryInformation{}) * uintptr(index) + dirInfo = (*winapi.ObjectDirectoryInformation)(unsafe.Pointer(&buffer[size])) + index++ + } + } + + return result, nil +} diff --git a/test/go.sum b/test/go.sum index 13975bdb09..57105f190b 100644 --- a/test/go.sum +++ b/test/go.sum @@ -6,7 +6,6 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5 h1:ygIc8M6trr62pF5DucadTWGdEB4mEyvzi0e2nbcmcyA= github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw= -github.com/Microsoft/hcsshim/test v0.0.0-20200715222032-5eafd1556990/go.mod h1:DuZrw7H6+NwapnU0Q42ym+GLIlEJ8GuzVJ1/493/oH0= github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ= github.com/aws/aws-sdk-go v1.15.11/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0= github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -20,8 +19,6 @@ github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b/go.mod h1:obH5gd0Bsq github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE= github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3/go.mod h1:MA5e5Lr8slmEg9bt0VpxxWqJlO4iwu3FBdHUzV7wQVg= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/containerd/cgroups v0.0.0-20190919134610-bf292b21730f h1:tSNMc+rJDfmYntojat8lljbt1mgKNpTxUZJsSzJ9Y1s= -github.com/containerd/cgroups v0.0.0-20190919134610-bf292b21730f/go.mod h1:OApqhQ4XNSNC13gXIwDjhOQxjWa/NxkwZXJ1EvqT0ko= github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59 h1:qWj4qVYZ95vLWwqyNJCQg7rDsG5wPdze0UaPolH7DUk= github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59/go.mod h1:pA0z1pT8KYB3TCXK/ocprsh7MAkoW8bZVzPdih9snmM= github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1 h1:uict5mhHFTzKLUCufdSLym7z/J0CbBJT59lYbP9wtbg= @@ -38,9 +35,7 @@ github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de h1:dlfGmNcE3jDAec github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o= github.com/containerd/typeurl v0.0.0-20180627222232-a93fcdb778cd h1:JNn81o/xG+8NEo3bC/vx9pbi/g2WI8mtP2/nXzu297Y= github.com/containerd/typeurl v0.0.0-20180627222232-a93fcdb778cd/go.mod h1:Cm3kwCdlkCfMSHURc+r6fwoGH6/F1hH3S4sg0rLFWPc= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f h1:JOrtw2xFKzlg+cbHpyrpLDmnN1HqhBfnX7WDiW7eG2c= -github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd/v22 v22.0.0 h1:XJIw/+VlJ+87J+doOxznsAWIdmWuViOVhkQamW5YV28= github.com/coreos/go-systemd/v22 v22.0.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= @@ -60,9 +55,7 @@ github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDD github.com/docker/libtrust v0.0.0-20150114040149-fa567046d9b1/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE= github.com/garyburd/redigo v0.0.0-20150301180006-535138d7bcd7/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY= github.com/go-ini/ini v1.25.4/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= -github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4= -github.com/godbus/dbus v4.1.0+incompatible h1:WqqLRTsQic3apZUK9qC5sGNfXthmPXzUZ7nQPrNITa4= -github.com/godbus/dbus v4.1.0+incompatible/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= +github.com/godbus/dbus/v5 v5.0.3 h1:ZqHaoEF7TBzh4jzPmqVhE/5A1z9of6orkAe5uHoAeME= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/googleapis v1.2.0 h1:Z0v3OJDotX9ZBpdz2V+AI7F4fITSZhVE5mg6GQppwMM= github.com/gogo/googleapis v1.2.0/go.mod h1:Njal3psf3qN6dwBtQfUmBZh2ybovJ0tlu3o/AC7HYjU= @@ -119,8 +112,6 @@ github.com/opencontainers/image-spec v1.0.0 h1:jcw3cCH887bLKETGYpv8afogdYchbShR0 github.com/opencontainers/image-spec v1.0.0/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= github.com/opencontainers/runc v0.0.0-20190115041553-12f6a991201f h1:a969LJ4IQFwRHYqonHtUDMSh9i54WcKggeEkQ3fZMl4= github.com/opencontainers/runc v0.0.0-20190115041553-12f6a991201f/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700 h1:eNUVfm/RFLIi1G7flU5/ZRTHvd4kcVuzfRnL6OFlzCI= -github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39 h1:H7DMc6FAjgwZZi8BRqjrAAHWoqEr5e5L6pS4V0ezet4= @@ -239,4 +230,3 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= k8s.io/cri-api v0.17.3 h1:jvjVvBqgZq3WcaPq07n0h5h9eCnIaR4dhKyHSoZG8Y8= k8s.io/cri-api v0.17.3/go.mod h1:X1sbHmuXhwaHs9xxYffLqJogVsnI+f6cPRcgPel7ywM= -k8s.io/cri-api v0.18.6 h1:dxhb+Ii0qThCgl3ZR+LO3wAy8RVzvppYVtyLOUC0fyI= From 3fde72426aa83126fdb4cc9dbd4b4a8d7f7e9ce5 Mon Sep 17 00:00:00 2001 From: Kathryn Baldauf Date: Tue, 16 Jun 2020 13:07:00 -0700 Subject: [PATCH 05/20] Create new utility 'device-util' for querying pnp information of devices * Add a new shim option for the `device-util` path Signed-off-by: Kathryn Baldauf --- appveyor.yml | 2 + .../options/next.pb.txt | 421 +++---- .../options/runhcs.pb.go | 100 +- .../stats/next.pb.txt | 1048 ----------------- cmd/device-util/main.go | 105 ++ 5 files changed, 371 insertions(+), 1305 deletions(-) create mode 100644 cmd/device-util/main.go diff --git a/appveyor.yml b/appveyor.yml index 6617fade0f..bd3f18f075 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -18,6 +18,7 @@ build_script: - go build ./cmd/runhcs - go build ./cmd/tar2ext4 - go build ./cmd/wclayer + - go build ./cmd/device-util - go build ./internal/tools/grantvmgroupaccess - go build ./internal/tools/uvmboot - go build ./internal/tools/zapdir @@ -33,6 +34,7 @@ artifacts: - path: 'containerd-shim-runhcs-v1.exe' - path: 'runhcs.exe' - path: 'tar2ext4.exe' + - path: 'device-util.exe' - path: 'wclayer.exe' - path: 'grantvmgroupaccess.exe' - path: 'uvmboot.exe' diff --git a/cmd/containerd-shim-runhcs-v1/options/next.pb.txt b/cmd/containerd-shim-runhcs-v1/options/next.pb.txt index fee15d6b5c..632290f42f 100755 --- a/cmd/containerd-shim-runhcs-v1/options/next.pb.txt +++ b/cmd/containerd-shim-runhcs-v1/options/next.pb.txt @@ -1,207 +1,214 @@ -file { - name: "google/protobuf/timestamp.proto" - package: "google.protobuf" - message_type { - name: "Timestamp" - field { - name: "seconds" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_INT64 - json_name: "seconds" - } - field { - name: "nanos" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_INT32 - json_name: "nanos" - } - } - options { - java_package: "com.google.protobuf" - java_outer_classname: "TimestampProto" - java_multiple_files: true - go_package: "github.com/golang/protobuf/ptypes/timestamp" - cc_enable_arenas: true - objc_class_prefix: "GPB" - csharp_namespace: "Google.Protobuf.WellKnownTypes" - } - syntax: "proto3" -} -file { - name: "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options/runhcs.proto" - package: "containerd.runhcs.v1" - dependency: "gogoproto/gogo.proto" - dependency: "google/protobuf/timestamp.proto" - message_type { - name: "Options" - field { - name: "debug" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_BOOL - json_name: "debug" - } - field { - name: "debug_type" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_ENUM - type_name: ".containerd.runhcs.v1.Options.DebugType" - json_name: "debugType" - } - field { - name: "registry_root" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "registryRoot" - } - field { - name: "sandbox_image" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "sandboxImage" - } - field { - name: "sandbox_platform" - number: 5 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "sandboxPlatform" - } - field { - name: "sandbox_isolation" - number: 6 - label: LABEL_OPTIONAL - type: TYPE_ENUM - type_name: ".containerd.runhcs.v1.Options.SandboxIsolation" - json_name: "sandboxIsolation" - } - field { - name: "boot_files_root_path" - number: 7 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "bootFilesRootPath" - } - field { - name: "vm_processor_count" - number: 8 - label: LABEL_OPTIONAL - type: TYPE_INT32 - json_name: "vmProcessorCount" - } - field { - name: "vm_memory_size_in_mb" - number: 9 - label: LABEL_OPTIONAL - type: TYPE_INT32 - json_name: "vmMemorySizeInMb" - } - enum_type { - name: "DebugType" - value { - name: "NPIPE" - number: 0 - } - value { - name: "FILE" - number: 1 - } - value { - name: "ETW" - number: 2 - } - } - enum_type { - name: "SandboxIsolation" - value { - name: "PROCESS" - number: 0 - } - value { - name: "HYPERVISOR" - number: 1 - } - } - } - message_type { - name: "ProcessDetails" - field { - name: "image_name" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "imageName" - } - field { - name: "created_at" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".google.protobuf.Timestamp" - options { - 65001: 0 - 65010: 1 - } - json_name: "createdAt" - } - field { - name: "kernel_time_100_ns" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "kernelTime100Ns" - } - field { - name: "memory_commit_bytes" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "memoryCommitBytes" - } - field { - name: "memory_working_set_private_bytes" - number: 5 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "memoryWorkingSetPrivateBytes" - } - field { - name: "memory_working_set_shared_bytes" - number: 6 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "memoryWorkingSetSharedBytes" - } - field { - name: "process_id" - number: 7 - label: LABEL_OPTIONAL - type: TYPE_UINT32 - json_name: "processId" - } - field { - name: "user_time_100_ns" - number: 8 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "userTime100Ns" - } - field { - name: "exec_id" - number: 9 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "execId" - } - } - options { - go_package: "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options;options" - } - weak_dependency: 0 - syntax: "proto3" -} \ No newline at end of file +file { + name: "google/protobuf/timestamp.proto" + package: "google.protobuf" + message_type { + name: "Timestamp" + field { + name: "seconds" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT64 + json_name: "seconds" + } + field { + name: "nanos" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT32 + json_name: "nanos" + } + } + options { + java_package: "com.google.protobuf" + java_outer_classname: "TimestampProto" + java_multiple_files: true + go_package: "github.com/golang/protobuf/ptypes/timestamp" + cc_enable_arenas: true + objc_class_prefix: "GPB" + csharp_namespace: "Google.Protobuf.WellKnownTypes" + } + syntax: "proto3" +} +file { + name: "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options/runhcs.proto" + package: "containerd.runhcs.v1" + dependency: "gogoproto/gogo.proto" + dependency: "google/protobuf/timestamp.proto" + message_type { + name: "Options" + field { + name: "debug" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_BOOL + json_name: "debug" + } + field { + name: "debug_type" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".containerd.runhcs.v1.Options.DebugType" + json_name: "debugType" + } + field { + name: "registry_root" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + json_name: "registryRoot" + } + field { + name: "sandbox_image" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_STRING + json_name: "sandboxImage" + } + field { + name: "sandbox_platform" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_STRING + json_name: "sandboxPlatform" + } + field { + name: "sandbox_isolation" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".containerd.runhcs.v1.Options.SandboxIsolation" + json_name: "sandboxIsolation" + } + field { + name: "boot_files_root_path" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_STRING + json_name: "bootFilesRootPath" + } + field { + name: "vm_processor_count" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_INT32 + json_name: "vmProcessorCount" + } + field { + name: "vm_memory_size_in_mb" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_INT32 + json_name: "vmMemorySizeInMb" + } + field { + name: "GPUVHDPath" + number: 10 + label: LABEL_OPTIONAL + type: TYPE_STRING + json_name: "GPUVHDPath" + } + enum_type { + name: "DebugType" + value { + name: "NPIPE" + number: 0 + } + value { + name: "FILE" + number: 1 + } + value { + name: "ETW" + number: 2 + } + } + enum_type { + name: "SandboxIsolation" + value { + name: "PROCESS" + number: 0 + } + value { + name: "HYPERVISOR" + number: 1 + } + } + } + message_type { + name: "ProcessDetails" + field { + name: "image_name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + json_name: "imageName" + } + field { + name: "created_at" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".google.protobuf.Timestamp" + options { + 65001: 0 + 65010: 1 + } + json_name: "createdAt" + } + field { + name: "kernel_time_100_ns" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "kernelTime100Ns" + } + field { + name: "memory_commit_bytes" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "memoryCommitBytes" + } + field { + name: "memory_working_set_private_bytes" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "memoryWorkingSetPrivateBytes" + } + field { + name: "memory_working_set_shared_bytes" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "memoryWorkingSetSharedBytes" + } + field { + name: "process_id" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_UINT32 + json_name: "processId" + } + field { + name: "user_time_100_ns" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "userTime100Ns" + } + field { + name: "exec_id" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_STRING + json_name: "execId" + } + } + options { + go_package: "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options;options" + } + weak_dependency: 0 + syntax: "proto3" +} diff --git a/cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go b/cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go index 2190e56a79..a9d10fd429 100644 --- a/cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go +++ b/cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go @@ -115,7 +115,7 @@ type Options struct { VmMemorySizeInMb int32 `protobuf:"varint,9,opt,name=vm_memory_size_in_mb,json=vmMemorySizeInMb,proto3" json:"vm_memory_size_in_mb,omitempty"` // GPUVHDPath is the path to the gpu vhd to add to the uvm // when a container requests a gpu - GPUVHDPath string `protobuf:"bytes,10,opt,name=GPUVHDPath,json=gPUVHDPath,proto3" json:"GPUVHDPath,omitempty"` + GPUVHDPath string `protobuf:"bytes,10,opt,name=GPUVHDPath,proto3" json:"GPUVHDPath,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` @@ -214,56 +214,56 @@ func init() { } var fileDescriptor_b643df6839c75082 = []byte{ - // 777 bytes of a gzipped FileDescriptorProto + // 775 bytes of a gzipped FileDescriptorProto 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x54, 0xcd, 0x6f, 0xdb, 0x36, - 0x1c, 0xb5, 0x9a, 0xf8, 0x43, 0xbf, 0x2e, 0xa9, 0xc2, 0xf9, 0x20, 0x64, 0x9b, 0x6d, 0xa4, 0x87, - 0xa6, 0x58, 0x23, 0x25, 0xdd, 0x71, 0xa7, 0x39, 0x76, 0x56, 0x0d, 0x4b, 0x22, 0xc8, 0x59, 0xbb, - 0x8f, 0x03, 0xa1, 0x0f, 0x46, 0x26, 0x6a, 0x8a, 0x02, 0x49, 0x7b, 0x71, 0x4f, 0xfb, 0x13, 0xf6, - 0x47, 0xed, 0x90, 0xe3, 0x8e, 0x03, 0x06, 0x64, 0xab, 0xff, 0x92, 0x81, 0x94, 0x94, 0x62, 0x45, - 0xb1, 0xcb, 0x4e, 0xa6, 0xde, 0x7b, 0x7c, 0xbf, 0x0f, 0x3e, 0x18, 0x2e, 0x73, 0xaa, 0xe6, 0xcb, - 0xc4, 0x4b, 0x39, 0xf3, 0xcf, 0x69, 0x2a, 0xb8, 0xe4, 0xd7, 0xca, 0x9f, 0xa7, 0x52, 0xce, 0x29, - 0xf3, 0x53, 0x96, 0xf9, 0x29, 0x2f, 0x54, 0x4c, 0x0b, 0x22, 0xb2, 0x23, 0x8d, 0x1d, 0x89, 0x65, - 0x31, 0x4f, 0xe5, 0xd1, 0xea, 0xc4, 0xe7, 0xa5, 0xa2, 0xbc, 0x90, 0x7e, 0x85, 0x78, 0xa5, 0xe0, - 0x8a, 0xa3, 0xfe, 0x3b, 0xbd, 0x57, 0x13, 0xab, 0x93, 0xfd, 0x7e, 0xce, 0x73, 0x6e, 0x04, 0xbe, - 0x3e, 0x55, 0xda, 0xfd, 0x61, 0xce, 0x79, 0xbe, 0x20, 0xbe, 0xf9, 0x4a, 0x96, 0xd7, 0xbe, 0xa2, - 0x8c, 0x48, 0x15, 0xb3, 0xb2, 0x12, 0x1c, 0xfc, 0xb6, 0x0d, 0xdd, 0xcb, 0xaa, 0x0a, 0xea, 0x43, - 0x3b, 0x23, 0xc9, 0x32, 0x77, 0xad, 0x91, 0x75, 0xd8, 0x8b, 0xaa, 0x0f, 0x74, 0x06, 0x60, 0x0e, - 0x58, 0xad, 0x4b, 0xe2, 0x3e, 0x18, 0x59, 0x87, 0xbb, 0xcf, 0x9f, 0x78, 0x1f, 0xea, 0xc1, 0xab, - 0x8d, 0xbc, 0x89, 0xd6, 0x5f, 0xad, 0x4b, 0x12, 0xd9, 0x59, 0x73, 0x44, 0x8f, 0x61, 0x47, 0x90, - 0x9c, 0x4a, 0x25, 0xd6, 0x58, 0x70, 0xae, 0xdc, 0xad, 0x91, 0x75, 0x68, 0x47, 0x1f, 0x35, 0x60, - 0xc4, 0xb9, 0xd2, 0x22, 0x19, 0x17, 0x59, 0xc2, 0x6f, 0x30, 0x65, 0x71, 0x4e, 0xdc, 0xed, 0x4a, - 0x54, 0x83, 0x81, 0xc6, 0xd0, 0x53, 0x70, 0x1a, 0x51, 0xb9, 0x88, 0xd5, 0x35, 0x17, 0xcc, 0x6d, - 0x1b, 0xdd, 0xa3, 0x1a, 0x0f, 0x6b, 0x18, 0xfd, 0x04, 0x7b, 0xf7, 0x7e, 0x92, 0x2f, 0x62, 0xdd, - 0x9f, 0xdb, 0x31, 0x33, 0x78, 0xff, 0x3d, 0xc3, 0xac, 0xae, 0xd8, 0xdc, 0x8a, 0x9a, 0x9a, 0xf7, - 0x08, 0xf2, 0xa1, 0x9f, 0x70, 0xae, 0xf0, 0x35, 0x5d, 0x10, 0x69, 0x66, 0xc2, 0x65, 0xac, 0xe6, - 0x6e, 0xd7, 0xf4, 0xb2, 0xa7, 0xb9, 0x33, 0x4d, 0xe9, 0xc9, 0xc2, 0x58, 0xcd, 0xd1, 0x33, 0x40, - 0x2b, 0x86, 0x4b, 0xc1, 0x53, 0x22, 0x25, 0x17, 0x38, 0xe5, 0xcb, 0x42, 0xb9, 0xbd, 0x91, 0x75, - 0xd8, 0x8e, 0x9c, 0x15, 0x0b, 0x1b, 0xe2, 0x54, 0xe3, 0xc8, 0x83, 0xfe, 0x8a, 0x61, 0x46, 0x18, - 0x17, 0x6b, 0x2c, 0xe9, 0x1b, 0x82, 0x69, 0x81, 0x59, 0xe2, 0xda, 0x8d, 0xfe, 0xdc, 0x50, 0x33, - 0xfa, 0x86, 0x04, 0xc5, 0x79, 0x82, 0x06, 0x00, 0x5f, 0x87, 0xdf, 0xbd, 0x7c, 0x31, 0xd1, 0xb5, - 0x5c, 0x30, 0x4d, 0x40, 0x7e, 0x8f, 0x1c, 0x3c, 0x05, 0xfb, 0xfe, 0x61, 0x90, 0x0d, 0xed, 0x8b, - 0x30, 0x08, 0xa7, 0x4e, 0x0b, 0xf5, 0x60, 0xfb, 0x2c, 0xf8, 0x76, 0xea, 0x58, 0xa8, 0x0b, 0x5b, - 0xd3, 0xab, 0x57, 0xce, 0x83, 0x03, 0x1f, 0x9c, 0xf7, 0xe7, 0x47, 0x0f, 0xa1, 0x1b, 0x46, 0x97, - 0xa7, 0xd3, 0xd9, 0xcc, 0x69, 0xa1, 0x5d, 0x80, 0x17, 0x3f, 0x84, 0xd3, 0xe8, 0x65, 0x30, 0xbb, - 0x8c, 0x1c, 0xeb, 0xe0, 0xcf, 0x2d, 0xd8, 0xad, 0xdb, 0x9f, 0x10, 0x15, 0xd3, 0x85, 0x44, 0x9f, - 0x01, 0x98, 0x27, 0xc4, 0x45, 0xcc, 0x88, 0x89, 0x94, 0x1d, 0xd9, 0x06, 0xb9, 0x88, 0x19, 0x41, - 0xa7, 0x00, 0xa9, 0x20, 0xb1, 0x22, 0x19, 0x8e, 0x95, 0x89, 0xd5, 0xc3, 0xe7, 0xfb, 0x5e, 0x15, - 0x57, 0xaf, 0x89, 0xab, 0x77, 0xd5, 0xc4, 0x75, 0xdc, 0xbb, 0xbd, 0x1b, 0xb6, 0x7e, 0xfd, 0x6b, - 0x68, 0x45, 0x76, 0x7d, 0xef, 0x2b, 0x85, 0x3e, 0x07, 0xf4, 0x9a, 0x88, 0x82, 0x2c, 0xb0, 0xce, - 0x35, 0x3e, 0x39, 0x3e, 0xc6, 0x85, 0x34, 0xc1, 0xda, 0x8e, 0x1e, 0x55, 0x8c, 0x76, 0x38, 0x39, - 0x3e, 0xbe, 0x90, 0xc8, 0x83, 0x8f, 0xeb, 0x65, 0xa6, 0x9c, 0x31, 0xaa, 0x70, 0xb2, 0x56, 0x44, - 0x9a, 0x84, 0x6d, 0x47, 0x7b, 0x15, 0x75, 0x6a, 0x98, 0xb1, 0x26, 0xd0, 0x19, 0x8c, 0x6a, 0xfd, - 0xcf, 0x5c, 0xbc, 0xa6, 0x45, 0x8e, 0x25, 0x51, 0xb8, 0x14, 0x74, 0x15, 0x2b, 0x52, 0x5f, 0x6e, - 0x9b, 0xcb, 0x9f, 0x56, 0xba, 0x57, 0x95, 0x6c, 0x46, 0x54, 0x58, 0x89, 0x2a, 0x9f, 0x09, 0x0c, - 0x3f, 0xe0, 0x23, 0xe7, 0xb1, 0x20, 0x59, 0x6d, 0xd3, 0x31, 0x36, 0x9f, 0xbc, 0x6f, 0x33, 0x33, - 0x9a, 0xca, 0xe5, 0x19, 0x40, 0x1d, 0x1c, 0x4c, 0x33, 0x13, 0xb1, 0x9d, 0xf1, 0xce, 0xe6, 0x6e, - 0x68, 0xd7, 0x6b, 0x0f, 0x26, 0x91, 0x5d, 0x0b, 0x82, 0x0c, 0x3d, 0x01, 0x67, 0x29, 0x89, 0xf8, - 0xd7, 0x5a, 0x7a, 0xa6, 0xc8, 0x8e, 0xc6, 0xdf, 0x2d, 0xe5, 0x31, 0x74, 0xc9, 0x0d, 0x49, 0xb5, - 0xa7, 0xce, 0x95, 0x3d, 0x86, 0xcd, 0xdd, 0xb0, 0x33, 0xbd, 0x21, 0x69, 0x30, 0x89, 0x3a, 0x9a, - 0x0a, 0xb2, 0x71, 0x76, 0xfb, 0x76, 0xd0, 0xfa, 0xe3, 0xed, 0xa0, 0xf5, 0xcb, 0x66, 0x60, 0xdd, - 0x6e, 0x06, 0xd6, 0xef, 0x9b, 0x81, 0xf5, 0xf7, 0x66, 0x60, 0xfd, 0xf8, 0xcd, 0xff, 0xff, 0x73, - 0xfb, 0xb2, 0xfe, 0xfd, 0xbe, 0x95, 0x74, 0xcc, 0xbb, 0x7f, 0xf1, 0x4f, 0x00, 0x00, 0x00, 0xff, - 0xff, 0xc9, 0xeb, 0xae, 0x6f, 0x33, 0x05, 0x00, 0x00, + 0x1c, 0xb5, 0x1a, 0x7f, 0xe9, 0xd7, 0x25, 0x75, 0x38, 0x1f, 0x84, 0x6c, 0xb3, 0x8d, 0xf4, 0xd0, + 0x14, 0x6b, 0xa4, 0xa4, 0x3b, 0xee, 0x34, 0xc7, 0xce, 0xaa, 0x61, 0x49, 0x04, 0x39, 0x6b, 0xf7, + 0x71, 0x20, 0xf4, 0xc1, 0xc8, 0x44, 0x4d, 0x51, 0x20, 0x69, 0x2f, 0xee, 0x69, 0x7f, 0xc2, 0xfe, + 0xa8, 0x1d, 0x72, 0xdc, 0x71, 0xc0, 0x80, 0x6c, 0xf5, 0x5f, 0x32, 0x90, 0x92, 0xd2, 0xad, 0x08, + 0x76, 0xe9, 0xc9, 0xd4, 0x7b, 0x8f, 0xef, 0xf7, 0xc1, 0x07, 0xc3, 0x45, 0x46, 0xd5, 0x7c, 0x19, + 0xbb, 0x09, 0x67, 0xde, 0x19, 0x4d, 0x04, 0x97, 0xfc, 0x4a, 0x79, 0xf3, 0x44, 0xca, 0x39, 0x65, + 0x5e, 0xc2, 0x52, 0x2f, 0xe1, 0xb9, 0x8a, 0x68, 0x4e, 0x44, 0x7a, 0xa8, 0xb1, 0x43, 0xb1, 0xcc, + 0xe7, 0x89, 0x3c, 0x5c, 0x1d, 0x7b, 0xbc, 0x50, 0x94, 0xe7, 0xd2, 0x2b, 0x11, 0xb7, 0x10, 0x5c, + 0x71, 0xd4, 0x7f, 0xa7, 0x77, 0x2b, 0x62, 0x75, 0xbc, 0xd7, 0xcf, 0x78, 0xc6, 0x8d, 0xc0, 0xd3, + 0xa7, 0x52, 0xbb, 0x37, 0xcc, 0x38, 0xcf, 0x16, 0xc4, 0x33, 0x5f, 0xf1, 0xf2, 0xca, 0x53, 0x94, + 0x11, 0xa9, 0x22, 0x56, 0x94, 0x82, 0xfd, 0xdf, 0x9a, 0xd0, 0xb9, 0x28, 0xab, 0xa0, 0x3e, 0xb4, + 0x52, 0x12, 0x2f, 0x33, 0xc7, 0x1a, 0x59, 0x07, 0xdd, 0xb0, 0xfc, 0x40, 0xa7, 0x00, 0xe6, 0x80, + 0xd5, 0xba, 0x20, 0xce, 0x83, 0x91, 0x75, 0xb0, 0xf3, 0xfc, 0x89, 0x7b, 0x5f, 0x0f, 0x6e, 0x65, + 0xe4, 0x4e, 0xb4, 0xfe, 0x72, 0x5d, 0x90, 0xd0, 0x4e, 0xeb, 0x23, 0x7a, 0x0c, 0xdb, 0x82, 0x64, + 0x54, 0x2a, 0xb1, 0xc6, 0x82, 0x73, 0xe5, 0x6c, 0x8d, 0xac, 0x03, 0x3b, 0xfc, 0xa8, 0x06, 0x43, + 0xce, 0x95, 0x16, 0xc9, 0x28, 0x4f, 0x63, 0x7e, 0x8d, 0x29, 0x8b, 0x32, 0xe2, 0x34, 0x4b, 0x51, + 0x05, 0xfa, 0x1a, 0x43, 0x4f, 0xa1, 0x57, 0x8b, 0x8a, 0x45, 0xa4, 0xae, 0xb8, 0x60, 0x4e, 0xcb, + 0xe8, 0x1e, 0x55, 0x78, 0x50, 0xc1, 0xe8, 0x27, 0xd8, 0xbd, 0xf3, 0x93, 0x7c, 0x11, 0xe9, 0xfe, + 0x9c, 0xb6, 0x99, 0xc1, 0xfd, 0xff, 0x19, 0x66, 0x55, 0xc5, 0xfa, 0x56, 0x58, 0xd7, 0xbc, 0x43, + 0x90, 0x07, 0xfd, 0x98, 0x73, 0x85, 0xaf, 0xe8, 0x82, 0x48, 0x33, 0x13, 0x2e, 0x22, 0x35, 0x77, + 0x3a, 0xa6, 0x97, 0x5d, 0xcd, 0x9d, 0x6a, 0x4a, 0x4f, 0x16, 0x44, 0x6a, 0x8e, 0x9e, 0x01, 0x5a, + 0x31, 0x5c, 0x08, 0x9e, 0x10, 0x29, 0xb9, 0xc0, 0x09, 0x5f, 0xe6, 0xca, 0xe9, 0x8e, 0xac, 0x83, + 0x56, 0xd8, 0x5b, 0xb1, 0xa0, 0x26, 0x4e, 0x34, 0x8e, 0x5c, 0xe8, 0xaf, 0x18, 0x66, 0x84, 0x71, + 0xb1, 0xc6, 0x92, 0xbe, 0x21, 0x98, 0xe6, 0x98, 0xc5, 0x8e, 0x5d, 0xeb, 0xcf, 0x0c, 0x35, 0xa3, + 0x6f, 0x88, 0x9f, 0x9f, 0xc5, 0x68, 0x00, 0xf0, 0x75, 0xf0, 0xdd, 0xcb, 0x17, 0x13, 0x5d, 0xcb, + 0x01, 0xd3, 0xc4, 0xbf, 0x90, 0xfd, 0xa7, 0x60, 0xdf, 0x3d, 0x0c, 0xb2, 0xa1, 0x75, 0x1e, 0xf8, + 0xc1, 0xb4, 0xd7, 0x40, 0x5d, 0x68, 0x9e, 0xfa, 0xdf, 0x4e, 0x7b, 0x16, 0xea, 0xc0, 0xd6, 0xf4, + 0xf2, 0x55, 0xef, 0xc1, 0xbe, 0x07, 0xbd, 0xf7, 0xe7, 0x47, 0x0f, 0xa1, 0x13, 0x84, 0x17, 0x27, + 0xd3, 0xd9, 0xac, 0xd7, 0x40, 0x3b, 0x00, 0x2f, 0x7e, 0x08, 0xa6, 0xe1, 0x4b, 0x7f, 0x76, 0x11, + 0xf6, 0xac, 0xfd, 0x3f, 0xb7, 0x60, 0xa7, 0x6a, 0x7f, 0x42, 0x54, 0x44, 0x17, 0x12, 0x7d, 0x06, + 0x60, 0x9e, 0x10, 0xe7, 0x11, 0x23, 0x26, 0x52, 0x76, 0x68, 0x1b, 0xe4, 0x3c, 0x62, 0x04, 0x9d, + 0x00, 0x24, 0x82, 0x44, 0x8a, 0xa4, 0x38, 0x52, 0x26, 0x56, 0x0f, 0x9f, 0xef, 0xb9, 0x65, 0x5c, + 0xdd, 0x3a, 0xae, 0xee, 0x65, 0x1d, 0xd7, 0x71, 0xf7, 0xe6, 0x76, 0xd8, 0xf8, 0xf5, 0xaf, 0xa1, + 0x15, 0xda, 0xd5, 0xbd, 0xaf, 0x14, 0xfa, 0x1c, 0xd0, 0x6b, 0x22, 0x72, 0xb2, 0xc0, 0x3a, 0xd7, + 0xf8, 0xf8, 0xe8, 0x08, 0xe7, 0xd2, 0x04, 0xab, 0x19, 0x3e, 0x2a, 0x19, 0xed, 0x70, 0x7c, 0x74, + 0x74, 0x2e, 0x91, 0x0b, 0x1f, 0x57, 0xcb, 0x4c, 0x38, 0x63, 0x54, 0xe1, 0x78, 0xad, 0x88, 0x34, + 0x09, 0x6b, 0x86, 0xbb, 0x25, 0x75, 0x62, 0x98, 0xb1, 0x26, 0xd0, 0x29, 0x8c, 0x2a, 0xfd, 0xcf, + 0x5c, 0xbc, 0xa6, 0x79, 0x86, 0x25, 0x51, 0xb8, 0x10, 0x74, 0x15, 0x29, 0x52, 0x5d, 0x6e, 0x99, + 0xcb, 0x9f, 0x96, 0xba, 0x57, 0xa5, 0x6c, 0x46, 0x54, 0x50, 0x8a, 0x4a, 0x9f, 0x09, 0x0c, 0xef, + 0xf1, 0x91, 0xf3, 0x48, 0x90, 0xb4, 0xb2, 0x69, 0x1b, 0x9b, 0x4f, 0xde, 0xb7, 0x99, 0x19, 0x4d, + 0xe9, 0xf2, 0x0c, 0xa0, 0x0a, 0x0e, 0xa6, 0xa9, 0x89, 0xd8, 0xf6, 0x78, 0x7b, 0x73, 0x3b, 0xb4, + 0xab, 0xb5, 0xfb, 0x93, 0xd0, 0xae, 0x04, 0x7e, 0x8a, 0x9e, 0x40, 0x6f, 0x29, 0x89, 0xf8, 0xcf, + 0x5a, 0xba, 0xa6, 0xc8, 0xb6, 0xc6, 0xdf, 0x2d, 0xe5, 0x31, 0x74, 0xc8, 0x35, 0x49, 0xb4, 0xa7, + 0xce, 0x95, 0x3d, 0x86, 0xcd, 0xed, 0xb0, 0x3d, 0xbd, 0x26, 0x89, 0x3f, 0x09, 0xdb, 0x9a, 0xf2, + 0xd3, 0x71, 0x7a, 0xf3, 0x76, 0xd0, 0xf8, 0xe3, 0xed, 0xa0, 0xf1, 0xcb, 0x66, 0x60, 0xdd, 0x6c, + 0x06, 0xd6, 0xef, 0x9b, 0x81, 0xf5, 0xf7, 0x66, 0x60, 0xfd, 0xf8, 0xcd, 0x87, 0xff, 0xb9, 0x7d, + 0x59, 0xfd, 0x7e, 0xdf, 0x88, 0xdb, 0xe6, 0xdd, 0xbf, 0xf8, 0x27, 0x00, 0x00, 0xff, 0xff, 0x75, + 0x1f, 0x14, 0xf4, 0x33, 0x05, 0x00, 0x00, } func (m *Options) Marshal() (dAtA []byte, err error) { diff --git a/cmd/containerd-shim-runhcs-v1/stats/next.pb.txt b/cmd/containerd-shim-runhcs-v1/stats/next.pb.txt index 0b3b84d28e..e69de29bb2 100644 --- a/cmd/containerd-shim-runhcs-v1/stats/next.pb.txt +++ b/cmd/containerd-shim-runhcs-v1/stats/next.pb.txt @@ -1,1048 +0,0 @@ -file { - name: "google/protobuf/timestamp.proto" - package: "google.protobuf" - message_type { - name: "Timestamp" - field { - name: "seconds" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_INT64 - json_name: "seconds" - } - field { - name: "nanos" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_INT32 - json_name: "nanos" - } - } - options { - java_package: "com.google.protobuf" - java_outer_classname: "TimestampProto" - java_multiple_files: true - go_package: "github.com/golang/protobuf/ptypes/timestamp" - cc_enable_arenas: true - objc_class_prefix: "GPB" - csharp_namespace: "Google.Protobuf.WellKnownTypes" - } - syntax: "proto3" -} -file { - name: "github.com/containerd/cgroups/stats/v1/metrics.proto" - package: "io.containerd.cgroups.v1" - dependency: "gogoproto/gogo.proto" - message_type { - name: "Metrics" - field { - name: "hugetlb" - number: 1 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.HugetlbStat" - json_name: "hugetlb" - } - field { - name: "pids" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.PidsStat" - json_name: "pids" - } - field { - name: "cpu" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.CPUStat" - options { - 65004: "CPU" - } - json_name: "cpu" - } - field { - name: "memory" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.MemoryStat" - json_name: "memory" - } - field { - name: "blkio" - number: 5 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.BlkIOStat" - json_name: "blkio" - } - field { - name: "rdma" - number: 6 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.RdmaStat" - json_name: "rdma" - } - field { - name: "network" - number: 7 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.NetworkStat" - json_name: "network" - } - } - message_type { - name: "HugetlbStat" - field { - name: "usage" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "usage" - } - field { - name: "max" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "max" - } - field { - name: "failcnt" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "failcnt" - } - field { - name: "pagesize" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "pagesize" - } - } - message_type { - name: "PidsStat" - field { - name: "current" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "current" - } - field { - name: "limit" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "limit" - } - } - message_type { - name: "CPUStat" - field { - name: "usage" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.CPUUsage" - json_name: "usage" - } - field { - name: "throttling" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.Throttle" - json_name: "throttling" - } - } - message_type { - name: "CPUUsage" - field { - name: "total" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "total" - } - field { - name: "kernel" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "kernel" - } - field { - name: "user" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "user" - } - field { - name: "per_cpu" - number: 4 - label: LABEL_REPEATED - type: TYPE_UINT64 - options { - 65004: "PerCPU" - } - json_name: "perCpu" - } - } - message_type { - name: "Throttle" - field { - name: "periods" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "periods" - } - field { - name: "throttled_periods" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "throttledPeriods" - } - field { - name: "throttled_time" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "throttledTime" - } - } - message_type { - name: "MemoryStat" - field { - name: "cache" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "cache" - } - field { - name: "rss" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - options { - 65004: "RSS" - } - json_name: "rss" - } - field { - name: "rss_huge" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - options { - 65004: "RSSHuge" - } - json_name: "rssHuge" - } - field { - name: "mapped_file" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "mappedFile" - } - field { - name: "dirty" - number: 5 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "dirty" - } - field { - name: "writeback" - number: 6 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "writeback" - } - field { - name: "pg_pg_in" - number: 7 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "pgPgIn" - } - field { - name: "pg_pg_out" - number: 8 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "pgPgOut" - } - field { - name: "pg_fault" - number: 9 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "pgFault" - } - field { - name: "pg_maj_fault" - number: 10 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "pgMajFault" - } - field { - name: "inactive_anon" - number: 11 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "inactiveAnon" - } - field { - name: "active_anon" - number: 12 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "activeAnon" - } - field { - name: "inactive_file" - number: 13 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "inactiveFile" - } - field { - name: "active_file" - number: 14 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "activeFile" - } - field { - name: "unevictable" - number: 15 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "unevictable" - } - field { - name: "hierarchical_memory_limit" - number: 16 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "hierarchicalMemoryLimit" - } - field { - name: "hierarchical_swap_limit" - number: 17 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "hierarchicalSwapLimit" - } - field { - name: "total_cache" - number: 18 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalCache" - } - field { - name: "total_rss" - number: 19 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - options { - 65004: "TotalRSS" - } - json_name: "totalRss" - } - field { - name: "total_rss_huge" - number: 20 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - options { - 65004: "TotalRSSHuge" - } - json_name: "totalRssHuge" - } - field { - name: "total_mapped_file" - number: 21 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalMappedFile" - } - field { - name: "total_dirty" - number: 22 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalDirty" - } - field { - name: "total_writeback" - number: 23 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalWriteback" - } - field { - name: "total_pg_pg_in" - number: 24 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalPgPgIn" - } - field { - name: "total_pg_pg_out" - number: 25 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalPgPgOut" - } - field { - name: "total_pg_fault" - number: 26 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalPgFault" - } - field { - name: "total_pg_maj_fault" - number: 27 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalPgMajFault" - } - field { - name: "total_inactive_anon" - number: 28 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalInactiveAnon" - } - field { - name: "total_active_anon" - number: 29 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalActiveAnon" - } - field { - name: "total_inactive_file" - number: 30 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalInactiveFile" - } - field { - name: "total_active_file" - number: 31 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalActiveFile" - } - field { - name: "total_unevictable" - number: 32 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "totalUnevictable" - } - field { - name: "usage" - number: 33 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.MemoryEntry" - json_name: "usage" - } - field { - name: "swap" - number: 34 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.MemoryEntry" - json_name: "swap" - } - field { - name: "kernel" - number: 35 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.MemoryEntry" - json_name: "kernel" - } - field { - name: "kernel_tcp" - number: 36 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.MemoryEntry" - options { - 65004: "KernelTCP" - } - json_name: "kernelTcp" - } - } - message_type { - name: "MemoryEntry" - field { - name: "limit" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "limit" - } - field { - name: "usage" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "usage" - } - field { - name: "max" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "max" - } - field { - name: "failcnt" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "failcnt" - } - } - message_type { - name: "BlkIOStat" - field { - name: "io_service_bytes_recursive" - number: 1 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.BlkIOEntry" - json_name: "ioServiceBytesRecursive" - } - field { - name: "io_serviced_recursive" - number: 2 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.BlkIOEntry" - json_name: "ioServicedRecursive" - } - field { - name: "io_queued_recursive" - number: 3 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.BlkIOEntry" - json_name: "ioQueuedRecursive" - } - field { - name: "io_service_time_recursive" - number: 4 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.BlkIOEntry" - json_name: "ioServiceTimeRecursive" - } - field { - name: "io_wait_time_recursive" - number: 5 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.BlkIOEntry" - json_name: "ioWaitTimeRecursive" - } - field { - name: "io_merged_recursive" - number: 6 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.BlkIOEntry" - json_name: "ioMergedRecursive" - } - field { - name: "io_time_recursive" - number: 7 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.BlkIOEntry" - json_name: "ioTimeRecursive" - } - field { - name: "sectors_recursive" - number: 8 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.BlkIOEntry" - json_name: "sectorsRecursive" - } - } - message_type { - name: "BlkIOEntry" - field { - name: "op" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "op" - } - field { - name: "device" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "device" - } - field { - name: "major" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "major" - } - field { - name: "minor" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "minor" - } - field { - name: "value" - number: 5 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "value" - } - } - message_type { - name: "RdmaStat" - field { - name: "current" - number: 1 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.RdmaEntry" - json_name: "current" - } - field { - name: "limit" - number: 2 - label: LABEL_REPEATED - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.RdmaEntry" - json_name: "limit" - } - } - message_type { - name: "RdmaEntry" - field { - name: "device" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "device" - } - field { - name: "hca_handles" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT32 - json_name: "hcaHandles" - } - field { - name: "hca_objects" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT32 - json_name: "hcaObjects" - } - } - message_type { - name: "NetworkStat" - field { - name: "name" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_STRING - json_name: "name" - } - field { - name: "rx_bytes" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "rxBytes" - } - field { - name: "rx_packets" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "rxPackets" - } - field { - name: "rx_errors" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "rxErrors" - } - field { - name: "rx_dropped" - number: 5 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "rxDropped" - } - field { - name: "tx_bytes" - number: 6 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "txBytes" - } - field { - name: "tx_packets" - number: 7 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "txPackets" - } - field { - name: "tx_errors" - number: 8 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "txErrors" - } - field { - name: "tx_dropped" - number: 9 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "txDropped" - } - } - syntax: "proto3" -} -file { - name: "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats/stats.proto" - package: "containerd.runhcs.stats.v1" - dependency: "gogoproto/gogo.proto" - dependency: "google/protobuf/timestamp.proto" - dependency: "github.com/containerd/cgroups/stats/v1/metrics.proto" - message_type { - name: "Statistics" - field { - name: "windows" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".containerd.runhcs.stats.v1.WindowsContainerStatistics" - oneof_index: 0 - json_name: "windows" - } - field { - name: "linux" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".io.containerd.cgroups.v1.Metrics" - oneof_index: 0 - json_name: "linux" - } - field { - name: "vm" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".containerd.runhcs.stats.v1.VirtualMachineStatistics" - options { - 65004: "VM" - } - json_name: "vm" - } - oneof_decl { - name: "container" - } - } - message_type { - name: "WindowsContainerStatistics" - field { - name: "timestamp" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".google.protobuf.Timestamp" - options { - 65001: 0 - 65010: 1 - } - json_name: "timestamp" - } - field { - name: "container_start_time" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".google.protobuf.Timestamp" - options { - 65001: 0 - 65010: 1 - } - json_name: "containerStartTime" - } - field { - name: "uptime_ns" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - options { - 65004: "UptimeNS" - } - json_name: "uptimeNs" - } - field { - name: "processor" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".containerd.runhcs.stats.v1.WindowsContainerProcessorStatistics" - json_name: "processor" - } - field { - name: "memory" - number: 5 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".containerd.runhcs.stats.v1.WindowsContainerMemoryStatistics" - json_name: "memory" - } - field { - name: "storage" - number: 6 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".containerd.runhcs.stats.v1.WindowsContainerStorageStatistics" - json_name: "storage" - } - } - message_type { - name: "WindowsContainerProcessorStatistics" - field { - name: "total_runtime_ns" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - options { - 65004: "TotalRuntimeNS" - } - json_name: "totalRuntimeNs" - } - field { - name: "runtime_user_ns" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - options { - 65004: "RuntimeUserNS" - } - json_name: "runtimeUserNs" - } - field { - name: "runtime_kernel_ns" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - options { - 65004: "RuntimeKernelNS" - } - json_name: "runtimeKernelNs" - } - } - message_type { - name: "WindowsContainerMemoryStatistics" - field { - name: "memory_usage_commit_bytes" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "memoryUsageCommitBytes" - } - field { - name: "memory_usage_commit_peak_bytes" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "memoryUsageCommitPeakBytes" - } - field { - name: "memory_usage_private_working_set_bytes" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "memoryUsagePrivateWorkingSetBytes" - } - } - message_type { - name: "WindowsContainerStorageStatistics" - field { - name: "read_count_normalized" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "readCountNormalized" - } - field { - name: "read_size_bytes" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "readSizeBytes" - } - field { - name: "write_count_normalized" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "writeCountNormalized" - } - field { - name: "write_size_bytes" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "writeSizeBytes" - } - } - message_type { - name: "VirtualMachineStatistics" - field { - name: "processor" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".containerd.runhcs.stats.v1.VirtualMachineProcessorStatistics" - json_name: "processor" - } - field { - name: "memory" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".containerd.runhcs.stats.v1.VirtualMachineMemoryStatistics" - json_name: "memory" - } - } - message_type { - name: "VirtualMachineProcessorStatistics" - field { - name: "total_runtime_ns" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - options { - 65004: "TotalRuntimeNS" - } - json_name: "totalRuntimeNs" - } - } - message_type { - name: "VirtualMachineMemoryStatistics" - field { - name: "working_set_bytes" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "workingSetBytes" - } - field { - name: "virtual_node_count" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_UINT32 - json_name: "virtualNodeCount" - } - field { - name: "vm_memory" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_MESSAGE - type_name: ".containerd.runhcs.stats.v1.VirtualMachineMemory" - json_name: "vmMemory" - } - } - message_type { - name: "VirtualMachineMemory" - field { - name: "available_memory" - number: 1 - label: LABEL_OPTIONAL - type: TYPE_INT32 - json_name: "availableMemory" - } - field { - name: "available_memory_buffer" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_INT32 - json_name: "availableMemoryBuffer" - } - field { - name: "reserved_memory" - number: 3 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "reservedMemory" - } - field { - name: "assigned_memory" - number: 4 - label: LABEL_OPTIONAL - type: TYPE_UINT64 - json_name: "assignedMemory" - } - field { - name: "slp_active" - number: 5 - label: LABEL_OPTIONAL - type: TYPE_BOOL - json_name: "slpActive" - } - field { - name: "balancing_enabled" - number: 6 - label: LABEL_OPTIONAL - type: TYPE_BOOL - json_name: "balancingEnabled" - } - field { - name: "dm_operation_in_progress" - number: 7 - label: LABEL_OPTIONAL - type: TYPE_BOOL - json_name: "dmOperationInProgress" - } - } - options { - go_package: "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats;stats" - } - weak_dependency: 0 - syntax: "proto3" -} diff --git a/cmd/device-util/main.go b/cmd/device-util/main.go new file mode 100644 index 0000000000..8ca35f3d3a --- /dev/null +++ b/cmd/device-util/main.go @@ -0,0 +1,105 @@ +package main + +import ( + "errors" + "fmt" + "os" + "strings" + + "github.com/Microsoft/hcsshim/internal/windevice" + "github.com/Microsoft/hcsshim/internal/winobjdir" + "github.com/urfave/cli" +) + +const usage = `device-util is a command line tool for querying devices present on Windows` + +func main() { + app := cli.NewApp() + app.Name = "device-util" + app.Commands = []cli.Command{ + queryChildrenCommand, + readObjDirCommand, + } + app.Usage = usage + + if err := app.Run(os.Args); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + +const ( + parentIDFlag = "parentID" + propertyFlag = "property" + objDirFlag = "dir" + + locationProperty = "location" + idProperty = "id" + + globalNTPath = "\\Global??" +) + +var readObjDirCommand = cli.Command{ + Name: "obj-dir", + Usage: "outputs contents of a NT object directory", + Flags: []cli.Flag{ + cli.StringFlag{ + Name: objDirFlag, + Usage: "Optional: Object directory to query. Defaults to the global object directory.", + }, + }, + Action: func(context *cli.Context) error { + dir := globalNTPath + if context.IsSet(objDirFlag) { + dir = context.String(objDirFlag) + } + entries, err := winobjdir.EnumerateNTObjectDirectory(dir) + if err != nil { + return err + } + formatted := strings.Join(entries, ",") + fmt.Fprintln(os.Stdout, formatted) + return nil + }, +} + +var queryChildrenCommand = cli.Command{ + Name: "children", + Usage: "queries for given devices' children on the system", + Flags: []cli.Flag{ + cli.StringFlag{ + Name: parentIDFlag, + Usage: "Required: Parent device's instance IDs. Comma separated string.", + }, + cli.StringFlag{ + Name: propertyFlag, + Usage: "Either 'location' or 'id', default 'id'. String indicating a property to query devices for.", + }, + }, + Action: func(context *cli.Context) error { + if !context.IsSet(parentIDFlag) { + return errors.New("`children` command must specify at least one parent instance ID") + } + csParents := context.String(parentIDFlag) + parents := strings.Split(csParents, ",") + + children, err := windevice.GetChildrenFromInstanceIDs(parents) + if err != nil { + return err + } + + property := idProperty + if context.IsSet(propertyFlag) { + property = context.String(propertyFlag) + } + if property == locationProperty { + children, err = windevice.GetDeviceLocationPathsFromIDs(children) + if err != nil { + return err + } + } + formattedChildren := strings.Join(children, ",") + fmt.Fprintln(os.Stdout, formattedChildren) + return nil + }, +} From ae426175f1380a324154bceee6bc9113d05fe51b Mon Sep 17 00:00:00 2001 From: Kathryn Baldauf Date: Tue, 16 Jun 2020 13:09:44 -0700 Subject: [PATCH 06/20] Add commands for pnp device querying/management * Add new comma separated annotation "io.microsoft.assigneddevice.kerneldrivers" for path to device drivers * Add commands to install kernel drivers * Add command to query UVM for pnp device information Signed-off-by: Kathryn Baldauf --- internal/cmd/cmd.go | 1 + internal/devices/drivers.go | 40 +++++++++++++++++++++++ internal/devices/pnp.go | 64 +++++++++++++++++++++++++++++++++++++ internal/oci/uvm.go | 8 ++++- 4 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 internal/devices/drivers.go create mode 100644 internal/devices/pnp.go diff --git a/internal/cmd/cmd.go b/internal/cmd/cmd.go index 023e0b34d2..478455429f 100644 --- a/internal/cmd/cmd.go +++ b/internal/cmd/cmd.go @@ -101,6 +101,7 @@ func Command(host cow.ProcessHost, name string, arg ...string) *Cmd { Spec: &specs.Process{ Args: append([]string{name}, arg...), }, + ExitState: &ExitState{}, } if host.OS() == "windows" { cmd.Spec.Cwd = `C:\` diff --git a/internal/devices/drivers.go b/internal/devices/drivers.go new file mode 100644 index 0000000000..8b21cbc347 --- /dev/null +++ b/internal/devices/drivers.go @@ -0,0 +1,40 @@ +// +build windows + +package devices + +import ( + "context" + "fmt" + + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/resources" + "github.com/Microsoft/hcsshim/internal/uvm" +) + +// InstallWindowsDriver mounts a specified kernel driver using vsmb, then installs it in the UVM. +// +// `driver` is a directory path on the host that contains driver files for standard installation. +// +// Returns a ResourceCloser for the added vsmb share. On failure, the vsmb share will be released, +// the returned ResourceCloser will be nil, and an error will be returned. +func InstallWindowsDriver(ctx context.Context, vm *uvm.UtilityVM, driver string) (closer resources.ResourceCloser, err error) { + defer func() { + if err != nil && closer != nil { + // best effort clean up allocated resource on failure + if releaseErr := closer.Release(ctx); releaseErr != nil { + log.G(ctx).WithError(releaseErr).Error("failed to release container resource") + } + closer = nil + } + }() + options := vm.DefaultVSMBOptions(true) + closer, err = vm.AddVSMB(ctx, driver, options) + if err != nil { + return closer, fmt.Errorf("failed to add VSMB share to utility VM for path %+v: %s", driver, err) + } + uvmPath, err := vm.GetVSMBUvmPath(ctx, driver, true) + if err != nil { + return closer, err + } + return closer, execPnPInstallDriver(ctx, vm, uvmPath) +} diff --git a/internal/devices/pnp.go b/internal/devices/pnp.go new file mode 100644 index 0000000000..1cb67326f4 --- /dev/null +++ b/internal/devices/pnp.go @@ -0,0 +1,64 @@ +// +build windows + +package devices + +import ( + "context" + "fmt" + + "github.com/Microsoft/hcsshim/internal/cmd" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/shimdiag" + "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/internal/winapi" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +const ( + uvmPnpExePath = "C:\\Windows\\System32\\pnputil.exe" + pnputilNoMoreItemsErrorMessage = `driver not ranked higher than existing driver in UVM. + if drivers were not previously present in the UVM, this + is an expected race and can be ignored.` +) + +// createPnPInstallDriverCommand creates a pnputil command to add and install drivers +// present in `driverUVMPath` and all subdirectories. +func createPnPInstallDriverCommand(driverUVMPath string) []string { + dirFormatted := fmt.Sprintf("%s/*.inf", driverUVMPath) + args := []string{ + "cmd", + "/c", + uvmPnpExePath, + "/add-driver", + dirFormatted, + "/subdirs", + "/install", + } + return args +} + +// execPnPInstallDriver makes the calls to exec in the uvm the pnp command +// that installs a driver previously mounted into the uvm. +func execPnPInstallDriver(ctx context.Context, vm *uvm.UtilityVM, driverDir string) error { + args := createPnPInstallDriverCommand(driverDir) + req := &shimdiag.ExecProcessRequest{ + Args: args, + } + exitCode, err := cmd.ExecInUvm(ctx, vm, req) + if err != nil && exitCode != winapi.ERROR_NO_MORE_ITEMS { + return errors.Wrapf(err, "failed to install driver %s in uvm with exit code %d", driverDir, exitCode) + } else if exitCode == winapi.ERROR_NO_MORE_ITEMS { + // As mentioned in `pnputilNoMoreItemsErrorMessage`, this exit code comes from pnputil + // but is not necessarily an error + log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: vm.ID(), + "driver": driverDir, + "error": pnputilNoMoreItemsErrorMessage, + }).Warn("expected version of driver may not have been installed") + } + + log.G(ctx).WithField("added drivers", driverDir).Debug("installed drivers") + return nil +} diff --git a/internal/oci/uvm.go b/internal/oci/uvm.go index c25d94fa43..94658a0444 100644 --- a/internal/oci/uvm.go +++ b/internal/oci/uvm.go @@ -79,7 +79,13 @@ const ( // `spec.Windows.Resources.Storage.Iops`. AnnotationContainerStorageQoSIopsMaximum = "io.microsoft.container.storage.qos.iopsmaximum" // AnnotationGPUVHDPath overrides the default path to search for the gpu vhd - AnnotationGPUVHDPath = "io.microsoft.lcow.gpuvhdpath" + AnnotationGPUVHDPath = "io.microsoft.lcow.gpuvhdpath" + // AnnotationAssignedDeviceKernelDrivers indicates what drivers to install in the pod during device + // assignment. This value should contain a list of comma separated directories containing all + // files and information needed to install given driver(s). This may include .sys, + // .inf, .cer, and/or other files used during standard installation with pnputil. + AnnotationAssignedDeviceKernelDrivers = "io.microsoft.assigneddevice.kerneldrivers" + annotationAllowOvercommit = "io.microsoft.virtualmachine.computetopology.memory.allowovercommit" annotationEnableDeferredCommit = "io.microsoft.virtualmachine.computetopology.memory.enabledeferredcommit" annotationEnableColdDiscardHint = "io.microsoft.virtualmachine.computetopology.memory.enablecolddiscardhint" From 1f2256d5d9ed72e74cce82cc450c315867eb576b Mon Sep 17 00:00:00 2001 From: Kathryn Baldauf Date: Tue, 16 Jun 2020 13:13:53 -0700 Subject: [PATCH 07/20] Add ability to handle assigned devices for WCOW * Add ability to parse assigned devices * Add function to query UVM for location paths of child devices assigned * Add new prefix for container spec specified devices in vpci * Remove block on assigning devices in WCOW Signed-off-by: Kathryn Baldauf --- internal/devices/assigned_devices.go | 159 +++++++++++++++++++++++++++ internal/hcsoci/create.go | 5 + internal/hcsoci/devices.go | 108 ++++++++++++++++++ internal/uvm/virtual_device.go | 28 ++++- 4 files changed, 295 insertions(+), 5 deletions(-) create mode 100644 internal/devices/assigned_devices.go create mode 100644 internal/hcsoci/devices.go diff --git a/internal/devices/assigned_devices.go b/internal/devices/assigned_devices.go new file mode 100644 index 0000000000..de1ca247d7 --- /dev/null +++ b/internal/devices/assigned_devices.go @@ -0,0 +1,159 @@ +// +build windows + +package devices + +import ( + "context" + "fmt" + "io/ioutil" + "net" + "strings" + + winio "github.com/Microsoft/go-winio" + "github.com/Microsoft/go-winio/pkg/guid" + "github.com/Microsoft/hcsshim/internal/cmd" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/shimdiag" + "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/pkg/errors" +) + +// AddDevice is the api exposed to hcsoci to handle assigning a device on a UVM +// +// `idType` refers to the specified device's type, supported types here are `VPCIDeviceIDType` +// and `VPCIDeviceIDTypeLegacy`. +// +// `deviceID` refers to the specified device's identifier. This must refer to a device instance id +// for hyper-v isolated device assignment. +// +// `deviceUtilPath` refers to the path in the UVM of the device-util tool used for finding the given +// device's location path(s). +// +// Returns the allocated vpci device in `vpci` to be tracked for release by the caller. On failure in +// this function, `vpci` is released and nil is returned for that value. +// +// Returns a slice of strings representing the resulting location path(s) for the specified device. +func AddDevice(ctx context.Context, vm *uvm.UtilityVM, idType, deviceID, deviceUtilPath string) (vpci *uvm.VPCIDevice, locationPaths []string, err error) { + defer func() { + if err != nil && vpci != nil { + // best effort clean up allocated resource on failure + if releaseErr := vpci.Release(ctx); releaseErr != nil { + log.G(ctx).WithError(releaseErr).Error("failed to release container resource") + } + vpci = nil + } + }() + if idType == uvm.VPCIDeviceIDType || idType == uvm.VPCIDeviceIDTypeLegacy { + vpci, err = vm.AssignDevice(ctx, deviceID) + if err != nil { + return vpci, nil, errors.Wrapf(err, "failed to assign device %s of type %s to pod %s", deviceID, idType, vm.ID()) + } + vmBusInstanceID := vm.GetAssignedDeviceVMBUSInstanceID(vpci.VMBusGUID) + log.G(ctx).WithField("vmbus id", vmBusInstanceID).Info("vmbus instance ID") + + locationPaths, err = getChildrenDeviceLocationPaths(ctx, vm, vmBusInstanceID, deviceUtilPath) + return vpci, locationPaths, err + } + + return vpci, nil, fmt.Errorf("device type %s for device %s is not supported in windows", idType, deviceID) +} + +// getChildrenDeviceLocationPaths queries the UVM with the device-util tool with the formatted +// parent bus device for the children devices' location paths from the uvm's view. +// Returns a slice of strings representing the resulting children location paths +func getChildrenDeviceLocationPaths(ctx context.Context, vm *uvm.UtilityVM, vmBusInstanceID string, deviceUtilPath string) ([]string, error) { + p, l, err := createNamedPipeListener() + if err != nil { + return nil, err + } + defer l.Close() + + var pipeResults []string + errChan := make(chan error) + + go readCsPipeOutput(l, errChan, &pipeResults) + + args := createDeviceUtilChildrenCommand(deviceUtilPath, vmBusInstanceID) + req := &shimdiag.ExecProcessRequest{ + Args: args, + Stdout: p, + } + exitCode, err := cmd.ExecInUvm(ctx, vm, req) + if err != nil { + return nil, errors.Wrapf(err, "failed to find devices with exit code %d", exitCode) + } + + // wait to finish parsing stdout results + select { + case err := <-errChan: + if err != nil { + return nil, err + } + case <-ctx.Done(): + return nil, ctx.Err() + } + + return pipeResults, nil +} + +// createDeviceUtilChildrenCommand constructs a device-util command to query the UVM for +// device information +// +// `deviceUtilPath` is the UVM path to device-util +// +// `vmBusInstanceID` is a string of the vmbus instance ID already assigned to the UVM +// +// Returns a slice of strings that represent the location paths in the UVM of the +// target devices +func createDeviceUtilChildrenCommand(deviceUtilPath string, vmBusInstanceID string) []string { + parentIDsFlag := fmt.Sprintf("--parentID=%s", vmBusInstanceID) + args := []string{deviceUtilPath, "children", parentIDsFlag, "--property=location"} + return args +} + +// createNamedPipeListener is a helper function to create and return a pipe listener +// and it's created path. +func createNamedPipeListener() (string, net.Listener, error) { + g, err := guid.NewV4() + if err != nil { + return "", nil, err + } + p := `\\.\pipe\` + g.String() + l, err := winio.ListenPipe(p, nil) + if err != nil { + return "", nil, err + } + return p, l, nil +} + +// readCsPipeOutput is a helper function that connects to a listener and reads +// the connection's comma separated output until done. resulting comma separated +// values are returned in the `result` param. The `errChan` param is used to +// propagate an errors to the calling function. +func readCsPipeOutput(l net.Listener, errChan chan<- error, result *[]string) { + defer close(errChan) + c, err := l.Accept() + if err != nil { + errChan <- errors.Wrapf(err, "failed to accept named pipe") + return + } + bytes, err := ioutil.ReadAll(c) + if err != nil { + errChan <- err + return + } + + elementsAsString := strings.TrimSuffix(string(bytes), "\n") + elements := strings.Split(elementsAsString, ",") + + for _, elem := range elements { + *result = append(*result, elem) + } + + if len(*result) == 0 { + errChan <- errors.Wrapf(err, "failed to get any pipe output") + return + } + + errChan <- nil +} diff --git a/internal/hcsoci/create.go b/internal/hcsoci/create.go index 48f58e0cab..fcc9138796 100644 --- a/internal/hcsoci/create.go +++ b/internal/hcsoci/create.go @@ -253,3 +253,8 @@ func (coi *createOptionsInternal) isV2Argon() bool { func (coi *createOptionsInternal) isV1Argon() bool { return schemaversion.IsV10(coi.actualSchemaVersion) && coi.Spec.Windows.HyperV == nil } + +func (coi *createOptionsInternal) hasWindowsAssignedDevices() bool { + return (coi.Spec.Windows != nil) && (coi.Spec.Windows.Devices != nil) && + (len(coi.Spec.Windows.Devices) > 0) +} diff --git a/internal/hcsoci/devices.go b/internal/hcsoci/devices.go new file mode 100644 index 0000000000..1c5e6f9553 --- /dev/null +++ b/internal/hcsoci/devices.go @@ -0,0 +1,108 @@ +package hcsoci + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/Microsoft/hcsshim/internal/devices" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oci" + "github.com/Microsoft/hcsshim/internal/resources" + "github.com/Microsoft/hcsshim/internal/uvm" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +const deviceUtilExeName = "device-util.exe" + +// getAssignedDeviceKernelDrivers gets any device drivers specified on the spec. +// Drivers are optional, therefore do not return an error if none are on the spec. +// +// See comment on oci.AnnotationAssignedDeviceKernelDrivers for expected format. +func getAssignedDeviceKernelDrivers(annotations map[string]string) ([]string, error) { + csDrivers, ok := annotations[oci.AnnotationAssignedDeviceKernelDrivers] + if !ok || csDrivers == "" { + return nil, nil + } + drivers := strings.Split(csDrivers, ",") + for _, driver := range drivers { + if _, err := os.Stat(driver); err != nil { + return nil, errors.Wrapf(err, "failed to find path to drivers at %s", driver) + } + } + return drivers, nil +} + +// getDeviceUtilHostPath is a simple helper function to find the host path of the device-util tool +func getDeviceUtilHostPath() string { + return filepath.Join(filepath.Dir(os.Args[0]), deviceUtilExeName) +} + +// handleAssignedDevicesWindows does all of the work to setup the hosting UVM, assign in devices +// specified on the spec, and install any necessary, specified kernel drivers into the UVM. +// +// Drivers must be installed after the target devices are assigned into the UVM. +// This ordering allows us to guarantee that driver installation on a device in the UVM is completed +// before we attempt to create a container. +func handleAssignedDevicesWindows(ctx context.Context, vm *uvm.UtilityVM, annotations map[string]string, specDevs []specs.WindowsDevice) (resultDevs []specs.WindowsDevice, closers []resources.ResourceCloser, err error) { + defer func() { + if err != nil { + // best effort clean up allocated resources on failure + for _, r := range closers { + if releaseErr := r.Release(ctx); releaseErr != nil { + log.G(ctx).WithError(releaseErr).Error("failed to release container resource") + } + } + closers = nil + resultDevs = nil + } + }() + + // install the device util tool in the UVM + toolHostPath := getDeviceUtilHostPath() + options := vm.DefaultVSMBOptions(true) + toolsShare, err := vm.AddVSMB(ctx, toolHostPath, options) + if err != nil { + return nil, closers, fmt.Errorf("failed to add VSMB share to utility VM for path %+v: %s", toolHostPath, err) + } + closers = append(closers, toolsShare) + deviceUtilPath, err := vm.GetVSMBUvmPath(ctx, toolHostPath, true) + if err != nil { + return nil, closers, err + } + + // assign device into UVM and create corresponding spec windows devices + for _, d := range specDevs { + vpciCloser, locationPaths, err := devices.AddDevice(ctx, vm, d.IDType, d.ID, deviceUtilPath) + if err != nil { + return nil, nil, err + } + closers = append(closers, vpciCloser) + for _, value := range locationPaths { + specDev := specs.WindowsDevice{ + ID: value, + IDType: uvm.VPCILocationPathIDType, + } + log.G(ctx).WithField("parsed devices", specDev).Info("added windows device to spec") + resultDevs = append(resultDevs, specDev) + } + } + + // get the spec specified kernel drivers and install them on the UVM + drivers, err := getAssignedDeviceKernelDrivers(annotations) + if err != nil { + return nil, closers, err + } + for _, d := range drivers { + driverCloser, err := devices.InstallWindowsDriver(ctx, vm, d) + if err != nil { + return nil, closers, err + } + closers = append(closers, driverCloser) + } + + return resultDevs, closers, nil +} diff --git a/internal/uvm/virtual_device.go b/internal/uvm/virtual_device.go index dfe993d6b5..d06453793f 100644 --- a/internal/uvm/virtual_device.go +++ b/internal/uvm/virtual_device.go @@ -2,7 +2,6 @@ package uvm import ( "context" - "errors" "fmt" "github.com/Microsoft/go-winio/pkg/guid" @@ -16,8 +15,14 @@ const ( VPCILocationPathIDType = "vpci-location-path" VPCIClassGUIDTypeLegacy = "class" VPCIClassGUIDType = "vpci-class-guid" + VPCIDeviceIDTypeLegacy = "vpci" + VPCIDeviceIDType = "vpci-instance-id" ) +// this is the well known channel type GUID defined by VMBUS for all assigned devices +const vmbusChannelTypeGUIDFormatted = "{44c4f61d-4444-4400-9d52-802e27ede19f}" +const assignedDeviceEnumerator = "VMBUS" + // VPCIDevice represents a vpci device. Holds its guid and a handle to the uvm it // belongs to. type VPCIDevice struct { @@ -31,6 +36,23 @@ type VPCIDevice struct { refCount uint32 } +// GetAssignedDeviceVMBUSInstanceID returns the instance ID of the VMBUS channel device node created. +// +// When a device is assigned to a UVM via VPCI support in HCS, a new VMBUS channel device node is +// created in the UVM. The actual device that was assigned in is exposed as a child on this VMBUS +// channel device node. +// +// A device node's instance ID is an identifier that distinguishes that device from other devices +// on the system. The GUID of a VMBUS channel device node refers to that channel's unique +// identifier used internally by VMBUS and can be used to determine the VMBUS channel +// device node's instance ID. +// +// A VMBUS channel device node's instance ID is in the form: +// "VMBUS\vmbusChannelTypeGUIDFormatted\{vmBusChannelGUID}" +func (uvm *UtilityVM) GetAssignedDeviceVMBUSInstanceID(vmBusChannelGUID string) string { + return fmt.Sprintf("%s\\%s\\{%s}", assignedDeviceEnumerator, vmbusChannelTypeGUIDFormatted, vmBusChannelGUID) +} + // Release frees the resources of the corresponding vpci device func (vpci *VPCIDevice) Release(ctx context.Context) error { if err := vpci.vm.removeDevice(ctx, vpci.deviceInstanceID); err != nil { @@ -46,10 +68,6 @@ func (vpci *VPCIDevice) Release(ctx context.Context) error { // onto the UVM. A new VPCIDevice entry is made on the UVM and the VPCIDevice is returned // to the caller func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string) (*VPCIDevice, error) { - if uvm.operatingSystem == "windows" { - return nil, errors.New("assigned devices is not currently supported on wcow") - } - guid, err := guid.NewV4() if err != nil { return nil, err From 9ffc90a28f709d40badcf3d76cef7f62f31594fe Mon Sep 17 00:00:00 2001 From: Kathryn Baldauf Date: Tue, 16 Jun 2020 13:18:43 -0700 Subject: [PATCH 08/20] Use previously defined functions to allow device assignment * Extend parsing of assigned devices on the hcsv2 doc to include xenon * Use functions that handle assigned devices when allocating windows container resources Signed-off-by: Kathryn Baldauf --- internal/hcsoci/hcsdoc_wcow.go | 26 ++++++++------------------ internal/hcsoci/resources_wcow.go | 10 ++++++++++ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 5a04d5c98d..6552f897b8 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -4,7 +4,6 @@ package hcsoci import ( "context" - "errors" "fmt" "path/filepath" "regexp" @@ -282,29 +281,19 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter v1.MappedPipes = mpsv1 v2Container.MappedPipes = mpsv2 - if specHasAssignedDevices(coi) { - // add assigned devices to the container definition - if err := parseAssignedDevices(coi, v2Container); err != nil { - return nil, nil, err - } + // add assigned devices to the container definition + if err := parseAssignedDevices(ctx, coi, v2Container); err != nil { + return nil, nil, err } return v1, v2Container, nil } -func specHasAssignedDevices(coi *createOptionsInternal) bool { - if coi.Spec.Windows != nil && coi.Spec.Windows.Devices != nil && - len(coi.Spec.Windows.Devices) > 0 { - return true - } - return false -} - // parseAssignedDevices parses assigned devices for the container definition -// this is currently supported for HCS schema V2 argon only -func parseAssignedDevices(coi *createOptionsInternal, v2 *hcsschema.Container) error { - if !coi.isV2Argon() { - return errors.New("device assignment is currently only supported for HCS schema V2 argon") +// this is currently supported for v2 argon and xenon only +func parseAssignedDevices(ctx context.Context, coi *createOptionsInternal, v2 *hcsschema.Container) error { + if !coi.isV2Argon() && !coi.isV2Xenon() { + return nil } v2AssignedDevices := []hcsschema.Device{} @@ -321,6 +310,7 @@ func parseAssignedDevices(coi *createOptionsInternal, v2 *hcsschema.Container) e default: return fmt.Errorf("specified device %s has unsupported type %s", d.ID, d.IDType) } + log.G(ctx).WithField("hcsv2 device", v2Dev).Debug("adding assigned device to container doc") v2AssignedDevices = append(v2AssignedDevices, v2Dev) } v2.AssignedDevices = v2AssignedDevices diff --git a/internal/hcsoci/resources_wcow.go b/internal/hcsoci/resources_wcow.go index 44e55c50b1..7b89a25c9f 100644 --- a/internal/hcsoci/resources_wcow.go +++ b/internal/hcsoci/resources_wcow.go @@ -138,5 +138,15 @@ func allocateWindowsResources(ctx context.Context, coi *createOptionsInternal, r //TODO dcantah: If/when dynamic service table entries is supported register the RpcEndpoint with hvsocket here } } + + if coi.HostingSystem != nil && coi.hasWindowsAssignedDevices() { + windowsDevices, closers, err := handleAssignedDevicesWindows(ctx, coi.HostingSystem, coi.Spec.Annotations, coi.Spec.Windows.Devices) + if err != nil { + return err + } + r.Add(closers...) + coi.Spec.Windows.Devices = windowsDevices + } + return nil } From 9f824883e0640fc3b5c190b6f67fe64e321dc289 Mon Sep 17 00:00:00 2001 From: Kathryn Baldauf Date: Tue, 16 Jun 2020 13:30:55 -0700 Subject: [PATCH 09/20] Add e2e tests for assigned devices in WCOW Signed-off-by: Kathryn Baldauf --- .../container_virtual_device_test.go | 341 ++++++++++++++++-- test/cri-containerd/main.go | 25 +- 2 files changed, 323 insertions(+), 43 deletions(-) diff --git a/test/cri-containerd/container_virtual_device_test.go b/test/cri-containerd/container_virtual_device_test.go index adf28ac091..c1ba40df55 100644 --- a/test/cri-containerd/container_virtual_device_test.go +++ b/test/cri-containerd/container_virtual_device_test.go @@ -14,11 +14,18 @@ import ( runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) +const containerDeviceUtilPath = "C:\\device-util.exe" +const gpuWin32InstanceIDPrefix = "PCI#VEN_10DE" + // makeGPUExecCommand constructs the container command to check for the // existence of a nvidia GPU device and returns the command in an // ExecSyncRequest -func makeGPUExecCommand(containerID string) *runtime.ExecSyncRequest { +func makeGPUExecCommand(os string, containerID string) *runtime.ExecSyncRequest { cmd := []string{"ls", "/dev/nvidia0"} + if os == "windows" { + cmd = []string{containerDeviceUtilPath, "obj-dir"} + } + return &runtime.ExecSyncRequest{ ContainerId: containerID, Cmd: cmd, @@ -28,8 +35,8 @@ func makeGPUExecCommand(containerID string) *runtime.ExecSyncRequest { // verifyGPUIsPresent is a helper function that runs a command in the container // to verify the existence of a GPU and fails the running test is none are found -func verifyGPUIsPresent(t *testing.T, client runtime.RuntimeServiceClient, ctx context.Context, containerID string) { - execReq := makeGPUExecCommand(containerID) +func verifyGPUIsPresentLCOW(t *testing.T, client runtime.RuntimeServiceClient, ctx context.Context, containerID string) { + execReq := makeGPUExecCommand("linux", containerID) response := execSync(t, client, ctx, execReq) if len(response.Stderr) != 0 { t.Fatalf("expected to see no error, instead saw %s", string(response.Stderr)) @@ -39,11 +46,30 @@ func verifyGPUIsPresent(t *testing.T, client runtime.RuntimeServiceClient, ctx c } } +func isGPUPresentWCOW(t *testing.T, client runtime.RuntimeServiceClient, ctx context.Context, containerID string) bool { + execReq := makeGPUExecCommand("windows", containerID) + response := execSync(t, client, ctx, execReq) + if len(response.Stderr) != 0 { + t.Fatalf("expected to see no error, instead saw %s", string(response.Stderr)) + } + out := string(response.Stdout) + devices := strings.Split(out, ",") + if len(devices) == 0 { + t.Fatal("expected to see devices on container, none found") + } + for _, d := range devices { + if strings.HasPrefix(d, gpuWin32InstanceIDPrefix) { + return true + } + } + return false +} + // verifyGPUIsNotPresent is a helper function that runs a command in the container // to verify that there are no GPUs present in the container and fails the running test // if any are found -func verifyGPUIsNotPresent(t *testing.T, client runtime.RuntimeServiceClient, ctx context.Context, containerID string) { - execReq := makeGPUExecCommand(containerID) +func verifyGPUIsNotPresentLCOW(t *testing.T, client runtime.RuntimeServiceClient, ctx context.Context, containerID string) { + execReq := makeGPUExecCommand("linux", containerID) response := execSync(t, client, ctx, execReq) if len(response.Stderr) == 0 { t.Fatal("expected to see an error as file /dev/nvidia0 should not exist, instead saw none") @@ -76,18 +102,6 @@ func findTestNvidiaGPULocationPath() (string, error) { return strings.TrimSpace(string(out)), nil } -// findTestNvidiaGPUClassGUID returns the class guid of the first pci nvidia device on the host -func findTestNvidiaGPUInterfaceClassGUID() (string, error) { - out, err := exec.Command( - "powershell", - `((Get-PnpDevice -presentOnly | where-object {$_.InstanceID -Match 'PCI\\VEN_10DE.*'})[0] | Get-PnpDeviceProperty DEVPKEY_DeviceInterface_ClassGuid).Data[0]`, - ).Output() - if err != nil { - return "", nil - } - return strings.TrimSpace(string(out)), nil -} - // findTestVirtualDeviceID returns the instance ID of the first generic pcip device on the host func findTestVirtualDeviceID() (string, error) { out, err := exec.Command( @@ -168,6 +182,15 @@ func getGPUContainerRequestWCOW(t *testing.T, podID string, podConfig *runtime.P Devices: []*runtime.Device{ device, }, + Mounts: []*runtime.Mount{ + { + HostPath: testDeviceUtilFilePath, + ContainerPath: containerDeviceUtilPath, + }, + }, + Annotations: map[string]string{ + "io.microsoft.assigneddevice.kerneldrivers": testDriversPath, + }, }, PodSandboxId: podID, SandboxConfig: podConfig, @@ -183,10 +206,10 @@ func Test_RunContainer_VirtualDevice_GPU_LCOW(t *testing.T) { testDeviceInstanceID, err := findTestNvidiaGPUDevice() if err != nil { - t.Skipf("skipping test, failed to find assignable nvidia gpu on host with: %v", err) + t.Fatalf("skipping test, failed to find assignable nvidia gpu on host with: %v", err) } if testDeviceInstanceID == "" { - t.Skipf("skipping test, host has no assignable nvidia gpu devices") + t.Fatalf("skipping test, host has no assignable nvidia gpu devices") } pullRequiredLcowImages(t, []string{imageLcowK8sPause, imageLcowAlpine}) @@ -211,7 +234,7 @@ func Test_RunContainer_VirtualDevice_GPU_LCOW(t *testing.T) { startContainer(t, client, ctx, containerID) defer stopContainer(t, client, ctx, containerID) - verifyGPUIsPresent(t, client, ctx, containerID) + verifyGPUIsPresentLCOW(t, client, ctx, containerID) } func Test_RunContainer_VirtualDevice_GPU_Multiple_LCOW(t *testing.T) { @@ -224,10 +247,10 @@ func Test_RunContainer_VirtualDevice_GPU_Multiple_LCOW(t *testing.T) { numContainers := 2 testDeviceInstanceID, err := findTestNvidiaGPUDevice() if err != nil { - t.Skipf("skipping test, failed to find assignable nvidia gpu on host with: %v", err) + t.Fatalf("skipping test, failed to find assignable nvidia gpu on host with: %v", err) } if testDeviceInstanceID == "" { - t.Skipf("skipping test, host has no assignable nvidia gpu devices") + t.Fatalf("skipping test, host has no assignable nvidia gpu devices") } pullRequiredLcowImages(t, []string{imageLcowK8sPause, imageLcowAlpine}) @@ -259,7 +282,7 @@ func Test_RunContainer_VirtualDevice_GPU_Multiple_LCOW(t *testing.T) { startContainer(t, client, ctx, containerID) defer stopContainer(t, client, ctx, containerID) - verifyGPUIsPresent(t, client, ctx, containerID) + verifyGPUIsPresentLCOW(t, client, ctx, containerID) } } @@ -320,7 +343,7 @@ func Test_RunContainer_VirtualDevice_GPU_and_NoGPU_LCOW(t *testing.T) { defer stopContainer(t, client, ctx, gpuContainerID) // verify that we can access the GPU in the GPU-Container - verifyGPUIsPresent(t, client, ctx, gpuContainerID) + verifyGPUIsPresentLCOW(t, client, ctx, gpuContainerID) // create container without a GPU noGPUContainerID := createContainer(t, client, ctx, containerNoGPURequest) @@ -329,8 +352,7 @@ func Test_RunContainer_VirtualDevice_GPU_and_NoGPU_LCOW(t *testing.T) { defer stopContainer(t, client, ctx, noGPUContainerID) // verify that we can't access the GPU in the No-GPU-Container - verifyGPUIsNotPresent(t, client, ctx, noGPUContainerID) - + verifyGPUIsNotPresentLCOW(t, client, ctx, noGPUContainerID) } func Test_RunContainer_VirtualDevice_GPU_Multiple_Removal_LCOW(t *testing.T) { @@ -380,7 +402,7 @@ func Test_RunContainer_VirtualDevice_GPU_Multiple_Removal_LCOW(t *testing.T) { // verify after removing second container that we can still see // the GPU on the first container - verifyGPUIsPresent(t, client, ctx, containerOneID) + verifyGPUIsPresentLCOW(t, client, ctx, containerOneID) } func Test_RunContainer_VirtualDevice_LocationPath_WCOW_Process(t *testing.T) { @@ -388,13 +410,13 @@ func Test_RunContainer_VirtualDevice_LocationPath_WCOW_Process(t *testing.T) { testDeviceLocationPath, err := findTestNvidiaGPULocationPath() if err != nil { - t.Skipf("skipping test, failed to retrieve assignable device on host with: %v", err) + t.Fatalf("skipping test, failed to retrieve assignable device on host with: %v", err) } if testDeviceLocationPath == "" { - t.Skipf("skipping test, host has no assignable devices") + t.Fatalf("skipping test, host has no assignable devices") } - pullRequiredLcowImages(t, []string{imageLcowK8sPause, imageLcowAlpine}) + pullRequiredImages(t, []string{imageWindowsNanoserver}) client := newTestRuntimeClient(t) podctx := context.Background() @@ -422,6 +444,10 @@ func Test_RunContainer_VirtualDevice_LocationPath_WCOW_Process(t *testing.T) { containerID := createContainer(t, client, ctx, containerRequest) runContainerLifetime(t, client, ctx, containerID) + + if !isGPUPresentWCOW(t, client, ctx, containerID) { + t.Fatalf("expected to see a GPU device on container %s, none present", containerID) + } } func Test_RunContainer_VirtualDevice_ClassGUID_WCOW_Process(t *testing.T) { @@ -430,16 +456,16 @@ func Test_RunContainer_VirtualDevice_ClassGUID_WCOW_Process(t *testing.T) { // instance ID is only used here to ensure there are devices present on the host instanceID, err := findTestNvidiaGPUDevice() if err != nil { - t.Skipf("skipping test, failed to retrieve assignable device on host with: %v", err) + t.Fatalf("skipping test, failed to retrieve assignable device on host with: %v", err) } if instanceID == "" { - t.Skipf("skipping test, host has no assignable devices") + t.Fatalf("skipping test, host has no assignable devices") } // use fixed GPU class guid testDeviceClassGUID := "5B45201D-F2F2-4F3B-85BB-30FF1F953599" - pullRequiredLcowImages(t, []string{imageLcowK8sPause, imageLcowAlpine}) + pullRequiredImages(t, []string{imageWindowsNanoserver}) client := newTestRuntimeClient(t) podctx := context.Background() @@ -467,4 +493,253 @@ func Test_RunContainer_VirtualDevice_ClassGUID_WCOW_Process(t *testing.T) { containerID := createContainer(t, client, ctx, containerRequest) runContainerLifetime(t, client, ctx, containerID) + + if !isGPUPresentWCOW(t, client, ctx, containerID) { + t.Fatalf("expected to see a GPU device on container %s, none present", containerID) + } +} + +func Test_RunContainer_VirtualDevice_GPU_WCOW_Hypervisor(t *testing.T) { + requireFeatures(t, featureWCOWHypervisor, featureGPU) + + if osversion.Get().Build < 19566 { + t.Skip("Requires build +19566") + } + + testDeviceInstanceID, err := findTestNvidiaGPUDevice() + if err != nil { + t.Fatalf("skipping test, failed to retrieve assignable device on host with: %v", err) + } + if testDeviceInstanceID == "" { + t.Fatalf("skipping test, host has no assignable devices") + } + + pullRequiredImages(t, []string{imageWindowsNanoserver}) + client := newTestRuntimeClient(t) + + podctx := context.Background() + sandboxRequest := &runtime.RunPodSandboxRequest{ + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: t.Name(), + Namespace: testNamespace, + }, + Annotations: map[string]string{ + "io.microsoft.virtualmachine.fullyphysicallybacked": "true", + }, + }, + RuntimeHandler: wcowHypervisorRuntimeHandler, + } + + podID := runPodSandbox(t, client, podctx, sandboxRequest) + defer removePodSandbox(t, client, podctx, podID) + defer stopPodSandbox(t, client, podctx, podID) + + device := &runtime.Device{ + HostPath: "vpci://" + testDeviceInstanceID, + } + containerRequest := getGPUContainerRequestWCOW(t, podID, sandboxRequest.Config, device) + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + containerID := createContainer(t, client, ctx, containerRequest) + defer removeContainer(t, client, ctx, containerID) + startContainer(t, client, ctx, containerID) + defer stopContainer(t, client, ctx, containerID) + + if !isGPUPresentWCOW(t, client, ctx, containerID) { + t.Fatalf("expected to see a GPU device on container %s, none present", containerID) + } +} + +func Test_RunContainer_VirtualDevice_GPU_and_NoGPU_WCOW_Hypervisor(t *testing.T) { + requireFeatures(t, featureWCOWHypervisor, featureGPU) + + if osversion.Get().Build < 19566 { + t.Skip("Requires build +19566") + } + + testDeviceInstanceID, err := findTestNvidiaGPUDevice() + if err != nil { + t.Fatalf("skipping test, failed to retrieve assignable device on host with: %v", err) + } + if testDeviceInstanceID == "" { + t.Fatalf("skipping test, host has no assignable devices") + } + + pullRequiredImages(t, []string{imageWindowsNanoserver}) + client := newTestRuntimeClient(t) + + podctx := context.Background() + sandboxRequest := &runtime.RunPodSandboxRequest{ + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: t.Name(), + Namespace: testNamespace, + }, + Annotations: map[string]string{ + "io.microsoft.virtualmachine.fullyphysicallybacked": "true", + }, + }, + RuntimeHandler: wcowHypervisorRuntimeHandler, + } + + podID := runPodSandbox(t, client, podctx, sandboxRequest) + defer removePodSandbox(t, client, podctx, podID) + defer stopPodSandbox(t, client, podctx, podID) + + device := &runtime.Device{ + HostPath: "vpci://" + testDeviceInstanceID, + } + + containerRequest := getGPUContainerRequestWCOW(t, podID, sandboxRequest.Config, device) + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + // create container with a GPU present + gpuContainerID := createContainer(t, client, ctx, containerRequest) + defer removeContainer(t, client, ctx, gpuContainerID) + startContainer(t, client, ctx, gpuContainerID) + defer stopContainer(t, client, ctx, gpuContainerID) + + if !isGPUPresentWCOW(t, client, ctx, gpuContainerID) { + t.Fatalf("expected to see a GPU device on container %s, none present", gpuContainerID) + } + + // create container without a GPU + noGPUName := t.Name() + "-No-GPU-Container" + containerRequest.Config.Metadata.Name = noGPUName + containerRequest.Config.Devices = []*runtime.Device{} + noGPUContainerID := createContainer(t, client, ctx, containerRequest) + defer removeContainer(t, client, ctx, noGPUContainerID) + startContainer(t, client, ctx, noGPUContainerID) + defer stopContainer(t, client, ctx, noGPUContainerID) + + // verify that we can't access the GPU in the No-GPU-Container + if isGPUPresentWCOW(t, client, ctx, noGPUContainerID) { + t.Fatalf("expected to see NO GPU device in container %s", noGPUContainerID) + } +} + +func Test_RunContainer_VirtualDevice_GPU_Multiple_WCOW_Hypervisor(t *testing.T) { + requireFeatures(t, featureWCOWHypervisor, featureGPU) + + if osversion.Get().Build < 19566 { + t.Skip("Requires build +19566") + } + + numContainers := 2 + testDeviceInstanceID, err := findTestNvidiaGPUDevice() + if err != nil { + t.Fatalf("skipping test, failed to retrieve assignable device on host with: %v", err) + } + if testDeviceInstanceID == "" { + t.Fatalf("skipping test, host has no assignable devices") + } + + pullRequiredImages(t, []string{imageWindowsNanoserver}) + client := newTestRuntimeClient(t) + + podctx := context.Background() + sandboxRequest := &runtime.RunPodSandboxRequest{ + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: t.Name(), + Namespace: testNamespace, + }, + Annotations: map[string]string{ + "io.microsoft.virtualmachine.fullyphysicallybacked": "true", + }, + }, + RuntimeHandler: wcowHypervisorRuntimeHandler, + } + + podID := runPodSandbox(t, client, podctx, sandboxRequest) + defer removePodSandbox(t, client, podctx, podID) + defer stopPodSandbox(t, client, podctx, podID) + + device := &runtime.Device{ + HostPath: "vpci://" + testDeviceInstanceID, + } + + containerRequest := getGPUContainerRequestWCOW(t, podID, sandboxRequest.Config, device) + + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + for i := 0; i < numContainers; i++ { + name := t.Name() + "-GPU-Container" + fmt.Sprintf("%d", i) + containerRequest.Config.Metadata.Name = name + + containerID := createContainer(t, client, ctx, containerRequest) + defer removeContainer(t, client, ctx, containerID) + startContainer(t, client, ctx, containerID) + defer stopContainer(t, client, ctx, containerID) + + if !isGPUPresentWCOW(t, client, ctx, containerID) { + t.Fatalf("expected to see a GPU device on container %s, none present", containerID) + } + } +} + +func Test_RunContainer_VirtualDevice_GPU_Multiple_Removal_WCOW_Hypervisor(t *testing.T) { + requireFeatures(t, featureWCOWHypervisor, featureGPU) + + if osversion.Get().Build < 19566 { + t.Skip("Requires build +19566") + } + + testDeviceInstanceID, err := findTestNvidiaGPUDevice() + if err != nil { + t.Fatalf("skipping test, failed to retrieve assignable device on host with: %v", err) + } + if testDeviceInstanceID == "" { + t.Fatalf("skipping test, host has no assignable devices") + } + + pullRequiredImages(t, []string{imageWindowsNanoserver}) + client := newTestRuntimeClient(t) + + podctx := context.Background() + sandboxRequest := &runtime.RunPodSandboxRequest{ + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: t.Name(), + Namespace: testNamespace, + }, + Annotations: map[string]string{ + "io.microsoft.virtualmachine.fullyphysicallybacked": "true", + }, + }, + RuntimeHandler: wcowHypervisorRuntimeHandler, + } + + podID := runPodSandbox(t, client, podctx, sandboxRequest) + defer removePodSandbox(t, client, podctx, podID) + defer stopPodSandbox(t, client, podctx, podID) + + device := &runtime.Device{ + HostPath: "vpci://" + testDeviceInstanceID, + } + + containerRequest := getGPUContainerRequestWCOW(t, podID, sandboxRequest.Config, device) + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + // create container with a GPU present + gpuContainerIDOne := createContainer(t, client, ctx, containerRequest) + defer removeContainer(t, client, ctx, gpuContainerIDOne) + startContainer(t, client, ctx, gpuContainerIDOne) + defer stopContainer(t, client, ctx, gpuContainerIDOne) + + // run full lifetime of second container with GPU + containerRequest.Config.Metadata.Name = t.Name() + "-GPU-Container-2" + gpuContainerIDTwo := createContainer(t, client, ctx, containerRequest) + runContainerLifetime(t, client, ctx, gpuContainerIDTwo) + + // verify after removing second container that we can still see + // the GPU on the first container + if !isGPUPresentWCOW(t, client, ctx, gpuContainerIDOne) { + t.Fatalf("expected to see a GPU device on container %s, none present", gpuContainerIDOne) + } } diff --git a/test/cri-containerd/main.go b/test/cri-containerd/main.go index 436be2b859..08130aca83 100644 --- a/test/cri-containerd/main.go +++ b/test/cri-containerd/main.go @@ -26,21 +26,26 @@ import ( ) const ( - daemonAddress = "tcp://127.0.0.1:2376" - connectTimeout = time.Second * 10 - testNamespace = "cri-containerd-test" + daemonAddress = "tcp://127.0.0.1:2376" + connectTimeout = time.Second * 10 + testNamespace = "cri-containerd-test" + wcowProcessRuntimeHandler = "runhcs-wcow-process" wcowHypervisorRuntimeHandler = "runhcs-wcow-hypervisor" wcowHypervisor17763RuntimeHandler = "runhcs-wcow-hypervisor-17763" wcowHypervisor18362RuntimeHandler = "runhcs-wcow-hypervisor-18362" wcowHypervisor19041RuntimeHandler = "runhcs-wcow-hypervisor-19041" - lcowRuntimeHandler = "runhcs-lcow" - imageLcowK8sPause = "k8s.gcr.io/pause:3.1" - imageLcowAlpine = "docker.io/library/alpine:latest" - imageLcowCosmos = "cosmosarno/spark-master:2.4.1_2019-04-18_8e864ce" - testGPUBootFiles = "C:\\ContainerPlat\\LinuxBootFiles\\nvidiagpu" - alpineAspNet = "mcr.microsoft.com/dotnet/core/aspnet:3.1-alpine3.11" - alpineAspnetUpgrade = "mcr.microsoft.com/dotnet/core/aspnet:3.1.2-alpine3.11" + + testDeviceUtilFilePath = "C:\\ContainerPlat\\device-util.exe" + testDriversPath = "C:\\ContainerPlat\\testdrivers" + testGPUBootFiles = "C:\\ContainerPlat\\LinuxBootFiles\\nvidiagpu" + + lcowRuntimeHandler = "runhcs-lcow" + imageLcowK8sPause = "k8s.gcr.io/pause:3.1" + imageLcowAlpine = "docker.io/library/alpine:latest" + imageLcowCosmos = "cosmosarno/spark-master:2.4.1_2019-04-18_8e864ce" + alpineAspNet = "mcr.microsoft.com/dotnet/core/aspnet:3.1-alpine3.11" + alpineAspnetUpgrade = "mcr.microsoft.com/dotnet/core/aspnet:3.1.2-alpine3.11" // Default account name for use with GMSA related tests. This will not be // present/you will not have access to the account on your machine unless // your environment is configured properly. From 23d02c871bb904a25ca6b5001a3d7682dd08d92b Mon Sep 17 00:00:00 2001 From: "Krishnakumar R(KK)" <65895020+kk-src@users.noreply.github.com> Date: Fri, 4 Sep 2020 14:05:58 -0700 Subject: [PATCH 10/20] Add prepare-disk command Prepare-disk command formats a given disk with ext4. This is required for a disk(as passthrough) to be made available within a container. Here is an excerpt from a configuration for such a scenario. "mounts": [ { "host_path": "\\.\PHYSICALDRIVE2", "container_path": "/disk" } ] The gcs/runc combo expects the pass-through disk to be formatted and tries to mount it on a local path within the uvm. Prepare-disk ensures that this restriction is met. Note: Full disk is formatted with ext4 without any partitioning. TODO: add options to allow partitioning and then formatting. Signed-off-by: Krishnakumar R(KK) <65895020+kk-src@users.noreply.github.com> --- cmd/runhcs/main.go | 1 + cmd/runhcs/prepare-disk.go | 63 +++++++++++++++++++++++++++++++++++++ internal/lcow/common.go | 64 ++++++++++++++++++++++++++++++++++++++ internal/lcow/disk.go | 52 +++++++++++++++++++++++++++++++ 4 files changed, 180 insertions(+) create mode 100644 cmd/runhcs/prepare-disk.go create mode 100644 internal/lcow/common.go create mode 100644 internal/lcow/disk.go diff --git a/cmd/runhcs/main.go b/cmd/runhcs/main.go index e13dbc097b..4ee443b837 100644 --- a/cmd/runhcs/main.go +++ b/cmd/runhcs/main.go @@ -108,6 +108,7 @@ func main() { killCommand, listCommand, pauseCommand, + prepareDiskCommand, psCommand, resizeTtyCommand, resumeCommand, diff --git a/cmd/runhcs/prepare-disk.go b/cmd/runhcs/prepare-disk.go new file mode 100644 index 0000000000..78dda8dbb9 --- /dev/null +++ b/cmd/runhcs/prepare-disk.go @@ -0,0 +1,63 @@ +package main + +import ( + gcontext "context" + + "github.com/Microsoft/hcsshim/internal/appargs" + "github.com/Microsoft/hcsshim/internal/lcow" + "github.com/Microsoft/hcsshim/internal/oc" + "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/osversion" + "github.com/pkg/errors" + "github.com/urfave/cli" + "go.opencensus.io/trace" +) + +const ( + // prepareDiskStr string used to name the command and identity in the logs + prepareDiskStr = "prepare-disk" +) + +var prepareDiskCommand = cli.Command{ + Name: prepareDiskStr, + Usage: "format a disk with ext4", + Description: "Format a disk with ext4. To be used prior to exposing a pass-through disk. Prerequisite is that disk should be offline ('Get-Disk -Number | Set-Disk -IsOffline $true').", + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "destpath", + Usage: "Required: describes the destination disk path", + }, + }, + Before: appargs.Validate(), + Action: func(context *cli.Context) (err error) { + ctx, span := trace.StartSpan(gcontext.Background(), prepareDiskStr) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + dest := context.String("destpath") + if dest == "" { + return errors.New("'destpath' is required") + } + + if osversion.Get().Build < osversion.RS5 { + return errors.New("LCOW is not supported pre-RS5") + } + + opts := uvm.NewDefaultOptionsLCOW("preparedisk-uvm", context.GlobalString("owner")) + + preparediskUVM, err := uvm.CreateLCOW(ctx, opts) + if err != nil { + return errors.Wrapf(err, "failed to create '%s'", opts.ID) + } + defer preparediskUVM.Close() + if err := preparediskUVM.Start(ctx); err != nil { + return errors.Wrapf(err, "failed to start '%s'", opts.ID) + } + + if err := lcow.FormatDisk(ctx, preparediskUVM, dest); err != nil { + return errors.Wrapf(err, "failed to format disk '%s' with ext4", opts.ID) + } + + return nil + }, +} diff --git a/internal/lcow/common.go b/internal/lcow/common.go new file mode 100644 index 0000000000..32938641e8 --- /dev/null +++ b/internal/lcow/common.go @@ -0,0 +1,64 @@ +package lcow + +import ( + "bytes" + "context" + "fmt" + "time" + + cmdpkg "github.com/Microsoft/hcsshim/internal/cmd" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/timeout" + "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/sirupsen/logrus" +) + +// formatDiskUvm creates a utility vm, mounts the disk as a scsi disk onto to the VM +// and then formats it with ext4. +func formatDiskUvm(ctx context.Context, lcowUVM *uvm.UtilityVM, controller int, lun int32, destPath string) error { + // Validate /sys/bus/scsi/devices/C:0:0:L exists as a directory + devicePath := fmt.Sprintf("/sys/bus/scsi/devices/%d:0:0:%d/block", controller, lun) + testdCtx, cancel := context.WithTimeout(ctx, timeout.TestDRetryLoop) + defer cancel() + for { + cmd := cmdpkg.CommandContext(testdCtx, lcowUVM, "test", "-d", devicePath) + err := cmd.Run() + if err == nil { + break + } + if _, ok := err.(*cmdpkg.ExitError); !ok { + return fmt.Errorf("failed to run %+v following hot-add %s to utility VM: %s", cmd.Spec.Args, destPath, err) + } + time.Sleep(time.Millisecond * 10) + } + cancel() + + // Get the device from under the block subdirectory by doing a simple ls. This will come back as (eg) `sda` + lsCtx, cancel := context.WithTimeout(ctx, timeout.ExternalCommandToStart) + cmd := cmdpkg.CommandContext(lsCtx, lcowUVM, "ls", devicePath) + lsOutput, err := cmd.Output() + cancel() + if err != nil { + return fmt.Errorf("failed to `%+v` following hot-add %s to utility VM: %s", cmd.Spec.Args, destPath, err) + } + device := fmt.Sprintf(`/dev/%s`, bytes.TrimSpace(lsOutput)) + log.G(ctx).WithFields(logrus.Fields{ + "dest": destPath, + "device": device, + }).Debug("lcow::FormatDisk device guest location") + + // Format it ext4 + mkfsCtx, cancel := context.WithTimeout(ctx, timeout.ExternalCommandToStart) + cmd = cmdpkg.CommandContext(mkfsCtx, lcowUVM, "mkfs.ext4", "-q", "-E", "lazy_itable_init=0,nodiscard", "-O", `^has_journal,sparse_super2,^resize_inode`, device) + var mkfsStderr bytes.Buffer + cmd.Stderr = &mkfsStderr + err = cmd.Run() + cancel() + if err != nil { + return fmt.Errorf("failed to `%+v` following hot-add %s to utility VM: %s. detailed error: %s", cmd.Spec.Args, destPath, err, mkfsStderr.String()) + } + + log.G(ctx).WithField("dest", destPath).Debug("lcow::FormatDisk complete") + + return nil +} diff --git a/internal/lcow/disk.go b/internal/lcow/disk.go new file mode 100644 index 0000000000..ac68dcfcf0 --- /dev/null +++ b/internal/lcow/disk.go @@ -0,0 +1,52 @@ +package lcow + +import ( + "context" + "errors" + "fmt" + + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/sirupsen/logrus" +) + +// FormatDisk creates a utility vm, mounts the disk as a scsi disk onto to the VM +// and then formats it with ext4. Disk is expected to be made offline before this +// command is run. The following powershell commands: +// 'Get-Disk -Number | Set-Disk -IsOffline $true' +// can be used to offline the disk. +func FormatDisk(ctx context.Context, lcowUVM *uvm.UtilityVM, destPath string) error { + if lcowUVM == nil { + return fmt.Errorf("no uvm") + } + + if lcowUVM.OS() != "linux" { + return errors.New("lcow::FormatDisk requires a linux utility VM to operate") + } + + log.G(ctx).WithFields(logrus.Fields{ + "dest": destPath, + }).Debug("lcow::FormatDisk opts") + + scsi, err := lcowUVM.AddSCSIPhysicalDisk(ctx, destPath, "", false) // No destination as not formatted + if err != nil { + return err + } + + defer func() { + scsi.Release(ctx) + }() + + log.G(ctx).WithFields(logrus.Fields{ + "dest": destPath, + "controller": scsi.Controller, + "lun": scsi.LUN, + }).Debug("lcow::FormatDisk device attached") + + if err := formatDiskUvm(ctx, lcowUVM, scsi.Controller, scsi.LUN, destPath); err != nil { + return err + } + log.G(ctx).WithField("dest", destPath).Debug("lcow::FormatDisk complete") + + return nil +} From 0bbf25b0a739cfff840719a19924f7f7147b6959 Mon Sep 17 00:00:00 2001 From: elweb9858 Date: Fri, 4 Sep 2020 12:16:48 -0700 Subject: [PATCH 11/20] Adding version check for L4WfpProxy endpoint policy Signed-off-by: elweb9858 --- hcn/hcn.go | 9 +++++++++ hcn/hcnglobals.go | 3 +++ hcn/hcnsupport.go | 4 +++- hcn/hcnsupport_test.go | 11 +++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/hcn/hcn.go b/hcn/hcn.go index 45b07559d1..54c258ed74 100644 --- a/hcn/hcn.go +++ b/hcn/hcn.go @@ -216,6 +216,15 @@ func L4proxyPolicySupported() error { return platformDoesNotSupportError("L4ProxyPolicy") } +// L4WfpProxySupported returns an error if the HCN verison does not support L4WfpProxy +func L4WfpProxyPolicySupported() error { + supported := GetSupportedFeatures() + if supported.L4WfpProxy { + return nil + } + return platformDoesNotSupportError("L4WfpProxyPolicy") +} + // SetPolicySupported returns an error if the HCN version does not support SetPolicy. func SetPolicySupported() error { supported := GetSupportedFeatures() diff --git a/hcn/hcnglobals.go b/hcn/hcnglobals.go index d0b9c53b71..aaf94dcaeb 100644 --- a/hcn/hcnglobals.go +++ b/hcn/hcnglobals.go @@ -65,6 +65,9 @@ var ( //HNS 13.1 allows for L4Proxy Policy support L4ProxyPolicyVersion = VersionRanges{VersionRange{MinVersion: Version{Major: 13, Minor: 1}, MaxVersion: Version{Major: math.MaxInt32, Minor: math.MaxInt32}}} + + //HNS 13.2 allows for L4WfpProxy Policy support + L4WfpProxyPolicyVersion = VersionRanges{VersionRange{MinVersion: Version{Major: 13, Minor: 2}, MaxVersion: Version{Major: math.MaxInt32, Minor: math.MaxInt32}}} ) // GetGlobals returns the global properties of the HCN Service. diff --git a/hcn/hcnsupport.go b/hcn/hcnsupport.go index 8a86e073d1..1096aebde5 100644 --- a/hcn/hcnsupport.go +++ b/hcn/hcnsupport.go @@ -17,7 +17,8 @@ type SupportedFeatures struct { IPv6DualStack bool `json:"IPv6DualStack"` SetPolicy bool `json:"SetPolicy"` VxlanPort bool `json:"VxlanPort"` - L4Proxy bool `json:"L4Proxy"` + L4Proxy bool `json:"L4Proxy"` // network policy that applies VFP rules to all endpoints on the network to redirect traffic + L4WfpProxy bool `json:"L4WfpProxy"` // endpoint policy that applies WFP filters to redirect traffic to/from that endpoint } // AclFeatures are the supported ACL possibilities. @@ -67,6 +68,7 @@ func GetSupportedFeatures() SupportedFeatures { features.SetPolicy = isFeatureSupported(globals.Version, SetPolicyVersion) features.VxlanPort = isFeatureSupported(globals.Version, VxlanPortVersion) features.L4Proxy = isFeatureSupported(globals.Version, L4ProxyPolicyVersion) + features.L4WfpProxy = isFeatureSupported(globals.Version, L4WfpProxyPolicyVersion) return features } diff --git a/hcn/hcnsupport_test.go b/hcn/hcnsupport_test.go index 9caf57aa86..1d6f90dea2 100644 --- a/hcn/hcnsupport_test.go +++ b/hcn/hcnsupport_test.go @@ -138,6 +138,17 @@ func TestL4ProxyPolicySupport(t *testing.T) { } } +func TestL4WfpProxyPolicySupport(t *testing.T) { + supportedFeatures := GetSupportedFeatures() + err := L4WfpProxyPolicySupported() + if supportedFeatures.L4WfpProxy && err != nil { + t.Fatal(err) + } + if !supportedFeatures.L4WfpProxy && err == nil { + t.Fatal(err) + } +} + func TestIsFeatureSupported(t *testing.T) { // HNSVersion1803 testing (single range tests) if isFeatureSupported(Version{Major: 0, Minor: 0}, HNSVersion1803) { From 0baeb83a97ba502ae6b729a22eee354104f21a67 Mon Sep 17 00:00:00 2001 From: "Paul \"TBBle\" Hampson" Date: Wed, 9 Sep 2020 22:30:16 +1000 Subject: [PATCH 12/20] Revendor Microsoft/go-winio to v0.4.15-0.20200908182639-5b44b70ab3ab This pulls in the migration of go-winio/backuptar from the bundled fork of archive/tar from Go 1.6 to using Go's current archive/tar. Currently only affects the wclayer utility, but resolves a problem creating OCI layers containing files larger than 8gB. Signed-off-by: Paul "TBBle" Hampson --- go.mod | 2 +- go.sum | 4 +- internal/ociwclayer/export.go | 2 +- internal/ociwclayer/import.go | 2 +- .../Microsoft/go-winio/archive/tar/LICENSE | 27 - .../Microsoft/go-winio/archive/tar/common.go | 344 ------ .../Microsoft/go-winio/archive/tar/reader.go | 1002 ----------------- .../go-winio/archive/tar/stat_atim.go | 20 - .../go-winio/archive/tar/stat_atimespec.go | 20 - .../go-winio/archive/tar/stat_unix.go | 32 - .../Microsoft/go-winio/archive/tar/writer.go | 444 -------- .../Microsoft/go-winio/backuptar/strconv.go | 68 ++ .../Microsoft/go-winio/backuptar/tar.go | 60 +- vendor/github.com/Microsoft/go-winio/pipe.go | 17 +- vendor/modules.txt | 3 +- 15 files changed, 122 insertions(+), 1925 deletions(-) delete mode 100644 vendor/github.com/Microsoft/go-winio/archive/tar/LICENSE delete mode 100644 vendor/github.com/Microsoft/go-winio/archive/tar/common.go delete mode 100644 vendor/github.com/Microsoft/go-winio/archive/tar/reader.go delete mode 100644 vendor/github.com/Microsoft/go-winio/archive/tar/stat_atim.go delete mode 100644 vendor/github.com/Microsoft/go-winio/archive/tar/stat_atimespec.go delete mode 100644 vendor/github.com/Microsoft/go-winio/archive/tar/stat_unix.go delete mode 100644 vendor/github.com/Microsoft/go-winio/archive/tar/writer.go create mode 100644 vendor/github.com/Microsoft/go-winio/backuptar/strconv.go diff --git a/go.mod b/go.mod index 02d2127059..492607a226 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/Microsoft/hcsshim go 1.13 require ( - github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5 + github.com/Microsoft/go-winio v0.4.15-0.20200908182639-5b44b70ab3ab github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59 github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1 github.com/containerd/containerd v1.3.2 diff --git a/go.sum b/go.sum index e949461b32..92f129cc31 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5 h1:ygIc8M6trr62pF5DucadTWGdEB4mEyvzi0e2nbcmcyA= -github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw= +github.com/Microsoft/go-winio v0.4.15-0.20200908182639-5b44b70ab3ab h1:9pygWVFqbY9lPxM0peffumuVDyMuIMzNLyO9uFjJuQo= +github.com/Microsoft/go-winio v0.4.15-0.20200908182639-5b44b70ab3ab/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw= github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3/go.mod h1:MA5e5Lr8slmEg9bt0VpxxWqJlO4iwu3FBdHUzV7wQVg= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59 h1:qWj4qVYZ95vLWwqyNJCQg7rDsG5wPdze0UaPolH7DUk= diff --git a/internal/ociwclayer/export.go b/internal/ociwclayer/export.go index d8df1ef5ba..a582f0018f 100644 --- a/internal/ociwclayer/export.go +++ b/internal/ociwclayer/export.go @@ -3,10 +3,10 @@ package ociwclayer import ( + "archive/tar" "io" "path/filepath" - "github.com/Microsoft/go-winio/archive/tar" "github.com/Microsoft/go-winio/backuptar" "github.com/Microsoft/hcsshim" ) diff --git a/internal/ociwclayer/import.go b/internal/ociwclayer/import.go index de7a720263..9859b517a7 100644 --- a/internal/ociwclayer/import.go +++ b/internal/ociwclayer/import.go @@ -1,6 +1,7 @@ package ociwclayer import ( + "archive/tar" "bufio" "io" "os" @@ -9,7 +10,6 @@ import ( "strings" winio "github.com/Microsoft/go-winio" - "github.com/Microsoft/go-winio/archive/tar" "github.com/Microsoft/go-winio/backuptar" "github.com/Microsoft/hcsshim" ) diff --git a/vendor/github.com/Microsoft/go-winio/archive/tar/LICENSE b/vendor/github.com/Microsoft/go-winio/archive/tar/LICENSE deleted file mode 100644 index 7448756763..0000000000 --- a/vendor/github.com/Microsoft/go-winio/archive/tar/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2012 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/Microsoft/go-winio/archive/tar/common.go b/vendor/github.com/Microsoft/go-winio/archive/tar/common.go deleted file mode 100644 index 0378401c0d..0000000000 --- a/vendor/github.com/Microsoft/go-winio/archive/tar/common.go +++ /dev/null @@ -1,344 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package tar implements access to tar archives. -// It aims to cover most of the variations, including those produced -// by GNU and BSD tars. -// -// References: -// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 -// http://www.gnu.org/software/tar/manual/html_node/Standard.html -// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html -package tar - -import ( - "bytes" - "errors" - "fmt" - "os" - "path" - "time" -) - -const ( - blockSize = 512 - - // Types - TypeReg = '0' // regular file - TypeRegA = '\x00' // regular file - TypeLink = '1' // hard link - TypeSymlink = '2' // symbolic link - TypeChar = '3' // character device node - TypeBlock = '4' // block device node - TypeDir = '5' // directory - TypeFifo = '6' // fifo node - TypeCont = '7' // reserved - TypeXHeader = 'x' // extended header - TypeXGlobalHeader = 'g' // global extended header - TypeGNULongName = 'L' // Next file has a long name - TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name - TypeGNUSparse = 'S' // sparse file -) - -// A Header represents a single header in a tar archive. -// Some fields may not be populated. -type Header struct { - Name string // name of header file entry - Mode int64 // permission and mode bits - Uid int // user id of owner - Gid int // group id of owner - Size int64 // length in bytes - ModTime time.Time // modified time - Typeflag byte // type of header entry - Linkname string // target name of link - Uname string // user name of owner - Gname string // group name of owner - Devmajor int64 // major number of character or block device - Devminor int64 // minor number of character or block device - AccessTime time.Time // access time - ChangeTime time.Time // status change time - CreationTime time.Time // creation time - Xattrs map[string]string - Winheaders map[string]string -} - -// File name constants from the tar spec. -const ( - fileNameSize = 100 // Maximum number of bytes in a standard tar name. - fileNamePrefixSize = 155 // Maximum number of ustar extension bytes. -) - -// FileInfo returns an os.FileInfo for the Header. -func (h *Header) FileInfo() os.FileInfo { - return headerFileInfo{h} -} - -// headerFileInfo implements os.FileInfo. -type headerFileInfo struct { - h *Header -} - -func (fi headerFileInfo) Size() int64 { return fi.h.Size } -func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } -func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime } -func (fi headerFileInfo) Sys() interface{} { return fi.h } - -// Name returns the base name of the file. -func (fi headerFileInfo) Name() string { - if fi.IsDir() { - return path.Base(path.Clean(fi.h.Name)) - } - return path.Base(fi.h.Name) -} - -// Mode returns the permission and mode bits for the headerFileInfo. -func (fi headerFileInfo) Mode() (mode os.FileMode) { - // Set file permission bits. - mode = os.FileMode(fi.h.Mode).Perm() - - // Set setuid, setgid and sticky bits. - if fi.h.Mode&c_ISUID != 0 { - // setuid - mode |= os.ModeSetuid - } - if fi.h.Mode&c_ISGID != 0 { - // setgid - mode |= os.ModeSetgid - } - if fi.h.Mode&c_ISVTX != 0 { - // sticky - mode |= os.ModeSticky - } - - // Set file mode bits. - // clear perm, setuid, setgid and sticky bits. - m := os.FileMode(fi.h.Mode) &^ 07777 - if m == c_ISDIR { - // directory - mode |= os.ModeDir - } - if m == c_ISFIFO { - // named pipe (FIFO) - mode |= os.ModeNamedPipe - } - if m == c_ISLNK { - // symbolic link - mode |= os.ModeSymlink - } - if m == c_ISBLK { - // device file - mode |= os.ModeDevice - } - if m == c_ISCHR { - // Unix character device - mode |= os.ModeDevice - mode |= os.ModeCharDevice - } - if m == c_ISSOCK { - // Unix domain socket - mode |= os.ModeSocket - } - - switch fi.h.Typeflag { - case TypeSymlink: - // symbolic link - mode |= os.ModeSymlink - case TypeChar: - // character device node - mode |= os.ModeDevice - mode |= os.ModeCharDevice - case TypeBlock: - // block device node - mode |= os.ModeDevice - case TypeDir: - // directory - mode |= os.ModeDir - case TypeFifo: - // fifo node - mode |= os.ModeNamedPipe - } - - return mode -} - -// sysStat, if non-nil, populates h from system-dependent fields of fi. -var sysStat func(fi os.FileInfo, h *Header) error - -// Mode constants from the tar spec. -const ( - c_ISUID = 04000 // Set uid - c_ISGID = 02000 // Set gid - c_ISVTX = 01000 // Save text (sticky bit) - c_ISDIR = 040000 // Directory - c_ISFIFO = 010000 // FIFO - c_ISREG = 0100000 // Regular file - c_ISLNK = 0120000 // Symbolic link - c_ISBLK = 060000 // Block special file - c_ISCHR = 020000 // Character special file - c_ISSOCK = 0140000 // Socket -) - -// Keywords for the PAX Extended Header -const ( - paxAtime = "atime" - paxCharset = "charset" - paxComment = "comment" - paxCtime = "ctime" // please note that ctime is not a valid pax header. - paxCreationTime = "LIBARCHIVE.creationtime" - paxGid = "gid" - paxGname = "gname" - paxLinkpath = "linkpath" - paxMtime = "mtime" - paxPath = "path" - paxSize = "size" - paxUid = "uid" - paxUname = "uname" - paxXattr = "SCHILY.xattr." - paxWindows = "MSWINDOWS." - paxNone = "" -) - -// FileInfoHeader creates a partially-populated Header from fi. -// If fi describes a symlink, FileInfoHeader records link as the link target. -// If fi describes a directory, a slash is appended to the name. -// Because os.FileInfo's Name method returns only the base name of -// the file it describes, it may be necessary to modify the Name field -// of the returned header to provide the full path name of the file. -func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { - if fi == nil { - return nil, errors.New("tar: FileInfo is nil") - } - fm := fi.Mode() - h := &Header{ - Name: fi.Name(), - ModTime: fi.ModTime(), - Mode: int64(fm.Perm()), // or'd with c_IS* constants later - } - switch { - case fm.IsRegular(): - h.Mode |= c_ISREG - h.Typeflag = TypeReg - h.Size = fi.Size() - case fi.IsDir(): - h.Typeflag = TypeDir - h.Mode |= c_ISDIR - h.Name += "/" - case fm&os.ModeSymlink != 0: - h.Typeflag = TypeSymlink - h.Mode |= c_ISLNK - h.Linkname = link - case fm&os.ModeDevice != 0: - if fm&os.ModeCharDevice != 0 { - h.Mode |= c_ISCHR - h.Typeflag = TypeChar - } else { - h.Mode |= c_ISBLK - h.Typeflag = TypeBlock - } - case fm&os.ModeNamedPipe != 0: - h.Typeflag = TypeFifo - h.Mode |= c_ISFIFO - case fm&os.ModeSocket != 0: - h.Mode |= c_ISSOCK - default: - return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) - } - if fm&os.ModeSetuid != 0 { - h.Mode |= c_ISUID - } - if fm&os.ModeSetgid != 0 { - h.Mode |= c_ISGID - } - if fm&os.ModeSticky != 0 { - h.Mode |= c_ISVTX - } - // If possible, populate additional fields from OS-specific - // FileInfo fields. - if sys, ok := fi.Sys().(*Header); ok { - // This FileInfo came from a Header (not the OS). Use the - // original Header to populate all remaining fields. - h.Uid = sys.Uid - h.Gid = sys.Gid - h.Uname = sys.Uname - h.Gname = sys.Gname - h.AccessTime = sys.AccessTime - h.ChangeTime = sys.ChangeTime - if sys.Xattrs != nil { - h.Xattrs = make(map[string]string) - for k, v := range sys.Xattrs { - h.Xattrs[k] = v - } - } - if sys.Typeflag == TypeLink { - // hard link - h.Typeflag = TypeLink - h.Size = 0 - h.Linkname = sys.Linkname - } - } - if sysStat != nil { - return h, sysStat(fi, h) - } - return h, nil -} - -var zeroBlock = make([]byte, blockSize) - -// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values. -// We compute and return both. -func checksum(header []byte) (unsigned int64, signed int64) { - for i := 0; i < len(header); i++ { - if i == 148 { - // The chksum field (header[148:156]) is special: it should be treated as space bytes. - unsigned += ' ' * 8 - signed += ' ' * 8 - i += 7 - continue - } - unsigned += int64(header[i]) - signed += int64(int8(header[i])) - } - return -} - -type slicer []byte - -func (sp *slicer) next(n int) (b []byte) { - s := *sp - b, *sp = s[0:n], s[n:] - return -} - -func isASCII(s string) bool { - for _, c := range s { - if c >= 0x80 { - return false - } - } - return true -} - -func toASCII(s string) string { - if isASCII(s) { - return s - } - var buf bytes.Buffer - for _, c := range s { - if c < 0x80 { - buf.WriteByte(byte(c)) - } - } - return buf.String() -} - -// isHeaderOnlyType checks if the given type flag is of the type that has no -// data section even if a size is specified. -func isHeaderOnlyType(flag byte) bool { - switch flag { - case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: - return true - default: - return false - } -} diff --git a/vendor/github.com/Microsoft/go-winio/archive/tar/reader.go b/vendor/github.com/Microsoft/go-winio/archive/tar/reader.go deleted file mode 100644 index e210c618a1..0000000000 --- a/vendor/github.com/Microsoft/go-winio/archive/tar/reader.go +++ /dev/null @@ -1,1002 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package tar - -// TODO(dsymonds): -// - pax extensions - -import ( - "bytes" - "errors" - "io" - "io/ioutil" - "math" - "os" - "strconv" - "strings" - "time" -) - -var ( - ErrHeader = errors.New("archive/tar: invalid tar header") -) - -const maxNanoSecondIntSize = 9 - -// A Reader provides sequential access to the contents of a tar archive. -// A tar archive consists of a sequence of files. -// The Next method advances to the next file in the archive (including the first), -// and then it can be treated as an io.Reader to access the file's data. -type Reader struct { - r io.Reader - err error - pad int64 // amount of padding (ignored) after current file entry - curr numBytesReader // reader for current file entry - hdrBuff [blockSize]byte // buffer to use in readHeader -} - -type parser struct { - err error // Last error seen -} - -// A numBytesReader is an io.Reader with a numBytes method, returning the number -// of bytes remaining in the underlying encoded data. -type numBytesReader interface { - io.Reader - numBytes() int64 -} - -// A regFileReader is a numBytesReader for reading file data from a tar archive. -type regFileReader struct { - r io.Reader // underlying reader - nb int64 // number of unread bytes for current file entry -} - -// A sparseFileReader is a numBytesReader for reading sparse file data from a -// tar archive. -type sparseFileReader struct { - rfr numBytesReader // Reads the sparse-encoded file data - sp []sparseEntry // The sparse map for the file - pos int64 // Keeps track of file position - total int64 // Total size of the file -} - -// A sparseEntry holds a single entry in a sparse file's sparse map. -// -// Sparse files are represented using a series of sparseEntrys. -// Despite the name, a sparseEntry represents an actual data fragment that -// references data found in the underlying archive stream. All regions not -// covered by a sparseEntry are logically filled with zeros. -// -// For example, if the underlying raw file contains the 10-byte data: -// var compactData = "abcdefgh" -// -// And the sparse map has the following entries: -// var sp = []sparseEntry{ -// {offset: 2, numBytes: 5} // Data fragment for [2..7] -// {offset: 18, numBytes: 3} // Data fragment for [18..21] -// } -// -// Then the content of the resulting sparse file with a "real" size of 25 is: -// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 -type sparseEntry struct { - offset int64 // Starting position of the fragment - numBytes int64 // Length of the fragment -} - -// Keywords for GNU sparse files in a PAX extended header -const ( - paxGNUSparseNumBlocks = "GNU.sparse.numblocks" - paxGNUSparseOffset = "GNU.sparse.offset" - paxGNUSparseNumBytes = "GNU.sparse.numbytes" - paxGNUSparseMap = "GNU.sparse.map" - paxGNUSparseName = "GNU.sparse.name" - paxGNUSparseMajor = "GNU.sparse.major" - paxGNUSparseMinor = "GNU.sparse.minor" - paxGNUSparseSize = "GNU.sparse.size" - paxGNUSparseRealSize = "GNU.sparse.realsize" -) - -// Keywords for old GNU sparse headers -const ( - oldGNUSparseMainHeaderOffset = 386 - oldGNUSparseMainHeaderIsExtendedOffset = 482 - oldGNUSparseMainHeaderNumEntries = 4 - oldGNUSparseExtendedHeaderIsExtendedOffset = 504 - oldGNUSparseExtendedHeaderNumEntries = 21 - oldGNUSparseOffsetSize = 12 - oldGNUSparseNumBytesSize = 12 -) - -// NewReader creates a new Reader reading from r. -func NewReader(r io.Reader) *Reader { return &Reader{r: r} } - -// Next advances to the next entry in the tar archive. -// -// io.EOF is returned at the end of the input. -func (tr *Reader) Next() (*Header, error) { - if tr.err != nil { - return nil, tr.err - } - - var hdr *Header - var extHdrs map[string]string - - // Externally, Next iterates through the tar archive as if it is a series of - // files. Internally, the tar format often uses fake "files" to add meta - // data that describes the next file. These meta data "files" should not - // normally be visible to the outside. As such, this loop iterates through - // one or more "header files" until it finds a "normal file". -loop: - for { - tr.err = tr.skipUnread() - if tr.err != nil { - return nil, tr.err - } - - hdr = tr.readHeader() - if tr.err != nil { - return nil, tr.err - } - - // Check for PAX/GNU special headers and files. - switch hdr.Typeflag { - case TypeXHeader: - extHdrs, tr.err = parsePAX(tr) - if tr.err != nil { - return nil, tr.err - } - continue loop // This is a meta header affecting the next header - case TypeGNULongName, TypeGNULongLink: - var realname []byte - realname, tr.err = ioutil.ReadAll(tr) - if tr.err != nil { - return nil, tr.err - } - - // Convert GNU extensions to use PAX headers. - if extHdrs == nil { - extHdrs = make(map[string]string) - } - var p parser - switch hdr.Typeflag { - case TypeGNULongName: - extHdrs[paxPath] = p.parseString(realname) - case TypeGNULongLink: - extHdrs[paxLinkpath] = p.parseString(realname) - } - if p.err != nil { - tr.err = p.err - return nil, tr.err - } - continue loop // This is a meta header affecting the next header - default: - mergePAX(hdr, extHdrs) - - // Check for a PAX format sparse file - sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) - if err != nil { - tr.err = err - return nil, err - } - if sp != nil { - // Current file is a PAX format GNU sparse file. - // Set the current file reader to a sparse file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) - if tr.err != nil { - return nil, tr.err - } - } - break loop // This is a file, so stop - } - } - return hdr, nil -} - -// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then -// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to -// be treated as a regular file. -func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { - var sparseFormat string - - // Check for sparse format indicators - major, majorOk := headers[paxGNUSparseMajor] - minor, minorOk := headers[paxGNUSparseMinor] - sparseName, sparseNameOk := headers[paxGNUSparseName] - _, sparseMapOk := headers[paxGNUSparseMap] - sparseSize, sparseSizeOk := headers[paxGNUSparseSize] - sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] - - // Identify which, if any, sparse format applies from which PAX headers are set - if majorOk && minorOk { - sparseFormat = major + "." + minor - } else if sparseNameOk && sparseMapOk { - sparseFormat = "0.1" - } else if sparseSizeOk { - sparseFormat = "0.0" - } else { - // Not a PAX format GNU sparse file. - return nil, nil - } - - // Check for unknown sparse format - if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { - return nil, nil - } - - // Update hdr from GNU sparse PAX headers - if sparseNameOk { - hdr.Name = sparseName - } - if sparseSizeOk { - realSize, err := strconv.ParseInt(sparseSize, 10, 0) - if err != nil { - return nil, ErrHeader - } - hdr.Size = realSize - } else if sparseRealSizeOk { - realSize, err := strconv.ParseInt(sparseRealSize, 10, 0) - if err != nil { - return nil, ErrHeader - } - hdr.Size = realSize - } - - // Set up the sparse map, according to the particular sparse format in use - var sp []sparseEntry - var err error - switch sparseFormat { - case "0.0", "0.1": - sp, err = readGNUSparseMap0x1(headers) - case "1.0": - sp, err = readGNUSparseMap1x0(tr.curr) - } - return sp, err -} - -// mergePAX merges well known headers according to PAX standard. -// In general headers with the same name as those found -// in the header struct overwrite those found in the header -// struct with higher precision or longer values. Esp. useful -// for name and linkname fields. -func mergePAX(hdr *Header, headers map[string]string) error { - for k, v := range headers { - switch k { - case paxPath: - hdr.Name = v - case paxLinkpath: - hdr.Linkname = v - case paxGname: - hdr.Gname = v - case paxUname: - hdr.Uname = v - case paxUid: - uid, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Uid = int(uid) - case paxGid: - gid, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Gid = int(gid) - case paxAtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.AccessTime = t - case paxMtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.ModTime = t - case paxCtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.ChangeTime = t - case paxCreationTime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.CreationTime = t - case paxSize: - size, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Size = int64(size) - default: - if strings.HasPrefix(k, paxXattr) { - if hdr.Xattrs == nil { - hdr.Xattrs = make(map[string]string) - } - hdr.Xattrs[k[len(paxXattr):]] = v - } else if strings.HasPrefix(k, paxWindows) { - if hdr.Winheaders == nil { - hdr.Winheaders = make(map[string]string) - } - hdr.Winheaders[k[len(paxWindows):]] = v - } - } - } - return nil -} - -// parsePAXTime takes a string of the form %d.%d as described in -// the PAX specification. -func parsePAXTime(t string) (time.Time, error) { - buf := []byte(t) - pos := bytes.IndexByte(buf, '.') - var seconds, nanoseconds int64 - var err error - if pos == -1 { - seconds, err = strconv.ParseInt(t, 10, 0) - if err != nil { - return time.Time{}, err - } - } else { - seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) - if err != nil { - return time.Time{}, err - } - nano_buf := string(buf[pos+1:]) - // Pad as needed before converting to a decimal. - // For example .030 -> .030000000 -> 30000000 nanoseconds - if len(nano_buf) < maxNanoSecondIntSize { - // Right pad - nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf)) - } else if len(nano_buf) > maxNanoSecondIntSize { - // Right truncate - nano_buf = nano_buf[:maxNanoSecondIntSize] - } - nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0) - if err != nil { - return time.Time{}, err - } - } - ts := time.Unix(seconds, nanoseconds) - return ts, nil -} - -// parsePAX parses PAX headers. -// If an extended header (type 'x') is invalid, ErrHeader is returned -func parsePAX(r io.Reader) (map[string]string, error) { - buf, err := ioutil.ReadAll(r) - if err != nil { - return nil, err - } - sbuf := string(buf) - - // For GNU PAX sparse format 0.0 support. - // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. - var sparseMap bytes.Buffer - - headers := make(map[string]string) - // Each record is constructed as - // "%d %s=%s\n", length, keyword, value - for len(sbuf) > 0 { - key, value, residual, err := parsePAXRecord(sbuf) - if err != nil { - return nil, ErrHeader - } - sbuf = residual - - keyStr := string(key) - if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { - // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. - sparseMap.WriteString(value) - sparseMap.Write([]byte{','}) - } else { - // Normal key. Set the value in the headers map. - headers[keyStr] = string(value) - } - } - if sparseMap.Len() != 0 { - // Add sparse info to headers, chopping off the extra comma - sparseMap.Truncate(sparseMap.Len() - 1) - headers[paxGNUSparseMap] = sparseMap.String() - } - return headers, nil -} - -// parsePAXRecord parses the input PAX record string into a key-value pair. -// If parsing is successful, it will slice off the currently read record and -// return the remainder as r. -// -// A PAX record is of the following form: -// "%d %s=%s\n" % (size, key, value) -func parsePAXRecord(s string) (k, v, r string, err error) { - // The size field ends at the first space. - sp := strings.IndexByte(s, ' ') - if sp == -1 { - return "", "", s, ErrHeader - } - - // Parse the first token as a decimal integer. - n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int - if perr != nil || n < 5 || int64(len(s)) < n { - return "", "", s, ErrHeader - } - - // Extract everything between the space and the final newline. - rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] - if nl != "\n" { - return "", "", s, ErrHeader - } - - // The first equals separates the key from the value. - eq := strings.IndexByte(rec, '=') - if eq == -1 { - return "", "", s, ErrHeader - } - return rec[:eq], rec[eq+1:], rem, nil -} - -// parseString parses bytes as a NUL-terminated C-style string. -// If a NUL byte is not found then the whole slice is returned as a string. -func (*parser) parseString(b []byte) string { - n := 0 - for n < len(b) && b[n] != 0 { - n++ - } - return string(b[0:n]) -} - -// parseNumeric parses the input as being encoded in either base-256 or octal. -// This function may return negative numbers. -// If parsing fails or an integer overflow occurs, err will be set. -func (p *parser) parseNumeric(b []byte) int64 { - // Check for base-256 (binary) format first. - // If the first bit is set, then all following bits constitute a two's - // complement encoded number in big-endian byte order. - if len(b) > 0 && b[0]&0x80 != 0 { - // Handling negative numbers relies on the following identity: - // -a-1 == ^a - // - // If the number is negative, we use an inversion mask to invert the - // data bytes and treat the value as an unsigned number. - var inv byte // 0x00 if positive or zero, 0xff if negative - if b[0]&0x40 != 0 { - inv = 0xff - } - - var x uint64 - for i, c := range b { - c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing - if i == 0 { - c &= 0x7f // Ignore signal bit in first byte - } - if (x >> 56) > 0 { - p.err = ErrHeader // Integer overflow - return 0 - } - x = x<<8 | uint64(c) - } - if (x >> 63) > 0 { - p.err = ErrHeader // Integer overflow - return 0 - } - if inv == 0xff { - return ^int64(x) - } - return int64(x) - } - - // Normal case is base-8 (octal) format. - return p.parseOctal(b) -} - -func (p *parser) parseOctal(b []byte) int64 { - // Because unused fields are filled with NULs, we need - // to skip leading NULs. Fields may also be padded with - // spaces or NULs. - // So we remove leading and trailing NULs and spaces to - // be sure. - b = bytes.Trim(b, " \x00") - - if len(b) == 0 { - return 0 - } - x, perr := strconv.ParseUint(p.parseString(b), 8, 64) - if perr != nil { - p.err = ErrHeader - } - return int64(x) -} - -// skipUnread skips any unread bytes in the existing file entry, as well as any -// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is -// encountered in the data portion; it is okay to hit io.EOF in the padding. -// -// Note that this function still works properly even when sparse files are being -// used since numBytes returns the bytes remaining in the underlying io.Reader. -func (tr *Reader) skipUnread() error { - dataSkip := tr.numBytes() // Number of data bytes to skip - totalSkip := dataSkip + tr.pad // Total number of bytes to skip - tr.curr, tr.pad = nil, 0 - - // If possible, Seek to the last byte before the end of the data section. - // Do this because Seek is often lazy about reporting errors; this will mask - // the fact that the tar stream may be truncated. We can rely on the - // io.CopyN done shortly afterwards to trigger any IO errors. - var seekSkipped int64 // Number of bytes skipped via Seek - if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { - // Not all io.Seeker can actually Seek. For example, os.Stdin implements - // io.Seeker, but calling Seek always returns an error and performs - // no action. Thus, we try an innocent seek to the current position - // to see if Seek is really supported. - pos1, err := sr.Seek(0, os.SEEK_CUR) - if err == nil { - // Seek seems supported, so perform the real Seek. - pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) - if err != nil { - tr.err = err - return tr.err - } - seekSkipped = pos2 - pos1 - } - } - - var copySkipped int64 // Number of bytes skipped via CopyN - copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) - if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { - tr.err = io.ErrUnexpectedEOF - } - return tr.err -} - -func (tr *Reader) verifyChecksum(header []byte) bool { - if tr.err != nil { - return false - } - - var p parser - given := p.parseOctal(header[148:156]) - unsigned, signed := checksum(header) - return p.err == nil && (given == unsigned || given == signed) -} - -// readHeader reads the next block header and assumes that the underlying reader -// is already aligned to a block boundary. -// -// The err will be set to io.EOF only when one of the following occurs: -// * Exactly 0 bytes are read and EOF is hit. -// * Exactly 1 block of zeros is read and EOF is hit. -// * At least 2 blocks of zeros are read. -func (tr *Reader) readHeader() *Header { - header := tr.hdrBuff[:] - copy(header, zeroBlock) - - if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { - return nil // io.EOF is okay here - } - - // Two blocks of zero bytes marks the end of the archive. - if bytes.Equal(header, zeroBlock[0:blockSize]) { - if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { - return nil // io.EOF is okay here - } - if bytes.Equal(header, zeroBlock[0:blockSize]) { - tr.err = io.EOF - } else { - tr.err = ErrHeader // zero block and then non-zero block - } - return nil - } - - if !tr.verifyChecksum(header) { - tr.err = ErrHeader - return nil - } - - // Unpack - var p parser - hdr := new(Header) - s := slicer(header) - - hdr.Name = p.parseString(s.next(100)) - hdr.Mode = p.parseNumeric(s.next(8)) - hdr.Uid = int(p.parseNumeric(s.next(8))) - hdr.Gid = int(p.parseNumeric(s.next(8))) - hdr.Size = p.parseNumeric(s.next(12)) - hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) - s.next(8) // chksum - hdr.Typeflag = s.next(1)[0] - hdr.Linkname = p.parseString(s.next(100)) - - // The remainder of the header depends on the value of magic. - // The original (v7) version of tar had no explicit magic field, - // so its magic bytes, like the rest of the block, are NULs. - magic := string(s.next(8)) // contains version field as well. - var format string - switch { - case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988) - if string(header[508:512]) == "tar\x00" { - format = "star" - } else { - format = "posix" - } - case magic == "ustar \x00": // old GNU tar - format = "gnu" - } - - switch format { - case "posix", "gnu", "star": - hdr.Uname = p.parseString(s.next(32)) - hdr.Gname = p.parseString(s.next(32)) - devmajor := s.next(8) - devminor := s.next(8) - if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { - hdr.Devmajor = p.parseNumeric(devmajor) - hdr.Devminor = p.parseNumeric(devminor) - } - var prefix string - switch format { - case "posix", "gnu": - prefix = p.parseString(s.next(155)) - case "star": - prefix = p.parseString(s.next(131)) - hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) - hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) - } - if len(prefix) > 0 { - hdr.Name = prefix + "/" + hdr.Name - } - } - - if p.err != nil { - tr.err = p.err - return nil - } - - nb := hdr.Size - if isHeaderOnlyType(hdr.Typeflag) { - nb = 0 - } - if nb < 0 { - tr.err = ErrHeader - return nil - } - - // Set the current file reader. - tr.pad = -nb & (blockSize - 1) // blockSize is a power of two - tr.curr = ®FileReader{r: tr.r, nb: nb} - - // Check for old GNU sparse format entry. - if hdr.Typeflag == TypeGNUSparse { - // Get the real size of the file. - hdr.Size = p.parseNumeric(header[483:495]) - if p.err != nil { - tr.err = p.err - return nil - } - - // Read the sparse map. - sp := tr.readOldGNUSparseMap(header) - if tr.err != nil { - return nil - } - - // Current file is a GNU sparse file. Update the current file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) - if tr.err != nil { - return nil - } - } - - return hdr -} - -// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. -// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, -// then one or more extension headers are used to store the rest of the sparse map. -func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { - var p parser - isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 - spCap := oldGNUSparseMainHeaderNumEntries - if isExtended { - spCap += oldGNUSparseExtendedHeaderNumEntries - } - sp := make([]sparseEntry, 0, spCap) - s := slicer(header[oldGNUSparseMainHeaderOffset:]) - - // Read the four entries from the main tar header - for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { - offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) - numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) - if p.err != nil { - tr.err = p.err - return nil - } - if offset == 0 && numBytes == 0 { - break - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) - } - - for isExtended { - // There are more entries. Read an extension header and parse its entries. - sparseHeader := make([]byte, blockSize) - if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil { - return nil - } - isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 - s = slicer(sparseHeader) - for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { - offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) - numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) - if p.err != nil { - tr.err = p.err - return nil - } - if offset == 0 && numBytes == 0 { - break - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) - } - } - return sp -} - -// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format -// version 1.0. The format of the sparse map consists of a series of -// newline-terminated numeric fields. The first field is the number of entries -// and is always present. Following this are the entries, consisting of two -// fields (offset, numBytes). This function must stop reading at the end -// boundary of the block containing the last newline. -// -// Note that the GNU manual says that numeric values should be encoded in octal -// format. However, the GNU tar utility itself outputs these values in decimal. -// As such, this library treats values as being encoded in decimal. -func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { - var cntNewline int64 - var buf bytes.Buffer - var blk = make([]byte, blockSize) - - // feedTokens copies data in numBlock chunks from r into buf until there are - // at least cnt newlines in buf. It will not read more blocks than needed. - var feedTokens = func(cnt int64) error { - for cntNewline < cnt { - if _, err := io.ReadFull(r, blk); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } - return err - } - buf.Write(blk) - for _, c := range blk { - if c == '\n' { - cntNewline++ - } - } - } - return nil - } - - // nextToken gets the next token delimited by a newline. This assumes that - // at least one newline exists in the buffer. - var nextToken = func() string { - cntNewline-- - tok, _ := buf.ReadString('\n') - return tok[:len(tok)-1] // Cut off newline - } - - // Parse for the number of entries. - // Use integer overflow resistant math to check this. - if err := feedTokens(1); err != nil { - return nil, err - } - numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int - if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { - return nil, ErrHeader - } - - // Parse for all member entries. - // numEntries is trusted after this since a potential attacker must have - // committed resources proportional to what this library used. - if err := feedTokens(2 * numEntries); err != nil { - return nil, err - } - sp := make([]sparseEntry, 0, numEntries) - for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(nextToken(), 10, 64) - if err != nil { - return nil, ErrHeader - } - numBytes, err := strconv.ParseInt(nextToken(), 10, 64) - if err != nil { - return nil, ErrHeader - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) - } - return sp, nil -} - -// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format -// version 0.1. The sparse map is stored in the PAX headers. -func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { - // Get number of entries. - // Use integer overflow resistant math to check this. - numEntriesStr := extHdrs[paxGNUSparseNumBlocks] - numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int - if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { - return nil, ErrHeader - } - - // There should be two numbers in sparseMap for each entry. - sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") - if int64(len(sparseMap)) != 2*numEntries { - return nil, ErrHeader - } - - // Loop through the entries in the sparse map. - // numEntries is trusted now. - sp := make([]sparseEntry, 0, numEntries) - for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) - if err != nil { - return nil, ErrHeader - } - numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) - if err != nil { - return nil, ErrHeader - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) - } - return sp, nil -} - -// numBytes returns the number of bytes left to read in the current file's entry -// in the tar archive, or 0 if there is no current file. -func (tr *Reader) numBytes() int64 { - if tr.curr == nil { - // No current file, so no bytes - return 0 - } - return tr.curr.numBytes() -} - -// Read reads from the current entry in the tar archive. -// It returns 0, io.EOF when it reaches the end of that entry, -// until Next is called to advance to the next entry. -// -// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, -// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what -// the Header.Size claims. -func (tr *Reader) Read(b []byte) (n int, err error) { - if tr.err != nil { - return 0, tr.err - } - if tr.curr == nil { - return 0, io.EOF - } - - n, err = tr.curr.Read(b) - if err != nil && err != io.EOF { - tr.err = err - } - return -} - -func (rfr *regFileReader) Read(b []byte) (n int, err error) { - if rfr.nb == 0 { - // file consumed - return 0, io.EOF - } - if int64(len(b)) > rfr.nb { - b = b[0:rfr.nb] - } - n, err = rfr.r.Read(b) - rfr.nb -= int64(n) - - if err == io.EOF && rfr.nb > 0 { - err = io.ErrUnexpectedEOF - } - return -} - -// numBytes returns the number of bytes left to read in the file's data in the tar archive. -func (rfr *regFileReader) numBytes() int64 { - return rfr.nb -} - -// newSparseFileReader creates a new sparseFileReader, but validates all of the -// sparse entries before doing so. -func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { - if total < 0 { - return nil, ErrHeader // Total size cannot be negative - } - - // Validate all sparse entries. These are the same checks as performed by - // the BSD tar utility. - for i, s := range sp { - switch { - case s.offset < 0 || s.numBytes < 0: - return nil, ErrHeader // Negative values are never okay - case s.offset > math.MaxInt64-s.numBytes: - return nil, ErrHeader // Integer overflow with large length - case s.offset+s.numBytes > total: - return nil, ErrHeader // Region extends beyond the "real" size - case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: - return nil, ErrHeader // Regions can't overlap and must be in order - } - } - return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil -} - -// readHole reads a sparse hole ending at endOffset. -func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { - n64 := endOffset - sfr.pos - if n64 > int64(len(b)) { - n64 = int64(len(b)) - } - n := int(n64) - for i := 0; i < n; i++ { - b[i] = 0 - } - sfr.pos += n64 - return n -} - -// Read reads the sparse file data in expanded form. -func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { - // Skip past all empty fragments. - for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { - sfr.sp = sfr.sp[1:] - } - - // If there are no more fragments, then it is possible that there - // is one last sparse hole. - if len(sfr.sp) == 0 { - // This behavior matches the BSD tar utility. - // However, GNU tar stops returning data even if sfr.total is unmet. - if sfr.pos < sfr.total { - return sfr.readHole(b, sfr.total), nil - } - return 0, io.EOF - } - - // In front of a data fragment, so read a hole. - if sfr.pos < sfr.sp[0].offset { - return sfr.readHole(b, sfr.sp[0].offset), nil - } - - // In a data fragment, so read from it. - // This math is overflow free since we verify that offset and numBytes can - // be safely added when creating the sparseFileReader. - endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment - bytesLeft := endPos - sfr.pos // Bytes left in fragment - if int64(len(b)) > bytesLeft { - b = b[:bytesLeft] - } - - n, err = sfr.rfr.Read(b) - sfr.pos += int64(n) - if err == io.EOF { - if sfr.pos < endPos { - err = io.ErrUnexpectedEOF // There was supposed to be more data - } else if sfr.pos < sfr.total { - err = nil // There is still an implicit sparse hole at the end - } - } - - if sfr.pos == endPos { - sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it - } - return n, err -} - -// numBytes returns the number of bytes left to read in the sparse file's -// sparse-encoded data in the tar archive. -func (sfr *sparseFileReader) numBytes() int64 { - return sfr.rfr.numBytes() -} diff --git a/vendor/github.com/Microsoft/go-winio/archive/tar/stat_atim.go b/vendor/github.com/Microsoft/go-winio/archive/tar/stat_atim.go deleted file mode 100644 index cf9cc79c59..0000000000 --- a/vendor/github.com/Microsoft/go-winio/archive/tar/stat_atim.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build linux dragonfly openbsd solaris - -package tar - -import ( - "syscall" - "time" -) - -func statAtime(st *syscall.Stat_t) time.Time { - return time.Unix(st.Atim.Unix()) -} - -func statCtime(st *syscall.Stat_t) time.Time { - return time.Unix(st.Ctim.Unix()) -} diff --git a/vendor/github.com/Microsoft/go-winio/archive/tar/stat_atimespec.go b/vendor/github.com/Microsoft/go-winio/archive/tar/stat_atimespec.go deleted file mode 100644 index 6f17dbe307..0000000000 --- a/vendor/github.com/Microsoft/go-winio/archive/tar/stat_atimespec.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build darwin freebsd netbsd - -package tar - -import ( - "syscall" - "time" -) - -func statAtime(st *syscall.Stat_t) time.Time { - return time.Unix(st.Atimespec.Unix()) -} - -func statCtime(st *syscall.Stat_t) time.Time { - return time.Unix(st.Ctimespec.Unix()) -} diff --git a/vendor/github.com/Microsoft/go-winio/archive/tar/stat_unix.go b/vendor/github.com/Microsoft/go-winio/archive/tar/stat_unix.go deleted file mode 100644 index cb843db4cf..0000000000 --- a/vendor/github.com/Microsoft/go-winio/archive/tar/stat_unix.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build linux darwin dragonfly freebsd openbsd netbsd solaris - -package tar - -import ( - "os" - "syscall" -) - -func init() { - sysStat = statUnix -} - -func statUnix(fi os.FileInfo, h *Header) error { - sys, ok := fi.Sys().(*syscall.Stat_t) - if !ok { - return nil - } - h.Uid = int(sys.Uid) - h.Gid = int(sys.Gid) - // TODO(bradfitz): populate username & group. os/user - // doesn't cache LookupId lookups, and lacks group - // lookup functions. - h.AccessTime = statAtime(sys) - h.ChangeTime = statCtime(sys) - // TODO(bradfitz): major/minor device numbers? - return nil -} diff --git a/vendor/github.com/Microsoft/go-winio/archive/tar/writer.go b/vendor/github.com/Microsoft/go-winio/archive/tar/writer.go deleted file mode 100644 index 30d7e606d6..0000000000 --- a/vendor/github.com/Microsoft/go-winio/archive/tar/writer.go +++ /dev/null @@ -1,444 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package tar - -// TODO(dsymonds): -// - catch more errors (no first header, etc.) - -import ( - "bytes" - "errors" - "fmt" - "io" - "path" - "sort" - "strconv" - "strings" - "time" -) - -var ( - ErrWriteTooLong = errors.New("archive/tar: write too long") - ErrFieldTooLong = errors.New("archive/tar: header field too long") - ErrWriteAfterClose = errors.New("archive/tar: write after close") - errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") -) - -// A Writer provides sequential writing of a tar archive in POSIX.1 format. -// A tar archive consists of a sequence of files. -// Call WriteHeader to begin a new file, and then call Write to supply that file's data, -// writing at most hdr.Size bytes in total. -type Writer struct { - w io.Writer - err error - nb int64 // number of unwritten bytes for current file entry - pad int64 // amount of padding to write after current file entry - closed bool - usedBinary bool // whether the binary numeric field extension was used - preferPax bool // use pax header instead of binary numeric header - hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header - paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header -} - -type formatter struct { - err error // Last error seen -} - -// NewWriter creates a new Writer writing to w. -func NewWriter(w io.Writer) *Writer { return &Writer{w: w, preferPax: true} } - -// Flush finishes writing the current file (optional). -func (tw *Writer) Flush() error { - if tw.nb > 0 { - tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) - return tw.err - } - - n := tw.nb + tw.pad - for n > 0 && tw.err == nil { - nr := n - if nr > blockSize { - nr = blockSize - } - var nw int - nw, tw.err = tw.w.Write(zeroBlock[0:nr]) - n -= int64(nw) - } - tw.nb = 0 - tw.pad = 0 - return tw.err -} - -// Write s into b, terminating it with a NUL if there is room. -func (f *formatter) formatString(b []byte, s string) { - if len(s) > len(b) { - f.err = ErrFieldTooLong - return - } - ascii := toASCII(s) - copy(b, ascii) - if len(ascii) < len(b) { - b[len(ascii)] = 0 - } -} - -// Encode x as an octal ASCII string and write it into b with leading zeros. -func (f *formatter) formatOctal(b []byte, x int64) { - s := strconv.FormatInt(x, 8) - // leading zeros, but leave room for a NUL. - for len(s)+1 < len(b) { - s = "0" + s - } - f.formatString(b, s) -} - -// fitsInBase256 reports whether x can be encoded into n bytes using base-256 -// encoding. Unlike octal encoding, base-256 encoding does not require that the -// string ends with a NUL character. Thus, all n bytes are available for output. -// -// If operating in binary mode, this assumes strict GNU binary mode; which means -// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is -// equivalent to the sign bit in two's complement form. -func fitsInBase256(n int, x int64) bool { - var binBits = uint(n-1) * 8 - return n >= 9 || (x >= -1<= 0; i-- { - b[i] = byte(x) - x >>= 8 - } - b[0] |= 0x80 // Highest bit indicates binary format - return - } - - f.formatOctal(b, 0) // Last resort, just write zero - f.err = ErrFieldTooLong -} - -var ( - minTime = time.Unix(0, 0) - // There is room for 11 octal digits (33 bits) of mtime. - maxTime = minTime.Add((1<<33 - 1) * time.Second) -) - -// WriteHeader writes hdr and prepares to accept the file's contents. -// WriteHeader calls Flush if it is not the first header. -// Calling after a Close will return ErrWriteAfterClose. -func (tw *Writer) WriteHeader(hdr *Header) error { - return tw.writeHeader(hdr, true) -} - -// WriteHeader writes hdr and prepares to accept the file's contents. -// WriteHeader calls Flush if it is not the first header. -// Calling after a Close will return ErrWriteAfterClose. -// As this method is called internally by writePax header to allow it to -// suppress writing the pax header. -func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { - if tw.closed { - return ErrWriteAfterClose - } - if tw.err == nil { - tw.Flush() - } - if tw.err != nil { - return tw.err - } - - // a map to hold pax header records, if any are needed - paxHeaders := make(map[string]string) - - // TODO(shanemhansen): we might want to use PAX headers for - // subsecond time resolution, but for now let's just capture - // too long fields or non ascii characters - - var f formatter - var header []byte - - // We need to select which scratch buffer to use carefully, - // since this method is called recursively to write PAX headers. - // If allowPax is true, this is the non-recursive call, and we will use hdrBuff. - // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is - // already being used by the non-recursive call, so we must use paxHdrBuff. - header = tw.hdrBuff[:] - if !allowPax { - header = tw.paxHdrBuff[:] - } - copy(header, zeroBlock) - s := slicer(header) - - // Wrappers around formatter that automatically sets paxHeaders if the - // argument extends beyond the capacity of the input byte slice. - var formatString = func(b []byte, s string, paxKeyword string) { - needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) - if needsPaxHeader { - paxHeaders[paxKeyword] = s - return - } - f.formatString(b, s) - } - var formatNumeric = func(b []byte, x int64, paxKeyword string) { - // Try octal first. - s := strconv.FormatInt(x, 8) - if len(s) < len(b) { - f.formatOctal(b, x) - return - } - - // If it is too long for octal, and PAX is preferred, use a PAX header. - if paxKeyword != paxNone && tw.preferPax { - f.formatOctal(b, 0) - s := strconv.FormatInt(x, 10) - paxHeaders[paxKeyword] = s - return - } - - tw.usedBinary = true - f.formatNumeric(b, x) - } - var formatTime = func(b []byte, t time.Time, paxKeyword string) { - var unixTime int64 - if !t.Before(minTime) && !t.After(maxTime) { - unixTime = t.Unix() - } - formatNumeric(b, unixTime, paxNone) - - // Write a PAX header if the time didn't fit precisely. - if paxKeyword != "" && tw.preferPax && allowPax && (t.Nanosecond() != 0 || !t.Before(minTime) || !t.After(maxTime)) { - paxHeaders[paxKeyword] = formatPAXTime(t) - } - } - - // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax - pathHeaderBytes := s.next(fileNameSize) - - formatString(pathHeaderBytes, hdr.Name, paxPath) - - f.formatOctal(s.next(8), hdr.Mode) // 100:108 - formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 - formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 - formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 - formatTime(s.next(12), hdr.ModTime, paxMtime) // 136:148 - s.next(8) // chksum (148:156) - s.next(1)[0] = hdr.Typeflag // 156:157 - - formatString(s.next(100), hdr.Linkname, paxLinkpath) - - copy(s.next(8), []byte("ustar\x0000")) // 257:265 - formatString(s.next(32), hdr.Uname, paxUname) // 265:297 - formatString(s.next(32), hdr.Gname, paxGname) // 297:329 - formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337 - formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345 - - // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax - prefixHeaderBytes := s.next(155) - formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix - - // Use the GNU magic instead of POSIX magic if we used any GNU extensions. - if tw.usedBinary { - copy(header[257:265], []byte("ustar \x00")) - } - - _, paxPathUsed := paxHeaders[paxPath] - // try to use a ustar header when only the name is too long - if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { - prefix, suffix, ok := splitUSTARPath(hdr.Name) - if ok { - // Since we can encode in USTAR format, disable PAX header. - delete(paxHeaders, paxPath) - - // Update the path fields - formatString(pathHeaderBytes, suffix, paxNone) - formatString(prefixHeaderBytes, prefix, paxNone) - } - } - - // The chksum field is terminated by a NUL and a space. - // This is different from the other octal fields. - chksum, _ := checksum(header) - f.formatOctal(header[148:155], chksum) // Never fails - header[155] = ' ' - - // Check if there were any formatting errors. - if f.err != nil { - tw.err = f.err - return tw.err - } - - if allowPax { - if !hdr.AccessTime.IsZero() { - paxHeaders[paxAtime] = formatPAXTime(hdr.AccessTime) - } - if !hdr.ChangeTime.IsZero() { - paxHeaders[paxCtime] = formatPAXTime(hdr.ChangeTime) - } - if !hdr.CreationTime.IsZero() { - paxHeaders[paxCreationTime] = formatPAXTime(hdr.CreationTime) - } - for k, v := range hdr.Xattrs { - paxHeaders[paxXattr+k] = v - } - for k, v := range hdr.Winheaders { - paxHeaders[paxWindows+k] = v - } - } - - if len(paxHeaders) > 0 { - if !allowPax { - return errInvalidHeader - } - if err := tw.writePAXHeader(hdr, paxHeaders); err != nil { - return err - } - } - tw.nb = int64(hdr.Size) - tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize - - _, tw.err = tw.w.Write(header) - return tw.err -} - -func formatPAXTime(t time.Time) string { - sec := t.Unix() - usec := t.Nanosecond() - s := strconv.FormatInt(sec, 10) - if usec != 0 { - s = fmt.Sprintf("%s.%09d", s, usec) - } - return s -} - -// splitUSTARPath splits a path according to USTAR prefix and suffix rules. -// If the path is not splittable, then it will return ("", "", false). -func splitUSTARPath(name string) (prefix, suffix string, ok bool) { - length := len(name) - if length <= fileNameSize || !isASCII(name) { - return "", "", false - } else if length > fileNamePrefixSize+1 { - length = fileNamePrefixSize + 1 - } else if name[length-1] == '/' { - length-- - } - - i := strings.LastIndex(name[:length], "/") - nlen := len(name) - i - 1 // nlen is length of suffix - plen := i // plen is length of prefix - if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { - return "", "", false - } - return name[:i], name[i+1:], true -} - -// writePaxHeader writes an extended pax header to the -// archive. -func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error { - // Prepare extended header - ext := new(Header) - ext.Typeflag = TypeXHeader - // Setting ModTime is required for reader parsing to - // succeed, and seems harmless enough. - ext.ModTime = hdr.ModTime - // The spec asks that we namespace our pseudo files - // with the current pid. However, this results in differing outputs - // for identical inputs. As such, the constant 0 is now used instead. - // golang.org/issue/12358 - dir, file := path.Split(hdr.Name) - fullName := path.Join(dir, "PaxHeaders.0", file) - - ascii := toASCII(fullName) - if len(ascii) > 100 { - ascii = ascii[:100] - } - ext.Name = ascii - // Construct the body - var buf bytes.Buffer - - // Keys are sorted before writing to body to allow deterministic output. - var keys []string - for k := range paxHeaders { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k])) - } - - ext.Size = int64(len(buf.Bytes())) - if err := tw.writeHeader(ext, false); err != nil { - return err - } - if _, err := tw.Write(buf.Bytes()); err != nil { - return err - } - if err := tw.Flush(); err != nil { - return err - } - return nil -} - -// formatPAXRecord formats a single PAX record, prefixing it with the -// appropriate length. -func formatPAXRecord(k, v string) string { - const padding = 3 // Extra padding for ' ', '=', and '\n' - size := len(k) + len(v) + padding - size += len(strconv.Itoa(size)) - record := fmt.Sprintf("%d %s=%s\n", size, k, v) - - // Final adjustment if adding size field increased the record size. - if len(record) != size { - size = len(record) - record = fmt.Sprintf("%d %s=%s\n", size, k, v) - } - return record -} - -// Write writes to the current entry in the tar archive. -// Write returns the error ErrWriteTooLong if more than -// hdr.Size bytes are written after WriteHeader. -func (tw *Writer) Write(b []byte) (n int, err error) { - if tw.closed { - err = ErrWriteAfterClose - return - } - overwrite := false - if int64(len(b)) > tw.nb { - b = b[0:tw.nb] - overwrite = true - } - n, err = tw.w.Write(b) - tw.nb -= int64(n) - if err == nil && overwrite { - err = ErrWriteTooLong - return - } - tw.err = err - return -} - -// Close closes the tar archive, flushing any unwritten -// data to the underlying writer. -func (tw *Writer) Close() error { - if tw.err != nil || tw.closed { - return tw.err - } - tw.Flush() - tw.closed = true - if tw.err != nil { - return tw.err - } - - // trailer: two zero blocks - for i := 0; i < 2; i++ { - _, tw.err = tw.w.Write(zeroBlock) - if tw.err != nil { - break - } - } - return tw.err -} diff --git a/vendor/github.com/Microsoft/go-winio/backuptar/strconv.go b/vendor/github.com/Microsoft/go-winio/backuptar/strconv.go new file mode 100644 index 0000000000..3416096639 --- /dev/null +++ b/vendor/github.com/Microsoft/go-winio/backuptar/strconv.go @@ -0,0 +1,68 @@ +package backuptar + +import ( + "archive/tar" + "fmt" + "strconv" + "strings" + "time" +) + +// Functions copied from https://github.com/golang/go/blob/master/src/archive/tar/strconv.go +// as we need to manage the LIBARCHIVE.creationtime PAXRecord manually. +// Idea taken from containerd which did the same thing. + +// parsePAXTime takes a string of the form %d.%d as described in the PAX +// specification. Note that this implementation allows for negative timestamps, +// which is allowed for by the PAX specification, but not always portable. +func parsePAXTime(s string) (time.Time, error) { + const maxNanoSecondDigits = 9 + + // Split string into seconds and sub-seconds parts. + ss, sn := s, "" + if pos := strings.IndexByte(s, '.'); pos >= 0 { + ss, sn = s[:pos], s[pos+1:] + } + + // Parse the seconds. + secs, err := strconv.ParseInt(ss, 10, 64) + if err != nil { + return time.Time{}, tar.ErrHeader + } + if len(sn) == 0 { + return time.Unix(secs, 0), nil // No sub-second values + } + + // Parse the nanoseconds. + if strings.Trim(sn, "0123456789") != "" { + return time.Time{}, tar.ErrHeader + } + if len(sn) < maxNanoSecondDigits { + sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad + } else { + sn = sn[:maxNanoSecondDigits] // Right truncate + } + nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed + if len(ss) > 0 && ss[0] == '-' { + return time.Unix(secs, -1*nsecs), nil // Negative correction + } + return time.Unix(secs, nsecs), nil +} + +// formatPAXTime converts ts into a time of the form %d.%d as described in the +// PAX specification. This function is capable of negative timestamps. +func formatPAXTime(ts time.Time) (s string) { + secs, nsecs := ts.Unix(), ts.Nanosecond() + if nsecs == 0 { + return strconv.FormatInt(secs, 10) + } + + // If seconds is negative, then perform correction. + sign := "" + if secs < 0 { + sign = "-" // Remember sign + secs = -(secs + 1) // Add a second to secs + nsecs = -(nsecs - 1e9) // Take that second away from nsecs + } + return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") +} diff --git a/vendor/github.com/Microsoft/go-winio/backuptar/tar.go b/vendor/github.com/Microsoft/go-winio/backuptar/tar.go index d6566dbf0c..088a43c68e 100644 --- a/vendor/github.com/Microsoft/go-winio/backuptar/tar.go +++ b/vendor/github.com/Microsoft/go-winio/backuptar/tar.go @@ -3,6 +3,7 @@ package backuptar import ( + "archive/tar" "encoding/base64" "errors" "fmt" @@ -15,7 +16,6 @@ import ( "time" "github.com/Microsoft/go-winio" - "github.com/Microsoft/go-winio/archive/tar" // until archive/tar supports pax extensions in its interface ) const ( @@ -32,11 +32,13 @@ const ( ) const ( - hdrFileAttributes = "fileattr" - hdrSecurityDescriptor = "sd" - hdrRawSecurityDescriptor = "rawsd" - hdrMountPoint = "mountpoint" - hdrEaPrefix = "xattr." + hdrFileAttributes = "MSWINDOWS.fileattr" + hdrSecurityDescriptor = "MSWINDOWS.sd" + hdrRawSecurityDescriptor = "MSWINDOWS.rawsd" + hdrMountPoint = "MSWINDOWS.mountpoint" + hdrEaPrefix = "MSWINDOWS.xattr." + + hdrCreationTime = "LIBARCHIVE.creationtime" ) func writeZeroes(w io.Writer, count int64) error { @@ -86,16 +88,17 @@ func copySparse(t *tar.Writer, br *winio.BackupStreamReader) error { // BasicInfoHeader creates a tar header from basic file information. func BasicInfoHeader(name string, size int64, fileInfo *winio.FileBasicInfo) *tar.Header { hdr := &tar.Header{ - Name: filepath.ToSlash(name), - Size: size, - Typeflag: tar.TypeReg, - ModTime: time.Unix(0, fileInfo.LastWriteTime.Nanoseconds()), - ChangeTime: time.Unix(0, fileInfo.ChangeTime.Nanoseconds()), - AccessTime: time.Unix(0, fileInfo.LastAccessTime.Nanoseconds()), - CreationTime: time.Unix(0, fileInfo.CreationTime.Nanoseconds()), - Winheaders: make(map[string]string), + Format: tar.FormatPAX, + Name: filepath.ToSlash(name), + Size: size, + Typeflag: tar.TypeReg, + ModTime: time.Unix(0, fileInfo.LastWriteTime.Nanoseconds()), + ChangeTime: time.Unix(0, fileInfo.ChangeTime.Nanoseconds()), + AccessTime: time.Unix(0, fileInfo.LastAccessTime.Nanoseconds()), + PAXRecords: make(map[string]string), } - hdr.Winheaders[hdrFileAttributes] = fmt.Sprintf("%d", fileInfo.FileAttributes) + hdr.PAXRecords[hdrFileAttributes] = fmt.Sprintf("%d", fileInfo.FileAttributes) + hdr.PAXRecords[hdrCreationTime] = formatPAXTime(time.Unix(0, fileInfo.CreationTime.Nanoseconds())) if (fileInfo.FileAttributes & syscall.FILE_ATTRIBUTE_DIRECTORY) != 0 { hdr.Mode |= c_ISDIR @@ -155,7 +158,7 @@ func WriteTarFileFromBackupStream(t *tar.Writer, r io.Reader, name string, size if err != nil { return err } - hdr.Winheaders[hdrRawSecurityDescriptor] = base64.StdEncoding.EncodeToString(sd) + hdr.PAXRecords[hdrRawSecurityDescriptor] = base64.StdEncoding.EncodeToString(sd) case winio.BackupReparseData: hdr.Mode |= c_ISLNK @@ -166,7 +169,7 @@ func WriteTarFileFromBackupStream(t *tar.Writer, r io.Reader, name string, size return err } if rp.IsMountPoint { - hdr.Winheaders[hdrMountPoint] = "1" + hdr.PAXRecords[hdrMountPoint] = "1" } hdr.Linkname = rp.Target @@ -183,7 +186,7 @@ func WriteTarFileFromBackupStream(t *tar.Writer, r io.Reader, name string, size // Use base64 encoding for the binary value. Note that there // is no way to encode the EA's flags, since their use doesn't // make any sense for persisted EAs. - hdr.Winheaders[hdrEaPrefix+ea.Name] = base64.StdEncoding.EncodeToString(ea.Value) + hdr.PAXRecords[hdrEaPrefix+ea.Name] = base64.StdEncoding.EncodeToString(ea.Value) } case winio.BackupAlternateData, winio.BackupLink, winio.BackupPropertyData, winio.BackupObjectId, winio.BackupTxfsData: @@ -254,6 +257,7 @@ func WriteTarFileFromBackupStream(t *tar.Writer, r io.Reader, name string, size } if (bhdr.Attributes & winio.StreamSparseAttributes) == 0 { hdr = &tar.Header{ + Format: hdr.Format, Name: name + altName, Mode: hdr.Mode, Typeflag: tar.TypeReg, @@ -296,9 +300,10 @@ func FileInfoFromHeader(hdr *tar.Header) (name string, size int64, fileInfo *win LastAccessTime: syscall.NsecToFiletime(hdr.AccessTime.UnixNano()), LastWriteTime: syscall.NsecToFiletime(hdr.ModTime.UnixNano()), ChangeTime: syscall.NsecToFiletime(hdr.ChangeTime.UnixNano()), - CreationTime: syscall.NsecToFiletime(hdr.CreationTime.UnixNano()), + // Default to ModTime, we'll pull hdrCreationTime below if present + CreationTime: syscall.NsecToFiletime(hdr.ModTime.UnixNano()), } - if attrStr, ok := hdr.Winheaders[hdrFileAttributes]; ok { + if attrStr, ok := hdr.PAXRecords[hdrFileAttributes]; ok { attr, err := strconv.ParseUint(attrStr, 10, 32) if err != nil { return "", 0, nil, err @@ -309,6 +314,13 @@ func FileInfoFromHeader(hdr *tar.Header) (name string, size int64, fileInfo *win fileInfo.FileAttributes |= syscall.FILE_ATTRIBUTE_DIRECTORY } } + if creationTimeStr, ok := hdr.PAXRecords[hdrCreationTime]; ok { + creationTime, err := parsePAXTime(creationTimeStr) + if err != nil { + return "", 0, nil, err + } + fileInfo.CreationTime = syscall.NsecToFiletime(creationTime.UnixNano()) + } return } @@ -321,13 +333,13 @@ func WriteBackupStreamFromTarFile(w io.Writer, t *tar.Reader, hdr *tar.Header) ( var err error // Maintaining old SDDL-based behavior for backward compatibility. All new tar headers written // by this library will have raw binary for the security descriptor. - if sddl, ok := hdr.Winheaders[hdrSecurityDescriptor]; ok { + if sddl, ok := hdr.PAXRecords[hdrSecurityDescriptor]; ok { sd, err = winio.SddlToSecurityDescriptor(sddl) if err != nil { return nil, err } } - if sdraw, ok := hdr.Winheaders[hdrRawSecurityDescriptor]; ok { + if sdraw, ok := hdr.PAXRecords[hdrRawSecurityDescriptor]; ok { sd, err = base64.StdEncoding.DecodeString(sdraw) if err != nil { return nil, err @@ -348,7 +360,7 @@ func WriteBackupStreamFromTarFile(w io.Writer, t *tar.Reader, hdr *tar.Header) ( } } var eas []winio.ExtendedAttribute - for k, v := range hdr.Winheaders { + for k, v := range hdr.PAXRecords { if !strings.HasPrefix(k, hdrEaPrefix) { continue } @@ -380,7 +392,7 @@ func WriteBackupStreamFromTarFile(w io.Writer, t *tar.Reader, hdr *tar.Header) ( } } if hdr.Typeflag == tar.TypeSymlink { - _, isMountPoint := hdr.Winheaders[hdrMountPoint] + _, isMountPoint := hdr.PAXRecords[hdrMountPoint] rp := winio.ReparsePoint{ Target: filepath.FromSlash(hdr.Linkname), IsMountPoint: isMountPoint, diff --git a/vendor/github.com/Microsoft/go-winio/pipe.go b/vendor/github.com/Microsoft/go-winio/pipe.go index d6a46f6a24..ff96dff1c6 100644 --- a/vendor/github.com/Microsoft/go-winio/pipe.go +++ b/vendor/github.com/Microsoft/go-winio/pipe.go @@ -182,13 +182,14 @@ func (s pipeAddress) String() string { } // tryDialPipe attempts to dial the pipe at `path` until `ctx` cancellation or timeout. -func tryDialPipe(ctx context.Context, path *string) (syscall.Handle, error) { +func tryDialPipe(ctx context.Context, path *string, access uint32) (syscall.Handle, error) { for { + select { case <-ctx.Done(): return syscall.Handle(0), ctx.Err() default: - h, err := createFile(*path, syscall.GENERIC_READ|syscall.GENERIC_WRITE, 0, nil, syscall.OPEN_EXISTING, syscall.FILE_FLAG_OVERLAPPED|cSECURITY_SQOS_PRESENT|cSECURITY_ANONYMOUS, 0) + h, err := createFile(*path, access, 0, nil, syscall.OPEN_EXISTING, syscall.FILE_FLAG_OVERLAPPED|cSECURITY_SQOS_PRESENT|cSECURITY_ANONYMOUS, 0) if err == nil { return h, nil } @@ -197,7 +198,7 @@ func tryDialPipe(ctx context.Context, path *string) (syscall.Handle, error) { } // Wait 10 msec and try again. This is a rather simplistic // view, as we always try each 10 milliseconds. - time.Sleep(time.Millisecond * 10) + time.Sleep(10 * time.Millisecond) } } } @@ -210,7 +211,7 @@ func DialPipe(path string, timeout *time.Duration) (net.Conn, error) { if timeout != nil { absTimeout = time.Now().Add(*timeout) } else { - absTimeout = time.Now().Add(time.Second * 2) + absTimeout = time.Now().Add(2 * time.Second) } ctx, _ := context.WithDeadline(context.Background(), absTimeout) conn, err := DialPipeContext(ctx, path) @@ -223,9 +224,15 @@ func DialPipe(path string, timeout *time.Duration) (net.Conn, error) { // DialPipeContext attempts to connect to a named pipe by `path` until `ctx` // cancellation or timeout. func DialPipeContext(ctx context.Context, path string) (net.Conn, error) { + return DialPipeAccess(ctx, path, syscall.GENERIC_READ|syscall.GENERIC_WRITE) +} + +// DialPipeAccess attempts to connect to a named pipe by `path` with `access` until `ctx` +// cancellation or timeout. +func DialPipeAccess(ctx context.Context, path string, access uint32) (net.Conn, error) { var err error var h syscall.Handle - h, err = tryDialPipe(ctx, &path) + h, err = tryDialPipe(ctx, &path, access) if err != nil { return nil, err } diff --git a/vendor/modules.txt b/vendor/modules.txt index 27a97d50e7..cb1c5597f5 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,6 +1,5 @@ -# github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5 +# github.com/Microsoft/go-winio v0.4.15-0.20200908182639-5b44b70ab3ab github.com/Microsoft/go-winio -github.com/Microsoft/go-winio/archive/tar github.com/Microsoft/go-winio/backuptar github.com/Microsoft/go-winio/pkg/etw github.com/Microsoft/go-winio/pkg/etwlogrus From 12eee6e44ec39e3627c2974b8baf98694ef423cd Mon Sep 17 00:00:00 2001 From: Daniel Canter Date: Fri, 4 Sep 2020 12:54:54 -0700 Subject: [PATCH 13/20] Add new winapi bindings for job containers * Add windows bindings needed for job containers work Signed-off-by: Daniel Canter --- internal/processorinfo/processor_count.go | 8 +- internal/processorinfo/zsyscall_windows.go | 49 ------ internal/winapi/errors.go | 7 +- internal/winapi/{ntfs.go => filesystem.go} | 0 internal/winapi/heapalloc.go | 4 - internal/winapi/jobobject.go | 120 ++++++++++++++ internal/winapi/logon.go | 30 ++++ internal/winapi/memory.go | 11 ++ internal/winapi/path.go | 11 ++ internal/winapi/process.go | 3 + .../syscall.go => winapi/processor.go} | 8 +- internal/winapi/winapi.go | 6 +- internal/winapi/zsyscall_windows.go | 154 +++++++++++++++--- 13 files changed, 323 insertions(+), 88 deletions(-) delete mode 100644 internal/processorinfo/zsyscall_windows.go rename internal/winapi/{ntfs.go => filesystem.go} (100%) delete mode 100644 internal/winapi/heapalloc.go create mode 100644 internal/winapi/jobobject.go create mode 100644 internal/winapi/logon.go create mode 100644 internal/winapi/memory.go create mode 100644 internal/winapi/path.go create mode 100644 internal/winapi/process.go rename internal/{processorinfo/syscall.go => winapi/processor.go} (52%) diff --git a/internal/processorinfo/processor_count.go b/internal/processorinfo/processor_count.go index b54dde8bf4..3f6301ed68 100644 --- a/internal/processorinfo/processor_count.go +++ b/internal/processorinfo/processor_count.go @@ -1,12 +1,16 @@ package processorinfo -import "runtime" +import ( + "runtime" + + "github.com/Microsoft/hcsshim/internal/winapi" +) // ProcessorCount calls the win32 API function GetMaximumProcessorCount // to get the total number of logical processors on the system. If this // fails it will fall back to runtime.NumCPU func ProcessorCount() int32 { - if amount := getActiveProcessorCount(ALL_PROCESSOR_GROUPS); amount != 0 { + if amount := winapi.GetActiveProcessorCount(winapi.ALL_PROCESSOR_GROUPS); amount != 0 { return int32(amount) } return int32(runtime.NumCPU()) diff --git a/internal/processorinfo/zsyscall_windows.go b/internal/processorinfo/zsyscall_windows.go deleted file mode 100644 index 8bec758708..0000000000 --- a/internal/processorinfo/zsyscall_windows.go +++ /dev/null @@ -1,49 +0,0 @@ -// Code generated mksyscall_windows.exe DO NOT EDIT - -package processorinfo - -import ( - "syscall" - "unsafe" - - "golang.org/x/sys/windows" -) - -var _ unsafe.Pointer - -// Do the interface allocations only once for common -// Errno values. -const ( - errnoERROR_IO_PENDING = 997 -) - -var ( - errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING) -) - -// errnoErr returns common boxed Errno values, to prevent -// allocations at runtime. -func errnoErr(e syscall.Errno) error { - switch e { - case 0: - return nil - case errnoERROR_IO_PENDING: - return errERROR_IO_PENDING - } - // TODO: add more here, after collecting data on the common - // error values see on Windows. (perhaps when running - // all.bat?) - return e -} - -var ( - modkernel32 = windows.NewLazySystemDLL("kernel32.dll") - - procGetActiveProcessorCount = modkernel32.NewProc("GetActiveProcessorCount") -) - -func getActiveProcessorCount(groupNumber uint16) (amount uint32) { - r0, _, _ := syscall.Syscall(procGetActiveProcessorCount.Addr(), 1, uintptr(groupNumber), 0, 0) - amount = uint32(r0) - return -} diff --git a/internal/winapi/errors.go b/internal/winapi/errors.go index 1ca86f4c3c..4e80ef68c9 100644 --- a/internal/winapi/errors.go +++ b/internal/winapi/errors.go @@ -1,10 +1,13 @@ package winapi +import "syscall" + //sys RtlNtStatusToDosError(status uint32) (winerr error) = ntdll.RtlNtStatusToDosError const ( - STATUS_REPARSE_POINT_ENCOUNTERED = 0xC000050B - ERROR_NO_MORE_ITEMS = 0x103 + STATUS_REPARSE_POINT_ENCOUNTERED = 0xC000050B + ERROR_NO_MORE_ITEMS = 0x103 + ERROR_MORE_DATA syscall.Errno = 234 ) func NTSuccess(status uint32) bool { diff --git a/internal/winapi/ntfs.go b/internal/winapi/filesystem.go similarity index 100% rename from internal/winapi/ntfs.go rename to internal/winapi/filesystem.go diff --git a/internal/winapi/heapalloc.go b/internal/winapi/heapalloc.go deleted file mode 100644 index 53f62948c9..0000000000 --- a/internal/winapi/heapalloc.go +++ /dev/null @@ -1,4 +0,0 @@ -package winapi - -//sys LocalAlloc(flags uint32, size int) (ptr uintptr) = kernel32.LocalAlloc -//sys LocalFree(ptr uintptr) = kernel32.LocalFree diff --git a/internal/winapi/jobobject.go b/internal/winapi/jobobject.go new file mode 100644 index 0000000000..1ea5b18a3d --- /dev/null +++ b/internal/winapi/jobobject.go @@ -0,0 +1,120 @@ +package winapi + +import ( + "golang.org/x/sys/windows" +) + +// Messages that can be received from an assigned io completion port. +// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_associate_completion_port +const ( + JOB_OBJECT_MSG_END_OF_JOB_TIME = 1 + JOB_OBJECT_MSG_END_OF_PROCESS_TIME = 2 + JOB_OBJECT_MSG_ACTIVE_PROCESS_LIMIT = 3 + JOB_OBJECT_MSG_ACTIVE_PROCESS_ZERO = 4 + JOB_OBJECT_MSG_NEW_PROCESS = 6 + JOB_OBJECT_MSG_EXIT_PROCESS = 7 + JOB_OBJECT_MSG_ABNORMAL_EXIT_PROCESS = 8 + JOB_OBJECT_MSG_PROCESS_MEMORY_LIMIT = 9 + JOB_OBJECT_MSG_JOB_MEMORY_LIMIT = 10 + JOB_OBJECT_MSG_NOTIFICATION_LIMIT = 11 +) + +// IO limit flags +// +// https://docs.microsoft.com/en-us/windows/win32/api/jobapi2/ns-jobapi2-jobobject_io_rate_control_information +const JOB_OBJECT_IO_RATE_CONTROL_ENABLE = 0x1 + +// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_cpu_rate_control_information +const ( + JOB_OBJECT_CPU_RATE_CONTROL_ENABLE = 1 << iota + JOB_OBJECT_CPU_RATE_CONTROL_WEIGHT_BASED + JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP + JOB_OBJECT_CPU_RATE_CONTROL_NOTIFY + JOB_OBJECT_CPU_RATE_CONTROL_MIN_MAX_RATE +) + +// JobObjectInformationClass values. Used for a call to QueryInformationJobObject +// +// https://docs.microsoft.com/en-us/windows/win32/api/jobapi2/nf-jobapi2-queryinformationjobobject +const ( + JobObjectBasicAccountingInformation uint32 = 1 + JobObjectBasicProcessIdList uint32 = 3 + JobObjectBasicAndIoAccountingInformation uint32 = 8 + JobObjectLimitViolationInformation uint32 = 13 + JobObjectNotificationLimitInformation2 uint32 = 33 +) + +// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_basic_limit_information +type JOBOBJECT_BASIC_LIMIT_INFORMATION struct { + PerProcessUserTimeLimit int64 + PerJobUserTimeLimit int64 + LimitFlags uint32 + MinimumWorkingSetSize uintptr + MaximumWorkingSetSize uintptr + ActiveProcessLimit uint32 + Affinity uintptr + PriorityClass uint32 + SchedulingClass uint32 +} + +// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_cpu_rate_control_information +type JOBOBJECT_CPU_RATE_CONTROL_INFORMATION struct { + ControlFlags uint32 + Rate uint32 +} + +// https://docs.microsoft.com/en-us/windows/win32/api/jobapi2/ns-jobapi2-jobobject_io_rate_control_information +type JOBOBJECT_IO_RATE_CONTROL_INFORMATION struct { + MaxIops int64 + MaxBandwidth int64 + ReservationIops int64 + BaseIOSize uint32 + VolumeName string + ControlFlags uint32 +} + +// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_basic_process_id_list +type JOBOBJECT_BASIC_PROCESS_ID_LIST struct { + NumberOfAssignedProcesses uint32 + NumberOfProcessIdsInList uint32 + ProcessIdList [1]uintptr +} + +// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-jobobject_associate_completion_port +type JOBOBJECT_ASSOCIATE_COMPLETION_PORT struct { + CompletionKey uintptr + CompletionPort windows.Handle +} + +// BOOL IsProcessInJob( +// HANDLE ProcessHandle, +// HANDLE JobHandle, +// PBOOL Result +// ); +// +//sys IsProcessInJob(procHandle windows.Handle, jobHandle windows.Handle, result *bool) (err error) = kernel32.IsProcessInJob + +// BOOL QueryInformationJobObject( +// HANDLE hJob, +// JOBOBJECTINFOCLASS JobObjectInformationClass, +// LPVOID lpJobObjectInformation, +// DWORD cbJobObjectInformationLength, +// LPDWORD lpReturnLength +// ); +// +//sys QueryInformationJobObject(jobHandle windows.Handle, infoClass uint32, jobObjectInfo uintptr, jobObjectInformationLength uint32, lpReturnLength *uint32) (err error) = kernel32.QueryInformationJobObject + +// HANDLE OpenJobObjectW( +// DWORD dwDesiredAccess, +// BOOL bInheritHandle, +// LPCWSTR lpName +// ); +// +//sys OpenJobObject(desiredAccess uint32, inheritHandle bool, lpName *uint16) (handle windows.Handle, err error) = kernel32.OpenJobObjectW + +// DWORD SetIoRateControlInformationJobObject( +// HANDLE hJob, +// JOBOBJECT_IO_RATE_CONTROL_INFORMATION *IoRateControlInfo +// ); +// +//sys SetIoRateControlInformationJobObject(jobHandle windows.Handle, ioRateControlInfo *JOBOBJECT_IO_RATE_CONTROL_INFORMATION) (ret uint32, err error) = kernel32.SetIoRateControlInformationJobObject diff --git a/internal/winapi/logon.go b/internal/winapi/logon.go new file mode 100644 index 0000000000..b6e7cfd460 --- /dev/null +++ b/internal/winapi/logon.go @@ -0,0 +1,30 @@ +package winapi + +// BOOL LogonUserA( +// LPCWSTR lpszUsername, +// LPCWSTR lpszDomain, +// LPCWSTR lpszPassword, +// DWORD dwLogonType, +// DWORD dwLogonProvider, +// PHANDLE phToken +// ); +// +//sys LogonUser(username *uint16, domain *uint16, password *uint16, logonType uint32, logonProvider uint32, token *windows.Token) (err error) = advapi32.LogonUserW + +// Logon types +const ( + LOGON32_LOGON_INTERACTIVE uint32 = 2 + LOGON32_LOGON_NETWORK uint32 = 3 + LOGON32_LOGON_BATCH uint32 = 4 + LOGON32_LOGON_SERVICE uint32 = 5 + LOGON32_LOGON_UNLOCK uint32 = 7 + LOGON32_LOGON_NETWORK_CLEARTEXT uint32 = 8 + LOGON32_LOGON_NEW_CREDENTIALS uint32 = 9 +) + +// Logon providers +const ( + LOGON32_PROVIDER_DEFAULT uint32 = 0 + LOGON32_PROVIDER_WINNT40 uint32 = 2 + LOGON32_PROVIDER_WINNT50 uint32 = 3 +) diff --git a/internal/winapi/memory.go b/internal/winapi/memory.go new file mode 100644 index 0000000000..ccaf5a624f --- /dev/null +++ b/internal/winapi/memory.go @@ -0,0 +1,11 @@ +package winapi + +// VOID RtlMoveMemory( +// _Out_ VOID UNALIGNED *Destination, +// _In_ const VOID UNALIGNED *Source, +// _In_ SIZE_T Length +// ); +//sys RtlMoveMemory(destination *byte, source *byte, length uintptr) (err error) = kernel32.RtlMoveMemory + +//sys LocalAlloc(flags uint32, size int) (ptr uintptr) = kernel32.LocalAlloc +//sys LocalFree(ptr uintptr) = kernel32.LocalFree diff --git a/internal/winapi/path.go b/internal/winapi/path.go new file mode 100644 index 0000000000..0ae8f33ea6 --- /dev/null +++ b/internal/winapi/path.go @@ -0,0 +1,11 @@ +package winapi + +// DWORD SearchPathW( +// LPCWSTR lpPath, +// LPCWSTR lpFileName, +// LPCWSTR lpExtension, +// DWORD nBufferLength, +// LPWSTR lpBuffer, +// LPWSTR *lpFilePart +// ); +//sys SearchPath(lpPath *uint16, lpFileName *uint16, lpExtension *uint16, nBufferLength uint32, lpBuffer *uint16, lpFilePath **uint16) (size uint32, err error) = kernel32.SearchPathW diff --git a/internal/winapi/process.go b/internal/winapi/process.go new file mode 100644 index 0000000000..adf0168eae --- /dev/null +++ b/internal/winapi/process.go @@ -0,0 +1,3 @@ +package winapi + +const PROCESS_ALL_ACCESS uint32 = 2097151 diff --git a/internal/processorinfo/syscall.go b/internal/winapi/processor.go similarity index 52% rename from internal/processorinfo/syscall.go rename to internal/winapi/processor.go index b96103a324..ce79ac2cdb 100644 --- a/internal/processorinfo/syscall.go +++ b/internal/winapi/processor.go @@ -1,9 +1,7 @@ -package processorinfo - -//go:generate go run ../../mksyscall_windows.go -output zsyscall_windows.go syscall.go - -//sys getActiveProcessorCount(groupNumber uint16) (amount uint32) = kernel32.GetActiveProcessorCount +package winapi // Get count from all processor groups. // https://docs.microsoft.com/en-us/windows/win32/procthread/processor-groups const ALL_PROCESSOR_GROUPS = 0xFFFF + +//sys GetActiveProcessorCount(groupNumber uint16) (amount uint32) = kernel32.GetActiveProcessorCount diff --git a/internal/winapi/winapi.go b/internal/winapi/winapi.go index 26e89a0b33..50bdc01f2b 100644 --- a/internal/winapi/winapi.go +++ b/internal/winapi/winapi.go @@ -1,5 +1,5 @@ -/*Package winapi contains various low-level bindings to Windows APIs. It can -be thought of as an extension to golang.org/x/sys/windows. */ +// Package winapi contains various low-level bindings to Windows APIs. It can +// be thought of as an extension to golang.org/x/sys/windows. package winapi -//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go devices.go heapalloc.go ntfs.go errors.go +//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go jobobject.go path.go logon.go memory.go processor.go devices.go filesystem.go errors.go diff --git a/internal/winapi/zsyscall_windows.go b/internal/winapi/zsyscall_windows.go index 1e0ab0ca2e..af83f5b07e 100644 --- a/internal/winapi/zsyscall_windows.go +++ b/internal/winapi/zsyscall_windows.go @@ -37,23 +37,142 @@ func errnoErr(e syscall.Errno) error { } var ( - modcfgmgr32 = windows.NewLazySystemDLL("cfgmgr32.dll") modkernel32 = windows.NewLazySystemDLL("kernel32.dll") + modadvapi32 = windows.NewLazySystemDLL("advapi32.dll") + modcfgmgr32 = windows.NewLazySystemDLL("cfgmgr32.dll") modntdll = windows.NewLazySystemDLL("ntdll.dll") - procCM_Get_Device_ID_List_SizeA = modcfgmgr32.NewProc("CM_Get_Device_ID_List_SizeA") - procCM_Get_Device_ID_ListA = modcfgmgr32.NewProc("CM_Get_Device_ID_ListA") - procCM_Locate_DevNodeW = modcfgmgr32.NewProc("CM_Locate_DevNodeW") - procCM_Get_DevNode_PropertyW = modcfgmgr32.NewProc("CM_Get_DevNode_PropertyW") - procLocalAlloc = modkernel32.NewProc("LocalAlloc") - procLocalFree = modkernel32.NewProc("LocalFree") - procNtCreateFile = modntdll.NewProc("NtCreateFile") - procNtSetInformationFile = modntdll.NewProc("NtSetInformationFile") - procNtOpenDirectoryObject = modntdll.NewProc("NtOpenDirectoryObject") - procNtQueryDirectoryObject = modntdll.NewProc("NtQueryDirectoryObject") - procRtlNtStatusToDosError = modntdll.NewProc("RtlNtStatusToDosError") + procIsProcessInJob = modkernel32.NewProc("IsProcessInJob") + procQueryInformationJobObject = modkernel32.NewProc("QueryInformationJobObject") + procOpenJobObjectW = modkernel32.NewProc("OpenJobObjectW") + procSetIoRateControlInformationJobObject = modkernel32.NewProc("SetIoRateControlInformationJobObject") + procSearchPathW = modkernel32.NewProc("SearchPathW") + procLogonUserW = modadvapi32.NewProc("LogonUserW") + procRtlMoveMemory = modkernel32.NewProc("RtlMoveMemory") + procLocalAlloc = modkernel32.NewProc("LocalAlloc") + procLocalFree = modkernel32.NewProc("LocalFree") + procGetActiveProcessorCount = modkernel32.NewProc("GetActiveProcessorCount") + procCM_Get_Device_ID_List_SizeA = modcfgmgr32.NewProc("CM_Get_Device_ID_List_SizeA") + procCM_Get_Device_ID_ListA = modcfgmgr32.NewProc("CM_Get_Device_ID_ListA") + procCM_Locate_DevNodeW = modcfgmgr32.NewProc("CM_Locate_DevNodeW") + procCM_Get_DevNode_PropertyW = modcfgmgr32.NewProc("CM_Get_DevNode_PropertyW") + procNtCreateFile = modntdll.NewProc("NtCreateFile") + procNtSetInformationFile = modntdll.NewProc("NtSetInformationFile") + procNtOpenDirectoryObject = modntdll.NewProc("NtOpenDirectoryObject") + procNtQueryDirectoryObject = modntdll.NewProc("NtQueryDirectoryObject") + procRtlNtStatusToDosError = modntdll.NewProc("RtlNtStatusToDosError") ) +func IsProcessInJob(procHandle windows.Handle, jobHandle windows.Handle, result *bool) (err error) { + r1, _, e1 := syscall.Syscall(procIsProcessInJob.Addr(), 3, uintptr(procHandle), uintptr(jobHandle), uintptr(unsafe.Pointer(result))) + if r1 == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func QueryInformationJobObject(jobHandle windows.Handle, infoClass uint32, jobObjectInfo uintptr, jobObjectInformationLength uint32, lpReturnLength *uint32) (err error) { + r1, _, e1 := syscall.Syscall6(procQueryInformationJobObject.Addr(), 5, uintptr(jobHandle), uintptr(infoClass), uintptr(jobObjectInfo), uintptr(jobObjectInformationLength), uintptr(unsafe.Pointer(lpReturnLength)), 0) + if r1 == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func OpenJobObject(desiredAccess uint32, inheritHandle bool, lpName *uint16) (handle windows.Handle, err error) { + var _p0 uint32 + if inheritHandle { + _p0 = 1 + } else { + _p0 = 0 + } + r0, _, e1 := syscall.Syscall(procOpenJobObjectW.Addr(), 3, uintptr(desiredAccess), uintptr(_p0), uintptr(unsafe.Pointer(lpName))) + handle = windows.Handle(r0) + if handle == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func SetIoRateControlInformationJobObject(jobHandle windows.Handle, ioRateControlInfo *JOBOBJECT_IO_RATE_CONTROL_INFORMATION) (ret uint32, err error) { + r0, _, e1 := syscall.Syscall(procSetIoRateControlInformationJobObject.Addr(), 2, uintptr(jobHandle), uintptr(unsafe.Pointer(ioRateControlInfo)), 0) + ret = uint32(r0) + if ret == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func SearchPath(lpPath *uint16, lpFileName *uint16, lpExtension *uint16, nBufferLength uint32, lpBuffer *uint16, lpFilePath **uint16) (size uint32, err error) { + r0, _, e1 := syscall.Syscall6(procSearchPathW.Addr(), 6, uintptr(unsafe.Pointer(lpPath)), uintptr(unsafe.Pointer(lpFileName)), uintptr(unsafe.Pointer(lpExtension)), uintptr(nBufferLength), uintptr(unsafe.Pointer(lpBuffer)), uintptr(unsafe.Pointer(lpFilePath))) + size = uint32(r0) + if size == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func LogonUser(username *uint16, domain *uint16, password *uint16, logonType uint32, logonProvider uint32, token *windows.Token) (err error) { + r1, _, e1 := syscall.Syscall6(procLogonUserW.Addr(), 6, uintptr(unsafe.Pointer(username)), uintptr(unsafe.Pointer(domain)), uintptr(unsafe.Pointer(password)), uintptr(logonType), uintptr(logonProvider), uintptr(unsafe.Pointer(token))) + if r1 == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func RtlMoveMemory(destination *byte, source *byte, length uintptr) (err error) { + r1, _, e1 := syscall.Syscall(procRtlMoveMemory.Addr(), 3, uintptr(unsafe.Pointer(destination)), uintptr(unsafe.Pointer(source)), uintptr(length)) + if r1 == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func LocalAlloc(flags uint32, size int) (ptr uintptr) { + r0, _, _ := syscall.Syscall(procLocalAlloc.Addr(), 2, uintptr(flags), uintptr(size), 0) + ptr = uintptr(r0) + return +} + +func LocalFree(ptr uintptr) { + syscall.Syscall(procLocalFree.Addr(), 1, uintptr(ptr), 0, 0) + return +} + +func GetActiveProcessorCount(groupNumber uint16) (amount uint32) { + r0, _, _ := syscall.Syscall(procGetActiveProcessorCount.Addr(), 1, uintptr(groupNumber), 0, 0) + amount = uint32(r0) + return +} + func CMGetDeviceIDListSize(pulLen *uint32, pszFilter *byte, uFlags uint32) (hr error) { r0, _, _ := syscall.Syscall(procCM_Get_Device_ID_List_SizeA.Addr(), 3, uintptr(unsafe.Pointer(pulLen)), uintptr(unsafe.Pointer(pszFilter)), uintptr(uFlags)) if int32(r0) < 0 { @@ -107,17 +226,6 @@ func CMGetDevNodeProperty(dnDevInst uint32, propertyKey *DevPropKey, propertyTyp return } -func LocalAlloc(flags uint32, size int) (ptr uintptr) { - r0, _, _ := syscall.Syscall(procLocalAlloc.Addr(), 2, uintptr(flags), uintptr(size), 0) - ptr = uintptr(r0) - return -} - -func LocalFree(ptr uintptr) { - syscall.Syscall(procLocalFree.Addr(), 1, uintptr(ptr), 0, 0) - return -} - func NtCreateFile(handle *uintptr, accessMask uint32, oa *ObjectAttributes, iosb *IOStatusBlock, allocationSize *uint64, fileAttributes uint32, shareAccess uint32, createDisposition uint32, createOptions uint32, eaBuffer *byte, eaLength uint32) (status uint32) { r0, _, _ := syscall.Syscall12(procNtCreateFile.Addr(), 11, uintptr(unsafe.Pointer(handle)), uintptr(accessMask), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(unsafe.Pointer(allocationSize)), uintptr(fileAttributes), uintptr(shareAccess), uintptr(createDisposition), uintptr(createOptions), uintptr(unsafe.Pointer(eaBuffer)), uintptr(eaLength), 0) status = uint32(r0) From 592d4f8fcbff1a786355cdd88f8cf0bd354e2189 Mon Sep 17 00:00:00 2001 From: Kathryn Baldauf Date: Thu, 17 Sep 2020 13:32:27 -0700 Subject: [PATCH 14/20] Fix schema memory size field type Signed-off-by: Kathryn Baldauf --- internal/oci/uvm.go | 6 +++--- internal/schema2/memory.go | 2 +- internal/schema2/memory_2.go | 2 +- internal/tools/uvmboot/main.go | 2 +- internal/uvm/create.go | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/internal/oci/uvm.go b/internal/oci/uvm.go index 94658a0444..f05c52262c 100644 --- a/internal/oci/uvm.go +++ b/internal/oci/uvm.go @@ -246,16 +246,16 @@ func ParseAnnotationsStorageBps(ctx context.Context, s *specs.Spec, annotation s // returns `def`. // // Note: The returned value is in `MB`. -func ParseAnnotationsMemory(ctx context.Context, s *specs.Spec, annotation string, def int32) int32 { +func ParseAnnotationsMemory(ctx context.Context, s *specs.Spec, annotation string, def uint64) uint64 { if m := parseAnnotationsUint64(ctx, s.Annotations, annotation, 0); m != 0 { - return int32(m) + return m } if s.Windows != nil && s.Windows.Resources != nil && s.Windows.Resources.Memory != nil && s.Windows.Resources.Memory.Limit != nil && *s.Windows.Resources.Memory.Limit > 0 { - return int32(*s.Windows.Resources.Memory.Limit / 1024 / 1024) + return (*s.Windows.Resources.Memory.Limit / 1024 / 1024) } return def } diff --git a/internal/schema2/memory.go b/internal/schema2/memory.go index ec93d004e1..30749c6724 100644 --- a/internal/schema2/memory.go +++ b/internal/schema2/memory.go @@ -10,5 +10,5 @@ package hcsschema type Memory struct { - SizeInMB int32 `json:"SizeInMB,omitempty"` + SizeInMB uint64 `json:"SizeInMB,omitempty"` } diff --git a/internal/schema2/memory_2.go b/internal/schema2/memory_2.go index 95328ec301..71224c75b9 100644 --- a/internal/schema2/memory_2.go +++ b/internal/schema2/memory_2.go @@ -10,7 +10,7 @@ package hcsschema type Memory2 struct { - SizeInMB int32 `json:"SizeInMB,omitempty"` + SizeInMB uint64 `json:"SizeInMB,omitempty"` AllowOvercommit bool `json:"AllowOvercommit,omitempty"` diff --git a/internal/tools/uvmboot/main.go b/internal/tools/uvmboot/main.go index c63083be10..09973cdf61 100644 --- a/internal/tools/uvmboot/main.go +++ b/internal/tools/uvmboot/main.go @@ -101,7 +101,7 @@ func setGlobalOptions(c *cli.Context, options *uvm.Options) { options.ProcessorCount = int32(c.GlobalUint64(cpusArgName)) } if c.GlobalIsSet(memoryArgName) { - options.MemorySizeInMB = int32(c.GlobalUint64(memoryArgName)) + options.MemorySizeInMB = c.GlobalUint64(memoryArgName) } if c.GlobalIsSet(allowOvercommitArgName) { options.AllowOvercommit = c.GlobalBool(allowOvercommitArgName) diff --git a/internal/uvm/create.go b/internal/uvm/create.go index 8993e5dd38..d7ff969bfb 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -29,7 +29,7 @@ type Options struct { // MemorySizeInMB sets the UVM memory. If `0` will default to platform // default. - MemorySizeInMB int32 + MemorySizeInMB uint64 LowMMIOGapInMB uint64 HighMMIOBaseInMB uint64 @@ -298,7 +298,7 @@ func (uvm *UtilityVM) PhysicallyBacked() bool { return uvm.physicallyBacked } -func (uvm *UtilityVM) normalizeMemorySize(ctx context.Context, requested int32) int32 { +func (uvm *UtilityVM) normalizeMemorySize(ctx context.Context, requested uint64) uint64 { actual := (requested + 1) &^ 1 // align up to an even number if requested != actual { log.G(ctx).WithFields(logrus.Fields{ From 0d64dfa648b10e4fdf952bbe1483d44a88240c49 Mon Sep 17 00:00:00 2001 From: Kathryn Baldauf Date: Tue, 8 Sep 2020 18:48:04 -0700 Subject: [PATCH 15/20] Add calls to modify UVM memory size and tests Signed-off-by: Kathryn Baldauf --- internal/uvm/memory_update.go | 46 ++++++++++++++++++++++ test/functional/test.go | 5 +++ test/functional/uvm_memory_test.go | 63 ++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 internal/uvm/memory_update.go create mode 100644 test/functional/uvm_memory_test.go diff --git a/internal/uvm/memory_update.go b/internal/uvm/memory_update.go new file mode 100644 index 0000000000..67ab63254f --- /dev/null +++ b/internal/uvm/memory_update.go @@ -0,0 +1,46 @@ +package uvm + +import ( + "context" + "fmt" + + hcsschema "github.com/Microsoft/hcsshim/internal/schema2" +) + +const ( + bytesPerPage = 4096 + bytesPerMB = 1024 * 1024 +) + +// UpdateMemory makes a call to the VM's orchestrator to update the VM's size in MB +// Internally, HCS will get the number of pages this corresponds to and attempt to assign +// pages to numa nodes evenly +func (uvm *UtilityVM) UpdateMemory(ctx context.Context, sizeInBytes uint64) error { + requestedSizeInMB := sizeInBytes / bytesPerMB + actual := uvm.normalizeMemorySize(ctx, requestedSizeInMB) + req := &hcsschema.ModifySettingRequest{ + ResourcePath: memoryResourcePath, + Settings: actual, + } + return uvm.modify(ctx, req) +} + +// GetAssignedMemoryInBytes returns the amount of assigned memory for the UVM in bytes +func (uvm *UtilityVM) GetAssignedMemoryInBytes(ctx context.Context) (uint64, error) { + props, err := uvm.hcsSystem.PropertiesV2(ctx, hcsschema.PTMemory) + if err != nil { + return 0, err + } + if props.Memory == nil { + return 0, fmt.Errorf("no memory properties returned for system %s", uvm.id) + } + if props.Memory.VirtualMachineMemory == nil { + return 0, fmt.Errorf("no virtual memory properties returned for system %s", uvm.id) + } + pages := props.Memory.VirtualMachineMemory.AssignedMemory + if pages == 0 { + return 0, fmt.Errorf("assigned memory returned should not be 0 for system %s", uvm.id) + } + memInBytes := pages * bytesPerPage + return memInBytes, nil +} diff --git a/test/functional/test.go b/test/functional/test.go index 20a27561ec..d942dd2898 100644 --- a/test/functional/test.go +++ b/test/functional/test.go @@ -13,6 +13,11 @@ import ( "github.com/sirupsen/logrus" ) +const ( + bytesPerMB = 1024 * 1024 + bytesPerPage = 4096 +) + var pauseDurationOnCreateContainerFailure time.Duration func init() { diff --git a/test/functional/uvm_memory_test.go b/test/functional/uvm_memory_test.go new file mode 100644 index 0000000000..bc1ecd5ea9 --- /dev/null +++ b/test/functional/uvm_memory_test.go @@ -0,0 +1,63 @@ +package functional + +import ( + "context" + "os" + "testing" + "time" + + "github.com/Microsoft/hcsshim/internal/uvm" + "github.com/Microsoft/hcsshim/osversion" + testutilities "github.com/Microsoft/hcsshim/test/functional/utilities" +) + +func TestUVMMemoryUpdateLCOW(t *testing.T) { + testutilities.RequiresBuild(t, osversion.RS5) + + ctx, cancel := context.WithTimeout(context.Background(), 40*time.Second) + defer cancel() + + opts := uvm.NewDefaultOptionsLCOW(t.Name(), "") + opts.MemorySizeInMB = 1024 * 2 + u := testutilities.CreateLCOWUVMFromOpts(ctx, t, opts) + defer u.Close() + + newMemorySize := uint64(opts.MemorySizeInMB/2) * bytesPerMB + + if err := u.UpdateMemory(ctx, newMemorySize); err != nil { + t.Fatalf("failed to make call to modify UVM memory size in MB with: %v", err) + } + memInBytes, err := u.GetAssignedMemoryInBytes(ctx) + if err != nil { + t.Fatalf("failed to verified assigned UVM memory size") + } + if memInBytes != newMemorySize { + t.Fatalf("incorrect memory size returned, expected %d but got %d", newMemorySize, memInBytes) + } +} + +func TestUVMMemoryUpdateWCOW(t *testing.T) { + testutilities.RequiresBuild(t, osversion.RS5) + + ctx, cancel := context.WithTimeout(context.Background(), 40*time.Second) + defer cancel() + + opts := uvm.NewDefaultOptionsWCOW(t.Name(), "") + opts.MemorySizeInMB = 1024 * 2 + + u, _, uvmScratchDir := testutilities.CreateWCOWUVMFromOptsWithImage(ctx, t, opts, "mcr.microsoft.com/windows/nanoserver:1909") + defer os.RemoveAll(uvmScratchDir) + defer u.Close() + + newMemoryInBytes := uint64(opts.MemorySizeInMB/2) * bytesPerMB + if err := u.UpdateMemory(ctx, newMemoryInBytes); err != nil { + t.Fatalf("failed to make call to modify UVM memory size in MB with: %v", err) + } + memInBytes, err := u.GetAssignedMemoryInBytes(ctx) + if err != nil { + t.Fatalf("failed to verified assigned UVM memory size") + } + if memInBytes != newMemoryInBytes { + t.Fatalf("incorrect memory size returned, expected %d but got %d", newMemoryInBytes, memInBytes) + } +} From dfb862d4211be24a084ac7b5e8a0ff87917aba87 Mon Sep 17 00:00:00 2001 From: Kevin Parsons Date: Thu, 1 Oct 2020 03:09:55 -0700 Subject: [PATCH 16/20] Add option to scale Windows container CPU limit based on UVM CPUs Previously we would always use the CPU limit given without any change. However, there is an issue with kubelet where it calculates that value based on the number of host CPUs, which causes an incorrect value to be computed when the container runs in a UVM. We now provide a config option to enable adjusting the CPU limit value based on the UVM's number of processors, so that the resulting amount of CPU will be what the kubelet expects. As this path is a fix to address specific behavior in the kubelet, and there could be other users who don't want this change, we lock the new behavior behind a config option. In the future, if kubelet becomes more aware of VM sandboxes for containers, we could adjust this behavior, or remove it entirely. Signed-off-by: Kevin Parsons --- .../options/next.pb.txt | 7 + .../options/runhcs.pb.go | 391 +++++++----------- .../options/runhcs.proto | 7 +- cmd/containerd-shim-runhcs-v1/task_hcs.go | 23 +- internal/hcsoci/create.go | 4 + internal/hcsoci/hcsdoc_wcow.go | 66 ++- 6 files changed, 248 insertions(+), 250 deletions(-) diff --git a/cmd/containerd-shim-runhcs-v1/options/next.pb.txt b/cmd/containerd-shim-runhcs-v1/options/next.pb.txt index 632290f42f..bbeee50f43 100755 --- a/cmd/containerd-shim-runhcs-v1/options/next.pb.txt +++ b/cmd/containerd-shim-runhcs-v1/options/next.pb.txt @@ -108,6 +108,13 @@ file { type: TYPE_STRING json_name: "GPUVHDPath" } + field { + name: "scale_cpu_limits_to_sandbox" + number: 11 + label: LABEL_OPTIONAL + type: TYPE_BOOL + json_name: "scaleCpuLimitsToSandbox" + } enum_type { name: "DebugType" value { diff --git a/cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go b/cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go index a9d10fd429..005ed7c5a3 100644 --- a/cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go +++ b/cmd/containerd-shim-runhcs-v1/options/runhcs.pb.go @@ -1,19 +1,33 @@ // Code generated by protoc-gen-gogo. DO NOT EDIT. // source: github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options/runhcs.proto +/* + Package options is a generated protocol buffer package. + + It is generated from these files: + github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options/runhcs.proto + + It has these top-level messages: + Options + ProcessDetails +*/ package options -import ( - fmt "fmt" - proto "github.com/gogo/protobuf/proto" - _ "github.com/gogo/protobuf/types" - github_com_gogo_protobuf_types "github.com/gogo/protobuf/types" - io "io" - math "math" - reflect "reflect" - strings "strings" - time "time" -) +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" + +// skipping weak import gogoproto "github.com/gogo/protobuf/gogoproto" +import _ "github.com/gogo/protobuf/types" + +import time "time" + +import types "github.com/gogo/protobuf/types" + +import strings "strings" +import reflect "reflect" + +import io "io" // Reference imports to suppress errors if they are not otherwise used. var _ = proto.Marshal @@ -40,7 +54,6 @@ var Options_DebugType_name = map[int32]string{ 1: "FILE", 2: "ETW", } - var Options_DebugType_value = map[string]int32{ "NPIPE": 0, "FILE": 1, @@ -50,10 +63,7 @@ var Options_DebugType_value = map[string]int32{ func (x Options_DebugType) String() string { return proto.EnumName(Options_DebugType_name, int32(x)) } - -func (Options_DebugType) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_b643df6839c75082, []int{0, 0} -} +func (Options_DebugType) EnumDescriptor() ([]byte, []int) { return fileDescriptorRunhcs, []int{0, 0} } type Options_SandboxIsolation int32 @@ -66,7 +76,6 @@ var Options_SandboxIsolation_name = map[int32]string{ 0: "PROCESS", 1: "HYPERVISOR", } - var Options_SandboxIsolation_value = map[string]int32{ "PROCESS": 0, "HYPERVISOR": 1, @@ -75,9 +84,8 @@ var Options_SandboxIsolation_value = map[string]int32{ func (x Options_SandboxIsolation) String() string { return proto.EnumName(Options_SandboxIsolation_name, int32(x)) } - func (Options_SandboxIsolation) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_b643df6839c75082, []int{0, 1} + return fileDescriptorRunhcs, []int{0, 1} } // Options are the set of customizations that can be passed at Create time. @@ -115,49 +123,22 @@ type Options struct { VmMemorySizeInMb int32 `protobuf:"varint,9,opt,name=vm_memory_size_in_mb,json=vmMemorySizeInMb,proto3" json:"vm_memory_size_in_mb,omitempty"` // GPUVHDPath is the path to the gpu vhd to add to the uvm // when a container requests a gpu - GPUVHDPath string `protobuf:"bytes,10,opt,name=GPUVHDPath,proto3" json:"GPUVHDPath,omitempty"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` + GPUVHDPath string `protobuf:"bytes,10,opt,name=GPUVHDPath,proto3" json:"GPUVHDPath,omitempty"` + // scale_cpu_limits_to_sandbox indicates that container CPU limits should + // be adjusted to account for the difference in number of cores between the + // host and UVM. + ScaleCpuLimitsToSandbox bool `protobuf:"varint,11,opt,name=scale_cpu_limits_to_sandbox,json=scaleCpuLimitsToSandbox,proto3" json:"scale_cpu_limits_to_sandbox,omitempty"` } -func (m *Options) Reset() { *m = Options{} } -func (*Options) ProtoMessage() {} -func (*Options) Descriptor() ([]byte, []int) { - return fileDescriptor_b643df6839c75082, []int{0} -} -func (m *Options) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *Options) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_Options.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalTo(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *Options) XXX_Merge(src proto.Message) { - xxx_messageInfo_Options.Merge(m, src) -} -func (m *Options) XXX_Size() int { - return m.Size() -} -func (m *Options) XXX_DiscardUnknown() { - xxx_messageInfo_Options.DiscardUnknown(m) -} - -var xxx_messageInfo_Options proto.InternalMessageInfo +func (m *Options) Reset() { *m = Options{} } +func (*Options) ProtoMessage() {} +func (*Options) Descriptor() ([]byte, []int) { return fileDescriptorRunhcs, []int{0} } // ProcessDetails contains additional information about a process. This is the additional // info returned in the Pids query. type ProcessDetails struct { ImageName string `protobuf:"bytes,1,opt,name=image_name,json=imageName,proto3" json:"image_name,omitempty"` - CreatedAt time.Time `protobuf:"bytes,2,opt,name=created_at,json=createdAt,proto3,stdtime" json:"created_at"` + CreatedAt time.Time `protobuf:"bytes,2,opt,name=created_at,json=createdAt,stdtime" json:"created_at"` KernelTime_100Ns uint64 `protobuf:"varint,3,opt,name=kernel_time_100_ns,json=kernelTime100Ns,proto3" json:"kernel_time_100_ns,omitempty"` MemoryCommitBytes uint64 `protobuf:"varint,4,opt,name=memory_commit_bytes,json=memoryCommitBytes,proto3" json:"memory_commit_bytes,omitempty"` MemoryWorkingSetPrivateBytes uint64 `protobuf:"varint,5,opt,name=memory_working_set_private_bytes,json=memoryWorkingSetPrivateBytes,proto3" json:"memory_working_set_private_bytes,omitempty"` @@ -165,107 +146,18 @@ type ProcessDetails struct { ProcessID uint32 `protobuf:"varint,7,opt,name=process_id,json=processId,proto3" json:"process_id,omitempty"` UserTime_100Ns uint64 `protobuf:"varint,8,opt,name=user_time_100_ns,json=userTime100Ns,proto3" json:"user_time_100_ns,omitempty"` ExecID string `protobuf:"bytes,9,opt,name=exec_id,json=execId,proto3" json:"exec_id,omitempty"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` -} - -func (m *ProcessDetails) Reset() { *m = ProcessDetails{} } -func (*ProcessDetails) ProtoMessage() {} -func (*ProcessDetails) Descriptor() ([]byte, []int) { - return fileDescriptor_b643df6839c75082, []int{1} -} -func (m *ProcessDetails) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *ProcessDetails) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_ProcessDetails.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalTo(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *ProcessDetails) XXX_Merge(src proto.Message) { - xxx_messageInfo_ProcessDetails.Merge(m, src) -} -func (m *ProcessDetails) XXX_Size() int { - return m.Size() -} -func (m *ProcessDetails) XXX_DiscardUnknown() { - xxx_messageInfo_ProcessDetails.DiscardUnknown(m) } -var xxx_messageInfo_ProcessDetails proto.InternalMessageInfo +func (m *ProcessDetails) Reset() { *m = ProcessDetails{} } +func (*ProcessDetails) ProtoMessage() {} +func (*ProcessDetails) Descriptor() ([]byte, []int) { return fileDescriptorRunhcs, []int{1} } func init() { - proto.RegisterEnum("containerd.runhcs.v1.Options_DebugType", Options_DebugType_name, Options_DebugType_value) - proto.RegisterEnum("containerd.runhcs.v1.Options_SandboxIsolation", Options_SandboxIsolation_name, Options_SandboxIsolation_value) proto.RegisterType((*Options)(nil), "containerd.runhcs.v1.Options") proto.RegisterType((*ProcessDetails)(nil), "containerd.runhcs.v1.ProcessDetails") + proto.RegisterEnum("containerd.runhcs.v1.Options_DebugType", Options_DebugType_name, Options_DebugType_value) + proto.RegisterEnum("containerd.runhcs.v1.Options_SandboxIsolation", Options_SandboxIsolation_name, Options_SandboxIsolation_value) } - -func init() { - proto.RegisterFile("github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options/runhcs.proto", fileDescriptor_b643df6839c75082) -} - -var fileDescriptor_b643df6839c75082 = []byte{ - // 775 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x54, 0xcd, 0x6f, 0xdb, 0x36, - 0x1c, 0xb5, 0x1a, 0x7f, 0xe9, 0xd7, 0x25, 0x75, 0x38, 0x1f, 0x84, 0x6c, 0xb3, 0x8d, 0xf4, 0xd0, - 0x14, 0x6b, 0xa4, 0xa4, 0x3b, 0xee, 0x34, 0xc7, 0xce, 0xaa, 0x61, 0x49, 0x04, 0x39, 0x6b, 0xf7, - 0x71, 0x20, 0xf4, 0xc1, 0xc8, 0x44, 0x4d, 0x51, 0x20, 0x69, 0x2f, 0xee, 0x69, 0x7f, 0xc2, 0xfe, - 0xa8, 0x1d, 0x72, 0xdc, 0x71, 0xc0, 0x80, 0x6c, 0xf5, 0x5f, 0x32, 0x90, 0x92, 0xd2, 0xad, 0x08, - 0x76, 0xe9, 0xc9, 0xd4, 0x7b, 0x8f, 0xef, 0xf7, 0xc1, 0x07, 0xc3, 0x45, 0x46, 0xd5, 0x7c, 0x19, - 0xbb, 0x09, 0x67, 0xde, 0x19, 0x4d, 0x04, 0x97, 0xfc, 0x4a, 0x79, 0xf3, 0x44, 0xca, 0x39, 0x65, - 0x5e, 0xc2, 0x52, 0x2f, 0xe1, 0xb9, 0x8a, 0x68, 0x4e, 0x44, 0x7a, 0xa8, 0xb1, 0x43, 0xb1, 0xcc, - 0xe7, 0x89, 0x3c, 0x5c, 0x1d, 0x7b, 0xbc, 0x50, 0x94, 0xe7, 0xd2, 0x2b, 0x11, 0xb7, 0x10, 0x5c, - 0x71, 0xd4, 0x7f, 0xa7, 0x77, 0x2b, 0x62, 0x75, 0xbc, 0xd7, 0xcf, 0x78, 0xc6, 0x8d, 0xc0, 0xd3, - 0xa7, 0x52, 0xbb, 0x37, 0xcc, 0x38, 0xcf, 0x16, 0xc4, 0x33, 0x5f, 0xf1, 0xf2, 0xca, 0x53, 0x94, - 0x11, 0xa9, 0x22, 0x56, 0x94, 0x82, 0xfd, 0xdf, 0x9a, 0xd0, 0xb9, 0x28, 0xab, 0xa0, 0x3e, 0xb4, - 0x52, 0x12, 0x2f, 0x33, 0xc7, 0x1a, 0x59, 0x07, 0xdd, 0xb0, 0xfc, 0x40, 0xa7, 0x00, 0xe6, 0x80, - 0xd5, 0xba, 0x20, 0xce, 0x83, 0x91, 0x75, 0xb0, 0xf3, 0xfc, 0x89, 0x7b, 0x5f, 0x0f, 0x6e, 0x65, - 0xe4, 0x4e, 0xb4, 0xfe, 0x72, 0x5d, 0x90, 0xd0, 0x4e, 0xeb, 0x23, 0x7a, 0x0c, 0xdb, 0x82, 0x64, - 0x54, 0x2a, 0xb1, 0xc6, 0x82, 0x73, 0xe5, 0x6c, 0x8d, 0xac, 0x03, 0x3b, 0xfc, 0xa8, 0x06, 0x43, - 0xce, 0x95, 0x16, 0xc9, 0x28, 0x4f, 0x63, 0x7e, 0x8d, 0x29, 0x8b, 0x32, 0xe2, 0x34, 0x4b, 0x51, - 0x05, 0xfa, 0x1a, 0x43, 0x4f, 0xa1, 0x57, 0x8b, 0x8a, 0x45, 0xa4, 0xae, 0xb8, 0x60, 0x4e, 0xcb, - 0xe8, 0x1e, 0x55, 0x78, 0x50, 0xc1, 0xe8, 0x27, 0xd8, 0xbd, 0xf3, 0x93, 0x7c, 0x11, 0xe9, 0xfe, - 0x9c, 0xb6, 0x99, 0xc1, 0xfd, 0xff, 0x19, 0x66, 0x55, 0xc5, 0xfa, 0x56, 0x58, 0xd7, 0xbc, 0x43, - 0x90, 0x07, 0xfd, 0x98, 0x73, 0x85, 0xaf, 0xe8, 0x82, 0x48, 0x33, 0x13, 0x2e, 0x22, 0x35, 0x77, - 0x3a, 0xa6, 0x97, 0x5d, 0xcd, 0x9d, 0x6a, 0x4a, 0x4f, 0x16, 0x44, 0x6a, 0x8e, 0x9e, 0x01, 0x5a, - 0x31, 0x5c, 0x08, 0x9e, 0x10, 0x29, 0xb9, 0xc0, 0x09, 0x5f, 0xe6, 0xca, 0xe9, 0x8e, 0xac, 0x83, - 0x56, 0xd8, 0x5b, 0xb1, 0xa0, 0x26, 0x4e, 0x34, 0x8e, 0x5c, 0xe8, 0xaf, 0x18, 0x66, 0x84, 0x71, - 0xb1, 0xc6, 0x92, 0xbe, 0x21, 0x98, 0xe6, 0x98, 0xc5, 0x8e, 0x5d, 0xeb, 0xcf, 0x0c, 0x35, 0xa3, - 0x6f, 0x88, 0x9f, 0x9f, 0xc5, 0x68, 0x00, 0xf0, 0x75, 0xf0, 0xdd, 0xcb, 0x17, 0x13, 0x5d, 0xcb, - 0x01, 0xd3, 0xc4, 0xbf, 0x90, 0xfd, 0xa7, 0x60, 0xdf, 0x3d, 0x0c, 0xb2, 0xa1, 0x75, 0x1e, 0xf8, - 0xc1, 0xb4, 0xd7, 0x40, 0x5d, 0x68, 0x9e, 0xfa, 0xdf, 0x4e, 0x7b, 0x16, 0xea, 0xc0, 0xd6, 0xf4, - 0xf2, 0x55, 0xef, 0xc1, 0xbe, 0x07, 0xbd, 0xf7, 0xe7, 0x47, 0x0f, 0xa1, 0x13, 0x84, 0x17, 0x27, - 0xd3, 0xd9, 0xac, 0xd7, 0x40, 0x3b, 0x00, 0x2f, 0x7e, 0x08, 0xa6, 0xe1, 0x4b, 0x7f, 0x76, 0x11, - 0xf6, 0xac, 0xfd, 0x3f, 0xb7, 0x60, 0xa7, 0x6a, 0x7f, 0x42, 0x54, 0x44, 0x17, 0x12, 0x7d, 0x06, - 0x60, 0x9e, 0x10, 0xe7, 0x11, 0x23, 0x26, 0x52, 0x76, 0x68, 0x1b, 0xe4, 0x3c, 0x62, 0x04, 0x9d, - 0x00, 0x24, 0x82, 0x44, 0x8a, 0xa4, 0x38, 0x52, 0x26, 0x56, 0x0f, 0x9f, 0xef, 0xb9, 0x65, 0x5c, - 0xdd, 0x3a, 0xae, 0xee, 0x65, 0x1d, 0xd7, 0x71, 0xf7, 0xe6, 0x76, 0xd8, 0xf8, 0xf5, 0xaf, 0xa1, - 0x15, 0xda, 0xd5, 0xbd, 0xaf, 0x14, 0xfa, 0x1c, 0xd0, 0x6b, 0x22, 0x72, 0xb2, 0xc0, 0x3a, 0xd7, - 0xf8, 0xf8, 0xe8, 0x08, 0xe7, 0xd2, 0x04, 0xab, 0x19, 0x3e, 0x2a, 0x19, 0xed, 0x70, 0x7c, 0x74, - 0x74, 0x2e, 0x91, 0x0b, 0x1f, 0x57, 0xcb, 0x4c, 0x38, 0x63, 0x54, 0xe1, 0x78, 0xad, 0x88, 0x34, - 0x09, 0x6b, 0x86, 0xbb, 0x25, 0x75, 0x62, 0x98, 0xb1, 0x26, 0xd0, 0x29, 0x8c, 0x2a, 0xfd, 0xcf, - 0x5c, 0xbc, 0xa6, 0x79, 0x86, 0x25, 0x51, 0xb8, 0x10, 0x74, 0x15, 0x29, 0x52, 0x5d, 0x6e, 0x99, - 0xcb, 0x9f, 0x96, 0xba, 0x57, 0xa5, 0x6c, 0x46, 0x54, 0x50, 0x8a, 0x4a, 0x9f, 0x09, 0x0c, 0xef, - 0xf1, 0x91, 0xf3, 0x48, 0x90, 0xb4, 0xb2, 0x69, 0x1b, 0x9b, 0x4f, 0xde, 0xb7, 0x99, 0x19, 0x4d, - 0xe9, 0xf2, 0x0c, 0xa0, 0x0a, 0x0e, 0xa6, 0xa9, 0x89, 0xd8, 0xf6, 0x78, 0x7b, 0x73, 0x3b, 0xb4, - 0xab, 0xb5, 0xfb, 0x93, 0xd0, 0xae, 0x04, 0x7e, 0x8a, 0x9e, 0x40, 0x6f, 0x29, 0x89, 0xf8, 0xcf, - 0x5a, 0xba, 0xa6, 0xc8, 0xb6, 0xc6, 0xdf, 0x2d, 0xe5, 0x31, 0x74, 0xc8, 0x35, 0x49, 0xb4, 0xa7, - 0xce, 0x95, 0x3d, 0x86, 0xcd, 0xed, 0xb0, 0x3d, 0xbd, 0x26, 0x89, 0x3f, 0x09, 0xdb, 0x9a, 0xf2, - 0xd3, 0x71, 0x7a, 0xf3, 0x76, 0xd0, 0xf8, 0xe3, 0xed, 0xa0, 0xf1, 0xcb, 0x66, 0x60, 0xdd, 0x6c, - 0x06, 0xd6, 0xef, 0x9b, 0x81, 0xf5, 0xf7, 0x66, 0x60, 0xfd, 0xf8, 0xcd, 0x87, 0xff, 0xb9, 0x7d, - 0x59, 0xfd, 0x7e, 0xdf, 0x88, 0xdb, 0xe6, 0xdd, 0xbf, 0xf8, 0x27, 0x00, 0x00, 0xff, 0xff, 0x75, - 0x1f, 0x14, 0xf4, 0x33, 0x05, 0x00, 0x00, -} - func (m *Options) Marshal() (dAtA []byte, err error) { size := m.Size() dAtA = make([]byte, size) @@ -341,8 +233,15 @@ func (m *Options) MarshalTo(dAtA []byte) (int, error) { i = encodeVarintRunhcs(dAtA, i, uint64(len(m.GPUVHDPath))) i += copy(dAtA[i:], m.GPUVHDPath) } - if m.XXX_unrecognized != nil { - i += copy(dAtA[i:], m.XXX_unrecognized) + if m.ScaleCpuLimitsToSandbox { + dAtA[i] = 0x58 + i++ + if m.ScaleCpuLimitsToSandbox { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ } return i, nil } @@ -370,8 +269,8 @@ func (m *ProcessDetails) MarshalTo(dAtA []byte) (int, error) { } dAtA[i] = 0x12 i++ - i = encodeVarintRunhcs(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.CreatedAt))) - n1, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.CreatedAt, dAtA[i:]) + i = encodeVarintRunhcs(dAtA, i, uint64(types.SizeOfStdTime(m.CreatedAt))) + n1, err := types.StdTimeMarshalTo(m.CreatedAt, dAtA[i:]) if err != nil { return 0, err } @@ -412,9 +311,6 @@ func (m *ProcessDetails) MarshalTo(dAtA []byte) (int, error) { i = encodeVarintRunhcs(dAtA, i, uint64(len(m.ExecID))) i += copy(dAtA[i:], m.ExecID) } - if m.XXX_unrecognized != nil { - i += copy(dAtA[i:], m.XXX_unrecognized) - } return i, nil } @@ -428,9 +324,6 @@ func encodeVarintRunhcs(dAtA []byte, offset int, v uint64) int { return offset + 1 } func (m *Options) Size() (n int) { - if m == nil { - return 0 - } var l int _ = l if m.Debug { @@ -468,23 +361,20 @@ func (m *Options) Size() (n int) { if l > 0 { n += 1 + l + sovRunhcs(uint64(l)) } - if m.XXX_unrecognized != nil { - n += len(m.XXX_unrecognized) + if m.ScaleCpuLimitsToSandbox { + n += 2 } return n } func (m *ProcessDetails) Size() (n int) { - if m == nil { - return 0 - } var l int _ = l l = len(m.ImageName) if l > 0 { n += 1 + l + sovRunhcs(uint64(l)) } - l = github_com_gogo_protobuf_types.SizeOfStdTime(m.CreatedAt) + l = types.SizeOfStdTime(m.CreatedAt) n += 1 + l + sovRunhcs(uint64(l)) if m.KernelTime_100Ns != 0 { n += 1 + sovRunhcs(uint64(m.KernelTime_100Ns)) @@ -508,9 +398,6 @@ func (m *ProcessDetails) Size() (n int) { if l > 0 { n += 1 + l + sovRunhcs(uint64(l)) } - if m.XXX_unrecognized != nil { - n += len(m.XXX_unrecognized) - } return n } @@ -542,7 +429,7 @@ func (this *Options) String() string { `VmProcessorCount:` + fmt.Sprintf("%v", this.VmProcessorCount) + `,`, `VmMemorySizeInMb:` + fmt.Sprintf("%v", this.VmMemorySizeInMb) + `,`, `GPUVHDPath:` + fmt.Sprintf("%v", this.GPUVHDPath) + `,`, - `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `ScaleCpuLimitsToSandbox:` + fmt.Sprintf("%v", this.ScaleCpuLimitsToSandbox) + `,`, `}`, }, "") return s @@ -553,7 +440,7 @@ func (this *ProcessDetails) String() string { } s := strings.Join([]string{`&ProcessDetails{`, `ImageName:` + fmt.Sprintf("%v", this.ImageName) + `,`, - `CreatedAt:` + strings.Replace(strings.Replace(this.CreatedAt.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`, + `CreatedAt:` + strings.Replace(strings.Replace(this.CreatedAt.String(), "Timestamp", "google_protobuf1.Timestamp", 1), `&`, ``, 1) + `,`, `KernelTime_100Ns:` + fmt.Sprintf("%v", this.KernelTime_100Ns) + `,`, `MemoryCommitBytes:` + fmt.Sprintf("%v", this.MemoryCommitBytes) + `,`, `MemoryWorkingSetPrivateBytes:` + fmt.Sprintf("%v", this.MemoryWorkingSetPrivateBytes) + `,`, @@ -561,7 +448,6 @@ func (this *ProcessDetails) String() string { `ProcessID:` + fmt.Sprintf("%v", this.ProcessID) + `,`, `UserTime_100Ns:` + fmt.Sprintf("%v", this.UserTime_100Ns) + `,`, `ExecID:` + fmt.Sprintf("%v", this.ExecID) + `,`, - `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, `}`, }, "") return s @@ -589,7 +475,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - wire |= uint64(b&0x7F) << shift + wire |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -617,7 +503,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - v |= int(b&0x7F) << shift + v |= (int(b) & 0x7F) << shift if b < 0x80 { break } @@ -637,7 +523,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.DebugType |= Options_DebugType(b&0x7F) << shift + m.DebugType |= (Options_DebugType(b) & 0x7F) << shift if b < 0x80 { break } @@ -656,7 +542,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - stringLen |= uint64(b&0x7F) << shift + stringLen |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -666,9 +552,6 @@ func (m *Options) Unmarshal(dAtA []byte) error { return ErrInvalidLengthRunhcs } postIndex := iNdEx + intStringLen - if postIndex < 0 { - return ErrInvalidLengthRunhcs - } if postIndex > l { return io.ErrUnexpectedEOF } @@ -688,7 +571,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - stringLen |= uint64(b&0x7F) << shift + stringLen |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -698,9 +581,6 @@ func (m *Options) Unmarshal(dAtA []byte) error { return ErrInvalidLengthRunhcs } postIndex := iNdEx + intStringLen - if postIndex < 0 { - return ErrInvalidLengthRunhcs - } if postIndex > l { return io.ErrUnexpectedEOF } @@ -720,7 +600,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - stringLen |= uint64(b&0x7F) << shift + stringLen |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -730,9 +610,6 @@ func (m *Options) Unmarshal(dAtA []byte) error { return ErrInvalidLengthRunhcs } postIndex := iNdEx + intStringLen - if postIndex < 0 { - return ErrInvalidLengthRunhcs - } if postIndex > l { return io.ErrUnexpectedEOF } @@ -752,7 +629,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.SandboxIsolation |= Options_SandboxIsolation(b&0x7F) << shift + m.SandboxIsolation |= (Options_SandboxIsolation(b) & 0x7F) << shift if b < 0x80 { break } @@ -771,7 +648,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - stringLen |= uint64(b&0x7F) << shift + stringLen |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -781,9 +658,6 @@ func (m *Options) Unmarshal(dAtA []byte) error { return ErrInvalidLengthRunhcs } postIndex := iNdEx + intStringLen - if postIndex < 0 { - return ErrInvalidLengthRunhcs - } if postIndex > l { return io.ErrUnexpectedEOF } @@ -803,7 +677,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.VmProcessorCount |= int32(b&0x7F) << shift + m.VmProcessorCount |= (int32(b) & 0x7F) << shift if b < 0x80 { break } @@ -822,7 +696,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.VmMemorySizeInMb |= int32(b&0x7F) << shift + m.VmMemorySizeInMb |= (int32(b) & 0x7F) << shift if b < 0x80 { break } @@ -841,7 +715,7 @@ func (m *Options) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - stringLen |= uint64(b&0x7F) << shift + stringLen |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -851,14 +725,31 @@ func (m *Options) Unmarshal(dAtA []byte) error { return ErrInvalidLengthRunhcs } postIndex := iNdEx + intStringLen - if postIndex < 0 { - return ErrInvalidLengthRunhcs - } if postIndex > l { return io.ErrUnexpectedEOF } m.GPUVHDPath = string(dAtA[iNdEx:postIndex]) iNdEx = postIndex + case 11: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ScaleCpuLimitsToSandbox", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRunhcs + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.ScaleCpuLimitsToSandbox = bool(v != 0) default: iNdEx = preIndex skippy, err := skipRunhcs(dAtA[iNdEx:]) @@ -868,13 +759,9 @@ func (m *Options) Unmarshal(dAtA []byte) error { if skippy < 0 { return ErrInvalidLengthRunhcs } - if (iNdEx + skippy) < 0 { - return ErrInvalidLengthRunhcs - } if (iNdEx + skippy) > l { return io.ErrUnexpectedEOF } - m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) iNdEx += skippy } } @@ -899,7 +786,7 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - wire |= uint64(b&0x7F) << shift + wire |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -927,7 +814,7 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - stringLen |= uint64(b&0x7F) << shift + stringLen |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -937,9 +824,6 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { return ErrInvalidLengthRunhcs } postIndex := iNdEx + intStringLen - if postIndex < 0 { - return ErrInvalidLengthRunhcs - } if postIndex > l { return io.ErrUnexpectedEOF } @@ -959,7 +843,7 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - msglen |= int(b&0x7F) << shift + msglen |= (int(b) & 0x7F) << shift if b < 0x80 { break } @@ -968,13 +852,10 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { return ErrInvalidLengthRunhcs } postIndex := iNdEx + msglen - if postIndex < 0 { - return ErrInvalidLengthRunhcs - } if postIndex > l { return io.ErrUnexpectedEOF } - if err := github_com_gogo_protobuf_types.StdTimeUnmarshal(&m.CreatedAt, dAtA[iNdEx:postIndex]); err != nil { + if err := types.StdTimeUnmarshal(&m.CreatedAt, dAtA[iNdEx:postIndex]); err != nil { return err } iNdEx = postIndex @@ -992,7 +873,7 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.KernelTime_100Ns |= uint64(b&0x7F) << shift + m.KernelTime_100Ns |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -1011,7 +892,7 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.MemoryCommitBytes |= uint64(b&0x7F) << shift + m.MemoryCommitBytes |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -1030,7 +911,7 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.MemoryWorkingSetPrivateBytes |= uint64(b&0x7F) << shift + m.MemoryWorkingSetPrivateBytes |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -1049,7 +930,7 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.MemoryWorkingSetSharedBytes |= uint64(b&0x7F) << shift + m.MemoryWorkingSetSharedBytes |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -1068,7 +949,7 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.ProcessID |= uint32(b&0x7F) << shift + m.ProcessID |= (uint32(b) & 0x7F) << shift if b < 0x80 { break } @@ -1087,7 +968,7 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - m.UserTime_100Ns |= uint64(b&0x7F) << shift + m.UserTime_100Ns |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -1106,7 +987,7 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - stringLen |= uint64(b&0x7F) << shift + stringLen |= (uint64(b) & 0x7F) << shift if b < 0x80 { break } @@ -1116,9 +997,6 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { return ErrInvalidLengthRunhcs } postIndex := iNdEx + intStringLen - if postIndex < 0 { - return ErrInvalidLengthRunhcs - } if postIndex > l { return io.ErrUnexpectedEOF } @@ -1133,13 +1011,9 @@ func (m *ProcessDetails) Unmarshal(dAtA []byte) error { if skippy < 0 { return ErrInvalidLengthRunhcs } - if (iNdEx + skippy) < 0 { - return ErrInvalidLengthRunhcs - } if (iNdEx + skippy) > l { return io.ErrUnexpectedEOF } - m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) iNdEx += skippy } } @@ -1203,11 +1077,8 @@ func skipRunhcs(dAtA []byte) (n int, err error) { break } } - if length < 0 { - return 0, ErrInvalidLengthRunhcs - } iNdEx += length - if iNdEx < 0 { + if length < 0 { return 0, ErrInvalidLengthRunhcs } return iNdEx, nil @@ -1238,9 +1109,6 @@ func skipRunhcs(dAtA []byte) (n int, err error) { return 0, err } iNdEx = start + next - if iNdEx < 0 { - return 0, ErrInvalidLengthRunhcs - } } return iNdEx, nil case 4: @@ -1259,3 +1127,62 @@ var ( ErrInvalidLengthRunhcs = fmt.Errorf("proto: negative length found during unmarshaling") ErrIntOverflowRunhcs = fmt.Errorf("proto: integer overflow") ) + +func init() { + proto.RegisterFile("github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options/runhcs.proto", fileDescriptorRunhcs) +} + +var fileDescriptorRunhcs = []byte{ + // 813 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x54, 0x4d, 0x6f, 0xdb, 0x36, + 0x18, 0xb6, 0x9a, 0x38, 0xb6, 0xde, 0x2c, 0xa9, 0xc3, 0x19, 0x98, 0x90, 0x6e, 0xb6, 0x91, 0x1e, + 0x9a, 0x62, 0x8d, 0x94, 0x74, 0xc7, 0xed, 0x32, 0x7f, 0x64, 0xd5, 0xd0, 0x24, 0x82, 0x9c, 0xb5, + 0xfb, 0x38, 0x10, 0xfa, 0x60, 0x64, 0xa2, 0xa6, 0x28, 0x88, 0xb4, 0x17, 0xf7, 0x34, 0xec, 0x17, + 0xec, 0x67, 0xe5, 0xb8, 0xe3, 0x80, 0x01, 0xd9, 0xea, 0x5f, 0x32, 0x90, 0xa2, 0xd2, 0xad, 0x28, + 0x76, 0xe9, 0xc9, 0xe4, 0xf3, 0x3c, 0x7c, 0x5e, 0xbe, 0x2f, 0x1f, 0x0b, 0x2e, 0x32, 0x2a, 0x67, + 0x8b, 0xd8, 0x4d, 0x38, 0xf3, 0xce, 0x68, 0x52, 0x72, 0xc1, 0xaf, 0xa4, 0x37, 0x4b, 0x84, 0x98, + 0x51, 0xe6, 0x25, 0x2c, 0xf5, 0x12, 0x9e, 0xcb, 0x88, 0xe6, 0xa4, 0x4c, 0x8f, 0x14, 0x76, 0x54, + 0x2e, 0xf2, 0x59, 0x22, 0x8e, 0x96, 0x27, 0x1e, 0x2f, 0x24, 0xe5, 0xb9, 0xf0, 0x2a, 0xc4, 0x2d, + 0x4a, 0x2e, 0x39, 0xea, 0xbe, 0xd5, 0xbb, 0x86, 0x58, 0x9e, 0xec, 0x77, 0x33, 0x9e, 0x71, 0x2d, + 0xf0, 0xd4, 0xaa, 0xd2, 0xee, 0xf7, 0x33, 0xce, 0xb3, 0x39, 0xf1, 0xf4, 0x2e, 0x5e, 0x5c, 0x79, + 0x92, 0x32, 0x22, 0x64, 0xc4, 0x8a, 0x4a, 0x70, 0xf0, 0x6b, 0x13, 0x5a, 0x17, 0x55, 0x15, 0xd4, + 0x85, 0x66, 0x4a, 0xe2, 0x45, 0xe6, 0x58, 0x03, 0xeb, 0xb0, 0x1d, 0x56, 0x1b, 0x74, 0x0a, 0xa0, + 0x17, 0x58, 0xae, 0x0a, 0xe2, 0xdc, 0x1b, 0x58, 0x87, 0xbb, 0x4f, 0x1f, 0xb9, 0xef, 0xbb, 0x83, + 0x6b, 0x8c, 0xdc, 0xb1, 0xd2, 0x5f, 0xae, 0x0a, 0x12, 0xda, 0x69, 0xbd, 0x44, 0x0f, 0x61, 0xa7, + 0x24, 0x19, 0x15, 0xb2, 0x5c, 0xe1, 0x92, 0x73, 0xe9, 0x6c, 0x0c, 0xac, 0x43, 0x3b, 0xfc, 0xa8, + 0x06, 0x43, 0xce, 0xa5, 0x12, 0x89, 0x28, 0x4f, 0x63, 0x7e, 0x8d, 0x29, 0x8b, 0x32, 0xe2, 0x6c, + 0x56, 0x22, 0x03, 0xfa, 0x0a, 0x43, 0x8f, 0xa1, 0x53, 0x8b, 0x8a, 0x79, 0x24, 0xaf, 0x78, 0xc9, + 0x9c, 0xa6, 0xd6, 0xdd, 0x37, 0x78, 0x60, 0x60, 0xf4, 0x13, 0xec, 0xdd, 0xf9, 0x09, 0x3e, 0x8f, + 0xd4, 0xfd, 0x9c, 0x2d, 0xdd, 0x83, 0xfb, 0xff, 0x3d, 0x4c, 0x4d, 0xc5, 0xfa, 0x54, 0x58, 0xd7, + 0xbc, 0x43, 0x90, 0x07, 0xdd, 0x98, 0x73, 0x89, 0xaf, 0xe8, 0x9c, 0x08, 0xdd, 0x13, 0x2e, 0x22, + 0x39, 0x73, 0x5a, 0xfa, 0x2e, 0x7b, 0x8a, 0x3b, 0x55, 0x94, 0xea, 0x2c, 0x88, 0xe4, 0x0c, 0x3d, + 0x01, 0xb4, 0x64, 0xb8, 0x28, 0x79, 0x42, 0x84, 0xe0, 0x25, 0x4e, 0xf8, 0x22, 0x97, 0x4e, 0x7b, + 0x60, 0x1d, 0x36, 0xc3, 0xce, 0x92, 0x05, 0x35, 0x31, 0x52, 0x38, 0x72, 0xa1, 0xbb, 0x64, 0x98, + 0x11, 0xc6, 0xcb, 0x15, 0x16, 0xf4, 0x35, 0xc1, 0x34, 0xc7, 0x2c, 0x76, 0xec, 0x5a, 0x7f, 0xa6, + 0xa9, 0x29, 0x7d, 0x4d, 0xfc, 0xfc, 0x2c, 0x46, 0x3d, 0x80, 0x6f, 0x82, 0xef, 0x5e, 0x3c, 0x1b, + 0xab, 0x5a, 0x0e, 0xe8, 0x4b, 0xfc, 0x0b, 0x41, 0x5f, 0xc1, 0x03, 0x91, 0x44, 0x73, 0x82, 0x93, + 0x62, 0x81, 0xe7, 0x94, 0x51, 0x29, 0xb0, 0xe4, 0xd8, 0xb4, 0xe5, 0x6c, 0xeb, 0x47, 0xff, 0x44, + 0x4b, 0x46, 0xc5, 0xe2, 0xb9, 0x16, 0x5c, 0x72, 0x33, 0x87, 0x83, 0xc7, 0x60, 0xdf, 0x3d, 0x2b, + 0xb2, 0xa1, 0x79, 0x1e, 0xf8, 0xc1, 0xa4, 0xd3, 0x40, 0x6d, 0xd8, 0x3c, 0xf5, 0x9f, 0x4f, 0x3a, + 0x16, 0x6a, 0xc1, 0xc6, 0xe4, 0xf2, 0x65, 0xe7, 0xde, 0x81, 0x07, 0x9d, 0x77, 0xa7, 0x87, 0xb6, + 0xa1, 0x15, 0x84, 0x17, 0xa3, 0xc9, 0x74, 0xda, 0x69, 0xa0, 0x5d, 0x80, 0x67, 0x3f, 0x04, 0x93, + 0xf0, 0x85, 0x3f, 0xbd, 0x08, 0x3b, 0xd6, 0xc1, 0x9f, 0x1b, 0xb0, 0x6b, 0x9a, 0x1f, 0x13, 0x19, + 0xd1, 0xb9, 0x40, 0x9f, 0x01, 0xe8, 0x00, 0xe0, 0x3c, 0x62, 0x44, 0x07, 0xd2, 0x0e, 0x6d, 0x8d, + 0x9c, 0x47, 0x8c, 0xa0, 0x11, 0x40, 0x52, 0x92, 0x48, 0x92, 0x14, 0x47, 0x52, 0x87, 0x72, 0xfb, + 0xe9, 0xbe, 0x5b, 0x85, 0xdd, 0xad, 0xc3, 0xee, 0x5e, 0xd6, 0x61, 0x1f, 0xb6, 0x6f, 0x6e, 0xfb, + 0x8d, 0xdf, 0xfe, 0xea, 0x5b, 0xa1, 0x6d, 0xce, 0x7d, 0x2d, 0xd1, 0xe7, 0x80, 0x5e, 0x91, 0x32, + 0x27, 0x73, 0xac, 0xfe, 0x15, 0xf8, 0xe4, 0xf8, 0x18, 0xe7, 0x42, 0xc7, 0x72, 0x33, 0xbc, 0x5f, + 0x31, 0xca, 0xe1, 0xe4, 0xf8, 0xf8, 0x5c, 0x20, 0x17, 0x3e, 0x36, 0x4f, 0x91, 0x70, 0xc6, 0xa8, + 0xc4, 0xf1, 0x4a, 0x12, 0xa1, 0xf3, 0xb9, 0x19, 0xee, 0x55, 0xd4, 0x48, 0x33, 0x43, 0x45, 0xa0, + 0x53, 0x18, 0x18, 0xfd, 0xcf, 0xbc, 0x7c, 0x45, 0xf3, 0x0c, 0x0b, 0x22, 0x71, 0x51, 0xd2, 0x65, + 0x24, 0x89, 0x39, 0xdc, 0xd4, 0x87, 0x3f, 0xad, 0x74, 0x2f, 0x2b, 0xd9, 0x94, 0xc8, 0xa0, 0x12, + 0x55, 0x3e, 0x63, 0xe8, 0xbf, 0xc7, 0x47, 0xcc, 0xa2, 0x92, 0xa4, 0xc6, 0x66, 0x4b, 0xdb, 0x3c, + 0x78, 0xd7, 0x66, 0xaa, 0x35, 0x95, 0xcb, 0x13, 0x00, 0x13, 0x3b, 0x4c, 0x53, 0x1d, 0xd0, 0x9d, + 0xe1, 0xce, 0xfa, 0xb6, 0x6f, 0x9b, 0xb1, 0xfb, 0xe3, 0xd0, 0x36, 0x02, 0x3f, 0x45, 0x8f, 0xa0, + 0xb3, 0x10, 0xa4, 0xfc, 0xcf, 0x58, 0xda, 0xba, 0xc8, 0x8e, 0xc2, 0xdf, 0x0e, 0xe5, 0x21, 0xb4, + 0xc8, 0x35, 0x49, 0x94, 0xa7, 0x4a, 0xa5, 0x3d, 0x84, 0xf5, 0x6d, 0x7f, 0x6b, 0x72, 0x4d, 0x12, + 0x7f, 0x1c, 0x6e, 0x29, 0xca, 0x4f, 0x87, 0xe9, 0xcd, 0x9b, 0x5e, 0xe3, 0x8f, 0x37, 0xbd, 0xc6, + 0x2f, 0xeb, 0x9e, 0x75, 0xb3, 0xee, 0x59, 0xbf, 0xaf, 0x7b, 0xd6, 0xdf, 0xeb, 0x9e, 0xf5, 0xe3, + 0xb7, 0x1f, 0xfe, 0x69, 0xfc, 0xd2, 0xfc, 0x7e, 0xdf, 0x88, 0xb7, 0xf4, 0xbb, 0x7f, 0xf1, 0x4f, + 0x00, 0x00, 0x00, 0xff, 0xff, 0x21, 0xc9, 0x9f, 0x44, 0x71, 0x05, 0x00, 0x00, +} diff --git a/cmd/containerd-shim-runhcs-v1/options/runhcs.proto b/cmd/containerd-shim-runhcs-v1/options/runhcs.proto index ed6132f76a..e184ebe621 100644 --- a/cmd/containerd-shim-runhcs-v1/options/runhcs.proto +++ b/cmd/containerd-shim-runhcs-v1/options/runhcs.proto @@ -62,7 +62,12 @@ message Options { // GPUVHDPath is the path to the gpu vhd to add to the uvm // when a container requests a gpu - string GPUVHDPath = 10; + string GPUVHDPath = 10; + + // scale_cpu_limits_to_sandbox indicates that container CPU limits should + // be adjusted to account for the difference in number of cores between the + // host and UVM. + bool scale_cpu_limits_to_sandbox = 11; } // ProcessDetails contains additional information about a process. This is the additional diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 303f16b403..c3e3810360 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -9,6 +9,7 @@ import ( "time" "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" "github.com/Microsoft/hcsshim/internal/cmd" "github.com/Microsoft/hcsshim/internal/cow" @@ -26,6 +27,7 @@ import ( "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/runtime" "github.com/containerd/containerd/runtime/v2/task" + "github.com/containerd/typeurl" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -132,12 +134,23 @@ func newHcsTask( s.Windows.Network != nil { netNS = s.Windows.Network.NetworkNamespace } + + var shimOpts *runhcsopts.Options + if req.Options != nil { + v, err := typeurl.UnmarshalAny(req.Options) + if err != nil { + return nil, err + } + shimOpts = v.(*runhcsopts.Options) + } + opts := hcsoci.CreateOptions{ - ID: req.ID, - Owner: owner, - Spec: s, - HostingSystem: parent, - NetworkNamespace: netNS, + ID: req.ID, + Owner: owner, + Spec: s, + HostingSystem: parent, + NetworkNamespace: netNS, + ScaleCPULimitsToSandbox: shimOpts.ScaleCpuLimitsToSandbox, } system, resources, err := hcsoci.CreateContainer(ctx, &opts) if err != nil { diff --git a/internal/hcsoci/create.go b/internal/hcsoci/create.go index fcc9138796..70ae8cbba1 100644 --- a/internal/hcsoci/create.go +++ b/internal/hcsoci/create.go @@ -49,6 +49,10 @@ type CreateOptions struct { // must a) not tear down the utility VM on failure (or pause in some way) and b) is responsible for // performing the ReleaseResources() call themselves. DoNotReleaseResourcesOnFailure bool + + // ScaleCPULimitsToSandbox indicates that the container CPU limits should be adjusted to account + // for the difference in CPU count between the host and the UVM. + ScaleCPULimitsToSandbox bool } // createOptionsInternal is the set of user-supplied create options, but includes internal diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 6552f897b8..4e3a06852f 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -80,27 +80,69 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter if cpuNumSet > 1 { return nil, nil, fmt.Errorf("invalid spec - Windows Process Container CPU Count: '%d', Limit: '%d', and Weight: '%d' are mutually exclusive", cpuCount, cpuLimit, cpuWeight) } else if cpuNumSet == 1 { - var hostCPUCount int32 + hostCPUCount := processorinfo.ProcessorCount() + // usableCPUCount is the number of processors present in whatever environment + // the container is running in. It will be either the processor count of the + // host, or the UVM, based on if the container is process or hypervisor isolated. + usableCPUCount := hostCPUCount + var uvmCPUCount int32 if coi.HostingSystem != nil { - // Normalize to UVM size - hostCPUCount = coi.HostingSystem.ProcessorCount() - } else { - // For process isolated case the amount of logical processors reported - // from the HCS apis may be greater than what is available to the host in a - // minroot configuration (host doesn't have access to all available LPs) - // so prefer standard OS level calls here instead. - hostCPUCount = processorinfo.ProcessorCount() + uvmCPUCount = coi.HostingSystem.ProcessorCount() + usableCPUCount = uvmCPUCount } - if cpuCount > hostCPUCount { + if cpuCount > usableCPUCount { l := log.G(ctx).WithField(logfields.ContainerID, coi.ID) if coi.HostingSystem != nil { l.Data[logfields.UVMID] = coi.HostingSystem.ID() } l.WithFields(logrus.Fields{ "requested": cpuCount, - "assigned": hostCPUCount, + "assigned": usableCPUCount, }).Warn("Changing user requested CPUCount to current number of processors") - cpuCount = hostCPUCount + cpuCount = usableCPUCount + } + if coi.ScaleCPULimitsToSandbox && cpuLimit > 0 && coi.HostingSystem != nil { + // When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume + // the CPU limit has been calculated based on the number of processors on the + // host, and instead re-calculate it based on the number of processors in the UVM. + // + // This is needed to work correctly with assumptions kubelet makes when computing + // the CPU limit value: + // - kubelet thinks about CPU limits in terms of millicores, which are 1000ths of + // cores. So if 2000 millicores are assigned, the container can use 2 processors. + // - In Windows, the job object CPU limit is global across all processors on the + // system, and is represented as a fraction out of 10000. In this model, a limit + // of 10000 means the container can use all processors fully, regardless of how + // many processors exist on the system. + // - To convert the millicores value into the job object limit, kubelet divides + // the millicores by the number of CPU cores on the host. This causes problems + // when running inside a UVM, as the UVM may have a different number of processors + // than the host system. + // + // To work around this, we undo the division by the number of host processors, and + // re-do the division based on the number of processors inside the UVM. This will + // give the correct value based on the actual number of millicores that the kubelet + // wants the container to have. + // + // Kubelet formula to compute CPU limit: + // cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(runtime.NumCPU()) / 1000 + newCPULimit := cpuLimit * hostCPUCount / uvmCPUCount + // We only apply bounds here because we are calculating the CPU limit ourselves, + // and this matches the kubelet behavior where they also bound the CPU limit by [1, 10000]. + // In the case where we use the value directly from the user, we don't alter it to fit + // within the bounds, but just let the platform throw an error if it is invalid. + if newCPULimit < 1 { + newCPULimit = 1 + } else if newCPULimit > 10000 { + newCPULimit = 10000 + } + log.G(ctx).WithFields(logrus.Fields{ + "hostCPUCount": hostCPUCount, + "uvmCPUCount": uvmCPUCount, + "oldCPULimit": cpuLimit, + "newCPULimit": newCPULimit, + }).Info("rescaling CPU limit for UVM sandbox") + cpuLimit = newCPULimit } v1.ProcessorCount = uint32(cpuCount) From 127715ce662cd68f3d2e8858e75a300e28d84b7f Mon Sep 17 00:00:00 2001 From: Kevin Parsons Date: Wed, 14 Oct 2020 02:07:37 -0700 Subject: [PATCH 17/20] Improve logging for LCOW layer operations Signed-off-by: Kevin Parsons --- internal/layers/layers.go | 60 ++++++++++++++++++++++++++------------- internal/uvm/scsi.go | 1 + internal/uvm/vpmem.go | 6 ++++ 3 files changed, 47 insertions(+), 20 deletions(-) diff --git a/internal/layers/layers.go b/internal/layers/layers.go index b4ce6e6049..187ca7df62 100644 --- a/internal/layers/layers.go +++ b/internal/layers/layers.go @@ -15,6 +15,7 @@ import ( uvmpkg "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/internal/wclayer" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // ImageLayers contains all the layers for an image. @@ -111,13 +112,8 @@ func MountContainerLayers(ctx context.Context, layerFolders []string, guestRoot } } else { for _, l := range layersAdded { - // Assume it was added to vPMEM and fall back to SCSI - e := uvm.RemoveVPMEM(ctx, l) - if e == uvmpkg.ErrNotAttached { - e = uvm.RemoveSCSI(ctx, l) - } - if e != nil { - log.G(ctx).WithError(e).Warn("failed to remove lcow layer on cleanup") + if err := removeLCOWLayer(ctx, uvm, l); err != nil { + log.G(ctx).WithError(err).Warn("failed to remove lcow layer on cleanup") } } } @@ -192,9 +188,13 @@ func addLCOWLayer(ctx context.Context, uvm *uvmpkg.UtilityVM, layerPath string) // fall back to SCSI. uvmPath, err = uvm.AddVPMEM(ctx, layerPath) if err == nil { - return uvmPath, err + log.G(ctx).WithFields(logrus.Fields{ + "layerPath": layerPath, + "layerType": "vpmem", + }).Debug("Added LCOW layer") + return uvmPath, nil } else if err != uvmpkg.ErrNoAvailableLocation && err != uvmpkg.ErrMaxVPMEMLayerSize { - return "", err + return "", fmt.Errorf("failed to add VPMEM layer: %s", err) } } @@ -203,10 +203,36 @@ func addLCOWLayer(ctx context.Context, uvm *uvmpkg.UtilityVM, layerPath string) if err != nil { return "", fmt.Errorf("failed to add SCSI layer: %s", err) } - + log.G(ctx).WithFields(logrus.Fields{ + "layerPath": layerPath, + "layerType": "scsi", + }).Debug("Added LCOW layer") return sm.UVMPath, nil } +func removeLCOWLayer(ctx context.Context, uvm *uvmpkg.UtilityVM, layerPath string) error { + // Assume it was added to vPMEM and fall back to SCSI + err := uvm.RemoveVPMEM(ctx, layerPath) + if err == nil { + log.G(ctx).WithFields(logrus.Fields{ + "layerPath": layerPath, + "layerType": "vpmem", + }).Debug("Removed LCOW layer") + return nil + } else if err == uvmpkg.ErrNotAttached { + err = uvm.RemoveSCSI(ctx, layerPath) + if err == nil { + log.G(ctx).WithFields(logrus.Fields{ + "layerPath": layerPath, + "layerType": "scsi", + }).Debug("Removed LCOW layer") + return nil + } + return fmt.Errorf("failed to remove SCSI layer: %s", err) + } + return fmt.Errorf("failed to remove VPMEM layer: %s", err) +} + // UnmountOperation is used when calling Unmount() to determine what type of unmount is // required. In V1 schema, this must be unmountOperationAll. In V2, client can // be more optimal and only unmount what they need which can be a minor performance @@ -291,18 +317,12 @@ func UnmountContainerLayers(ctx context.Context, layerFolders []string, containe if uvm.OS() == "linux" && (op&UnmountOperationVPMEM) == UnmountOperationVPMEM { for _, layerPath := range layerFolders[:len(layerFolders)-1] { hostPath := filepath.Join(layerPath, "layer.vhd") - - // Assume it was added to vPMEM and fall back to SCSI - e := uvm.RemoveVPMEM(ctx, hostPath) - if e == uvmpkg.ErrNotAttached { - e = uvm.RemoveSCSI(ctx, hostPath) - } - if e != nil { - log.G(ctx).WithError(e).Warn("remove layer failed") + if err := removeLCOWLayer(ctx, uvm, hostPath); err != nil { + log.G(ctx).WithError(err).Warn("remove layer failed") if retError == nil { - retError = e + retError = err } else { - retError = errors.Wrapf(retError, e.Error()) + retError = errors.Wrapf(retError, err.Error()) } } } diff --git a/internal/uvm/scsi.go b/internal/uvm/scsi.go index 2623e28e1f..119c9a1088 100644 --- a/internal/uvm/scsi.go +++ b/internal/uvm/scsi.go @@ -181,6 +181,7 @@ func (uvm *UtilityVM) RemoveSCSI(ctx context.Context, hostPath string) error { if err := uvm.modify(ctx, scsiModification); err != nil { return fmt.Errorf("failed to remove SCSI disk %s from container %s: %s", hostPath, uvm.id, err) } + log.G(ctx).WithFields(sm.logFormat()).Debug("removed SCSI location") uvm.scsiLocations[sm.Controller][sm.LUN] = nil return nil } diff --git a/internal/uvm/vpmem.go b/internal/uvm/vpmem.go index 2c19b3f38f..e50b484848 100644 --- a/internal/uvm/vpmem.go +++ b/internal/uvm/vpmem.go @@ -152,6 +152,12 @@ func (uvm *UtilityVM) RemoveVPMEM(ctx context.Context, hostPath string) (err err if err := uvm.modify(ctx, modification); err != nil { return fmt.Errorf("failed to remove VPMEM %s from utility VM %s: %s", hostPath, uvm.id, err) } + log.G(ctx).WithFields(logrus.Fields{ + "hostPath": device.hostPath, + "uvmPath": device.uvmPath, + "refCount": device.refCount, + "deviceNumber": deviceNumber, + }).Debug("removed VPMEM location") uvm.vpmemDevices[deviceNumber] = nil } else { device.refCount-- From 688da9024c6d261ad036d895bbb8745f733d9e3b Mon Sep 17 00:00:00 2001 From: Daniel Canter Date: Mon, 12 Oct 2020 15:09:58 -0700 Subject: [PATCH 18/20] Add SetJobCompartmentId function from iphlpapi.dll * For future work to be able to run a job container in a network namespace that isn't the hosts, added the SetJobCompartmentId function from iphlpapi. Signed-off-by: Daniel Canter --- internal/hns/namespace.go | 7 ++++--- internal/winapi/net.go | 3 +++ internal/winapi/winapi.go | 2 +- internal/winapi/zsyscall_windows.go | 13 +++++++++++++ 4 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 internal/winapi/net.go diff --git a/internal/hns/namespace.go b/internal/hns/namespace.go index 45e2281b07..d3b04eefe0 100644 --- a/internal/hns/namespace.go +++ b/internal/hns/namespace.go @@ -27,9 +27,10 @@ type namespaceResourceRequest struct { } type Namespace struct { - ID string - IsDefault bool `json:",omitempty"` - ResourceList []NamespaceResource `json:",omitempty"` + ID string + IsDefault bool `json:",omitempty"` + ResourceList []NamespaceResource `json:",omitempty"` + CompartmentId uint32 `json:",omitempty"` } func issueNamespaceRequest(id *string, method, subpath string, request interface{}) (*Namespace, error) { diff --git a/internal/winapi/net.go b/internal/winapi/net.go new file mode 100644 index 0000000000..57a2cbafe1 --- /dev/null +++ b/internal/winapi/net.go @@ -0,0 +1,3 @@ +package winapi + +//sys SetJobCompartmentId(handle windows.Handle, compartmentId uint32) (hr error) = iphlpapi.SetJobCompartmentId diff --git a/internal/winapi/winapi.go b/internal/winapi/winapi.go index 50bdc01f2b..545a87dad4 100644 --- a/internal/winapi/winapi.go +++ b/internal/winapi/winapi.go @@ -2,4 +2,4 @@ // be thought of as an extension to golang.org/x/sys/windows. package winapi -//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go jobobject.go path.go logon.go memory.go processor.go devices.go filesystem.go errors.go +//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go net.go jobobject.go path.go logon.go memory.go processor.go devices.go filesystem.go errors.go diff --git a/internal/winapi/zsyscall_windows.go b/internal/winapi/zsyscall_windows.go index af83f5b07e..4a11fc71b2 100644 --- a/internal/winapi/zsyscall_windows.go +++ b/internal/winapi/zsyscall_windows.go @@ -37,11 +37,13 @@ func errnoErr(e syscall.Errno) error { } var ( + modiphlpapi = windows.NewLazySystemDLL("iphlpapi.dll") modkernel32 = windows.NewLazySystemDLL("kernel32.dll") modadvapi32 = windows.NewLazySystemDLL("advapi32.dll") modcfgmgr32 = windows.NewLazySystemDLL("cfgmgr32.dll") modntdll = windows.NewLazySystemDLL("ntdll.dll") + procSetJobCompartmentId = modiphlpapi.NewProc("SetJobCompartmentId") procIsProcessInJob = modkernel32.NewProc("IsProcessInJob") procQueryInformationJobObject = modkernel32.NewProc("QueryInformationJobObject") procOpenJobObjectW = modkernel32.NewProc("OpenJobObjectW") @@ -63,6 +65,17 @@ var ( procRtlNtStatusToDosError = modntdll.NewProc("RtlNtStatusToDosError") ) +func SetJobCompartmentId(handle windows.Handle, compartmentId uint32) (hr error) { + r0, _, _ := syscall.Syscall(procSetJobCompartmentId.Addr(), 2, uintptr(handle), uintptr(compartmentId), 0) + if int32(r0) < 0 { + if r0&0x1fff0000 == 0x00070000 { + r0 &= 0xffff + } + hr = syscall.Errno(r0) + } + return +} + func IsProcessInJob(procHandle windows.Handle, jobHandle windows.Handle, result *bool) (err error) { r1, _, e1 := syscall.Syscall(procIsProcessInJob.Addr(), 3, uintptr(procHandle), uintptr(jobHandle), uintptr(unsafe.Pointer(result))) if r1 == 0 { From c91b39de715815e653647cee32f6ab09a664804e Mon Sep 17 00:00:00 2001 From: Daniel Canter Date: Thu, 15 Oct 2020 06:47:49 -0700 Subject: [PATCH 19/20] Change SetJobCompartmentId to use win32 error code semantics * Binding currently has the return value checked against HRESULT semantics when this shouldn't be the case. Signed-off-by: Daniel Canter --- internal/winapi/net.go | 2 +- internal/winapi/zsyscall_windows.go | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/internal/winapi/net.go b/internal/winapi/net.go index 57a2cbafe1..f37910024f 100644 --- a/internal/winapi/net.go +++ b/internal/winapi/net.go @@ -1,3 +1,3 @@ package winapi -//sys SetJobCompartmentId(handle windows.Handle, compartmentId uint32) (hr error) = iphlpapi.SetJobCompartmentId +//sys SetJobCompartmentId(handle windows.Handle, compartmentId uint32) (win32Err error) = iphlpapi.SetJobCompartmentId diff --git a/internal/winapi/zsyscall_windows.go b/internal/winapi/zsyscall_windows.go index 4a11fc71b2..fabe19c12b 100644 --- a/internal/winapi/zsyscall_windows.go +++ b/internal/winapi/zsyscall_windows.go @@ -65,13 +65,10 @@ var ( procRtlNtStatusToDosError = modntdll.NewProc("RtlNtStatusToDosError") ) -func SetJobCompartmentId(handle windows.Handle, compartmentId uint32) (hr error) { +func SetJobCompartmentId(handle windows.Handle, compartmentId uint32) (win32Err error) { r0, _, _ := syscall.Syscall(procSetJobCompartmentId.Addr(), 2, uintptr(handle), uintptr(compartmentId), 0) - if int32(r0) < 0 { - if r0&0x1fff0000 == 0x00070000 { - r0 &= 0xffff - } - hr = syscall.Errno(r0) + if r0 != 0 { + win32Err = syscall.Errno(r0) } return } From 56191cc34687edfbf1625049fd0379c1cdbca419 Mon Sep 17 00:00:00 2001 From: Daniel Canter Date: Thu, 8 Oct 2020 03:14:56 -0700 Subject: [PATCH 20/20] Add regkey to WCOW to deal with containment for GNS compartment changes * A change was added recently to GNS that will be backported to Vb and possibly 19H1 and RS5 that changes how network compartments are created to fix an issue with accessing smb shares in hypervisor isolated containers. To ease the worries of this breaking anything the change will be put behind a registry key (that is only set by us) so that the change won't impact docker and can be optionally toggled off by us through this annotation. Signed-off-by: Daniel Canter --- internal/oci/uvm.go | 22 ++++++++++++---------- internal/uvm/create.go | 11 +++++++++++ internal/uvm/create_wcow.go | 22 ++++++++++++++++++++++ 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/internal/oci/uvm.go b/internal/oci/uvm.go index f05c52262c..821180d711 100644 --- a/internal/oci/uvm.go +++ b/internal/oci/uvm.go @@ -120,16 +120,17 @@ const ( // // Note: Unlike Windows process isolated container QoS Count/Limt/Weight on // the UVM are not mutually exclusive and can be set together. - annotationProcessorWeight = "io.microsoft.virtualmachine.computetopology.processor.weight" - annotationVPMemCount = "io.microsoft.virtualmachine.devices.virtualpmem.maximumcount" - annotationVPMemSize = "io.microsoft.virtualmachine.devices.virtualpmem.maximumsizebytes" - annotationPreferredRootFSType = "io.microsoft.virtualmachine.lcow.preferredrootfstype" - annotationBootFilesRootPath = "io.microsoft.virtualmachine.lcow.bootfilesrootpath" - annotationKernelDirectBoot = "io.microsoft.virtualmachine.lcow.kerneldirectboot" - annotationVPCIEnabled = "io.microsoft.virtualmachine.lcow.vpcienabled" - annotationStorageQoSBandwidthMaximum = "io.microsoft.virtualmachine.storageqos.bandwidthmaximum" - annotationStorageQoSIopsMaximum = "io.microsoft.virtualmachine.storageqos.iopsmaximum" - annotationFullyPhysicallyBacked = "io.microsoft.virtualmachine.fullyphysicallybacked" + annotationProcessorWeight = "io.microsoft.virtualmachine.computetopology.processor.weight" + annotationVPMemCount = "io.microsoft.virtualmachine.devices.virtualpmem.maximumcount" + annotationVPMemSize = "io.microsoft.virtualmachine.devices.virtualpmem.maximumsizebytes" + annotationPreferredRootFSType = "io.microsoft.virtualmachine.lcow.preferredrootfstype" + annotationBootFilesRootPath = "io.microsoft.virtualmachine.lcow.bootfilesrootpath" + annotationKernelDirectBoot = "io.microsoft.virtualmachine.lcow.kerneldirectboot" + annotationVPCIEnabled = "io.microsoft.virtualmachine.lcow.vpcienabled" + annotationStorageQoSBandwidthMaximum = "io.microsoft.virtualmachine.storageqos.bandwidthmaximum" + annotationStorageQoSIopsMaximum = "io.microsoft.virtualmachine.storageqos.iopsmaximum" + annotationFullyPhysicallyBacked = "io.microsoft.virtualmachine.fullyphysicallybacked" + annotationDisableCompartmentNamespace = "io.microsoft.virtualmachine.disablecompartmentnamespace" // A boolean annotation to control whether to use an external bridge or the // HCS-GCS bridge. Default value is true which means external bridge will be used // by default. @@ -411,6 +412,7 @@ func SpecToUVMCreateOpts(ctx context.Context, s *specs.Spec, id, owner string) ( wopts.StorageQoSBandwidthMaximum = ParseAnnotationsStorageBps(ctx, s, annotationStorageQoSBandwidthMaximum, wopts.StorageQoSBandwidthMaximum) wopts.StorageQoSIopsMaximum = ParseAnnotationsStorageIops(ctx, s, annotationStorageQoSIopsMaximum, wopts.StorageQoSIopsMaximum) wopts.ExternalGuestConnection = parseAnnotationsBool(ctx, s.Annotations, annotationUseExternalGCSBridge, wopts.ExternalGuestConnection) + wopts.DisableCompartmentNamespace = parseAnnotationsBool(ctx, s.Annotations, annotationDisableCompartmentNamespace, wopts.DisableCompartmentNamespace) handleAnnotationFullyPhysicallyBacked(ctx, s.Annotations, wopts) return wopts, nil } diff --git a/internal/uvm/create.go b/internal/uvm/create.go index d7ff969bfb..29b50756ba 100644 --- a/internal/uvm/create.go +++ b/internal/uvm/create.go @@ -70,6 +70,17 @@ type Options struct { // ExternalGuestConnection sets whether the guest RPC connection is performed // internally by the OS platform or externally by this package. ExternalGuestConnection bool + + // DisableCompartmentNamespace sets whether to disable namespacing the network compartment in the UVM + // for WCOW. Namespacing makes it so the compartment created for a container is essentially no longer + // aware or able to see any of the other compartments on the host (in this case the UVM). + // The compartment that the container is added to now behaves as the default compartment as + // far as the container is concerned and it is only able to view the NICs in the compartment it's assigned to. + // This is the compartment setup (and behavior) that is followed for V1 HCS schema containers (docker) so + // this change brings parity as well. This behavior is gated behind a registry key currently to avoid any + // unneccessary behavior and once this restriction is removed then we can remove the need for this variable + // and the associated annotation as well. + DisableCompartmentNamespace bool } // Verifies that the final UVM options are correct and supported. diff --git a/internal/uvm/create_wcow.go b/internal/uvm/create_wcow.go index d98819575f..6c2b1bf395 100644 --- a/internal/uvm/create_wcow.go +++ b/internal/uvm/create_wcow.go @@ -138,6 +138,27 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error }, } + // Here for a temporary workaround until the need for setting this regkey is no more. To protect + // against any undesired behavior (such as some general networking scenarios ceasing to function) + // with a recent change to fix SMB share access in the UVM, this registry key will be checked to + // enable the change in question inside GNS.dll. + var registryChanges hcsschema.RegistryChanges + if !opts.DisableCompartmentNamespace { + registryChanges = hcsschema.RegistryChanges{ + AddValues: []hcsschema.RegistryValue{ + { + Key: &hcsschema.RegistryKey{ + Hive: "System", + Name: "CurrentControlSet\\Services\\gns", + }, + Name: "EnableCompartmentNamespace", + DWordValue: 1, + Type_: "DWord", + }, + }, + } + } + doc := &hcsschema.ComputeSystem{ Owner: uvm.owner, SchemaVersion: schemaversion.SchemaV21(), @@ -152,6 +173,7 @@ func CreateWCOW(ctx context.Context, opts *OptionsWCOW) (_ *UtilityVM, err error }, }, }, + RegistryChanges: ®istryChanges, ComputeTopology: &hcsschema.Topology{ Memory: &hcsschema.Memory2{ SizeInMB: memorySizeInMB,