diff --git a/Makefile b/Makefile index 922e4445d..e232e9e9b 100644 --- a/Makefile +++ b/Makefile @@ -53,22 +53,26 @@ CLI_VERSION = $(VERSION) endif CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info -GOOS ?= linux - binaries: cmds ifneq ($(PREFIX),) cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*) endif cmds: $(CMD_TARGETS) + +ifneq ($(shell uname),Darwin) +EXTLDFLAGS = -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files +else +EXTLDFLAGS = -Wl,-undefined,dynamic_lookup +endif $(CMD_TARGETS): cmd-%: - GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*) + go build -ldflags "-s -w '-extldflags=$(EXTLDFLAGS)' -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*) build: - GOOS=$(GOOS) go build ./... + go build ./... examples: $(EXAMPLE_TARGETS) $(EXAMPLE_TARGETS): example-%: - GOOS=$(GOOS) go build ./examples/$(*) + go build ./examples/$(*) all: check test build binary check: $(CHECK_TARGETS) @@ -100,7 +104,7 @@ coverage: test generate: go generate $(MODULE)/... -$(DOCKER_TARGETS): docker-%: +$(DOCKER_TARGETS): docker-%: @echo "Running 'make $(*)' in container image $(BUILDIMAGE)" $(DOCKER) run \ --rm \ diff --git a/cmd/nvidia-ctk/runtime/list/list.go b/cmd/nvidia-ctk/runtime/list/list.go new file mode 100644 index 000000000..2e7284d45 --- /dev/null +++ b/cmd/nvidia-ctk/runtime/list/list.go @@ -0,0 +1,126 @@ +/** +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package list + +import ( + "encoding/json" + "fmt" + + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/urfave/cli/v2" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/runtime" +) + +type command struct { + logger logger.Interface +} + +// NewCommand constructs an list command with the specified logger +func NewCommand(logger logger.Interface) *cli.Command { + c := command{ + logger: logger, + } + return c.build() +} + +// options defines the options that can be set for the CLI through options files, +type options struct { + mode string + envvars cli.StringSlice +} + +func (m command) build() *cli.Command { + // Create a options struct to hold the parsed environment variables or command line flags + cfg := options{} + + // Create the 'configure' command + configure := cli.Command{ + Name: "list", + Usage: "List the modifications made to the OCI runtime specification", + Before: func(c *cli.Context) error { + return m.validateFlags(c, &cfg) + }, + Action: func(c *cli.Context) error { + return m.list(c, &cfg) + }, + } + + configure.Flags = []cli.Flag{ + &cli.StringFlag{ + Name: "mode", + Usage: "override the runtime mode a specified in the config file", + Destination: &cfg.mode, + }, + &cli.StringSliceFlag{ + Name: "envvar", + Aliases: []string{"e"}, + Usage: "add an envvar to the container definition", + Destination: &cfg.envvars, + }, + } + + return &configure +} + +func (m command) validateFlags(c *cli.Context, cfg *options) error { + return nil +} + +// list executes the list command. +func (m command) list(c *cli.Context, cfg *options) error { + toolkitConfig, err := config.GetDefault() + if err != nil { + return fmt.Errorf("failed to generate default config: %w", err) + } + + if cfg.mode != "" { + toolkitConfig.NVIDIAContainerRuntimeConfig.Mode = cfg.mode + } + + container, err := image.New( + image.WithEnv(cfg.envvars.Value()), + ) + if err != nil { + return fmt.Errorf("failed to construct container image: %w", err) + } + + modifier, err := runtime.NewSpecModifier(m.logger, toolkitConfig, container) + if err != nil { + return fmt.Errorf("failed to contruct OCI runtime specification modifier: %w", err) + } + // TODO: We should handle this more cleanly. + if modifier == nil { + return fmt.Errorf("no modifications required") + } + + spec := specs.Spec{} + if err := modifier.Modify(&spec); err != nil { + return fmt.Errorf("faile to apply modification to empty spec: %w", err) + } + + specJSON, err := json.MarshalIndent(spec, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal OCI spec to JSON: %w", err) + } + fmt.Printf("%v", string(specJSON)) + + return nil +} diff --git a/cmd/nvidia-ctk/runtime/runtime.go b/cmd/nvidia-ctk/runtime/runtime.go index 1e3e7a546..6ff3b04ec 100644 --- a/cmd/nvidia-ctk/runtime/runtime.go +++ b/cmd/nvidia-ctk/runtime/runtime.go @@ -20,6 +20,7 @@ import ( "github.com/urfave/cli/v2" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure" + "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/list" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" ) @@ -44,6 +45,7 @@ func (m runtimeCommand) build() *cli.Command { runtime.Subcommands = []*cli.Command{ configure.NewCommand(m.logger), + list.NewCommand(m.logger), } return &runtime diff --git a/internal/config/image/builder.go b/internal/config/image/builder.go index da9025f05..bb8decc5c 100644 --- a/internal/config/image/builder.go +++ b/internal/config/image/builder.go @@ -24,8 +24,7 @@ import ( ) type builder struct { - env map[string]string - mounts []specs.Mount + CUDA disableRequire bool } @@ -60,6 +59,14 @@ func (b builder) build() (CUDA, error) { // Option is a functional option for creating a CUDA image. type Option func(*builder) error +// WithAnnotations sets the annotations for the image. +func WithAnnotations(annotations map[string]string) Option { + return func(b *builder) error { + b.annotations = annotations + return nil + } +} + // WithDisableRequire sets the disable require option. func WithDisableRequire(disableRequire bool) Option { return func(b *builder) error { @@ -93,6 +100,14 @@ func WithEnvMap(env map[string]string) Option { } } +// WithIsPrivileged sets whether a container is privileged or not. +func WithIsPrivileged(isPrivileged bool) Option { + return func(b *builder) error { + b.isPrivileged = isPrivileged + return nil + } +} + // WithMounts sets the mounts associated with the CUDA image. func WithMounts(mounts []specs.Mount) Option { return func(b *builder) error { diff --git a/internal/config/image/cuda_image.go b/internal/config/image/cuda_image.go index d285dea24..29e059f98 100644 --- a/internal/config/image/cuda_image.go +++ b/internal/config/image/cuda_image.go @@ -40,8 +40,10 @@ const ( // a map of environment variable to values that can be used to perform lookups // such as requirements. type CUDA struct { - env map[string]string - mounts []specs.Mount + annotations map[string]string + env map[string]string + mounts []specs.Mount + isPrivileged bool } // NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec. @@ -54,7 +56,9 @@ func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) { return New( WithEnv(env), + WithAnnotations(spec.Annotations), WithMounts(spec.Mounts), + WithIsPrivileged(IsPrivileged(spec)), ) } @@ -64,6 +68,11 @@ func NewCUDAImageFromEnv(env []string) (CUDA, error) { return New(WithEnv(env)) } +// IsPrivileged indicates whether the container was started with elevated privileged. +func (i CUDA) IsPrivileged() bool { + return i.isPrivileged +} + // Getenv returns the value of the specified environment variable. // If the environment variable is not specified, an empty string is returned. func (i CUDA) Getenv(key string) string { @@ -274,3 +283,38 @@ func (i CUDA) CDIDevicesFromMounts() []string { } return devices } + +// CDIDevicesFromAnnotations returns a list of devices specified in the container annotations. +// Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of +// fully-qualified CDI devices names. If any device name is not fully-quality an error is returned. +// The list of returned devices is deduplicated. +func (i CUDA) CDIDevicesFromAnnotations(prefixes ...string) ([]string, error) { + if len(prefixes) == 0 { + return nil, nil + } + devicesByKey := make(map[string][]string) + for key, value := range i.annotations { + for _, prefix := range prefixes { + if strings.HasPrefix(key, prefix) { + devicesByKey[key] = strings.Split(value, ",") + } + } + } + + seen := make(map[string]bool) + var annotationDevices []string + for key, devices := range devicesByKey { + for _, device := range devices { + if !parser.IsQualifiedName(device) { + return nil, fmt.Errorf("invalid device name %q in annotation %q", device, key) + } + if seen[device] { + continue + } + annotationDevices = append(annotationDevices, device) + seen[device] = true + } + } + + return annotationDevices, nil +} diff --git a/internal/cuda/cuda.go b/internal/cuda/cuda.go index 2c70a821d..5e8b96c28 100644 --- a/internal/cuda/cuda.go +++ b/internal/cuda/cuda.go @@ -23,7 +23,8 @@ import ( ) /* -#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files +#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files +#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup #ifdef _WIN32 #define CUDAAPI __stdcall diff --git a/internal/dxcore/dxcore.go b/internal/dxcore/dxcore.go index 76cc53f80..cbac01415 100644 --- a/internal/dxcore/dxcore.go +++ b/internal/dxcore/dxcore.go @@ -17,7 +17,9 @@ package dxcore /* -#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files +#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files +#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup + #include */ import "C" diff --git a/internal/modifier/cdi.go b/internal/modifier/cdi.go index c53cb9960..a062e2488 100644 --- a/internal/modifier/cdi.go +++ b/internal/modifier/cdi.go @@ -34,8 +34,8 @@ import ( // NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the // CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES environment variable is // used to select the devices to include. -func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) { - devices, err := getDevicesFromSpec(logger, ociSpec, cfg) +func NewCDIModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { + devices, err := getDevicesFromContainer(logger, cfg, image) if err != nil { return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err) } @@ -65,32 +65,22 @@ func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spe ) } -func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.Config) ([]string, error) { - rawSpec, err := ociSpec.Load() - if err != nil { - return nil, fmt.Errorf("failed to load OCI spec: %v", err) - } - - annotationDevices, err := getAnnotationDevices(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes, rawSpec.Annotations) +func getDevicesFromContainer(logger logger.Interface, cfg *config.Config, image image.CUDA) ([]string, error) { + annotationDevices, err := image.CDIDevicesFromAnnotations(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes...) if err != nil { return nil, fmt.Errorf("failed to parse container annotations: %v", err) } if len(annotationDevices) > 0 { return annotationDevices, nil } - - container, err := image.NewCUDAImageFromSpec(rawSpec) - if err != nil { - return nil, err - } if cfg.AcceptDeviceListAsVolumeMounts { - mountDevices := container.CDIDevicesFromMounts() + mountDevices := image.CDIDevicesFromMounts() if len(mountDevices) > 0 { return mountDevices, nil } } - envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar) + envDevices := image.DevicesFromEnvvars(visibleDevicesEnvvar) var devices []string seen := make(map[string]bool) @@ -109,7 +99,7 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C return nil, nil } - if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) { + if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged() { return devices, nil } diff --git a/internal/runtime/runtime_factory.go b/internal/runtime/runtime_factory.go index a8bdbbf6c..6e9dbd211 100644 --- a/internal/runtime/runtime_factory.go +++ b/internal/runtime/runtime_factory.go @@ -43,8 +43,16 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv if err != nil { return nil, fmt.Errorf("error constructing OCI specification: %v", err) } + rawSpec, err := ociSpec.Load() + if err != nil { + return nil, fmt.Errorf("failed to load OCI spec: %v", err) + } + image, err := image.NewCUDAImageFromSpec(rawSpec) + if err != nil { + return nil, err + } - specModifier, err := newSpecModifier(logger, cfg, ociSpec) + specModifier, err := NewSpecModifier(logger, cfg, image) if err != nil { return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err) } @@ -60,20 +68,10 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv return r, nil } -// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config. -func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) { - rawSpec, err := ociSpec.Load() - if err != nil { - return nil, fmt.Errorf("failed to load OCI spec: %v", err) - } - - image, err := image.NewCUDAImageFromSpec(rawSpec) - if err != nil { - return nil, err - } - +// NewSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config. +func NewSpecModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode, image) - modeModifier, err := newModeModifier(logger, mode, cfg, ociSpec, image) + modeModifier, err := newModeModifier(logger, mode, cfg, image) if err != nil { return nil, err } @@ -100,14 +98,14 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp return modifiers, nil } -func newModeModifier(logger logger.Interface, mode string, cfg *config.Config, ociSpec oci.Spec, image image.CUDA) (oci.SpecModifier, error) { +func newModeModifier(logger logger.Interface, mode string, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { switch mode { case "legacy": return modifier.NewStableRuntimeModifier(logger, cfg.NVIDIAContainerRuntimeHookConfig.Path), nil case "csv": return modifier.NewCSVModifier(logger, cfg, image) case "cdi": - return modifier.NewCDIModifier(logger, cfg, ociSpec) + return modifier.NewCDIModifier(logger, cfg, image) } return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)