Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nvidia-ctk runtime apply command to simulate OCI spec modifications #338

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,22 +53,26 @@ CLI_VERSION = $(VERSION)
endif
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info

GOOS ?= linux

binaries: cmds
ifneq ($(PREFIX),)
cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
endif
cmds: $(CMD_TARGETS)

ifneq ($(shell uname),Darwin)
EXTLDFLAGS = -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
else
EXTLDFLAGS = -Wl,-undefined,dynamic_lookup
endif
$(CMD_TARGETS): cmd-%:
GOOS=$(GOOS) go build -ldflags "-extldflags=-Wl,-z,lazy -s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
go build -ldflags "-s -w '-extldflags=$(EXTLDFLAGS)' -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)

build:
GOOS=$(GOOS) go build ./...
go build ./...

examples: $(EXAMPLE_TARGETS)
$(EXAMPLE_TARGETS): example-%:
GOOS=$(GOOS) go build ./examples/$(*)
go build ./examples/$(*)

all: check test build binary
check: $(CHECK_TARGETS)
Expand Down Expand Up @@ -100,7 +104,7 @@ coverage: test
generate:
go generate $(MODULE)/...

$(DOCKER_TARGETS): docker-%:
$(DOCKER_TARGETS): docker-%:
@echo "Running 'make $(*)' in container image $(BUILDIMAGE)"
$(DOCKER) run \
--rm \
Expand Down
126 changes: 126 additions & 0 deletions cmd/nvidia-ctk/runtime/list/list.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/**
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/

package list

import (
"encoding/json"
"fmt"

"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli/v2"

"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
)

type command struct {
logger logger.Interface
}

// NewCommand constructs an list command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}

// options defines the options that can be set for the CLI through options files,
type options struct {
mode string
envvars cli.StringSlice
}

func (m command) build() *cli.Command {
// Create a options struct to hold the parsed environment variables or command line flags
cfg := options{}

// Create the 'configure' command
configure := cli.Command{
Name: "list",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Update to apply

Usage: "List the modifications made to the OCI runtime specification",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return m.list(c, &cfg)
},
}

configure.Flags = []cli.Flag{
&cli.StringFlag{
Name: "mode",
Usage: "override the runtime mode a specified in the config file",
Destination: &cfg.mode,
},
&cli.StringSliceFlag{
Name: "envvar",
Aliases: []string{"e"},
Usage: "add an envvar to the container definition",
Destination: &cfg.envvars,
},
}

return &configure
}

func (m command) validateFlags(c *cli.Context, cfg *options) error {
return nil
}

// list executes the list command.
func (m command) list(c *cli.Context, cfg *options) error {
toolkitConfig, err := config.GetDefault()
if err != nil {
return fmt.Errorf("failed to generate default config: %w", err)
}

if cfg.mode != "" {
toolkitConfig.NVIDIAContainerRuntimeConfig.Mode = cfg.mode
}

container, err := image.New(
image.WithEnv(cfg.envvars.Value()),
)
if err != nil {
return fmt.Errorf("failed to construct container image: %w", err)
}

modifier, err := runtime.NewSpecModifier(m.logger, toolkitConfig, container)
if err != nil {
return fmt.Errorf("failed to contruct OCI runtime specification modifier: %w", err)

Check failure on line 107 in cmd/nvidia-ctk/runtime/list/list.go

View workflow job for this annotation

GitHub Actions / check

`contruct` is a misspelling of `construct` (misspell)
}
// TODO: We should handle this more cleanly.
if modifier == nil {
return fmt.Errorf("no modifications required")
}

spec := specs.Spec{}
if err := modifier.Modify(&spec); err != nil {
return fmt.Errorf("faile to apply modification to empty spec: %w", err)
}

specJSON, err := json.MarshalIndent(spec, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal OCI spec to JSON: %w", err)
}
fmt.Printf("%v", string(specJSON))

return nil
}
2 changes: 2 additions & 0 deletions cmd/nvidia-ctk/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/urfave/cli/v2"

"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/list"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)

Expand All @@ -44,6 +45,7 @@ func (m runtimeCommand) build() *cli.Command {

runtime.Subcommands = []*cli.Command{
configure.NewCommand(m.logger),
list.NewCommand(m.logger),
}

return &runtime
Expand Down
19 changes: 17 additions & 2 deletions internal/config/image/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ import (
)

type builder struct {
env map[string]string
mounts []specs.Mount
CUDA
disableRequire bool
}

Expand Down Expand Up @@ -60,6 +59,14 @@ func (b builder) build() (CUDA, error) {
// Option is a functional option for creating a CUDA image.
type Option func(*builder) error

// WithAnnotations sets the annotations for the image.
func WithAnnotations(annotations map[string]string) Option {
return func(b *builder) error {
b.annotations = annotations
return nil
}
}

// WithDisableRequire sets the disable require option.
func WithDisableRequire(disableRequire bool) Option {
return func(b *builder) error {
Expand Down Expand Up @@ -93,6 +100,14 @@ func WithEnvMap(env map[string]string) Option {
}
}

// WithIsPrivileged sets whether a container is privileged or not.
func WithIsPrivileged(isPrivileged bool) Option {
return func(b *builder) error {
b.isPrivileged = isPrivileged
return nil
}
}

// WithMounts sets the mounts associated with the CUDA image.
func WithMounts(mounts []specs.Mount) Option {
return func(b *builder) error {
Expand Down
48 changes: 46 additions & 2 deletions internal/config/image/cuda_image.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ const (
// a map of environment variable to values that can be used to perform lookups
// such as requirements.
type CUDA struct {
env map[string]string
mounts []specs.Mount
annotations map[string]string
env map[string]string
mounts []specs.Mount
isPrivileged bool
}

// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
Expand All @@ -54,7 +56,9 @@ func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {

return New(
WithEnv(env),
WithAnnotations(spec.Annotations),
WithMounts(spec.Mounts),
WithIsPrivileged(IsPrivileged(spec)),
)
}

Expand All @@ -64,6 +68,11 @@ func NewCUDAImageFromEnv(env []string) (CUDA, error) {
return New(WithEnv(env))
}

// IsPrivileged indicates whether the container was started with elevated privileged.
func (i CUDA) IsPrivileged() bool {
return i.isPrivileged
}

// Getenv returns the value of the specified environment variable.
// If the environment variable is not specified, an empty string is returned.
func (i CUDA) Getenv(key string) string {
Expand Down Expand Up @@ -274,3 +283,38 @@ func (i CUDA) CDIDevicesFromMounts() []string {
}
return devices
}

// CDIDevicesFromAnnotations returns a list of devices specified in the container annotations.
// Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of
// fully-qualified CDI devices names. If any device name is not fully-quality an error is returned.
// The list of returned devices is deduplicated.
func (i CUDA) CDIDevicesFromAnnotations(prefixes ...string) ([]string, error) {
if len(prefixes) == 0 {
return nil, nil
}
devicesByKey := make(map[string][]string)
for key, value := range i.annotations {
for _, prefix := range prefixes {
if strings.HasPrefix(key, prefix) {
devicesByKey[key] = strings.Split(value, ",")
}
}
}

seen := make(map[string]bool)
var annotationDevices []string
for key, devices := range devicesByKey {
for _, device := range devices {
if !parser.IsQualifiedName(device) {
return nil, fmt.Errorf("invalid device name %q in annotation %q", device, key)
}
if seen[device] {
continue
}
annotationDevices = append(annotationDevices, device)
seen[device] = true
}
}

return annotationDevices, nil
}
3 changes: 2 additions & 1 deletion internal/cuda/cuda.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ import (
)

/*
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup

#ifdef _WIN32
#define CUDAAPI __stdcall
Expand Down
4 changes: 3 additions & 1 deletion internal/dxcore/dxcore.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
package dxcore

/*
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#cgo linux LDFLAGS: -Wl,--export-dynamic -Wl,--unresolved-symbols=ignore-in-object-files
#cgo darwin LDFLAGS: -Wl,-undefined,dynamic_lookup

#include <dxcore.h>
*/
import "C"
Expand Down
24 changes: 7 additions & 17 deletions internal/modifier/cdi.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ import (
// NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES environment variable is
// used to select the devices to include.
func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
devices, err := getDevicesFromSpec(logger, ociSpec, cfg)
func NewCDIModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
devices, err := getDevicesFromContainer(logger, cfg, image)
if err != nil {
return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err)
}
Expand Down Expand Up @@ -65,32 +65,22 @@ func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spe
)
}

func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.Config) ([]string, error) {
rawSpec, err := ociSpec.Load()
if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
}

annotationDevices, err := getAnnotationDevices(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes, rawSpec.Annotations)
func getDevicesFromContainer(logger logger.Interface, cfg *config.Config, image image.CUDA) ([]string, error) {
annotationDevices, err := image.CDIDevicesFromAnnotations(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes...)
if err != nil {
return nil, fmt.Errorf("failed to parse container annotations: %v", err)
}
if len(annotationDevices) > 0 {
return annotationDevices, nil
}

container, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil {
return nil, err
}
if cfg.AcceptDeviceListAsVolumeMounts {
mountDevices := container.CDIDevicesFromMounts()
mountDevices := image.CDIDevicesFromMounts()
if len(mountDevices) > 0 {
return mountDevices, nil
}
}

envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar)
envDevices := image.DevicesFromEnvvars(visibleDevicesEnvvar)

var devices []string
seen := make(map[string]bool)
Expand All @@ -109,7 +99,7 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
return nil, nil
}

if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) {
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged() {
return devices, nil
}

Expand Down
Loading
Loading