Skip to content

Commit

Permalink
550.40.59
Browse files Browse the repository at this point in the history
  • Loading branch information
russellcnv committed Apr 1, 2024
1 parent 66b6384 commit acebc4b
Show file tree
Hide file tree
Showing 55 changed files with 1,137 additions and 509 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## Release 550 Entries

### [550.40.59] 2024-04-01

### [550.40.55] 2024-03-07

### [550.40.53] 2024-02-28
Expand Down
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source

This is the source release of the NVIDIA Linux open GPU kernel modules,
version 550.40.55.
version 550.40.59.


## How to Build
Expand All @@ -17,7 +17,7 @@ as root:

Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
550.40.55 driver release. This can be achieved by installing
550.40.59 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,

Expand Down Expand Up @@ -188,7 +188,7 @@ encountered specific to them.
For details on feature support and limitations, see the NVIDIA GPU driver
end user README here:

https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.55/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.59/README/kernel_open.html

For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
Package for more details.
Expand Down Expand Up @@ -867,13 +867,15 @@ Subsystem Device ID.
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
| NVIDIA GeForce RTX 4080 | 2704 |
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
| NVIDIA GeForce RTX 4070 | 2709 |
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
| NVIDIA RTX 5000 Ada Generation Embedded GPU | 2770 |
| NVIDIA GeForce RTX 4070 Ti | 2782 |
| NVIDIA GeForce RTX 4070 SUPER | 2783 |
| NVIDIA GeForce RTX 4070 | 2786 |
| NVIDIA GeForce RTX 4060 Ti | 2788 |
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
Expand All @@ -896,6 +898,7 @@ Subsystem Device ID.
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
| NVIDIA GeForce RTX 4060 Ti | 2803 |
| NVIDIA GeForce RTX 4060 Ti | 2805 |
| NVIDIA GeForce RTX 4060 | 2808 |
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
Expand Down
4 changes: 3 additions & 1 deletion kernel-open/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.55\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.59\"

ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
Expand Down Expand Up @@ -170,6 +170,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)

NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
NV_CONFTEST_CFLAGS += -Wno-error

NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
Expand Down
27 changes: 1 addition & 26 deletions kernel-open/common/inc/nv-linux.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2001-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
Expand Down Expand Up @@ -1989,31 +1989,6 @@ static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
return nvl->numa_info.use_auto_online;
}

typedef struct {
NvU64 base;
NvU64 size;
NvU32 nodeId;
int ret;
} remove_numa_memory_info_t;

static void offline_numa_memory_callback
(
void *args
)
{
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
pNumaInfo->base,
pNumaInfo->size);
#else
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
pNumaInfo->size);
#endif
#endif
}

typedef enum
{
NV_NUMA_STATUS_DISABLED = 0,
Expand Down
8 changes: 6 additions & 2 deletions kernel-open/conftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5168,11 +5168,15 @@ compile_test() {
# commit 49a3f51dfeee ("drm/gem: Use struct dma_buf_map in GEM
# vmap ops and convert GEM backends") in v5.11.
#
# Note that the 'map' argument type is changed from 'struct dma_buf_map'
# to 'struct iosys_map' by commit 7938f4218168 ("dma-buf-map: Rename
# to iosys-map) in v5.18.
#
CODE="
#include <drm/drm_gem.h>
int conftest_drm_gem_object_vmap_has_map_arg(
struct drm_gem_object *obj, struct dma_buf_map *map) {
return obj->funcs->vmap(obj, map);
struct drm_gem_object *obj) {
return obj->funcs->vmap(obj, NULL);
}"

compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"
Expand Down
16 changes: 10 additions & 6 deletions kernel-open/nvidia-uvm/uvm_channel_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -691,12 +691,16 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
if (uvm_test_rng_range_32(&rng, 0, 1) == 0) {
NvU32 random_stream_index = uvm_test_rng_range_32(&rng, 0, num_streams - 1);
uvm_test_stream_t *random_stream = &streams[random_stream_index];
uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
snapshot_counter(&stream->push,
random_stream->counter_mem,
stream->other_stream_counter_snapshots_mem,
i,
random_stream->queued_counter_repeat);

if ((random_stream->push.gpu == gpu) || uvm_push_allow_dependencies_across_gpus()) {
uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);

snapshot_counter(&stream->push,
random_stream->counter_mem,
stream->other_stream_counter_snapshots_mem,
i,
random_stream->queued_counter_repeat);
}
}

uvm_push_end(&stream->push);
Expand Down
6 changes: 4 additions & 2 deletions kernel-open/nvidia-uvm/uvm_fault_buffer_flush_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@ NV_STATUS uvm_test_fault_buffer_flush(UVM_TEST_FAULT_BUFFER_FLUSH_PARAMS *params

uvm_va_space_up_read(va_space);

if (uvm_processor_mask_empty(retained_gpus))
return NV_ERR_INVALID_DEVICE;
if (uvm_processor_mask_empty(retained_gpus)) {
status = NV_ERR_INVALID_DEVICE;
goto out;
}

for (i = 0; i < params->iterations; i++) {
if (fatal_signal_pending(current)) {
Expand Down
6 changes: 6 additions & 0 deletions kernel-open/nvidia-uvm/uvm_global.h
Original file line number Diff line number Diff line change
Expand Up @@ -409,4 +409,10 @@ NV_STATUS uvm_service_block_context_init(void);
// Release fault service contexts if any exist.
void uvm_service_block_context_exit(void);

// Allocate a service block context
uvm_service_block_context_t *uvm_service_block_context_alloc(struct mm_struct *mm);

// Free a servic block context
void uvm_service_block_context_free(uvm_service_block_context_t *service_context);

#endif // __UVM_GLOBAL_H__
19 changes: 14 additions & 5 deletions kernel-open/nvidia-uvm/uvm_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ struct uvm_service_block_context_struct
// Pages whose permissions need to be revoked from other processors
uvm_page_mask_t revocation_mask;

// Temporary mask used in service_va_block_locked() in
// uvm_gpu_access_counters.c.
uvm_processor_mask_t update_processors;

struct
{
// Per-processor mask with the pages that will be resident after
Expand Down Expand Up @@ -593,16 +597,21 @@ typedef enum
UVM_GPU_LINK_MAX
} uvm_gpu_link_type_t;

// UVM does not support P2P copies on pre-Pascal GPUs. Pascal+ GPUs only
// support virtual addresses in P2P copies. Therefore, a peer identity mapping
// needs to be created.
// Ampere+ GPUs support physical peer copies, too, so identity mappings are not
// needed
typedef enum
{
// Peer copies can be disallowed for a variety of reasons. For example,
// P2P transfers are disabled in pre-Pascal GPUs because there is no
// compelling use case for direct peer migrations.
UVM_GPU_PEER_COPY_MODE_UNSUPPORTED,

// Pascal+ GPUs support virtual addresses in P2P copies. Virtual peer copies
// require the creation of peer identity mappings.
UVM_GPU_PEER_COPY_MODE_VIRTUAL,

// Ampere+ GPUs support virtual and physical peer copies. Physical peer
// copies do not depend on peer identity mappings.
UVM_GPU_PEER_COPY_MODE_PHYSICAL,

UVM_GPU_PEER_COPY_MODE_COUNT
} uvm_gpu_peer_copy_mode_t;

Expand Down
6 changes: 3 additions & 3 deletions kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
Original file line number Diff line number Diff line change
Expand Up @@ -1087,12 +1087,12 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
// pages to be serviced
if (page_count > 0) {
uvm_processor_id_t id;
uvm_processor_mask_t update_processors;
uvm_processor_mask_t *update_processors = &service_context->update_processors;

uvm_processor_mask_and(&update_processors, &va_block->resident, &service_context->resident_processors);
uvm_processor_mask_and(update_processors, &va_block->resident, &service_context->resident_processors);

// Remove pages that are already resident in the destination processors
for_each_id_in_mask(id, &update_processors) {
for_each_id_in_mask(id, update_processors) {
bool migrate_pages;
uvm_page_mask_t *residency_mask = uvm_va_block_resident_mask_get(va_block, id, NUMA_NO_NODE);
UVM_ASSERT(residency_mask);
Expand Down
22 changes: 14 additions & 8 deletions kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,18 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
{
NV_STATUS status;
uvm_push_t push;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;

UVM_ASSERT(tracker != NULL);

status = uvm_tracker_add_tracker_safe(tracker, replay_tracker);
if (status != NV_OK)
return status;

if (global_cancel) {
status = uvm_push_begin_acquire(gpu->channel_manager,
UVM_CHANNEL_TYPE_MEMOPS,
&replayable_faults->replay_tracker,
tracker,
&push,
"Cancel targeting instance_ptr {0x%llx:%s}\n",
instance_ptr.address,
Expand All @@ -371,7 +377,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
else {
status = uvm_push_begin_acquire(gpu->channel_manager,
UVM_CHANNEL_TYPE_MEMOPS,
&replayable_faults->replay_tracker,
tracker,
&push,
"Cancel targeting instance_ptr {0x%llx:%s} gpc %u client %u\n",
instance_ptr.address,
Expand All @@ -382,17 +388,15 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,

UVM_ASSERT(status == NV_OK);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to create push and acquire replay tracker before pushing cancel: %s, GPU %s\n",
UVM_ERR_PRINT("Failed to create push and acquire trackers before pushing cancel: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(gpu));
return status;
}

uvm_push_acquire_tracker(&push, tracker);

if (global_cancel)
gpu->parent->host_hal->cancel_faults_global(&push, instance_ptr);
else
else
gpu->parent->host_hal->cancel_faults_targeted(&push, instance_ptr, gpc_id, client_id);

// We don't need to put the cancel in the GPU replay tracker since we wait
Expand All @@ -403,7 +407,9 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
if (status != NV_OK)
UVM_ERR_PRINT("Failed to wait for pushed cancel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));

uvm_tracker_clear(&replayable_faults->replay_tracker);
// The cancellation is complete, so the input trackers must be complete too.
uvm_tracker_clear(tracker);
uvm_tracker_clear(replay_tracker);

return status;
}
Expand Down
Loading

0 comments on commit acebc4b

Please sign in to comment.