550.40.59

NVIDIA · Apr 1, 2024 · acebc4b · acebc4b
1 parent 66b6384
commit acebc4b
Show file tree

Hide file tree

Showing 55 changed files with 1,137 additions and 509 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Release 550 Entries
 
+### [550.40.59] 2024-04-01
+
 ### [550.40.55] 2024-03-07
 
 ### [550.40.53] 2024-02-28

diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source
 
 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 550.40.55.
+version 550.40.59.
 
 
 ## How to Build
@@ -17,7 +17,7 @@ as root:
 
 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-550.40.55 driver release.  This can be achieved by installing
+550.40.59 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,
 
@@ -188,7 +188,7 @@ encountered specific to them.
 For details on feature support and limitations, see the NVIDIA GPU driver
 end user README here:
 
-https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.55/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.59/README/kernel_open.html
 
 For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
 Package for more details.
@@ -867,13 +867,15 @@ Subsystem Device ID.
 | NVIDIA GeForce RTX 4080 SUPER                   | 2702           |
 | NVIDIA GeForce RTX 4080                         | 2704           |
 | NVIDIA GeForce RTX 4070 Ti SUPER                | 2705           |
+| NVIDIA GeForce RTX 4070                         | 2709           |
 | NVIDIA GeForce RTX 4090 Laptop GPU              | 2717           |
 | NVIDIA RTX 5000 Ada Generation Laptop GPU       | 2730           |
 | NVIDIA GeForce RTX 4090 Laptop GPU              | 2757           |
 | NVIDIA RTX 5000 Ada Generation Embedded GPU     | 2770           |
 | NVIDIA GeForce RTX 4070 Ti                      | 2782           |
 | NVIDIA GeForce RTX 4070 SUPER                   | 2783           |
 | NVIDIA GeForce RTX 4070                         | 2786           |
+| NVIDIA GeForce RTX 4060 Ti                      | 2788           |
 | NVIDIA GeForce RTX 4080 Laptop GPU              | 27A0           |
 | NVIDIA RTX 4000 SFF Ada Generation              | 27B0 1028 16FA |
 | NVIDIA RTX 4000 SFF Ada Generation              | 27B0 103C 16FA |
@@ -896,6 +898,7 @@ Subsystem Device ID.
 | NVIDIA RTX 3500 Ada Generation Embedded GPU     | 27FB           |
 | NVIDIA GeForce RTX 4060 Ti                      | 2803           |
 | NVIDIA GeForce RTX 4060 Ti                      | 2805           |
+| NVIDIA GeForce RTX 4060                         | 2808           |
 | NVIDIA GeForce RTX 4070 Laptop GPU              | 2820           |
 | NVIDIA RTX 3000 Ada Generation Laptop GPU       | 2838           |
 | NVIDIA GeForce RTX 4070 Laptop GPU              | 2860           |

diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.55\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.59\"
 
 ifneq ($(SYSSRCHOST1X),)
  EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@@ -170,6 +170,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
 NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
 
 NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
+NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
+NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
 NV_CONFTEST_CFLAGS += -Wno-error
 
 NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h

diff --git a/kernel-open/common/inc/nv-linux.h b/kernel-open/common/inc/nv-linux.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2001-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -1989,31 +1989,6 @@ static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
     return nvl->numa_info.use_auto_online;
 }
 
-typedef struct {
-    NvU64 base;
-    NvU64 size;
-    NvU32 nodeId;
-    int ret;
-} remove_numa_memory_info_t;
-
-static void offline_numa_memory_callback
-(
-    void *args
-)
-{
-#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
-    remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
-#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
-    pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
-                                               pNumaInfo->base,
-                                               pNumaInfo->size);
-#else
-    pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
-                                               pNumaInfo->size);
-#endif
-#endif
-}
-
 typedef enum
 {
     NV_NUMA_STATUS_DISABLED             = 0,

diff --git a/kernel-open/conftest.sh b/kernel-open/conftest.sh
@@ -5168,11 +5168,15 @@ compile_test() {
             # commit 49a3f51dfeee ("drm/gem: Use struct dma_buf_map in GEM
             # vmap ops and convert GEM backends") in v5.11.
             #
+            # Note that the 'map' argument type is changed from 'struct dma_buf_map'
+            # to 'struct iosys_map' by commit 7938f4218168 ("dma-buf-map: Rename
+            # to iosys-map) in v5.18.
+            #
             CODE="
             #include <drm/drm_gem.h>
             int conftest_drm_gem_object_vmap_has_map_arg(
-                    struct drm_gem_object *obj, struct dma_buf_map *map) {
-                return obj->funcs->vmap(obj, map);
+                    struct drm_gem_object *obj) {
+                return obj->funcs->vmap(obj, NULL);
             }"
 
             compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"

diff --git a/kernel-open/nvidia-uvm/uvm_channel_test.c b/kernel-open/nvidia-uvm/uvm_channel_test.c
@@ -691,12 +691,16 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
             if (uvm_test_rng_range_32(&rng, 0, 1) == 0) {
                 NvU32 random_stream_index = uvm_test_rng_range_32(&rng, 0, num_streams - 1);
                 uvm_test_stream_t *random_stream = &streams[random_stream_index];
-                uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
-                snapshot_counter(&stream->push,
-                                 random_stream->counter_mem,
-                                 stream->other_stream_counter_snapshots_mem,
-                                 i,
-                                 random_stream->queued_counter_repeat);
+
+                if ((random_stream->push.gpu == gpu) || uvm_push_allow_dependencies_across_gpus()) {
+                    uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
+
+                    snapshot_counter(&stream->push,
+                                     random_stream->counter_mem,
+                                     stream->other_stream_counter_snapshots_mem,
+                                     i,
+                                     random_stream->queued_counter_repeat);
+                }
             }
 
             uvm_push_end(&stream->push);

diff --git a/kernel-open/nvidia-uvm/uvm_fault_buffer_flush_test.c b/kernel-open/nvidia-uvm/uvm_fault_buffer_flush_test.c
@@ -51,8 +51,10 @@ NV_STATUS uvm_test_fault_buffer_flush(UVM_TEST_FAULT_BUFFER_FLUSH_PARAMS *params
 
     uvm_va_space_up_read(va_space);
 
-    if (uvm_processor_mask_empty(retained_gpus))
-        return NV_ERR_INVALID_DEVICE;
+    if (uvm_processor_mask_empty(retained_gpus)) {
+        status = NV_ERR_INVALID_DEVICE;
+        goto out;
+    }
 
     for (i = 0; i < params->iterations; i++) {
         if (fatal_signal_pending(current)) {

diff --git a/kernel-open/nvidia-uvm/uvm_global.h b/kernel-open/nvidia-uvm/uvm_global.h
@@ -409,4 +409,10 @@ NV_STATUS uvm_service_block_context_init(void);
 // Release fault service contexts if any exist.
 void uvm_service_block_context_exit(void);
 
+// Allocate a service block context
+uvm_service_block_context_t *uvm_service_block_context_alloc(struct mm_struct *mm);
+
+// Free a servic block context
+void uvm_service_block_context_free(uvm_service_block_context_t *service_context);
+
 #endif // __UVM_GLOBAL_H__
diff --git a/kernel-open/nvidia-uvm/uvm_gpu.h b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -160,6 +160,10 @@ struct uvm_service_block_context_struct
     // Pages whose permissions need to be revoked from other processors
     uvm_page_mask_t revocation_mask;
 
+    // Temporary mask used in service_va_block_locked() in
+    // uvm_gpu_access_counters.c.
+    uvm_processor_mask_t update_processors;
+
     struct
     {
         // Per-processor mask with the pages that will be resident after
@@ -593,16 +597,21 @@ typedef enum
     UVM_GPU_LINK_MAX
 } uvm_gpu_link_type_t;
 
-// UVM does not support P2P copies on pre-Pascal GPUs. Pascal+ GPUs only
-// support virtual addresses in P2P copies. Therefore, a peer identity mapping
-// needs to be created.
-// Ampere+ GPUs support physical peer copies, too, so identity mappings are not
-// needed
 typedef enum
 {
+    // Peer copies can be disallowed for a variety of reasons. For example,
+    // P2P transfers are disabled in pre-Pascal GPUs because there is no
+    // compelling use case for direct peer migrations.
     UVM_GPU_PEER_COPY_MODE_UNSUPPORTED,
+
+    // Pascal+ GPUs support virtual addresses in P2P copies. Virtual peer copies
+    // require the creation of peer identity mappings.
     UVM_GPU_PEER_COPY_MODE_VIRTUAL,
+
+    // Ampere+ GPUs support virtual and physical peer copies. Physical peer
+    // copies do not depend on peer identity mappings.
     UVM_GPU_PEER_COPY_MODE_PHYSICAL,
+
     UVM_GPU_PEER_COPY_MODE_COUNT
 } uvm_gpu_peer_copy_mode_t;
 

diff --git a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@@ -1087,12 +1087,12 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
     // pages to be serviced
     if (page_count > 0) {
         uvm_processor_id_t id;
-        uvm_processor_mask_t update_processors;
+        uvm_processor_mask_t *update_processors = &service_context->update_processors;
 
-        uvm_processor_mask_and(&update_processors, &va_block->resident, &service_context->resident_processors);
+        uvm_processor_mask_and(update_processors, &va_block->resident, &service_context->resident_processors);
 
         // Remove pages that are already resident in the destination processors
-        for_each_id_in_mask(id, &update_processors) {
+        for_each_id_in_mask(id, update_processors) {
             bool migrate_pages;
             uvm_page_mask_t *residency_mask = uvm_va_block_resident_mask_get(va_block, id, NUMA_NO_NODE);
             UVM_ASSERT(residency_mask);

diff --git a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@@ -357,12 +357,18 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
 {
     NV_STATUS status;
     uvm_push_t push;
-    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
+
+    UVM_ASSERT(tracker != NULL);
+
+    status = uvm_tracker_add_tracker_safe(tracker, replay_tracker);
+    if (status != NV_OK)
+        return status;
 
     if (global_cancel) {
         status = uvm_push_begin_acquire(gpu->channel_manager,
                                         UVM_CHANNEL_TYPE_MEMOPS,
-                                        &replayable_faults->replay_tracker,
+                                        tracker,
                                         &push,
                                         "Cancel targeting instance_ptr {0x%llx:%s}\n",
                                         instance_ptr.address,
@@ -371,7 +377,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
     else {
         status = uvm_push_begin_acquire(gpu->channel_manager,
                                         UVM_CHANNEL_TYPE_MEMOPS,
-                                        &replayable_faults->replay_tracker,
+                                        tracker,
                                         &push,
                                         "Cancel targeting instance_ptr {0x%llx:%s} gpc %u client %u\n",
                                         instance_ptr.address,
@@ -382,17 +388,15 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
 
     UVM_ASSERT(status == NV_OK);
     if (status != NV_OK) {
-        UVM_ERR_PRINT("Failed to create push and acquire replay tracker before pushing cancel: %s, GPU %s\n",
+        UVM_ERR_PRINT("Failed to create push and acquire trackers before pushing cancel: %s, GPU %s\n",
                       nvstatusToString(status),
                       uvm_gpu_name(gpu));
         return status;
     }
 
-    uvm_push_acquire_tracker(&push, tracker);
-
     if (global_cancel)
         gpu->parent->host_hal->cancel_faults_global(&push, instance_ptr);
-     else
+    else
         gpu->parent->host_hal->cancel_faults_targeted(&push, instance_ptr, gpc_id, client_id);
 
     // We don't need to put the cancel in the GPU replay tracker since we wait
@@ -403,7 +407,9 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
     if (status != NV_OK)
         UVM_ERR_PRINT("Failed to wait for pushed cancel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
 
-    uvm_tracker_clear(&replayable_faults->replay_tracker);
+    // The cancellation is complete, so the input trackers must be complete too.
+    uvm_tracker_clear(tracker);
+    uvm_tracker_clear(replay_tracker);
 
     return status;
 }