Move to JetPack 6.1 for Xavier

- upgrade cross compilation cuda to 12.6 to match JetPack 6.1 - adds support for sm90a (THOR) Signed-off-by: Janusz Lisiecki <[email protected]>
NVIDIA · Jan 17, 2025 · 6e3f163 · 6e3f163
1 parent 041edf1
commit 6e3f163
Show file tree

Hide file tree

Showing 8 changed files with 41 additions and 11 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -352,7 +352,7 @@ set(CMAKE_C_STANDARD 11)
 set(CMAKE_CUDA_STANDARD 17)
 
 # CXX flags
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-free-nonheap-object -Wno-unused-variable -Wno-unused-function -Wno-strict-overflow -fno-strict-aliasing -fPIC -fvisibility=hidden")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-free-nonheap-object -Wno-unused-variable -Wno-unused-function -Wno-strict-overflow -fno-strict-aliasing -fPIC -fvisibility=hidden -Wno-array-bounds")
 
 if (WERROR)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")

diff --git a/cmake/CUDA_utils.cmake b/cmake/CUDA_utils.cmake
@@ -40,7 +40,7 @@ endfunction()
 # List of currently used arch values
 if (${ARCH} MATCHES "aarch64-")
   # aarch64-linux
-  set(CUDA_known_archs "53" "62" "72" "75" "87")
+  set(CUDA_known_archs "53" "62" "72" "75" "87" "90a")
 elseif (${ARCH} MATCHES "aarch64")
   # aarch64 SBSA, only >=Volta
   # from the whole list/; "70" "75" "80" "86"

diff --git a/dali/kernels/test/scatter_gather_test.cc b/dali/kernels/test/scatter_gather_test.cc
@@ -15,6 +15,7 @@
 #include <gtest/gtest.h>
 #include <vector>
 #include <algorithm>
+#include <random>
 
 #include "dali/core/cuda_error.h"
 #include "dali/core/mm/memory.h"
@@ -132,8 +133,10 @@ class ScatterGatherTest : public testing::Test {
       j += l;
     }
 
-    std::random_shuffle(ranges.begin(), ranges.end());
-    std::random_shuffle(back_ranges.begin(), back_ranges.end());
+    std::random_device rd;
+    std::mt19937 g(rd());
+    std::shuffle(ranges.begin(), ranges.end(), g);
+    std::shuffle(back_ranges.begin(), back_ranges.end(), g);
 
     this->template Memcpy<kind>(in_ptr.get(), in.data(), in.size(), cudaMemcpyHostToDevice);
     this->template Memset<kind>(out_ptr.get(), 0, out.size());

diff --git a/dali/operators/image/crop/bbox_crop.cc b/dali/operators/image/crop/bbox_crop.cc
@@ -679,7 +679,9 @@ class RandomBBoxCropImpl : public OpImplBase<CPUBackend> {
 
     std::array<int, ndim> order;
     std::iota(order.begin(), order.end(), 0);
-    std::random_shuffle(order.begin(), order.end());
+    std::random_device rd;
+    std::mt19937 g(rd());
+    std::shuffle(order.begin(), order.end(), g);
 
     float max_extent = 0.0f;
     for (int d = 0; d < ndim; d++) {

diff --git a/dali/pipeline/executor/executor_test.cc b/dali/pipeline/executor/executor_test.cc
@@ -166,6 +166,16 @@ TYPED_TEST(ExecutorTest, DISABLED_TestDataSetup) {
   vector<string> outputs = {"data3_gpu"};
   exe->Build(&graph, outputs);
 
+#pragma GCC diagnostic push
+// most recent gcc seems to be incorrectly report this as being null in this test
+#if defined(__has_warning)
+  #if __has_warning("-Wnonnull")
+    #pragma GCC diagnostic ignored "-Wnonnull"
+  #endif
+#else
+  #pragma GCC diagnostic ignored "-Wnonnull"
+#endif
+
   // Verify the data has been setup correctly
   for (int i = 0; i < 2; ++i) {
     auto host_workspaces = this->CPUData(exe.get(), i);
@@ -193,6 +203,7 @@ TYPED_TEST(ExecutorTest, DISABLED_TestDataSetup) {
     ASSERT_EQ(dws.NumOutput(), 1);
     ASSERT_TRUE(dws.OutputIsType<GPUBackend>(0));
   }
+  #pragma GCC diagnostic pop
 }
 
 TYPED_TEST(ExecutorTest, TestRunBasicGraph) {

diff --git a/docker/Dockerfile.build.aarch64-linux b/docker/Dockerfile.build.aarch64-linux
@@ -1,9 +1,9 @@
-ARG AARCH64_BASE_IMAGE=nvidia/cuda:11.8.0-devel-ubuntu20.04
+ARG AARCH64_BASE_IMAGE=nvidia/cuda:12.6.3-devel-ubuntu20.04
 FROM ${AARCH64_BASE_IMAGE}
 
 ENV DEBIAN_FRONTEND=noninteractive \
-    CUDA_CROSS_VERSION=11-8 \
-    CUDA_CROSS_VERSION_DOT=11.8
+    CUDA_CROSS_VERSION=12-6 \
+    CUDA_CROSS_VERSION_DOT=12.6
 
 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
     apt-get update && apt-get install software-properties-common -y --no-install-recommends && \
@@ -33,7 +33,7 @@ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/
     python3.12 python3.12-dev \
     python3.13 python3.13-dev && \
     apt-key adv --fetch-key http://repo.download.nvidia.com/jetson/jetson-ota-public.asc && \
-    add-apt-repository 'deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/cross-linux-aarch64/ /' && \
+    add-apt-repository 'deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/ /' && \
     apt-get update && \
     apt-get install -y cuda-cudart-cross-aarch64-${CUDA_CROSS_VERSION} \
                        libcufft-cross-aarch64-${CUDA_CROSS_VERSION} \

diff --git a/include/dali/core/small_vector.h b/include/dali/core/small_vector.h
@@ -23,6 +23,18 @@
 #include "dali/core/util.h"
 #include "dali/core/cuda_utils.h"
 
+#pragma GCC diagnostic push
+#if __GNUC__ > 11
+  // most recent gcc seems to be confused by some things in small vector raising false warnings
+  #if defined(__has_warning)
+    #if __has_warning("-Wuse-after-free")
+      #pragma GCC diagnostic ignored "-Wuse-after-free"
+    #endif
+  #else
+    #pragma GCC diagnostic ignored "-Wuse-after-free"
+  #endif
+#endif
+
 namespace dali {
 
 template <typename T, typename Allocator, bool Contextless = std::is_empty<Allocator>::value>
@@ -732,4 +744,6 @@ class SmallVector : SmallVectorAlloc<T, allocator>, SmallVectorBase<T> {
 
 }  // namespace dali
 
+#pragma GCC diagnostic pop
+
 #endif  // DALI_CORE_SMALL_VECTOR_H_
diff --git a/qa/setup_test_common.sh b/qa/setup_test_common.sh
@@ -49,10 +49,10 @@ echo "LD_LIBRARY_PATH is $LD_LIBRARY_PATH"
 # /usr/lib/aarch64-linux-gnu/libGLdispatch.so.0: cannot allocate memory in static TLS block
 # Seems there's an issue with libc:
 # https://bugzilla.redhat.com/show_bug.cgi?id=1722181
-# A fix has been proposed here: 
+# A fix has been proposed here:
 # https://sourceware.org/ml/libc-alpha/2020-01/msg00099.html
 preload_static_tls_libs() {
-    if [ "$(uname -m)" = "aarch64" ]; then
+    if [ "$(uname -m)" = "aarch64" ] && [ -f /usr/lib/aarch64-linux-gnu/libGLdispatch.so.0 ] ; then
         if [ -z "$LD_PRELOAD" ]; then
             export LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0"
         else