BUILD: fix building with nvc (openucx#680)

edgargabriel · Dec 2, 2022 · e21e8cd · e21e8cd
1 parent 5a2128f
commit e21e8cd
Show file tree

Hide file tree

Showing 16 changed files with 83 additions and 60 deletions.
diff --git a/config/m4/compiler.m4 b/config/m4/compiler.m4
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2001-2014, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2001-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED.
 # Copyright (C) ARM Ltd. 2016-2020.  ALL RIGHTS RESERVED.
 # See file LICENSE for terms.
@@ -445,12 +445,15 @@ AC_LANG_POP
 # --diag_suppress 1215 - Suppress deprecated API warning for PGI18 compiler
 # --diag_suppress 1901 - Use of a const variable in a constant expression is nonstandard in C
 # --diag_suppress 1902 - Use of a const variable in a constant expression is nonstandard in C (same as 1901)
+# --diag_suppress 301  - Suppress typedef name has already been declared (with same type) [duplicate_typedef]
+
 ADD_COMPILER_FLAGS_IF_SUPPORTED([[--display_error_number],
                                  [--diag_suppress 181],
                                  [--diag_suppress 381],
                                  [--diag_suppress 1215],
                                  [--diag_suppress 1901],
-                                 [--diag_suppress 1902]],
+                                 [--diag_suppress 1902],
+                                 [--diag_suppress 301]],
                                 [AC_LANG_SOURCE([[int main(int argc, char **argv){return 0;}]])])
 
 

diff --git a/src/coll_patterns/recursive_knomial.h b/src/coll_patterns/recursive_knomial.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  *
  * See file LICENSE for terms.
  */
@@ -202,8 +202,7 @@ enum {
 
 #define UCC_KN_CHECK_PHASE(_p)                                                 \
     case _p:                                                                   \
-        goto _p;                                                               \
-        break;
+        goto _p;
 
 #define UCC_KN_REDUCE_GOTO_PHASE(_phase)                                       \
     do {                                                                       \

diff --git a/src/coll_score/ucc_coll_score.c b/src/coll_score/ucc_coll_score.c
@@ -3,6 +3,7 @@
  *
  * See file LICENSE for terms.
  */
+
 #include "ucc_coll_score.h"
 #include "utils/ucc_string.h"
 #include "utils/ucc_log.h"
@@ -1000,7 +1001,7 @@ ucc_status_t ucc_coll_score_update(ucc_coll_score_t  *score,
 
     for (i = 0; i < UCC_COLL_TYPE_NUM; i++) {
         for (j = 0; j < mt_n; j++) {
-            mt = (mtypes == NULL ? j : mtypes[j]);
+            mt = (mtypes == NULL) ? (ucc_memory_type_t)j : mtypes[j];
             status = ucc_coll_score_update_one(
                 &score->scores[i][mt],
                 &update->scores[i][mt], default_score);

diff --git a/src/coll_score/ucc_coll_score_map.c b/src/coll_score/ucc_coll_score_map.c
@@ -3,6 +3,7 @@
  *
  * See file LICENSE for terms.
  */
+
 #include "ucc_coll_score.h"
 #include "utils/ucc_coll_utils.h"
 #include "utils/ucc_string.h"
@@ -179,7 +180,7 @@ void ucc_coll_score_map_print_info(const ucc_score_map_t *map)
         coll_str[0] = '\0';
         left        = sizeof(coll_str);
         STR_APPEND(coll_str, left, 32, "%s:\n",
-                   ucc_coll_type_str(UCC_BIT(i)));
+                   ucc_coll_type_str((ucc_coll_type_t)UCC_BIT(i)));
         for (j = 0; j < UCC_MEMORY_TYPE_LAST; j++) {
             if (ucc_list_is_empty(&map->score->scores[i][j])) {
                 continue;

diff --git a/src/components/ec/cpu/ec_cpu_reduce.c b/src/components/ec/cpu/ec_cpu_reduce.c
@@ -75,8 +75,8 @@
 
 #define DO_DT_REDUCE_INT(type, _srcs, _dst, _op, _count, _n_srcs)              \
     do {                                                                       \
-        const type **restrict s = (const type **restrict)_srcs;                \
-        type *restrict        d = (type * restrict) _dst;                      \
+        const type **restrict s = (const type **)_srcs;                        \
+        type *restrict        d = (type * ) _dst;                              \
         switch (_op) {                                                         \
         case UCC_OP_AVG:                                                       \
         case UCC_OP_SUM:                                                       \
@@ -171,8 +171,8 @@
 
 #define DO_DT_REDUCE_FLOAT(type, _srcs, _dst, _op, _count, _n_srcs)            \
     do {                                                                       \
-        const type **restrict s = (const type **restrict)_srcs;                \
-        type *restrict        d = (type * restrict) _dst;                      \
+        const type **restrict s = (const type **)_srcs;                        \
+        type *restrict        d = (type *) _dst;                               \
         switch (_op) {                                                         \
         case UCC_OP_AVG:                                                       \
         case UCC_OP_SUM:                                                       \
@@ -201,8 +201,8 @@
 
 #define DO_DT_REDUCE_FLOAT_COMPLEX(type, _srcs, _dst, _op, _count, _n_srcs)    \
     do {                                                                       \
-        const type **restrict s = (const type **restrict)_srcs;                \
-        type *restrict        d = (type * restrict) _dst;                      \
+        const type **restrict s = (const type **)_srcs;                        \
+        type *restrict        d = (type *) _dst;                               \
         switch (_op) {                                                         \
         case UCC_OP_AVG:                                                       \
         case UCC_OP_SUM:                                                       \

diff --git a/src/components/tl/cuda/tl_cuda_team_topo.c b/src/components/tl/cuda/tl_cuda_team_topo.c
@@ -7,7 +7,7 @@
 #include "tl_cuda_team_topo.h"
 #include "tl_cuda.h"
 
-#define UCC_TL_CUDA_TEAM_TOPO_SAME_DEVICE -1
+#define UCC_TL_CUDA_TEAM_TOPO_SAME_DEVICE ((ucc_rank_t)(UCC_RANK_MAX))
 
 static ucc_status_t
 ucc_tl_cuda_team_topo_add_ring(const ucc_tl_cuda_team_t *team,
@@ -59,7 +59,7 @@ ucc_tl_cuda_team_topo_add_ring(const ucc_tl_cuda_team_t *team,
 
 static ucc_status_t
 ucc_tl_cuda_team_topo_build_ring(const ucc_tl_cuda_team_t *team,
-                                 const int *graph,
+                                 const ucc_rank_t *graph,
                                  ucc_tl_cuda_ring_t *ring,
                                  ucc_rank_t pos,
                                  int width)
@@ -122,7 +122,7 @@ ucc_tl_cuda_team_topo_init_rings(const ucc_tl_cuda_team_t *team,
     ucc_tl_cuda_ring_t ring;
     int i, width, nr, num_rings, min_width;
     ucc_status_t status;
-    int *graph;
+    ucc_rank_t *graph;
 
     ucc_assert(size > 1);
     topo->num_rings = 0;
@@ -133,14 +133,15 @@ ucc_tl_cuda_team_topo_init_rings(const ucc_tl_cuda_team_t *team,
         return UCC_ERR_NO_MEMORY;
     }
 
-    graph = (int*) ucc_malloc(size * size * sizeof(int), "cuda_topo_graph");
+    graph = (ucc_rank_t*) ucc_malloc(size * size * sizeof(ucc_rank_t),
+                                     "cuda_topo_graph");
     if (!graph) {
         status = UCC_ERR_NO_MEMORY;
         tl_error(UCC_TL_TEAM_LIB(team), "failed to allocate topo graph");
         goto free_ring;
     }
 
-    memcpy(graph, topo->matrix, size * size * sizeof(int));
+    memcpy(graph, topo->matrix, size * size * sizeof(ucc_rank_t));
 
     num_rings = 0;
     min_width = 4;
@@ -332,7 +333,7 @@ ucc_tl_cuda_team_topo_init_proxies(const ucc_tl_cuda_team_t *team,
 
 static ucc_status_t
 ucc_tl_cuda_team_topo_init_matrix(const ucc_tl_cuda_team_t *team,
-                                  int *matrix)
+                                  ucc_rank_t *matrix)
 {
     ucc_tl_cuda_topo_t *topo = UCC_TL_CUDA_TEAM_CTX(team)->topo;
     int                 size = UCC_TL_TEAM_SIZE(team);
@@ -375,8 +376,8 @@ ucc_status_t ucc_tl_cuda_team_topo_create(const ucc_tl_team_t *cuda_team,
         return UCC_ERR_NO_MEMORY;
     }
 
-    topo->matrix = (int*)ucc_malloc(size * size * sizeof(int),
-                                    "cuda_topo_matrix");
+    topo->matrix = (ucc_rank_t*)ucc_malloc(size * size * sizeof(ucc_rank_t),
+                                           "cuda_topo_matrix");
     if (!topo->matrix) {
         tl_error(UCC_TL_TEAM_LIB(team), "failed to alloc cuda team topo matrix");
         status = UCC_ERR_NO_MEMORY;

diff --git a/src/components/tl/cuda/tl_cuda_team_topo.h b/src/components/tl/cuda/tl_cuda_team_topo.h
@@ -22,7 +22,7 @@ typedef struct ucc_tl_cuda_ring {
 } ucc_tl_cuda_ring_t;
 
 typedef struct ucc_tl_cuda_team_topo {
-    int                     *matrix;       /* nvlink adjacency matrix */
+    ucc_rank_t              *matrix;       /* nvlink adjacency matrix */
     int                      proxy_needed; /* is proxy needed for current rank */
     int                      num_proxies;  /* number of entries in proxies list */
     ucc_tl_cuda_proxy_t     *proxies;      /* list of pairs where current rank is proxy */

diff --git a/src/components/tl/cuda/tl_cuda_topo.c b/src/components/tl/cuda/tl_cuda_topo.c
@@ -334,7 +334,7 @@ ucc_status_t ucc_tl_cuda_topo_create(const ucc_base_lib_t *lib,
 ucc_status_t ucc_tl_cuda_topo_num_links(const ucc_tl_cuda_topo_t *topo,
                                         const ucc_tl_cuda_device_pci_id_t *dev1,
                                         const ucc_tl_cuda_device_pci_id_t *dev2,
-                                        int *num_links)
+                                        ucc_rank_t *num_links)
 {
     ucc_status_t status;
     ucc_tl_cuda_topo_node_t *dev1_node, *dev2_node;

diff --git a/src/components/tl/cuda/tl_cuda_topo.h b/src/components/tl/cuda/tl_cuda_topo.h
@@ -65,6 +65,6 @@ ucc_status_t ucc_tl_cuda_topo_destroy(ucc_tl_cuda_topo_t *cuda_topo);
 ucc_status_t ucc_tl_cuda_topo_num_links(const ucc_tl_cuda_topo_t *topo,
                                         const ucc_tl_cuda_device_pci_id_t *dev1,
                                         const ucc_tl_cuda_device_pci_id_t *dev2,
-                                        int *num_links);
+                                        ucc_rank_t *num_links);
 
 #endif
diff --git a/src/components/tl/sharp/tl_sharp_context.c b/src/components/tl/sharp/tl_sharp_context.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  *
  * See file LICENSE for terms.
  */
@@ -288,7 +288,7 @@ ucc_status_t ucc_tl_sharp_context_init(ucc_tl_sharp_context_t *sharp_ctx,
     struct sharp_coll_init_spec  init_spec = {0};
     ucc_tl_sharp_lib_t          *lib       = ucc_derived_of(sharp_ctx->super.super.lib,
                                                             ucc_tl_sharp_lib_t);
-    ucc_status_t status;
+    int ret;
 
     init_spec.progress_func                  = NULL;
     init_spec.world_local_rank               = 0;
@@ -321,15 +321,15 @@ ucc_status_t ucc_tl_sharp_context_init(ucc_tl_sharp_context_t *sharp_ctx,
     }
 
     //TODO: replace with unique context ID?
-    status = init_spec.oob_colls.bcast((void *)oob_ctx,
-                                        &init_spec.job_id,
-                                        sizeof(uint64_t), 0);
-    if (status != UCC_OK) {
-        tl_error(sharp_ctx->super.super.lib, "failed to broadcast SHARP job_id");
-        return status;
+    ret = init_spec.oob_colls.bcast((void *)oob_ctx, &init_spec.job_id,
+                                    sizeof(uint64_t), 0);
+    if (ret < 0) {
+        tl_error(sharp_ctx->super.super.lib,
+                 "failed to broadcast SHARP job_id");
+        return UCC_ERR_NO_MESSAGE;
     }
 
-    int ret = sharp_coll_init(&init_spec, context);
+    ret = sharp_coll_init(&init_spec, context);
     if (ret < 0 ) {
         tl_debug(sharp_ctx->super.super.lib, "Failed to initialize SHARP "
                  "collectives:%s(%d) job ID:%" PRIu64"\n",

diff --git a/src/components/tl/ucp/allgatherv/allgatherv_ring.c b/src/components/tl/ucp/allgatherv/allgatherv_ring.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  *
  * See file LICENSE for terms.
  */

diff --git a/src/components/tl/ucp/tl_ucp.h b/src/components/tl/ucp/tl_ucp.h
@@ -109,7 +109,7 @@ typedef struct ucc_tl_ucp_context {
     ucc_tl_ucp_context_config_t cfg;
     ucc_tl_ucp_worker_t         worker;
     ucc_tl_ucp_worker_t         service_worker;
-    int                         service_worker_throttling_count;
+    uint32_t                    service_worker_throttling_count;
     ucc_mpool_t                 req_mp;
     ucc_tl_ucp_remote_info_t *  remote_info;
     ucp_rkey_h *                rkeys;

diff --git a/src/components/topo/ucc_topo.c b/src/components/topo/ucc_topo.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
  * See file LICENSE for terms.
  */
 
@@ -172,7 +173,7 @@ ucc_status_t ucc_topo_init(ucc_subset_t set, ucc_context_topo_t *ctx_topo,
         topo->sbgps[i].status = UCC_SBGP_NOT_INIT;
     }
     topo->n_sockets           = -1;
-    topo->node_leader_rank    = -1;
+    topo->node_leader_rank    = UCC_RANK_INVALID;
     topo->node_leader_rank_id = 0;
     topo->set                 = set;
     topo->min_ppn             = UCC_RANK_MAX;

diff --git a/src/core/ucc_context.h b/src/core/ucc_context.h
@@ -1,5 +1,6 @@
 /**
- * Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
  * See file LICENSE for terms.
  */
 
@@ -53,18 +54,23 @@ typedef struct ucc_context {
     ucc_cl_context_t       **cl_ctx;
     ucc_tl_context_t       **tl_ctx;
     ucc_tl_context_t        *service_ctx;
-    int                      n_cl_ctx;
-    int                      n_tl_ctx;
-    int                      n_addr_packed; /*< Number of LT/CL components whose addresses are packed
-                                              into ucc_context->attr.addr */
+    unsigned                 n_cl_ctx;
+    unsigned                 n_tl_ctx;
+/**
+ *  Number of TL/CL components whose addresses are packed into
+ *  ucc_context->attr.addr
+ */
+    int                      n_addr_packed;
     ucc_config_names_array_t all_tls;
     ucc_list_link_t          progress_list;
     ucc_progress_queue_t    *pq;
     ucc_team_id_pool_t       ids;
     ucc_context_id_t         id;
     ucc_addr_storage_t       addr_storage;
-    ucc_rank_t               rank; /*< rank of a process in the "global" (with
-                                     OOB) context */
+/**
+ *  rank of a process in the "global" (with OOB) context
+ */
+    ucc_rank_t               rank;
     ucc_context_topo_t      *topo;
     uint64_t                 cl_flags;
     ucc_tl_team_t           *service_team;

diff --git a/src/utils/ucc_parser.c b/src/utils/ucc_parser.c
@@ -667,23 +667,23 @@ static ucc_pipeline_params_t ucc_pipeline_params_auto = {
     .n_frags   = 0,
     .frag_size = 0,
     .pdepth    = 0,
-    .order     = 0
+    .order     = UCC_PIPELINE_PARALLEL,
 };
 
 static ucc_pipeline_params_t ucc_pipeline_params_no = {
     .threshold = SIZE_MAX,
     .n_frags   = 0,
     .frag_size = 0,
     .pdepth    = 1,
-    .order     = 0
+    .order     = UCC_PIPELINE_PARALLEL,
 };
 
 static ucc_pipeline_params_t ucc_pipeline_params_default = {
     .threshold = SIZE_MAX,
     .n_frags   = 2,
     .frag_size = SIZE_MAX,
     .pdepth    = 2,
-    .order     = UCC_PIPELINE_SEQUENTIAL
+    .order     = UCC_PIPELINE_SEQUENTIAL,
 };
 
 int ucc_pipeline_params_is_auto(const ucc_pipeline_params_t *p)

diff --git a/src/utils/ucc_proc_info.c b/src/utils/ucc_proc_info.c
@@ -1,7 +1,9 @@
 /**
-* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
 * See file LICENSE for terms.
 */
+
 #include "ucc_proc_info.h"
 #include "ucc_log.h"
 #include "utils/ucc_malloc.h"
@@ -208,25 +210,34 @@ static ucc_status_t ucc_get_bound_numa_id(ucc_numa_id_t *numaid)
     char *       error;
     void *       handle, *cpumask;
     int          i, numa_node, n_cfg_cpus, nn;
+    int (*ucc_numa_available)(void);
+    int (*ucc_numa_num_configured_cpus)(void);
+    void *(*ucc_numa_allocate_cpumask)(void);
+    void *(*ucc_numa_sched_getaffinity)(int, void *);
+    int (*ucc_numa_bitmask_isbitset)(void *, int);
+    int (*ucc_numa_node_of_cpu)(int);
+    int (*ucc_numa_bitmask_free)(void *);
 
     handle = dlopen("libnuma.so", RTLD_LAZY);
     if (!handle) {
         ucc_debug("%s", dlerror());
         return UCC_ERR_NOT_FOUND;
     }
 
-    int (*ucc_numa_available)(void) =
-        LOAD_NUMA_SYM("numa_available");
-    int (*ucc_numa_num_configured_cpus)(void) =
-        LOAD_NUMA_SYM("numa_num_configured_cpus");
-    void *(*ucc_numa_allocate_cpumask)(void) =
-        LOAD_NUMA_SYM("numa_allocate_cpumask");
-    void *(*ucc_numa_sched_getaffinity)(int, void *) =
-        LOAD_NUMA_SYM("numa_sched_getaffinity");
-    int (*ucc_numa_bitmask_isbitset)(void *, int) =
-        LOAD_NUMA_SYM("numa_bitmask_isbitset");
-    int (*ucc_numa_node_of_cpu)(int)     = LOAD_NUMA_SYM("numa_node_of_cpu");
-    int (*ucc_numa_bitmask_free)(void *) = LOAD_NUMA_SYM("numa_bitmask_free");
+    ucc_numa_available           =
+        (int(*)(void))LOAD_NUMA_SYM("numa_available");
+    ucc_numa_num_configured_cpus =
+        (int(*)(void))LOAD_NUMA_SYM("numa_num_configured_cpus");
+    ucc_numa_allocate_cpumask    =
+        (void*(*)(void))LOAD_NUMA_SYM("numa_allocate_cpumask");
+    ucc_numa_sched_getaffinity   =
+        (void*(*)(int, void*))LOAD_NUMA_SYM("numa_sched_getaffinity");
+    ucc_numa_bitmask_isbitset    =
+        (int(*)(void*, int))LOAD_NUMA_SYM("numa_bitmask_isbitset");
+    ucc_numa_node_of_cpu         =
+        (int(*)(int))LOAD_NUMA_SYM("numa_node_of_cpu");
+    ucc_numa_bitmask_free        =
+        (int(*)(void*))LOAD_NUMA_SYM("numa_bitmask_free");
 
     if (-1 == ucc_numa_available()) {
         ucc_debug("libnuma is not available");