Skip to content

Commit

Permalink
BUILD: fix building with nvc (openucx#680)
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergei-Lebedev authored Dec 2, 2022
1 parent 5a2128f commit e21e8cd
Show file tree
Hide file tree
Showing 16 changed files with 83 additions and 60 deletions.
7 changes: 5 additions & 2 deletions config/m4/compiler.m4
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2001-2014, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2001-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED.
# Copyright (C) ARM Ltd. 2016-2020. ALL RIGHTS RESERVED.
# See file LICENSE for terms.
Expand Down Expand Up @@ -445,12 +445,15 @@ AC_LANG_POP
# --diag_suppress 1215 - Suppress deprecated API warning for PGI18 compiler
# --diag_suppress 1901 - Use of a const variable in a constant expression is nonstandard in C
# --diag_suppress 1902 - Use of a const variable in a constant expression is nonstandard in C (same as 1901)
# --diag_suppress 301 - Suppress typedef name has already been declared (with same type) [duplicate_typedef]

ADD_COMPILER_FLAGS_IF_SUPPORTED([[--display_error_number],
[--diag_suppress 181],
[--diag_suppress 381],
[--diag_suppress 1215],
[--diag_suppress 1901],
[--diag_suppress 1902]],
[--diag_suppress 1902],
[--diag_suppress 301]],
[AC_LANG_SOURCE([[int main(int argc, char **argv){return 0;}]])])


Expand Down
5 changes: 2 additions & 3 deletions src/coll_patterns/recursive_knomial.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -202,8 +202,7 @@ enum {

#define UCC_KN_CHECK_PHASE(_p) \
case _p: \
goto _p; \
break;
goto _p;

#define UCC_KN_REDUCE_GOTO_PHASE(_phase) \
do { \
Expand Down
3 changes: 2 additions & 1 deletion src/coll_score/ucc_coll_score.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*
* See file LICENSE for terms.
*/

#include "ucc_coll_score.h"
#include "utils/ucc_string.h"
#include "utils/ucc_log.h"
Expand Down Expand Up @@ -1000,7 +1001,7 @@ ucc_status_t ucc_coll_score_update(ucc_coll_score_t *score,

for (i = 0; i < UCC_COLL_TYPE_NUM; i++) {
for (j = 0; j < mt_n; j++) {
mt = (mtypes == NULL ? j : mtypes[j]);
mt = (mtypes == NULL) ? (ucc_memory_type_t)j : mtypes[j];
status = ucc_coll_score_update_one(
&score->scores[i][mt],
&update->scores[i][mt], default_score);
Expand Down
3 changes: 2 additions & 1 deletion src/coll_score/ucc_coll_score_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*
* See file LICENSE for terms.
*/

#include "ucc_coll_score.h"
#include "utils/ucc_coll_utils.h"
#include "utils/ucc_string.h"
Expand Down Expand Up @@ -179,7 +180,7 @@ void ucc_coll_score_map_print_info(const ucc_score_map_t *map)
coll_str[0] = '\0';
left = sizeof(coll_str);
STR_APPEND(coll_str, left, 32, "%s:\n",
ucc_coll_type_str(UCC_BIT(i)));
ucc_coll_type_str((ucc_coll_type_t)UCC_BIT(i)));
for (j = 0; j < UCC_MEMORY_TYPE_LAST; j++) {
if (ucc_list_is_empty(&map->score->scores[i][j])) {
continue;
Expand Down
12 changes: 6 additions & 6 deletions src/components/ec/cpu/ec_cpu_reduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@

#define DO_DT_REDUCE_INT(type, _srcs, _dst, _op, _count, _n_srcs) \
do { \
const type **restrict s = (const type **restrict)_srcs; \
type *restrict d = (type * restrict) _dst; \
const type **restrict s = (const type **)_srcs; \
type *restrict d = (type * ) _dst; \
switch (_op) { \
case UCC_OP_AVG: \
case UCC_OP_SUM: \
Expand Down Expand Up @@ -171,8 +171,8 @@

#define DO_DT_REDUCE_FLOAT(type, _srcs, _dst, _op, _count, _n_srcs) \
do { \
const type **restrict s = (const type **restrict)_srcs; \
type *restrict d = (type * restrict) _dst; \
const type **restrict s = (const type **)_srcs; \
type *restrict d = (type *) _dst; \
switch (_op) { \
case UCC_OP_AVG: \
case UCC_OP_SUM: \
Expand Down Expand Up @@ -201,8 +201,8 @@

#define DO_DT_REDUCE_FLOAT_COMPLEX(type, _srcs, _dst, _op, _count, _n_srcs) \
do { \
const type **restrict s = (const type **restrict)_srcs; \
type *restrict d = (type * restrict) _dst; \
const type **restrict s = (const type **)_srcs; \
type *restrict d = (type *) _dst; \
switch (_op) { \
case UCC_OP_AVG: \
case UCC_OP_SUM: \
Expand Down
17 changes: 9 additions & 8 deletions src/components/tl/cuda/tl_cuda_team_topo.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "tl_cuda_team_topo.h"
#include "tl_cuda.h"

#define UCC_TL_CUDA_TEAM_TOPO_SAME_DEVICE -1
#define UCC_TL_CUDA_TEAM_TOPO_SAME_DEVICE ((ucc_rank_t)(UCC_RANK_MAX))

static ucc_status_t
ucc_tl_cuda_team_topo_add_ring(const ucc_tl_cuda_team_t *team,
Expand Down Expand Up @@ -59,7 +59,7 @@ ucc_tl_cuda_team_topo_add_ring(const ucc_tl_cuda_team_t *team,

static ucc_status_t
ucc_tl_cuda_team_topo_build_ring(const ucc_tl_cuda_team_t *team,
const int *graph,
const ucc_rank_t *graph,
ucc_tl_cuda_ring_t *ring,
ucc_rank_t pos,
int width)
Expand Down Expand Up @@ -122,7 +122,7 @@ ucc_tl_cuda_team_topo_init_rings(const ucc_tl_cuda_team_t *team,
ucc_tl_cuda_ring_t ring;
int i, width, nr, num_rings, min_width;
ucc_status_t status;
int *graph;
ucc_rank_t *graph;

ucc_assert(size > 1);
topo->num_rings = 0;
Expand All @@ -133,14 +133,15 @@ ucc_tl_cuda_team_topo_init_rings(const ucc_tl_cuda_team_t *team,
return UCC_ERR_NO_MEMORY;
}

graph = (int*) ucc_malloc(size * size * sizeof(int), "cuda_topo_graph");
graph = (ucc_rank_t*) ucc_malloc(size * size * sizeof(ucc_rank_t),
"cuda_topo_graph");
if (!graph) {
status = UCC_ERR_NO_MEMORY;
tl_error(UCC_TL_TEAM_LIB(team), "failed to allocate topo graph");
goto free_ring;
}

memcpy(graph, topo->matrix, size * size * sizeof(int));
memcpy(graph, topo->matrix, size * size * sizeof(ucc_rank_t));

num_rings = 0;
min_width = 4;
Expand Down Expand Up @@ -332,7 +333,7 @@ ucc_tl_cuda_team_topo_init_proxies(const ucc_tl_cuda_team_t *team,

static ucc_status_t
ucc_tl_cuda_team_topo_init_matrix(const ucc_tl_cuda_team_t *team,
int *matrix)
ucc_rank_t *matrix)
{
ucc_tl_cuda_topo_t *topo = UCC_TL_CUDA_TEAM_CTX(team)->topo;
int size = UCC_TL_TEAM_SIZE(team);
Expand Down Expand Up @@ -375,8 +376,8 @@ ucc_status_t ucc_tl_cuda_team_topo_create(const ucc_tl_team_t *cuda_team,
return UCC_ERR_NO_MEMORY;
}

topo->matrix = (int*)ucc_malloc(size * size * sizeof(int),
"cuda_topo_matrix");
topo->matrix = (ucc_rank_t*)ucc_malloc(size * size * sizeof(ucc_rank_t),
"cuda_topo_matrix");
if (!topo->matrix) {
tl_error(UCC_TL_TEAM_LIB(team), "failed to alloc cuda team topo matrix");
status = UCC_ERR_NO_MEMORY;
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/cuda/tl_cuda_team_topo.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ typedef struct ucc_tl_cuda_ring {
} ucc_tl_cuda_ring_t;

typedef struct ucc_tl_cuda_team_topo {
int *matrix; /* nvlink adjacency matrix */
ucc_rank_t *matrix; /* nvlink adjacency matrix */
int proxy_needed; /* is proxy needed for current rank */
int num_proxies; /* number of entries in proxies list */
ucc_tl_cuda_proxy_t *proxies; /* list of pairs where current rank is proxy */
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/cuda/tl_cuda_topo.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ ucc_status_t ucc_tl_cuda_topo_create(const ucc_base_lib_t *lib,
ucc_status_t ucc_tl_cuda_topo_num_links(const ucc_tl_cuda_topo_t *topo,
const ucc_tl_cuda_device_pci_id_t *dev1,
const ucc_tl_cuda_device_pci_id_t *dev2,
int *num_links)
ucc_rank_t *num_links)
{
ucc_status_t status;
ucc_tl_cuda_topo_node_t *dev1_node, *dev2_node;
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/cuda/tl_cuda_topo.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,6 @@ ucc_status_t ucc_tl_cuda_topo_destroy(ucc_tl_cuda_topo_t *cuda_topo);
ucc_status_t ucc_tl_cuda_topo_num_links(const ucc_tl_cuda_topo_t *topo,
const ucc_tl_cuda_device_pci_id_t *dev1,
const ucc_tl_cuda_device_pci_id_t *dev2,
int *num_links);
ucc_rank_t *num_links);

#endif
18 changes: 9 additions & 9 deletions src/components/tl/sharp/tl_sharp_context.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -288,7 +288,7 @@ ucc_status_t ucc_tl_sharp_context_init(ucc_tl_sharp_context_t *sharp_ctx,
struct sharp_coll_init_spec init_spec = {0};
ucc_tl_sharp_lib_t *lib = ucc_derived_of(sharp_ctx->super.super.lib,
ucc_tl_sharp_lib_t);
ucc_status_t status;
int ret;

init_spec.progress_func = NULL;
init_spec.world_local_rank = 0;
Expand Down Expand Up @@ -321,15 +321,15 @@ ucc_status_t ucc_tl_sharp_context_init(ucc_tl_sharp_context_t *sharp_ctx,
}

//TODO: replace with unique context ID?
status = init_spec.oob_colls.bcast((void *)oob_ctx,
&init_spec.job_id,
sizeof(uint64_t), 0);
if (status != UCC_OK) {
tl_error(sharp_ctx->super.super.lib, "failed to broadcast SHARP job_id");
return status;
ret = init_spec.oob_colls.bcast((void *)oob_ctx, &init_spec.job_id,
sizeof(uint64_t), 0);
if (ret < 0) {
tl_error(sharp_ctx->super.super.lib,
"failed to broadcast SHARP job_id");
return UCC_ERR_NO_MESSAGE;
}

int ret = sharp_coll_init(&init_spec, context);
ret = sharp_coll_init(&init_spec, context);
if (ret < 0 ) {
tl_debug(sharp_ctx->super.super.lib, "Failed to initialize SHARP "
"collectives:%s(%d) job ID:%" PRIu64"\n",
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/ucp/allgatherv/allgatherv_ring.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/ucp/tl_ucp.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ typedef struct ucc_tl_ucp_context {
ucc_tl_ucp_context_config_t cfg;
ucc_tl_ucp_worker_t worker;
ucc_tl_ucp_worker_t service_worker;
int service_worker_throttling_count;
uint32_t service_worker_throttling_count;
ucc_mpool_t req_mp;
ucc_tl_ucp_remote_info_t * remote_info;
ucp_rkey_h * rkeys;
Expand Down
5 changes: 3 additions & 2 deletions src/components/topo/ucc_topo.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

Expand Down Expand Up @@ -172,7 +173,7 @@ ucc_status_t ucc_topo_init(ucc_subset_t set, ucc_context_topo_t *ctx_topo,
topo->sbgps[i].status = UCC_SBGP_NOT_INIT;
}
topo->n_sockets = -1;
topo->node_leader_rank = -1;
topo->node_leader_rank = UCC_RANK_INVALID;
topo->node_leader_rank_id = 0;
topo->set = set;
topo->min_ppn = UCC_RANK_MAX;
Expand Down
20 changes: 13 additions & 7 deletions src/core/ucc_context.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

Expand Down Expand Up @@ -53,18 +54,23 @@ typedef struct ucc_context {
ucc_cl_context_t **cl_ctx;
ucc_tl_context_t **tl_ctx;
ucc_tl_context_t *service_ctx;
int n_cl_ctx;
int n_tl_ctx;
int n_addr_packed; /*< Number of LT/CL components whose addresses are packed
into ucc_context->attr.addr */
unsigned n_cl_ctx;
unsigned n_tl_ctx;
/**
* Number of TL/CL components whose addresses are packed into
* ucc_context->attr.addr
*/
int n_addr_packed;
ucc_config_names_array_t all_tls;
ucc_list_link_t progress_list;
ucc_progress_queue_t *pq;
ucc_team_id_pool_t ids;
ucc_context_id_t id;
ucc_addr_storage_t addr_storage;
ucc_rank_t rank; /*< rank of a process in the "global" (with
OOB) context */
/**
* rank of a process in the "global" (with OOB) context
*/
ucc_rank_t rank;
ucc_context_topo_t *topo;
uint64_t cl_flags;
ucc_tl_team_t *service_team;
Expand Down
6 changes: 3 additions & 3 deletions src/utils/ucc_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -667,23 +667,23 @@ static ucc_pipeline_params_t ucc_pipeline_params_auto = {
.n_frags = 0,
.frag_size = 0,
.pdepth = 0,
.order = 0
.order = UCC_PIPELINE_PARALLEL,
};

static ucc_pipeline_params_t ucc_pipeline_params_no = {
.threshold = SIZE_MAX,
.n_frags = 0,
.frag_size = 0,
.pdepth = 1,
.order = 0
.order = UCC_PIPELINE_PARALLEL,
};

static ucc_pipeline_params_t ucc_pipeline_params_default = {
.threshold = SIZE_MAX,
.n_frags = 2,
.frag_size = SIZE_MAX,
.pdepth = 2,
.order = UCC_PIPELINE_SEQUENTIAL
.order = UCC_PIPELINE_SEQUENTIAL,
};

int ucc_pipeline_params_is_auto(const ucc_pipeline_params_t *p)
Expand Down
37 changes: 24 additions & 13 deletions src/utils/ucc_proc_info.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
/**
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* See file LICENSE for terms.
*/

#include "ucc_proc_info.h"
#include "ucc_log.h"
#include "utils/ucc_malloc.h"
Expand Down Expand Up @@ -208,25 +210,34 @@ static ucc_status_t ucc_get_bound_numa_id(ucc_numa_id_t *numaid)
char * error;
void * handle, *cpumask;
int i, numa_node, n_cfg_cpus, nn;
int (*ucc_numa_available)(void);
int (*ucc_numa_num_configured_cpus)(void);
void *(*ucc_numa_allocate_cpumask)(void);
void *(*ucc_numa_sched_getaffinity)(int, void *);
int (*ucc_numa_bitmask_isbitset)(void *, int);
int (*ucc_numa_node_of_cpu)(int);
int (*ucc_numa_bitmask_free)(void *);

handle = dlopen("libnuma.so", RTLD_LAZY);
if (!handle) {
ucc_debug("%s", dlerror());
return UCC_ERR_NOT_FOUND;
}

int (*ucc_numa_available)(void) =
LOAD_NUMA_SYM("numa_available");
int (*ucc_numa_num_configured_cpus)(void) =
LOAD_NUMA_SYM("numa_num_configured_cpus");
void *(*ucc_numa_allocate_cpumask)(void) =
LOAD_NUMA_SYM("numa_allocate_cpumask");
void *(*ucc_numa_sched_getaffinity)(int, void *) =
LOAD_NUMA_SYM("numa_sched_getaffinity");
int (*ucc_numa_bitmask_isbitset)(void *, int) =
LOAD_NUMA_SYM("numa_bitmask_isbitset");
int (*ucc_numa_node_of_cpu)(int) = LOAD_NUMA_SYM("numa_node_of_cpu");
int (*ucc_numa_bitmask_free)(void *) = LOAD_NUMA_SYM("numa_bitmask_free");
ucc_numa_available =
(int(*)(void))LOAD_NUMA_SYM("numa_available");
ucc_numa_num_configured_cpus =
(int(*)(void))LOAD_NUMA_SYM("numa_num_configured_cpus");
ucc_numa_allocate_cpumask =
(void*(*)(void))LOAD_NUMA_SYM("numa_allocate_cpumask");
ucc_numa_sched_getaffinity =
(void*(*)(int, void*))LOAD_NUMA_SYM("numa_sched_getaffinity");
ucc_numa_bitmask_isbitset =
(int(*)(void*, int))LOAD_NUMA_SYM("numa_bitmask_isbitset");
ucc_numa_node_of_cpu =
(int(*)(int))LOAD_NUMA_SYM("numa_node_of_cpu");
ucc_numa_bitmask_free =
(int(*)(void*))LOAD_NUMA_SYM("numa_bitmask_free");

if (-1 == ucc_numa_available()) {
ucc_debug("libnuma is not available");
Expand Down

0 comments on commit e21e8cd

Please sign in to comment.