Skip to content

Commit

Permalink
Add support for Neuron devices
Browse files Browse the repository at this point in the history
Add support for AWS Trainium devices via the AWS Neuron SDK.
AWS Neuron is a software stack for running ML applications using AWS's AI
hardware accelerators. Perftest support for Neuron allows testing of AI
accelerator direct memory usage by IB devices (in similar to CUDA).

To support Neuron, configure using NEURON_PATH=<neuron/base/path>.
Run bandwidth tests with Neuron direct by specifying
--use_neuron=<core_id> option.

Signed-off-by: Daniel Kranzdorf <[email protected]>
  • Loading branch information
Daniel Kranzdorf authored and mrgolin committed Aug 10, 2022
1 parent 8bd8b65 commit 6b08ce3
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 5 deletions.
27 changes: 27 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,33 @@ if [test "$CUDA_H_PATH" ]; then
LIBS=$LIBS" -lcuda"
fi

AC_ARG_ENABLE([neuron],
[AS_HELP_STRING([--enable-neuron],
[Enable Neuron benchmarks])
],
[],
[enable_neuron=no])

AC_ARG_WITH([neuron],
[AS_HELP_STRING([--with-neuron=@<:@NRT installation path@:>@],
[Provide path to NRT installation])
],
[AS_CASE([$with_neuron],
[yes|no], [],
[CPPFLAGS="-I$with_neuron/include $CPPFLAGS"
LDFLAGS="-L$with_neuron/lib -lnrt $LDFLAGS"])
])

AS_IF([test "x$enable_neuron" = xyes], [
AC_DEFINE([HAVE_NEURON], [1], [Enable Neuron benchmarks])
AC_CHECK_HEADERS([nrt/nrt.h], [],
[AC_MSG_ERROR([cannot include nrt.h])])
AC_SEARCH_LIBS([nrt_tensor_allocate], [nrt], [],
[AC_MSG_ERROR([cannot link with -lnrt])])
])

AM_CONDITIONAL([NEURON], [test x$enable_neuron = xyes])

AC_TRY_LINK([#include <infiniband/verbs.h>],
[struct ibv_qp_attr *attr; int x = attr->rate_limit;],[HAVE_PACKET_PACING=yes], [HAVE_PACKET_PACING=no])
AM_CONDITIONAL([HAVE_PACKET_PACING],[test "x$HAVE_PACKET_PACING" = "xyes"])
Expand Down
35 changes: 35 additions & 0 deletions src/perftest_parameters.c
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,11 @@ static void usage(const char *argv0, VerbType verb, TestType tst, int connection
printf(" Use CUDA specific device, based on its full PCIe address, for GPUDirect RDMA testing\n");
#endif

#ifdef HAVE_NEURON
printf(" --use_neuron=<logical neuron core id>");
printf(" Use selected logical neuron core for NeuronDirect RDMA testing\n");
#endif

#ifdef HAVE_ROCM
printf(" --use_rocm=<rocm device id>");
printf(" Use selected ROCm device for GPUDirect RDMA testing\n");
Expand Down Expand Up @@ -751,6 +756,10 @@ static void init_perftest_params(struct perftest_parameters *user_param)
user_param->use_cuda = 0;
user_param->cuda_device_id = 0;
#endif
#ifdef HAVE_NEURON
user_param->use_neuron = 0;
user_param->neuron_core_id = 0;
#endif
#ifdef HAVE_ROCM
user_param->use_rocm = 0;
user_param->rocm_device_id = 0;
Expand Down Expand Up @@ -1675,6 +1684,14 @@ static void force_dependecies(struct perftest_parameters *user_param)
}
#endif

#ifdef HAVE_NEURON
if (user_param->use_neuron && user_param->mmap_file != NULL) {
printf(RESULT_LINE);
fprintf(stderr,"You cannot use neuron and an mmap'd file at the same time\n");
exit(1);
}
#endif

#ifdef HAVE_ROCM
if (user_param->use_rocm && user_param->mmap_file != NULL) {
printf(RESULT_LINE);
Expand Down Expand Up @@ -2113,6 +2130,9 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
#endif
#ifdef HAVE_ROCM
static int use_rocm_flag = 0;
#endif
#ifdef HAVE_NEURON
static int use_neuron_flag = 0;
#endif
static int disable_pcir_flag = 0;
static int mmap_file_flag = 0;
Expand Down Expand Up @@ -2263,6 +2283,9 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
#ifdef HAVE_ROCM
{ .name = "use_rocm", .has_arg = 1, .flag = &use_rocm_flag, .val = 1},
#endif
#ifdef HAVE_NEURON
{ .name = "use_neuron", .has_arg = 1, .flag = &use_neuron_flag, .val = 1},
#endif
{ .name = "mmap", .has_arg = 1, .flag = &mmap_file_flag, .val = 1},
{ .name = "mmap-offset", .has_arg = 1, .flag = &mmap_offset_flag, .val = 1},
{ .name = "ipv6", .has_arg = 0, .flag = &ipv6_flag, .val = 1},
Expand Down Expand Up @@ -2642,6 +2665,18 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
CHECK_VALUE_NON_NEGATIVE(user_param->rocm_device_id,int,"ROCm device",not_int_ptr);
use_rocm_flag = 0;
}
#endif
#ifdef HAVE_NEURON
if (use_neuron_flag) {
user_param->use_neuron = 1;
user_param->neuron_core_id = strtol(optarg, NULL, 0);
if (user_param->neuron_core_id < 0)
{
fprintf(stderr, "Invalid Neuron Core ID %d\n", user_param->neuron_core_id);
return FAILURE;
}
use_neuron_flag = 0;
}
#endif
if (flow_label_flag) {
CHECK_VALUE_NON_NEGATIVE(user_param->flow_label,int,"flow label",not_int_ptr);
Expand Down
8 changes: 8 additions & 0 deletions src/perftest_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@
#include CUDA_PATH
#endif

#ifdef HAVE_NEURON
#include <nrt/nrt.h>
#endif

#ifdef HAVE_ROCM
#include <hip/hip_runtime_api.h>
#endif
Expand Down Expand Up @@ -537,6 +541,10 @@ struct perftest_parameters {
int cuda_device_id;
char *cuda_device_bus_id;
#endif
#ifdef HAVE_NEURON
int use_neuron;
int neuron_core_id;
#endif
#ifdef HAVE_ROCM
int use_rocm;
int rocm_device_id;
Expand Down
79 changes: 74 additions & 5 deletions src/perftest_resources.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,30 @@ static int pp_free_rocm(struct pingpong_context *ctx)
}
#endif

#ifdef HAVE_NEURON
/*----------------------------------------------------------------------------*/

static int pp_init_neuron(struct pingpong_context *ctx, int neuron_core_id)
{
NRT_STATUS result;

ctx->neuron_core_id = neuron_core_id;
result = nrt_init(NRT_FRAMEWORK_TYPE_NO_FW, "", "");
if (result != NRT_SUCCESS) {
return 1;
}

return 0;
}

static int pp_free_neuron(void)
{
nrt_close();

return 0;
}
#endif

static int pp_init_mmap(struct pingpong_context *ctx, size_t size,
const char *fname, unsigned long offset)
{
Expand Down Expand Up @@ -1152,6 +1176,9 @@ void alloc_ctx(struct pingpong_context *ctx,struct perftest_parameters *user_par
#endif
ALLOCATE(ctx->mr, struct ibv_mr*, user_param->num_of_qps);
ALLOCATE(ctx->buf, void* , user_param->num_of_qps);
#ifdef HAVE_NEURON
ALLOCATE(ctx->tensors, nrt_tensor_t* , user_param->num_of_qps);
#endif

if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && (user_param->machine == CLIENT || user_param->duplex)) {

Expand Down Expand Up @@ -1402,6 +1429,16 @@ int destroy_ctx(struct pingpong_context *ctx,
}
else
#endif
#ifdef HAVE_NEURON
if (user_param->use_neuron) {
for (i = 0; i < dereg_counter; i++) {
printf("deallocating neuron buffer %p\n", ctx->buf[i]);
nrt_tensor_free(&ctx->tensors[i]);
}
pp_free_neuron();
}
else
#endif
if (user_param->mmap_file != NULL) {
pp_free_mmap(ctx);
} else if (ctx->is_contig_supported == FAILURE) {
Expand Down Expand Up @@ -1550,6 +1587,7 @@ int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *u
int i;
int flags = IBV_ACCESS_LOCAL_WRITE;

int can_init_buff = 1;

#if defined(__FreeBSD__)
ctx->is_contig_supported = FAILURE;
Expand Down Expand Up @@ -1589,6 +1627,7 @@ int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *u
printf("allocated GPU buffer address at %016llx pointer=%p\n",
d_A, (void *)d_A);
ctx->buf[qp_index] = (void *)d_A;
can_init_buff = 0;
} else
#endif

Expand All @@ -1612,6 +1651,31 @@ int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *u
ctx->buf[qp_index] = d_A;
} else
#endif
#ifdef HAVE_NEURON
if (user_param->use_neuron) {
void* d_A = NULL;
NRT_STATUS result;
const size_t neuron_page_size = 4 * 1024;
size_t size = (ctx->buff_size + neuron_page_size - 1) &
~(neuron_page_size - 1);

result = nrt_tensor_allocate(NRT_TENSOR_PLACEMENT_DEVICE, ctx->neuron_core_id, size, NULL, &ctx->tensors[qp_index]);
if (result != NRT_SUCCESS) {
ctx->tensors[qp_index] = NULL;
printf("nrt_tensor_allocate_error =%d\n", (int)result);
return FAILURE;
}

d_A = nrt_tensor_get_va(ctx->tensors[qp_index]);
if (d_A == NULL) {
printf("Failed to get va for the allocated tensor\n");
return FAILURE;
}

ctx->buf[qp_index] = d_A;
can_init_buff = 0;
} else
#endif

if (user_param->mmap_file != NULL) {
#if defined(__FreeBSD__)
Expand Down Expand Up @@ -1683,9 +1747,7 @@ int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *u


/* Initialize buffer with random numbers except in WRITE_LAT test that it 0's */
#ifdef HAVE_CUDA
if (!user_param->use_cuda) {
#endif
if (can_init_buff) {
srand(time(NULL));
if (user_param->verb == WRITE && user_param->tst == LAT) {
memset(ctx->buf[qp_index], 0, ctx->buff_size);
Expand All @@ -1694,9 +1756,7 @@ int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *u
((char*)ctx->buf[qp_index])[i] = (char)rand();
}
}
#ifdef HAVE_CUDA
}
#endif
return SUCCESS;
}

Expand Down Expand Up @@ -1962,6 +2022,15 @@ int ctx_init(struct pingpong_context *ctx, struct perftest_parameters *user_para
}
#endif

#ifdef HAVE_NEURON
if (user_param->use_neuron) {
if (pp_init_neuron(ctx, user_param->neuron_core_id)) {
fprintf(stderr, "Couldn't initialize Neuron device\n");
return FAILURE;
}
}
#endif

if (create_mr(ctx, user_param)) {
fprintf(stderr, "Failed to create MR\n");
return FAILURE;
Expand Down
4 changes: 4 additions & 0 deletions src/perftest_resources.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ struct pingpong_context {
struct ibv_cq *send_cq;
struct ibv_cq *recv_cq;
void **buf;
#ifdef HAVE_NEURON
int neuron_core_id;
nrt_tensor_t **tensors;
#endif
struct ibv_ah **ah;
struct ibv_qp **qp;
#ifdef HAVE_IBV_WR_API
Expand Down

0 comments on commit 6b08ce3

Please sign in to comment.