Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP]enable ECE negotiation for reliable QP #144

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,12 @@ if [test $HAVE_OOO_ATTR = yes]; then
AC_DEFINE([HAVE_OOO_ATTR], [1], [Have Out of order data placement support])
fi

AC_TRY_LINK([#include <rdma/rdma_cma.h>],
[rdma_get_remote_ece(NULL, NULL);], [HAVE_RDMACM_ECE=yes], [HAVE_RDMACM_ECE=no])
if [test $HAVE_RDMACM_ECE = yes]; then
AC_DEFINE([HAVE_RDMACM_ECE], [1], [Enable ECE negotiation through rdamcm])
fi

if [test $HAVE_IBV_WR_API = yes]; then
AC_CHECK_LIB([efa], [efadv_create_qp_ex], [HAVE_SRD=yes], [HAVE_SRD=no])
AC_TRY_LINK([#include <infiniband/efadv.h>],
Expand Down
84 changes: 84 additions & 0 deletions src/perftest_parameters.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ static void usage(const char *argv0, VerbType verb, TestType tst, int connection
#endif
}

#ifdef HAVE_RDMACM_ECE
printf(" --use_ece use ECE for RC & DC if it's supported\n");
#endif

if (tst == LAT) {
printf(" -C, --report-cycles ");
printf(" report times in cpu cycle units (default microseconds)\n");
Expand Down Expand Up @@ -693,6 +697,7 @@ static void init_perftest_params(struct perftest_parameters *user_param)
user_param->test_method = RUN_REGULAR;
user_param->cpu_freq_f = OFF;
user_param->connection_type = (user_param->connection_type == RawEth) ? RawEth : RC;
user_param->use_ece = OFF;
user_param->use_event = OFF;
user_param->eq_num = 0;
user_param->use_eq_num = OFF;
Expand Down Expand Up @@ -2107,6 +2112,9 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
static int flow_label_flag = 0;
static int retry_count_flag = 0;
static int dont_xchg_versions_flag = 0;
#ifdef HAVE_RDMACM_ECE
static int use_ece_flag = 0;
#endif
#ifdef HAVE_CUDA
static int use_cuda_flag = 0;
static int use_cuda_bus_id_flag = 0;
Expand Down Expand Up @@ -2256,6 +2264,9 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
{ .name = "flow_label", .has_arg = 1, .flag = &flow_label_flag, .val = 1},
{ .name = "retry_count", .has_arg = 1, .flag = &retry_count_flag, .val = 1},
{ .name = "dont_xchg_versions", .has_arg = 0, .flag = &dont_xchg_versions_flag, .val = 1},
#ifdef HAVE_RDMACM_ECE
{ .name = "use_ece", .has_arg = 1, .flag = &use_ece_flag, .val = 1},
#endif
#ifdef HAVE_CUDA
{ .name = "use_cuda", .has_arg = 1, .flag = &use_cuda_flag, .val = 1},
{ .name = "use_cuda_bus_id", .has_arg = 1, .flag = &use_cuda_bus_id_flag, .val = 1},
Expand Down Expand Up @@ -2623,6 +2634,11 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
CHECK_VALUE_NON_NEGATIVE(user_param->latency_gap,int,"Latency gap time",not_int_ptr);
latency_gap_flag = 0;
}
#ifdef HAVE_RDMACM_ECE
if (use_ece_flag) {
user_param->use_ece = 1;
}
#endif
#ifdef HAVE_CUDA
if (use_cuda_flag) {
user_param->use_cuda = 1;
Expand Down Expand Up @@ -3121,6 +3137,74 @@ int check_link(struct ibv_context *context,struct perftest_parameters *user_para
return SUCCESS;
}

/******************************************************************************
*
******************************************************************************/
int check_ece(struct ibv_context *context,struct perftest_parameters *user_param)
{
int rst = SUCCESS;
if (user_param->use_ece == 0) {
goto out;
}
#ifdef HAVE_RDMACM_ECE
struct ibv_ece dummy_ece = {};
struct ibv_qp_init_attr dummy_qp_init_attr = {};
struct ibv_pd *dummy_pd = NULL;
struct ibv_qp *dummy_qp = NULL;
struct ibv_cq *dummy_cq = NULL;

dummy_pd = ibv_alloc_pd(context);
if (dummy_pd != NULL) {
fprintf(stderr, " failed to create PD\n");
rst = FAILURE;
goto out;
return FAILURE;
}

dummy_cq = ibv_create_cq(context, 1, NULL, NULL, 0);
if (dummy_cq == NULL) {
fprintf(stderr, " failed to create CQ\n");
rst = FAILURE;
goto free_pd;
}

dummy_qp_init_attr.send_cq = dummy_cq;
dummy_qp_init_attr.recv_cq = dummy_cq;
dummy_qp_init_attr.qp_type = IBV_QPT_RC;

dummy_qp_init_attr.cap.max_send_wr = 1;
dummy_qp_init_attr.cap.max_recv_wr = 1;
dummy_qp_init_attr.cap.max_send_sge = 1;
dummy_qp_init_attr.cap.max_recv_sge = 1;

dummy_qp = ibv_create_qp(dummy_pd, &dummy_qp_init_attr);
if (dummy_qp == NULL) {
fprintf(stderr, " failed to create RC QP\n");
rst = FAILURE;
goto free_cq;
}

/* ibv_set_ece() check whether ECE is supported */
if ((ibv_query_ece(dummy_qp, &dummy_ece) != 0) ||
(ibv_set_ece(dummy_qp, &dummy_ece) != 0)) {
fprintf(stderr, " device not support ECE\n");
rst = FAILURE;
}

ibv_destroy_qp(dummy_qp);
free_cq:
ibv_destroy_cq(dummy_cq);
free_pd:
ibv_dealloc_pd(dummy_pd);
#else
fprintf(stderr, " No support ECE operation\n");
rst = FAILURE;
#endif

out:
return rst;
}

/******************************************************************************
*
******************************************************************************/
Expand Down
15 changes: 15 additions & 0 deletions src/perftest_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ struct perftest_parameters {
int is_ethertype;
int cpu_freq_f;
int connection_type;
int use_ece;
int num_of_qps;
int use_event;
int eq_num;
Expand Down Expand Up @@ -716,6 +717,20 @@ int check_link(struct ibv_context *context,struct perftest_parameters *user_para
*/
int check_link_and_mtu(struct ibv_context *context,struct perftest_parameters *user_param);

/* check ECE capability if using it
*
* Description: check Device ECE capability.
*
* Parameters :
*

* context - Context of the device.
* user_param - Perftest parameters.
*
* Return Value : SUCCESS, FAILURE.
*/
int check_ece(struct ibv_context *context, struct perftest_parameters *user_param);

/* ctx_print_test_info
*
* Description : Prints all the parameters selected for this run.
Expand Down
5 changes: 5 additions & 0 deletions src/read_bw.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ int main(int argc, char *argv[])
return FAILURE;
}

/* See if ECE capability is supported if using it. */
if (check_ece(ctx.context, &user_param)) {
return FAILURE;
}

/* copy the relevant user parameters to the comm struct + creating rdma_cm resources. */
if (create_comm_struct(&user_comm,&user_param)) {
fprintf(stderr," Unable to create RDMA_CM resources\n");
Expand Down