Skip to content

Commit

Permalink
ch4/ofi: fix the av table assumptions
Browse files Browse the repository at this point in the history
Because we insert all remote endpoints to all local endpoints at the
same time, thus follow the exact same insertion order, they will share
the same av table index except for the local root endpoint because it
has inserted other remote root endpoints at init time. The local root to
remote non-root endpoints will have a fixed offset from that of local
non-root.
  • Loading branch information
hzhou committed Jan 8, 2025
1 parent 714ccc1 commit b5fa39d
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 6 deletions.
9 changes: 7 additions & 2 deletions src/mpid/ch4/netmod/ofi/ofi_pre.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,9 +311,14 @@ typedef struct {
/* Maximum number of network interfaces CH4 can support. */
#define MPIDI_OFI_MAX_NICS 8

/* Imagine a dimension of [local_vci][local_nic][rank][vci][nic] -
* all local endpoints will share the same remote address due to the same insertion order
* and use of FI_AV_TABLE except the local root endpoint.
*/
typedef struct {
fi_addr_t root_dest;
fi_addr_t *all_dest; /* to be allocated into an array of [nic * vci] */
fi_addr_t root_dest; /* [0][0][r][0][0] */
fi_addr_t root_offset; /* [0][0][r][vci][nic] - [*][*][r][vci][nic] */
fi_addr_t *all_dest; /* [*][*][r][vci][nic] */
} MPIDI_OFI_addr_t;

#endif /* OFI_PRE_H_INCLUDED */
19 changes: 15 additions & 4 deletions src/mpid/ch4/netmod/ofi/ofi_vci.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,13 @@ static int addr_exchange_all_ctx(MPIR_Comm * comm, int *all_num_vcis)

/* insert and store non-root nic/vci on the root context */
for (int r = 0; r < nprocs; r++) {
fi_addr_t expect_addr = FI_ADDR_NOTAVAIL;
fi_addr_t root_offset = 0;
GET_AV_AND_ADDRNAMES(r);
/* for each remote endpoints */
for (int nic = 0; nic < num_nics; nic++) {
for (int vci = 0; vci < NUM_VCIS_FOR_RANK(r); vci++) {
/* for each local endpoints */
fi_addr_t expect_addr = FI_ADDR_NOTAVAIL;
for (int nic_local = 0; nic_local < num_nics; nic_local++) {
for (int vci_local = 0; vci_local < my_num_vcis; vci_local++) {
/* skip root */
Expand All @@ -245,18 +246,28 @@ static int addr_exchange_all_ctx(MPIR_Comm * comm, int *all_num_vcis)
}
int ctx_idx = MPIDI_OFI_get_ctx_index(vci_local, nic_local);
DO_AV_INSERT(ctx_idx, nic, vci);
/* we expect all resulting addr to be the same */
/* we expect all resulting addr to be the same except for local root endpoint, which
* will have an offset */
if (expect_addr == FI_ADDR_NOTAVAIL) {
expect_addr = addr;
} else if (nic_local == 0 && vci_local == 0) {
if (root_offset == 0) {
root_offset = addr - expect_addr;
} else {
MPIR_Assert(addr == expect_addr + root_offset);
}
} else {
MPIR_Assert(expect_addr == addr);
MPIR_Assert(addr == expect_addr);
}
}
}
MPIR_Assert(expect_addr != FI_ADDR_NOTAVAIL);
MPIDI_OFI_AV_ADDR_NONROOT(av, vci, nic) = expect_addr;
MPIDI_OFI_AV_ADDR_NO_OFFSET(av, vci, nic) = expect_addr;
/* next */
expect_addr++;
}
}
MPIDI_OFI_AV(av).root_offset = root_offset;
}

mpi_errno = MPIR_Barrier_fallback(comm, MPIR_ERR_NONE);
Expand Down

0 comments on commit b5fa39d

Please sign in to comment.