Skip to content

Commit

Permalink
NVMe/MVMEeOF: Patch NVMe/NVMeOF driver to support GDS on Linux 5.19 K…
Browse files Browse the repository at this point in the history
…ernel

With this change, the NVMe and NVMeOF driver would be
enabled to support GPUDirectStorage(GDS).
The change is around nvme/nvme rdma map_data()
and unmap_data(), where the IO request is
first intercepted to check for GDS pages and
if it is a GDS page then the request is served
by GDS driver component called nvidia-fs,
else the request would be served by the standard NVMe driver code.
  • Loading branch information
sourabgupta3 committed Jul 20, 2023
1 parent db596dd commit efcfcc1
Show file tree
Hide file tree
Showing 8 changed files with 472 additions and 1 deletion.
4 changes: 3 additions & 1 deletion drivers/nvme/host/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0

ccflags-y += -I$(src)

ccflags-y += -DCONFIG_NVFS
obj-$(CONFIG_NVME_CORE) += nvme-core.o
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o
Expand All @@ -18,10 +18,12 @@ nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o

nvme-y += pci.o
nvme-y += nvfs-dma.o

nvme-fabrics-y += fabrics.o

nvme-rdma-y += rdma.o
nvme-rdma-y += nvfs-rdma.o

nvme-fc-y += fc.o

Expand Down
52 changes: 52 additions & 0 deletions drivers/nvme/host/nvfs-dma.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/

#ifdef CONFIG_NVFS
#define MODULE_PREFIX nvme_v1
#include "nvfs.h"

struct nvfs_dma_rw_ops *nvfs_ops;

atomic_t nvfs_shutdown = ATOMIC_INIT(1);

DEFINE_PER_CPU(long, nvfs_n_ops);

// must have for compatibility
#define NVIDIA_FS_COMPAT_FT(ops) \
(NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) && NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops))

// protected via nvfs_module_mutex
int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
{
if (NVIDIA_FS_COMPAT_FT(ops)) {
nvfs_ops = ops;
atomic_set(&nvfs_shutdown, 0);
return 0;
} else
return -EOPNOTSUPP;

}
EXPORT_SYMBOL(REGISTER_FUNC);

// protected via nvfs_module_mutex
void UNREGISTER_FUNC(void)
{
(void) atomic_cmpxchg(&nvfs_shutdown, 0, 1);
do {
msleep(NVFS_HOLD_TIME_MS);
} while (nvfs_count_ops());
nvfs_ops = NULL;
}
EXPORT_SYMBOL(UNREGISTER_FUNC);
#endif
122 changes: 122 additions & 0 deletions drivers/nvme/host/nvfs-dma.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/

#ifndef NVFS_DMA_H
#define NVFS_DMA_H

static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
struct request *req, struct nvme_rw_command *cmnd);

static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
struct request *req, struct nvme_rw_command *cmd, int entries);

static bool nvme_nvfs_unmap_data(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
enum dma_data_direction dma_dir = rq_dma_dir(req);

if (!iod || !iod->nents)
return false;
if (iod->sg && !is_pci_p2pdma_page(sg_page(iod->sg)) &&
!blk_integrity_rq(req) &&
!iod->dma_len &&
nvfs_ops != NULL) {
int count;

count = nvfs_ops->nvfs_dma_unmap_sg(dev->dev, iod->sg, iod->nents,
dma_dir);

if (!count)
return false;

nvfs_put_ops();
return true;
}
return false;
}

static blk_status_t nvme_nvfs_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd, bool *is_nvfs_io)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct request_queue *q = req->q;
enum dma_data_direction dma_dir = rq_dma_dir(req);
blk_status_t ret = BLK_STS_RESOURCE;
int nr_mapped;

nr_mapped = 0;
*is_nvfs_io = false;

if (!blk_integrity_rq(req) && nvfs_get_ops()) {
iod->dma_len = 0;
iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
if (!iod->sg) {
nvfs_put_ops();
return BLK_STS_RESOURCE;
}

sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
// associates bio pages to scatterlist
iod->nents = nvfs_ops->nvfs_blk_rq_map_sg(q, req, iod->sg);
if (!iod->nents) {
mempool_free(iod->sg, dev->iod_mempool);
nvfs_put_ops();
return BLK_STS_IOERR; // reset to original ret
}
*is_nvfs_io = true;

if (unlikely((iod->nents == NVFS_IO_ERR))) {
pr_err("%s: failed to map sg_nents=:%d\n", __func__, iod->nents);
mempool_free(iod->sg, dev->iod_mempool);
nvfs_put_ops();
return BLK_STS_IOERR;
}

nr_mapped = nvfs_ops->nvfs_dma_map_sg_attrs(dev->dev,
iod->sg,
iod->nents,
dma_dir,
DMA_ATTR_NO_WARN);

if (unlikely((nr_mapped == NVFS_IO_ERR))) {
mempool_free(iod->sg, dev->iod_mempool);
nvfs_put_ops();
pr_err("%s: failed to dma map sglist=:%d\n", __func__, iod->nents);
return BLK_STS_IOERR;
}

if (unlikely(nr_mapped == NVFS_CPU_REQ)) {
mempool_free(iod->sg, dev->iod_mempool);
nvfs_put_ops();
WARN_ON(1);
}

iod->use_sgl = nvme_pci_use_sgls(dev, req);
if (iod->use_sgl) {
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped);
} else {
// push dma address to hw registers
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
}

if (ret != BLK_STS_OK) {
nvme_nvfs_unmap_data(dev, req);
mempool_free(iod->sg, dev->iod_mempool);
}
return ret;
}
return ret;
}

#endif /* NVFS_DMA_H */
52 changes: 52 additions & 0 deletions drivers/nvme/host/nvfs-rdma.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/

#ifdef CONFIG_NVFS
#define MODULE_PREFIX nvme_rdma_v1
#include "nvfs.h"

struct nvfs_dma_rw_ops *nvfs_ops;

atomic_t nvfs_shutdown = ATOMIC_INIT(1);

DEFINE_PER_CPU(long, nvfs_n_ops);

// must have for compatibility
#define NVIDIA_FS_COMPAT_FT(ops) \
(NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) && NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops))

// protected via nvfs_module_mutex
int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
{
if (NVIDIA_FS_COMPAT_FT(ops)) {
nvfs_ops = ops;
atomic_set(&nvfs_shutdown, 0);
return 0;
} else
return -EOPNOTSUPP;

}
EXPORT_SYMBOL(REGISTER_FUNC);

// protected via nvfs_module_mutex
void UNREGISTER_FUNC(void)
{
(void) atomic_cmpxchg(&nvfs_shutdown, 0, 1);
do {
msleep(NVFS_HOLD_TIME_MS);
} while (nvfs_count_ops());
nvfs_ops = NULL;
}
EXPORT_SYMBOL(UNREGISTER_FUNC);
#endif
94 changes: 94 additions & 0 deletions drivers/nvme/host/nvfs-rdma.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/

#ifndef NVFS_RDMA_H
#define NVFS_RDMA_H

static bool nvme_rdma_nvfs_unmap_data(struct ib_device *ibdev,
struct request *rq)

{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
enum dma_data_direction dma_dir = rq_dma_dir(rq);
int count;

if (!blk_integrity_rq(rq) && nvfs_ops != NULL) {
count = nvfs_ops->nvfs_dma_unmap_sg(ibdev->dma_device, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
dma_dir);
if (count) {
nvfs_put_ops();
#ifdef HAVE_SG_ALLOC_TABLE_CHAINED_NENTS_FIRST_CHUNK_PARAM
sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
#else
sg_free_table_chained(&req->data_sgl.sg_table, true);
#endif

return true;
}
}
return false;
}

static int nvme_rdma_nvfs_map_data(struct ib_device *ibdev, struct request *rq, bool *is_nvfs_io)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
enum dma_data_direction dma_dir = rq_dma_dir(rq);
int count, ret = 0;

*is_nvfs_io = false;
count = 0;
if (!blk_integrity_rq(rq) && nvfs_get_ops()) {

// associates bio pages to scatterlist
count = nvfs_ops->nvfs_blk_rq_map_sg(rq->q, rq , req->data_sgl.sg_table.sgl);
if (!count) {
nvfs_put_ops();
return 0; // fall to cpu path
}

*is_nvfs_io = true;
if (unlikely((count == NVFS_IO_ERR))) {
nvfs_put_ops();
pr_err("%s: failed to map sg_nents=:%d\n", __func__, req->data_sgl.nents);
return -EIO;
}
req->data_sgl.nents = count;

count = nvfs_ops->nvfs_dma_map_sg_attrs(ibdev->dma_device,
req->data_sgl.sg_table.sgl,
req->data_sgl.nents,
dma_dir,
DMA_ATTR_NO_WARN);

if (unlikely((count == NVFS_IO_ERR))) {
nvfs_put_ops();
return -EIO;
}

if (unlikely(count == NVFS_CPU_REQ)) {
nvfs_put_ops();
BUG();
return -EIO;
}

return ret;
} else {
// Fall to CPU path
return 0;
}

return ret;
}

#endif
Loading

0 comments on commit efcfcc1

Please sign in to comment.