From d4c1f92183b24cbc5be12caf016ff4a8b0a34dc7 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 27 Aug 2024 14:43:01 +0200 Subject: [PATCH 1/5] nvme/util: make nvme_set_errno_from_cqe static inline Move nvme_set_errno_from_cqe to header such that rq_test does not need to link with nvme/util.c. Signed-off-by: Klaus Jensen --- include/vfn/nvme/util.h | 7 ++++++- src/nvme/meson.build | 2 +- src/nvme/util.c | 7 ------- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/include/vfn/nvme/util.h b/include/vfn/nvme/util.h index 6d36876f..0e9b6b0e 100644 --- a/include/vfn/nvme/util.h +++ b/include/vfn/nvme/util.h @@ -55,7 +55,12 @@ static inline bool nvme_cqe_ok(struct nvme_cqe *cqe) * * Return: ``0`` when no error in @cqe, otherwise ``-1`` and set ``errno``. */ -int nvme_set_errno_from_cqe(struct nvme_cqe *cqe); +static inline int nvme_set_errno_from_cqe(struct nvme_cqe *cqe) +{ + errno = le16_to_cpu(cqe->sfp) >> 1 ? EIO : 0; + + return errno ? -1 : 0; +} /** * nvme_aer - Submit an Asynchronous Event Request command diff --git a/src/nvme/meson.build b/src/nvme/meson.build index c30d40b0..fd6f5619 100644 --- a/src/nvme/meson.build +++ b/src/nvme/meson.build @@ -14,7 +14,7 @@ nvme_sources = files( ) # tests -rq_test = executable('rq_test', [gen_sources, support_sources, trace_sources, 'queue.c', 'util.c', 'rq_test.c'], +rq_test = executable('rq_test', [gen_sources, support_sources, trace_sources, 'queue.c', 'rq_test.c'], link_with: [ccan_lib], include_directories: [ccan_inc, core_inc, vfn_inc], ) diff --git a/src/nvme/util.c b/src/nvme/util.c index a509ceb7..c5b25776 100644 --- a/src/nvme/util.c +++ b/src/nvme/util.c @@ -48,13 +48,6 @@ uint64_t nvme_crc64(uint64_t crc, const unsigned char *buffer, size_t len) return crc ^ (uint64_t)~0; } -int nvme_set_errno_from_cqe(struct nvme_cqe *cqe) -{ - errno = le16_to_cpu(cqe->sfp) >> 1 ? EIO : 0; - - return errno ? -1 : 0; -} - int nvme_aer(struct nvme_ctrl *ctrl, void *opaque) { struct nvme_rq *rq; From 6476e8001238878a0fc08993801f0ea664874211 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Wed, 28 Aug 2024 07:22:14 +0200 Subject: [PATCH 2/5] iommu: add dmabuf helper Add iommu/dmabuf.h which helps allocating and mapping DMA buffers. Signed-off-by: Klaus Jensen --- include/vfn/iommu/dmabuf.h | 84 +++++++++++++++++++++++++++++++++++ include/vfn/support/autoptr.h | 40 ++++++++++++++--- src/iommu/dmabuf.c | 51 +++++++++++++++++++++ src/iommu/meson.build | 1 + 4 files changed, 171 insertions(+), 5 deletions(-) create mode 100644 include/vfn/iommu/dmabuf.h create mode 100644 src/iommu/dmabuf.c diff --git a/include/vfn/iommu/dmabuf.h b/include/vfn/iommu/dmabuf.h new file mode 100644 index 00000000..f85f9cf7 --- /dev/null +++ b/include/vfn/iommu/dmabuf.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later or MIT */ + +/* + * This file is part of libvfn. + * + * Copyright (C) 2023 The libvfn Authors. All Rights Reserved. + * + * This library (libvfn) is dual licensed under the GNU Lesser General + * Public License version 2.1 or later or the MIT license. See the + * COPYING and LICENSE files for more information. + */ + +#ifndef LIBVFN_IOMMU_DMABUF_H +#define LIBVFN_IOMMU_DMABUF_H + +/** + * DOC: DMA-buffer helpers + * + * This provides a helper for allocating and mapping DMA buffers. + * + * &struct iommu_dmabuf is also registered as an "autovar" and can be + * automatically unmapped and deallocated when going out of scope. For this + * reason, this header is not included in and must explicitly be + * included. + * + * Required includes: + * #include + * #include + * #include + */ + + +/** + * struct iommu_dmabuf - DMA buffer abstraction + * @ctx: &struct iommu_ctx + * @vaddr: data buffer + * @iova: mapped address + * @len: length of @vaddr + * + * Convenience wrapper around a mapped data buffer. + */ +struct iommu_dmabuf { + struct iommu_ctx *ctx; + + void *vaddr; + uint64_t iova; + ssize_t len; +}; + +/** + * iommu_get_dmabuf - Allocate and map a DMA buffer + * @ctx: &struct iommu_ctx + * @buffer: uninitialized &struct iommu_dmabuf + * @len: desired minimum length + * @flags: combination of enum iommu_map_flags + * + * Allocate at least @len bytes and map the buffer within the IOVA address space + * described by @ctx. The actual allocated and mapped length may be larger than + * requestes due to alignment requirements. + * + * Return: On success, returns ``0``; on error, returns ``-1`` and sets + * ``errno``. + */ +int iommu_get_dmabuf(struct iommu_ctx *ctx, struct iommu_dmabuf *buffer, size_t len, + unsigned long flags); + +/** + * iommu_put_dmabuf - Unmap and deallocate a DMA buffer + * @buffer: &struct iommu_dmabuf + * + * Unmap the buffer and deallocate it. + */ +void iommu_put_dmabuf(struct iommu_dmabuf *buffer); + +static inline void __do_iommu_put_dmabuf(void *p) +{ + struct iommu_dmabuf *buffer = (struct iommu_dmabuf *)p; + + iommu_put_dmabuf(buffer); +} + +DEFINE_AUTOVAR_STRUCT(iommu_dmabuf, __do_iommu_put_dmabuf); + +#endif /* LIBVFN_IOMMU_DMABUF_H */ diff --git a/include/vfn/support/autoptr.h b/include/vfn/support/autoptr.h index fd0977db..8993dce1 100644 --- a/include/vfn/support/autoptr.h +++ b/include/vfn/support/autoptr.h @@ -14,11 +14,11 @@ #define LIBVFN_SUPPORT_AUTOPTR_H /** - * DOC: glib-style auto pointer + * DOC: glib-style automatic cleanup. * - * The __autoptr() provides a general way of "cleaning up" when going out of - * scope. Inspired by glib, but simplified a lot (at the expence of - * flexibility). + * The __autoptr() and __autovar_s() provides a general way of "cleaning up" + * when going out of scope. Inspired by glib, but simplified a lot (at the + * expence of flexibility). */ #define __AUTOPTR_CLEANUP(t) __autoptr_cleanup_##t @@ -27,7 +27,7 @@ /** * DEFINE_AUTOPTR - Defines the appropriate cleanup function for a pointer type * @t: type name - * @cleanup: function to be called to cleanup type + * @cleanup: function to be called to clean up type * * Defines a function ``__autoptr_cleanup_##t`` that will call @cleanup when * invoked. @@ -51,4 +51,34 @@ */ #define __autoptr(t) __attribute__((cleanup(__AUTOPTR_CLEANUP(t)))) __AUTOPTR_T(t) +#define __AUTOVAR_STRUCT_CLEANUP(t) __autovar_struct_cleanup_##t +#define __AUTOVAR_STRUCT_T(t) __autovar_struct_##t + +/** + * DEFINE_AUTOVAR_STRUCT - Defines the appropriate cleanup function for a struct + * type + * @t: type name + * @cleanup: function to be called to clean up type + * + * Defines a function ``__autovar_struct_cleanup_##t`` that will call @cleanup + * when invoked. + */ +#define DEFINE_AUTOVAR_STRUCT(t, cleanup) \ + typedef struct t __AUTOVAR_STRUCT_T(t); \ + \ + static inline void __AUTOVAR_STRUCT_CLEANUP(t) (struct t *p) \ + { \ + (cleanup)(p); \ + } + +/** + * __autovar_s - Helper to declare a struct variable with automatic cleanup + * @t: type name + * + * Declares a struct-type variable that is cleaned up when the variable goes out + * of scope. How to clean up the type must have been previously declared using + * DEFINE_AUTOPTR(). + */ +#define __autovar_s(t) __attribute__((cleanup(__AUTOVAR_STRUCT_CLEANUP(t)))) __AUTOVAR_STRUCT_T(t) + #endif /* LIBVFN_SUPPORT_AUTOPTR_H */ diff --git a/src/iommu/dmabuf.c b/src/iommu/dmabuf.c new file mode 100644 index 00000000..4cf01423 --- /dev/null +++ b/src/iommu/dmabuf.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later or MIT + +/* + * This file is part of libvfn. + * + * Copyright (C) 2024 The libvfn Authors. All Rights Reserved. + * + * This library (libvfn) is dual licensed under the GNU Lesser General + * Public License version 2.1 or later or the MIT license. See the + * COPYING and LICENSE files for more information. + */ + +#include + +#include + +#include + +#include +#include + +#include + +int iommu_get_dmabuf(struct iommu_ctx *ctx, struct iommu_dmabuf *buffer, size_t len, + unsigned long flags) +{ + buffer->ctx = ctx; + + buffer->len = pgmap(&buffer->vaddr, len); + if (buffer->len < 0) + return -1; + + if (iommu_map_vaddr(ctx, buffer->vaddr, buffer->len, &buffer->iova, flags)) { + pgunmap(buffer->vaddr, buffer->len); + return -1; + } + + return 0; +} + +void iommu_put_dmabuf(struct iommu_dmabuf *buffer) +{ + if (!buffer->len) + return; + + log_fatal_if(iommu_unmap_vaddr(buffer->ctx, buffer->vaddr, NULL), "iommu_unmap_vaddr"); + + pgunmap(buffer->vaddr, buffer->len); + + memset(buffer, 0x0, sizeof(*buffer)); +} diff --git a/src/iommu/meson.build b/src/iommu/meson.build index 4ba49d3d..b79965e6 100644 --- a/src/iommu/meson.build +++ b/src/iommu/meson.build @@ -1,6 +1,7 @@ iommu_sources = files( 'context.c', 'dma.c', + 'dmabuf.c', 'vfio.c', ) From 9866a6d679c041771b183ba2d2ea148a96aa5b3d Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 6 Sep 2024 12:21:52 +0200 Subject: [PATCH 3/5] nvme: use iommu_dmabuf api Replace all pgmap/iommu_map_vaddr pairs with iommu_get/put_dmabuf. Signed-off-by: Klaus Jensen --- include/vfn/nvme.h | 1 + include/vfn/nvme/ctrl.h | 4 +- include/vfn/nvme/queue.h | 16 ++-- src/nvme/core.c | 174 ++++++++++++++------------------------- src/nvme/queue.c | 9 +- src/nvme/util.c | 1 - 6 files changed, 72 insertions(+), 133 deletions(-) diff --git a/include/vfn/nvme.h b/include/vfn/nvme.h index a1023c4b..63d43493 100644 --- a/include/vfn/nvme.h +++ b/include/vfn/nvme.h @@ -36,6 +36,7 @@ extern "C" { #include #include #include +#include #include #include #include diff --git a/include/vfn/nvme/ctrl.h b/include/vfn/nvme/ctrl.h index 664263b2..82b473dd 100644 --- a/include/vfn/nvme/ctrl.h +++ b/include/vfn/nvme/ctrl.h @@ -82,8 +82,8 @@ struct nvme_ctrl { * @dbbuf: doorbell buffers */ struct { - void *doorbells; - void *eventidxs; + struct iommu_dmabuf doorbells; + struct iommu_dmabuf eventidxs; } dbbuf; /** diff --git a/include/vfn/nvme/queue.h b/include/vfn/nvme/queue.h index b2316826..b7e41ef5 100644 --- a/include/vfn/nvme/queue.h +++ b/include/vfn/nvme/queue.h @@ -27,8 +27,7 @@ struct nvme_dbbuf { */ struct nvme_cq { /* private: */ - void *vaddr; - uint64_t iova; + struct iommu_dmabuf mem; int id; uint16_t head; @@ -51,13 +50,8 @@ struct nvme_sq { /* private: */ struct nvme_cq *cq; - void *vaddr; - uint64_t iova; - - struct { - void *vaddr; - uint64_t iova; - } pages; + struct iommu_dmabuf mem; + struct iommu_dmabuf pages; uint16_t tail, ptail; int qsize; @@ -84,7 +78,7 @@ struct nvme_sq { */ static inline void nvme_sq_post(struct nvme_sq *sq, const union nvme_cmd *sqe) { - memcpy(sq->vaddr + (sq->tail << NVME_SQES), sqe, 1 << NVME_SQES); + memcpy(sq->mem.vaddr + (sq->tail << NVME_SQES), sqe, 1 << NVME_SQES); trace_guard(NVME_SQ_POST) { trace_emit("sqid %d tail %d\n", sq->id, sq->tail); @@ -174,7 +168,7 @@ static inline void nvme_sq_exec(struct nvme_sq *sq, const union nvme_cmd *sqe) */ static inline struct nvme_cqe *nvme_cq_head(struct nvme_cq *cq) { - return (struct nvme_cqe *)(cq->vaddr + (cq->head << NVME_CQES)); + return (struct nvme_cqe *)(cq->mem.vaddr + (cq->head << NVME_CQES)); } /** diff --git a/src/nvme/core.c b/src/nvme/core.c index 24651910..405d27c5 100644 --- a/src/nvme/core.c +++ b/src/nvme/core.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -115,7 +114,6 @@ static int nvme_configure_cq(struct nvme_ctrl *ctrl, int qid, int qsize, int vec struct nvme_cq *cq = &ctrl->cq[qid]; uint64_t cap; uint8_t dstrd; - size_t len; cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); dstrd = NVME_FIELD_GET(cap, CAP_DSTRD); @@ -144,36 +142,25 @@ static int nvme_configure_cq(struct nvme_ctrl *ctrl, int qid, int qsize, int vec .vector = vector, }; - if (ctrl->dbbuf.doorbells) { - cq->dbbuf.doorbell = cqhdbl(ctrl->dbbuf.doorbells, qid, dstrd); - cq->dbbuf.eventidx = cqhdbl(ctrl->dbbuf.eventidxs, qid, dstrd); + if (ctrl->dbbuf.doorbells.vaddr) { + cq->dbbuf.doorbell = cqhdbl(ctrl->dbbuf.doorbells.vaddr, qid, dstrd); + cq->dbbuf.eventidx = cqhdbl(ctrl->dbbuf.eventidxs.vaddr, qid, dstrd); } - len = pgmapn(&cq->vaddr, qsize, 1 << NVME_CQES); - - if (iommu_map_vaddr(__iommu_ctx(ctrl), cq->vaddr, len, &cq->iova, 0x0)) { - log_debug("failed to map vaddr\n"); - - pgunmap(cq->vaddr, len); + if (iommu_get_dmabuf(__iommu_ctx(ctrl), &cq->mem, qsize << NVME_CQES, 0x0)) return -1; - } return 0; } void nvme_discard_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq) { - size_t len; - - if (!cq->vaddr) + if (!cq->mem.vaddr) return; - if (iommu_unmap_vaddr(__iommu_ctx(ctrl), cq->vaddr, &len)) - log_debug("failed to unmap vaddr\n"); + iommu_put_dmabuf(&cq->mem); - pgunmap(cq->vaddr, len); - - if (ctrl->dbbuf.doorbells) { + if (ctrl->dbbuf.doorbells.vaddr) { __STORE_PTR(uint32_t *, cq->dbbuf.doorbell, 0); __STORE_PTR(uint32_t *, cq->dbbuf.eventidx, 0); } @@ -187,7 +174,9 @@ static int nvme_configure_sq(struct nvme_ctrl *ctrl, int qid, int qsize, struct nvme_sq *sq = &ctrl->sq[qid]; uint64_t cap; uint8_t dstrd; - ssize_t len; + size_t pagesize; + + pagesize = __mps_to_pagesize(ctrl->config.mps); cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); dstrd = NVME_FIELD_GET(cap, CAP_DSTRD); @@ -216,25 +205,18 @@ static int nvme_configure_sq(struct nvme_ctrl *ctrl, int qid, int qsize, .cq = cq, }; - if (ctrl->dbbuf.doorbells) { - sq->dbbuf.doorbell = sqtdbl(ctrl->dbbuf.doorbells, qid, dstrd); - sq->dbbuf.eventidx = sqtdbl(ctrl->dbbuf.eventidxs, qid, dstrd); + if (ctrl->dbbuf.doorbells.vaddr) { + sq->dbbuf.doorbell = sqtdbl(ctrl->dbbuf.doorbells.vaddr, qid, dstrd); + sq->dbbuf.eventidx = sqtdbl(ctrl->dbbuf.eventidxs.vaddr, qid, dstrd); } /* * Use ctrl->config.mps instead of host page size, as we have the * opportunity to pack the allocations. */ - len = pgmapn(&sq->pages.vaddr, qsize, __mps_to_pagesize(ctrl->config.mps)); - - if (len < 0) + if (iommu_get_dmabuf(__iommu_ctx(ctrl), &sq->pages, pagesize, 0x0)) return -1; - if (iommu_map_vaddr(__iommu_ctx(ctrl), sq->pages.vaddr, len, &sq->pages.iova, 0x0)) { - log_debug("failed to map vaddr\n"); - goto unmap_pages; - } - sq->rqs = znew_t(struct nvme_rq, qsize - 1); sq->rq_top = &sq->rqs[qsize - 2]; @@ -251,50 +233,28 @@ static int nvme_configure_sq(struct nvme_ctrl *ctrl, int qid, int qsize, rq->rq_next = &sq->rqs[i - 1]; } - len = pgmapn(&sq->vaddr, qsize, 1 << NVME_SQES); - if (len < 0) - goto free_sq_rqs; + if (iommu_get_dmabuf(__iommu_ctx(ctrl), &sq->mem, qsize << NVME_SQES, 0x0)) { + free(sq->rqs); + iommu_put_dmabuf(&sq->pages); - if (iommu_map_vaddr(__iommu_ctx(ctrl), sq->vaddr, len, &sq->iova, 0x0)) { - log_debug("failed to map vaddr\n"); - goto unmap_sq; + return -1; } return 0; - -unmap_sq: - pgunmap(sq->vaddr, len); -free_sq_rqs: - free(sq->rqs); -unmap_pages: - if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->pages.vaddr, (size_t *)&len)) - log_debug("failed to unmap vaddr\n"); - - pgunmap(sq->pages.vaddr, len); - - return -1; } void nvme_discard_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq) { - size_t len; - - if (!sq->vaddr) + if (!sq->mem.vaddr) return; - if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->vaddr, &len)) - log_debug("failed to unmap vaddr\n"); - - pgunmap(sq->vaddr, len); + iommu_put_dmabuf(&sq->mem); free(sq->rqs); - if (iommu_unmap_vaddr(__iommu_ctx(ctrl), sq->pages.vaddr, &len)) - log_debug("failed to unmap vaddr\n"); - - pgunmap(sq->pages.vaddr, len); + iommu_put_dmabuf(&sq->pages); - if (ctrl->dbbuf.doorbells) { + if (ctrl->dbbuf.doorbells.vaddr) { __STORE_PTR(uint32_t *, sq->dbbuf.doorbell, 0); __STORE_PTR(uint32_t *, sq->dbbuf.eventidx, 0); } @@ -326,8 +286,8 @@ int nvme_configure_adminq(struct nvme_ctrl *ctrl, unsigned long sq_flags) aqa |= aqa << 16; mmio_write32(ctrl->regs + NVME_REG_AQA, cpu_to_le32(aqa)); - mmio_hl_write64(ctrl->regs + NVME_REG_ASQ, cpu_to_le64(sq->iova)); - mmio_hl_write64(ctrl->regs + NVME_REG_ACQ, cpu_to_le64(cq->iova)); + mmio_hl_write64(ctrl->regs + NVME_REG_ASQ, cpu_to_le64(sq->mem.iova)); + mmio_hl_write64(ctrl->regs + NVME_REG_ACQ, cpu_to_le64(cq->mem.iova)); return 0; @@ -361,7 +321,7 @@ int nvme_create_iocq(struct nvme_ctrl *ctrl, int qid, int qsize, int vector) cmd.create_cq = (struct nvme_cmd_create_cq) { .opcode = NVME_ADMIN_CREATE_CQ, - .prp1 = cpu_to_le64(cq->iova), + .prp1 = cpu_to_le64(cq->mem.iova), .qid = cpu_to_le16((uint16_t)qid), .qsize = cpu_to_le16((uint16_t)(qsize - 1)), .qflags = cpu_to_le16(qflags), @@ -398,7 +358,7 @@ int nvme_create_iosq(struct nvme_ctrl *ctrl, int qid, int qsize, struct nvme_cq cmd.create_sq = (struct nvme_cmd_create_sq) { .opcode = NVME_ADMIN_CREATE_SQ, - .prp1 = cpu_to_le64(sq->iova), + .prp1 = cpu_to_le64(sq->mem.iova), .qid = cpu_to_le16((uint16_t)qid), .qsize = cpu_to_le16((uint16_t)(qsize - 1)), .qflags = cpu_to_le16(NVME_Q_PC), @@ -518,29 +478,22 @@ int nvme_reset(struct nvme_ctrl *ctrl) static int nvme_init_dbconfig(struct nvme_ctrl *ctrl) { - uint64_t prp1, prp2; union nvme_cmd cmd; - if (pgmap((void **)&ctrl->dbbuf.doorbells, __VFN_PAGESIZE) < 0) + if (iommu_get_dmabuf(__iommu_ctx(ctrl), &ctrl->dbbuf.doorbells, __VFN_PAGESIZE, 0x0)) return -1; - if (iommu_map_vaddr(__iommu_ctx(ctrl), ctrl->dbbuf.doorbells, __VFN_PAGESIZE, &prp1, 0x0)) - return -1; - - if (pgmap((void **)&ctrl->dbbuf.eventidxs, __VFN_PAGESIZE) < 0) - return -1; - - if (iommu_map_vaddr(__iommu_ctx(ctrl), ctrl->dbbuf.eventidxs, __VFN_PAGESIZE, &prp2, 0x0)) - return -1; + if (iommu_get_dmabuf(__iommu_ctx(ctrl), &ctrl->dbbuf.eventidxs, __VFN_PAGESIZE, 0x0)) + goto put_doorbells; cmd = (union nvme_cmd) { .opcode = NVME_ADMIN_DBCONFIG, - .dptr.prp1 = cpu_to_le64(prp1), - .dptr.prp2 = cpu_to_le64(prp2), + .dptr.prp1 = cpu_to_le64(ctrl->dbbuf.doorbells.iova), + .dptr.prp2 = cpu_to_le64(ctrl->dbbuf.eventidxs.iova), }; if (__admin(ctrl, &cmd)) - return -1; + goto put_eventidxs; if (!(ctrl->opts.quirks & NVME_QUIRK_BROKEN_DBBUF)) { uint64_t cap; @@ -549,14 +502,25 @@ static int nvme_init_dbconfig(struct nvme_ctrl *ctrl) cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); dstrd = NVME_FIELD_GET(cap, CAP_DSTRD); - ctrl->adminq.cq->dbbuf.doorbell = cqhdbl(ctrl->dbbuf.doorbells, NVME_AQ, dstrd); - ctrl->adminq.cq->dbbuf.eventidx = cqhdbl(ctrl->dbbuf.eventidxs, NVME_AQ, dstrd); + ctrl->adminq.cq->dbbuf.doorbell = + cqhdbl(ctrl->dbbuf.doorbells.vaddr, NVME_AQ, dstrd); + ctrl->adminq.cq->dbbuf.eventidx = + cqhdbl(ctrl->dbbuf.eventidxs.vaddr, NVME_AQ, dstrd); - ctrl->adminq.sq->dbbuf.doorbell = sqtdbl(ctrl->dbbuf.doorbells, NVME_AQ, dstrd); - ctrl->adminq.sq->dbbuf.eventidx = sqtdbl(ctrl->dbbuf.eventidxs, NVME_AQ, dstrd); + ctrl->adminq.sq->dbbuf.doorbell = + sqtdbl(ctrl->dbbuf.doorbells.vaddr, NVME_AQ, dstrd); + ctrl->adminq.sq->dbbuf.eventidx = + sqtdbl(ctrl->dbbuf.eventidxs.vaddr, NVME_AQ, dstrd); } return 0; + +put_eventidxs: + iommu_put_dmabuf(&ctrl->dbbuf.eventidxs); +put_doorbells: + iommu_put_dmabuf(&ctrl->dbbuf.doorbells); + + return -1; } static int nvme_init_pci(struct nvme_ctrl *ctrl, const char *bdf) @@ -639,11 +603,12 @@ int nvme_ctrl_init(struct nvme_ctrl *ctrl, const char *bdf, int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_opts *opts) { + struct iommu_ctx *ctx = __iommu_ctx(ctrl); + uint16_t oacs; uint32_t sgls; - ssize_t len; - void *vaddr; - int ret; + + __autovar_s(iommu_dmabuf) buffer; union nvme_cmd cmd = {}; struct nvme_cqe cqe; @@ -688,8 +653,7 @@ int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_op ctrl->config.ncqa = min_t(int, ctrl->opts.ncqr, NVME_FIELD_GET(le32_to_cpu(cqe.dw0), FEAT_NRQS_NCQR)); - len = pgmap(&vaddr, NVME_IDENTIFY_DATA_SIZE); - if (len < 0) + if (iommu_get_dmabuf(ctx, &buffer, NVME_IDENTIFY_DATA_SIZE, IOMMU_MAP_EPHEMERAL)) return -1; cmd.identify = (struct nvme_cmd_identify) { @@ -697,17 +661,14 @@ int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_op .cns = NVME_IDENTIFY_CNS_CTRL, }; - ret = nvme_admin(ctrl, &cmd, vaddr, len, NULL); - if (ret) { - log_debug("could not identify\n"); - goto out; - } + if (nvme_admin(ctrl, &cmd, buffer.vaddr, buffer.len, NULL)) + return -1; - oacs = le16_to_cpu(*(leint16_t *)(vaddr + NVME_IDENTIFY_CTRL_OACS)); - if (oacs & NVME_IDENTIFY_CTRL_OACS_DBCONFIG) - ret = nvme_init_dbconfig(ctrl); + oacs = le16_to_cpu(*(leint16_t *)(buffer.vaddr + NVME_IDENTIFY_CTRL_OACS)); + if (oacs & NVME_IDENTIFY_CTRL_OACS_DBCONFIG && nvme_init_dbconfig(ctrl)) + return -1; - sgls = le32_to_cpu(*(leint32_t *)(vaddr + NVME_IDENTIFY_CTRL_SGLS)); + sgls = le32_to_cpu(*(leint32_t *)(buffer.vaddr + NVME_IDENTIFY_CTRL_SGLS)); if (sgls) { uint32_t alignment = NVME_FIELD_GET(sgls, IDENTIFY_CTRL_SGLS_ALIGNMENT); @@ -717,10 +678,7 @@ int nvme_init(struct nvme_ctrl *ctrl, const char *bdf, const struct nvme_ctrl_op ctrl->flags |= NVME_CTRL_F_SGLS_DWORD_ALIGNMENT; } -out: - pgunmap(vaddr, len); - - return ret; + return 0; } void nvme_close(struct nvme_ctrl *ctrl) @@ -735,17 +693,9 @@ void nvme_close(struct nvme_ctrl *ctrl) free(ctrl->cq); - if (ctrl->dbbuf.doorbells) { - struct iommu_ctx *ctx = __iommu_ctx(ctrl); - size_t len; - - log_fatal_if(iommu_unmap_vaddr(ctx, ctrl->dbbuf.doorbells, &len), - "iommu_unmap_vaddr"); - pgunmap(ctrl->dbbuf.doorbells, len); - - log_fatal_if(iommu_unmap_vaddr(ctx, ctrl->dbbuf.eventidxs, &len), - "iommu_unmap_vaddr"); - pgunmap(ctrl->dbbuf.eventidxs, len); + if (ctrl->dbbuf.doorbells.vaddr) { + iommu_put_dmabuf(&ctrl->dbbuf.doorbells); + iommu_put_dmabuf(&ctrl->dbbuf.eventidxs); } vfio_pci_unmap_bar(&ctrl->pci, 0, ctrl->regs, 0x1000, 0); diff --git a/src/nvme/queue.c b/src/nvme/queue.c index 85be40ca..42875064 100644 --- a/src/nvme/queue.c +++ b/src/nvme/queue.c @@ -26,14 +26,9 @@ #include -#include -#include -#include -#include -#include +#include #include -#include -#include +#include #include "ccan/time/time.h" diff --git a/src/nvme/util.c b/src/nvme/util.c index c5b25776..29221eb2 100644 --- a/src/nvme/util.c +++ b/src/nvme/util.c @@ -29,7 +29,6 @@ #include #include -#include #include #include #include From b0e1209da1974eec80ebae35d8405ace71b81dc8 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Fri, 6 Sep 2024 13:26:57 +0200 Subject: [PATCH 4/5] nvme/core: abstract mmio The mmio helpers assume that we can read and write to a specific address. The proposed DriverKit backend cannot do that and must call specific functions on a base address and an offset. In the core, prepare for DriverKit to provide its own implementation of doing that. This is a simplified version of commit "mmio: convert calls of 'addr + offset' to 'base-addr, offset'" to limit the change to the nvme core. Signed-off-by: Mads Ynddal [k.jensen: limit the change to nvme/core for now] Signed-off-by: Klaus Jensen --- src/nvme/core.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/nvme/core.c b/src/nvme/core.c index 405d27c5..9234663a 100644 --- a/src/nvme/core.c +++ b/src/nvme/core.c @@ -49,6 +49,13 @@ #define sqtdbl(doorbells, qid, dstrd) \ (doorbells + (2 * qid) * (4 << dstrd)) +#ifdef __linux__ +# define __reg32_read(regs, offset) mmio_read32((regs) + (offset)) +# define __reg32_write(regs, offset, v) mmio_write32((regs) + (offset), (v)) +# define __reg64_read(regs, offset) mmio_read64((regs) + (offset)) +# define __reg64_write_hl(regs, offset, v) mmio_hl_write64((regs) + (offset), (v)) +#endif + struct nvme_ctrl_handle { struct nvme_ctrl *ctrl; struct list_node list; @@ -115,7 +122,7 @@ static int nvme_configure_cq(struct nvme_ctrl *ctrl, int qid, int qsize, int vec uint64_t cap; uint8_t dstrd; - cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); + cap = le64_to_cpu(__reg64_read(ctrl->regs, NVME_REG_CAP)); dstrd = NVME_FIELD_GET(cap, CAP_DSTRD); if (qid && qid > ctrl->config.ncqa + 1) { @@ -178,7 +185,7 @@ static int nvme_configure_sq(struct nvme_ctrl *ctrl, int qid, int qsize, pagesize = __mps_to_pagesize(ctrl->config.mps); - cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); + cap = le64_to_cpu(__reg64_read(ctrl->regs, NVME_REG_CAP)); dstrd = NVME_FIELD_GET(cap, CAP_DSTRD); if (qid && qid > ctrl->config.nsqa + 1) { @@ -285,9 +292,9 @@ int nvme_configure_adminq(struct nvme_ctrl *ctrl, unsigned long sq_flags) aqa = NVME_AQ_QSIZE - 1; aqa |= aqa << 16; - mmio_write32(ctrl->regs + NVME_REG_AQA, cpu_to_le32(aqa)); - mmio_hl_write64(ctrl->regs + NVME_REG_ASQ, cpu_to_le64(sq->mem.iova)); - mmio_hl_write64(ctrl->regs + NVME_REG_ACQ, cpu_to_le64(cq->mem.iova)); + __reg32_write(ctrl->regs, NVME_REG_AQA, cpu_to_le32(aqa)); + __reg64_write_hl(ctrl->regs, NVME_REG_ASQ, cpu_to_le64(sq->mem.iova)); + __reg64_write_hl(ctrl->regs, NVME_REG_ACQ, cpu_to_le64(cq->mem.iova)); return 0; @@ -419,7 +426,7 @@ static int nvme_wait_rdy(struct nvme_ctrl *ctrl, unsigned short rdy) unsigned long timeout_ms; struct timeabs deadline; - cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); + cap = le64_to_cpu(__reg64_read(ctrl->regs, NVME_REG_CAP)); timeout_ms = 500 * (NVME_FIELD_GET(cap, CAP_TO) + 1); deadline = timeabs_add(time_now(), time_from_msec(timeout_ms)); @@ -431,7 +438,7 @@ static int nvme_wait_rdy(struct nvme_ctrl *ctrl, unsigned short rdy) return -1; } - csts = le32_to_cpu(mmio_read32(ctrl->regs + NVME_REG_CSTS)); + csts = le32_to_cpu(__reg32_read(ctrl->regs, NVME_REG_CSTS)); } while (NVME_FIELD_GET(csts, CSTS_RDY) != rdy); return 0; @@ -443,7 +450,7 @@ int nvme_enable(struct nvme_ctrl *ctrl) uint32_t cc; uint64_t cap; - cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); + cap = le64_to_cpu(__reg64_read(ctrl->regs, NVME_REG_CAP)); css = NVME_FIELD_GET(cap, CAP_CSS); cc = @@ -461,7 +468,7 @@ int nvme_enable(struct nvme_ctrl *ctrl) else cc |= NVME_FIELD_SET(NVME_CC_CSS_NVM, CC_CSS); - mmio_write32(ctrl->regs + NVME_REG_CC, cpu_to_le32(cc)); + __reg32_write(ctrl->regs, NVME_REG_CC, cpu_to_le32(cc)); return nvme_wait_rdy(ctrl, 1); } @@ -470,8 +477,8 @@ int nvme_reset(struct nvme_ctrl *ctrl) { uint32_t cc; - cc = le32_to_cpu(mmio_read32(ctrl->regs + NVME_REG_CC)); - mmio_write32(ctrl->regs + NVME_REG_CC, cpu_to_le32(cc & 0xfe)); + cc = le32_to_cpu(__reg32_read(ctrl->regs, NVME_REG_CC)); + __reg32_write(ctrl->regs, NVME_REG_CC, cpu_to_le32(cc & 0xfe)); return nvme_wait_rdy(ctrl, 0); } @@ -499,7 +506,7 @@ static int nvme_init_dbconfig(struct nvme_ctrl *ctrl) uint64_t cap; uint8_t dstrd; - cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); + cap = le64_to_cpu(__reg64_read(ctrl->regs, NVME_REG_CAP)); dstrd = NVME_FIELD_GET(cap, CAP_DSTRD); ctrl->adminq.cq->dbbuf.doorbell = @@ -577,7 +584,7 @@ int nvme_ctrl_init(struct nvme_ctrl *ctrl, const char *bdf, if ((ctrl->pci.classcode & 0xff) == 0x03) ctrl->flags = NVME_CTRL_F_ADMINISTRATIVE; - cap = le64_to_cpu(mmio_read64(ctrl->regs + NVME_REG_CAP)); + cap = le64_to_cpu(__reg64_read(ctrl->regs, NVME_REG_CAP)); mpsmin = NVME_FIELD_GET(cap, CAP_MPSMIN); mpsmax = NVME_FIELD_GET(cap, CAP_MPSMAX); From 4068cfe52d3908a6921bda6a1b0cf7bd48c29b14 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Wed, 13 Sep 2023 11:06:19 +0200 Subject: [PATCH 5/5] support/mem: add driverkit operations Within DriverKit we cannot use malloc/free but have to use the DriverKit specific IOMallocZero and IOFree. Signed-off-by: Mads Ynddal [k.jensen: allowed more generic functions to be retained] Signed-off-by: Klaus Jensen --- docs/api/support/mem.rst | 2 +- include/vfn/support/mem.h | 51 ++------------- include/vfn/support/platform/driverkit/mem.h | 65 ++++++++++++++++++++ include/vfn/support/platform/posix/mem.h | 63 +++++++++++++++++++ 4 files changed, 134 insertions(+), 47 deletions(-) create mode 100644 include/vfn/support/platform/driverkit/mem.h create mode 100644 include/vfn/support/platform/posix/mem.h diff --git a/docs/api/support/mem.rst b/docs/api/support/mem.rst index 9a629f06..61432080 100644 --- a/docs/api/support/mem.rst +++ b/docs/api/support/mem.rst @@ -3,4 +3,4 @@ Memory Allocation Helpers ========================= -.. kernel-doc:: include/vfn/support/mem.h +.. kernel-doc:: include/vfn/support/platform/posix/mem.h diff --git a/include/vfn/support/mem.h b/include/vfn/support/mem.h index 6773c4b1..e98e8fb1 100644 --- a/include/vfn/support/mem.h +++ b/include/vfn/support/mem.h @@ -40,41 +40,11 @@ static inline size_t __abort_on_overflow(unsigned int n, size_t sz) return n * sz; } -/** - * xmalloc - version of malloc that cannot fail - * @sz: number of bytes to allocate - * - * Call malloc, but only return NULL when @sz is zero. Otherwise, abort. - * - * Return: pointer to allocated memory - */ -static inline void *xmalloc(size_t sz) -{ - void *mem; - - if (unlikely(!sz)) - return NULL; - - mem = malloc(sz); - if (unlikely(!mem)) - backtrace_abort(); - - return mem; -} - -static inline void *zmalloc(size_t sz) -{ - void *mem; - - if (unlikely(!sz)) - return NULL; - - mem = calloc(1, sz); - if (unlikely(!mem)) - backtrace_abort(); - - return mem; -} +#ifdef __APPLE__ +# include "vfn/support/platform/driverkit/mem.h" +#else +# include "vfn/support/platform/posix/mem.h" +#endif static inline void *mallocn(unsigned int n, size_t sz) { @@ -98,17 +68,6 @@ static inline void *zmallocn(unsigned int n, size_t sz) return zmalloc(n * sz); } -static inline void *reallocn(void *mem, unsigned int n, size_t sz) -{ - if (would_overflow(n, sz)) { - fprintf(stderr, "allocation of %d * %zu bytes would overflow\n", n, sz); - - backtrace_abort(); - } - - return realloc(mem, n * sz); -} - #define _new_t(t, n, f) \ ((t *) f(n, sizeof(t))) diff --git a/include/vfn/support/platform/driverkit/mem.h b/include/vfn/support/platform/driverkit/mem.h new file mode 100644 index 00000000..9e8830ec --- /dev/null +++ b/include/vfn/support/platform/driverkit/mem.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later or MIT */ + +/* + * This file is part of libvfn. + * + * Copyright (C) 2024 The libvfn Authors. All Rights Reserved. + * + * This library (libvfn) is dual licensed under the GNU Lesser General + * Public License version 2.1 or later or the MIT license. See the + * COPYING and LICENSE files for more information. + */ + +#ifndef LIBVFN_SUPPORT_PLATFORM_DRIVERKIT_MEM_H +#define LIBVFN_SUPPORT_PLATFORM_DRIVERKIT_MEM_H + +#include +#include + +static inline void *xmalloc(size_t sz) +{ + void *mem; + + if (unlikely(!sz)) + return NULL; + + sz += sizeof(uint64_t); + + mem = IOMalloc(sz); + if (unlikely(!mem)) + backtrace_abort(); + + /* prepend length of allocationl (we need it on free) */ + *(uint64_t *)mem = sz; + + return (void *)(((char *)mem) + sizeof(uint64_t)); +} + +static inline void *zmalloc(size_t sz) +{ + void *mem; + + if (unlikely(!sz)) + return NULL; + + sz += sizeof(uint64_t); + + mem = IOMallocZero(sz); + if (unlikely(!mem)) + backtrace_abort(); + + /* prepend length of allocationl (we need it on free) */ + *(uint64_t *)mem = sz; + + return (void *)(((char *)mem) + sizeof(uint64_t)); +} + +static inline void free(void *ptr) +{ + void *real_ptr = (void *)(((uint8_t *)ptr) - sizeof(uint64_t)); + uint64_t len = *(uint64_t *)real_ptr; + + IOFree(real_ptr, len); +} + +#endif /* LIBVFN_SUPPORT_PLATFORM_DRIVERKIT_MEM_H */ diff --git a/include/vfn/support/platform/posix/mem.h b/include/vfn/support/platform/posix/mem.h new file mode 100644 index 00000000..ca362a6d --- /dev/null +++ b/include/vfn/support/platform/posix/mem.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later or MIT */ + +/* + * This file is part of libvfn. + * + * Copyright (C) 2022 The libvfn Authors. All Rights Reserved. + * + * This library (libvfn) is dual licensed under the GNU Lesser General + * Public License version 2.1 or later or the MIT license. See the + * COPYING and LICENSE files for more information. + */ + +#ifndef LIBVFN_SUPPORT_PLATFORM_POSIX_MEM_H +#define LIBVFN_SUPPORT_PLATFORM_POSIX_MEM_H + +/** + * xmalloc - version of malloc that cannot fail + * @sz: number of bytes to allocate + * + * Call malloc, but only return NULL when @sz is zero. Otherwise, abort. + * + * Return: pointer to allocated memory + */ +static inline void *xmalloc(size_t sz) +{ + void *mem; + + if (unlikely(!sz)) + return NULL; + + mem = malloc(sz); + if (unlikely(!mem)) + backtrace_abort(); + + return mem; +} + +static inline void *zmalloc(size_t sz) +{ + void *mem; + + if (unlikely(!sz)) + return NULL; + + mem = calloc(1, sz); + if (unlikely(!mem)) + backtrace_abort(); + + return mem; +} + +static inline void *reallocn(void *mem, unsigned int n, size_t sz) +{ + if (would_overflow(n, sz)) { + fprintf(stderr, "allocation of %d * %zu bytes would overflow\n", n, sz); + + backtrace_abort(); + } + + return realloc(mem, n * sz); +} + +#endif /* LIBVFN_SUPPORT_PLATFORM_POSIX_MEM_H */