From 4549bf2f137a8e76f22fcff618bdc341e1fc9b23 Mon Sep 17 00:00:00 2001 From: Klaus Birkelund Jensen Date: Fri, 5 Jul 2019 09:23:32 +0200 Subject: [PATCH] nvme: support scatter gather lists For now, support the Data Block, Segment and Last Segment descriptor types. See NVM Express 1.3d, Section 4.4 ("Scatter Gather List (SGL)"). Signed-off-by: Klaus Birkelund Jensen --- block/nvme.c | 18 +- hw/block/nvme.c | 390 +++++++++++++++++++++++++++++++++++------- hw/block/nvme.h | 6 + hw/block/trace-events | 3 + include/block/nvme.h | 64 ++++++- 5 files changed, 410 insertions(+), 71 deletions(-) diff --git a/block/nvme.c b/block/nvme.c index 5be3a39b63..8825c19c72 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -440,7 +440,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) error_setg(errp, "Cannot map buffer for DMA"); goto out; } - cmd.prp1 = cpu_to_le64(iova); + cmd.dptr.prp.prp1 = cpu_to_le64(iova); if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { error_setg(errp, "Failed to identify controller"); @@ -529,7 +529,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) } cmd = (NvmeCmd) { .opcode = NVME_ADM_CMD_CREATE_CQ, - .prp1 = cpu_to_le64(q->cq.iova), + .dptr.prp.prp1 = cpu_to_le64(q->cq.iova), .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), .cdw11 = cpu_to_le32(0x3), }; @@ -540,7 +540,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) } cmd = (NvmeCmd) { .opcode = NVME_ADM_CMD_CREATE_SQ, - .prp1 = cpu_to_le64(q->sq.iova), + .dptr.prp.prp1 = cpu_to_le64(q->sq.iova), .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), .cdw11 = cpu_to_le32(0x1 | (n << 16)), }; @@ -889,16 +889,16 @@ try_map: case 0: abort(); case 1: - cmd->prp1 = pagelist[0]; - cmd->prp2 = 0; + cmd->dptr.prp.prp1 = pagelist[0]; + cmd->dptr.prp.prp2 = 0; break; case 2: - cmd->prp1 = pagelist[0]; - cmd->prp2 = pagelist[1]; + cmd->dptr.prp.prp1 = pagelist[0]; + cmd->dptr.prp.prp2 = pagelist[1]; break; default: - cmd->prp1 = pagelist[0]; - cmd->prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t)); + cmd->dptr.prp.prp1 = pagelist[0]; + cmd->dptr.prp.prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t)); break; } trace_nvme_cmd_map_qiov(s, cmd, req, qiov, entries); diff --git a/hw/block/nvme.c b/hw/block/nvme.c index bdbb1dc311..8c1c7a2adf 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -275,6 +275,198 @@ unmap: return status; } +static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg, + NvmeSglDescriptor *segment, uint64_t nsgld, uint32_t *len, + NvmeRequest *req) +{ + dma_addr_t addr, trans_len; + + for (int i = 0; i < nsgld; i++) { + if (NVME_SGL_TYPE(segment[i].type) != SGL_DESCR_TYPE_DATA_BLOCK) { + trace_nvme_err_invalid_sgl_descriptor(req->cqe.cid, + NVME_SGL_TYPE(segment[i].type)); + return NVME_SGL_DESCRIPTOR_TYPE_INVALID | NVME_DNR; + } + + if (*len == 0) { + if (!NVME_CTRL_SGLS_EXCESS_LENGTH(n->id_ctrl.sgls)) { + trace_nvme_err_invalid_sgl_excess_length(req->cqe.cid); + return NVME_DATA_SGL_LENGTH_INVALID | NVME_DNR; + } + + break; + } + + addr = le64_to_cpu(segment[i].addr); + trans_len = MIN(*len, le64_to_cpu(segment[i].len)); + + if (nvme_addr_is_cmb(n, addr)) { + /* + * All data and metadata, if any, associated with a particular + * command shall be located in either the CMB or host memory. Thus, + * if an address if found to be in the CMB and we have already + * mapped data that is in host memory, the use is invalid. + */ + if (!req->is_cmb && qsg->size) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + + req->is_cmb = true; + } else { + /* + * Similarly, if the address does not reference the CMB, but we + * have already established that the request has data or metadata + * in the CMB, the use is invalid. + */ + if (req->is_cmb) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + } + + qemu_sglist_add(qsg, addr, trans_len); + + *len -= trans_len; + } + + return NVME_SUCCESS; +} + +static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, + NvmeSglDescriptor sgl, uint32_t len, NvmeRequest *req) +{ + const int MAX_NSGLD = 256; + + NvmeSglDescriptor segment[MAX_NSGLD]; + uint64_t nsgld; + uint16_t status; + bool sgl_in_cmb = false; + hwaddr addr = le64_to_cpu(sgl.addr); + + trace_nvme_map_sgl(req->cqe.cid, NVME_SGL_TYPE(sgl.type), req->nlb, len); + + pci_dma_sglist_init(qsg, &n->parent_obj, 1); + + /* + * If the entire transfer can be described with a single data block it can + * be mapped directly. + */ + if (NVME_SGL_TYPE(sgl.type) == SGL_DESCR_TYPE_DATA_BLOCK) { + status = nvme_map_sgl_data(n, qsg, &sgl, 1, &len, req); + if (status) { + goto unmap; + } + + goto out; + } + + /* + * If the segment is located in the CMB, the submission queue of the + * request must also reside there. + */ + if (nvme_addr_is_cmb(n, addr)) { + if (!nvme_addr_is_cmb(n, req->sq->dma_addr)) { + return NVME_INVALID_USE_OF_CMB | NVME_DNR; + } + + sgl_in_cmb = true; + } + + while (NVME_SGL_TYPE(sgl.type) == SGL_DESCR_TYPE_SEGMENT) { + bool addr_is_cmb; + + nsgld = le64_to_cpu(sgl.len) / sizeof(NvmeSglDescriptor); + + /* read the segment in chunks of 256 descriptors (4k) */ + while (nsgld > MAX_NSGLD) { + nvme_addr_read(n, addr, segment, sizeof(segment)); + + status = nvme_map_sgl_data(n, qsg, segment, MAX_NSGLD, &len, req); + if (status) { + goto unmap; + } + + nsgld -= MAX_NSGLD; + addr += MAX_NSGLD * sizeof(NvmeSglDescriptor); + } + + nvme_addr_read(n, addr, segment, nsgld * sizeof(NvmeSglDescriptor)); + + sgl = segment[nsgld - 1]; + addr = le64_to_cpu(sgl.addr); + + /* an SGL is allowed to end with a Data Block in a regular Segment */ + if (NVME_SGL_TYPE(sgl.type) == SGL_DESCR_TYPE_DATA_BLOCK) { + status = nvme_map_sgl_data(n, qsg, segment, nsgld, &len, req); + if (status) { + goto unmap; + } + + goto out; + } + + /* do not map last descriptor */ + status = nvme_map_sgl_data(n, qsg, segment, nsgld - 1, &len, req); + if (status) { + goto unmap; + } + + /* + * If the next segment is in the CMB, make sure that the sgl was + * already located there. + */ + addr_is_cmb = nvme_addr_is_cmb(n, addr); + if ((sgl_in_cmb && !addr_is_cmb) || (!sgl_in_cmb && addr_is_cmb)) { + status = NVME_INVALID_USE_OF_CMB | NVME_DNR; + goto unmap; + } + } + + /* + * If the segment did not end with a Data Block or a Segment descriptor, it + * must be a Last Segment descriptor. + */ + if (NVME_SGL_TYPE(sgl.type) != SGL_DESCR_TYPE_LAST_SEGMENT) { + trace_nvme_err_invalid_sgl_descriptor(req->cqe.cid, + NVME_SGL_TYPE(sgl.type)); + return NVME_SGL_DESCRIPTOR_TYPE_INVALID | NVME_DNR; + } + + nsgld = le64_to_cpu(sgl.len) / sizeof(NvmeSglDescriptor); + + while (nsgld > MAX_NSGLD) { + nvme_addr_read(n, addr, segment, sizeof(segment)); + + status = nvme_map_sgl_data(n, qsg, segment, MAX_NSGLD, &len, req); + if (status) { + goto unmap; + } + + nsgld -= MAX_NSGLD; + addr += MAX_NSGLD * sizeof(NvmeSglDescriptor); + } + + nvme_addr_read(n, addr, segment, nsgld * sizeof(NvmeSglDescriptor)); + + status = nvme_map_sgl_data(n, qsg, segment, nsgld, &len, req); + if (status) { + goto unmap; + } + +out: + /* if there is any residual left in len, the SGL was too short */ + if (len) { + status = NVME_DATA_SGL_LENGTH_INVALID | NVME_DNR; + goto unmap; + } + + return NVME_SUCCESS; + +unmap: + qemu_sglist_destroy(qsg); + + return status; +} + static void dma_to_cmb(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov) { for (int i = 0; i < qsg->nsg; i++) { @@ -320,6 +512,56 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, return err; } +static uint16_t nvme_dma_write_sgl(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeSglDescriptor sgl, NvmeRequest *req) +{ + QEMUSGList qsg; + uint16_t err = NVME_SUCCESS; + + err = nvme_map_sgl(n, &qsg, sgl, len, req); + if (err) { + return err; + } + + if (req->is_cmb) { + QEMUIOVector iov; + + qemu_iovec_init(&iov, qsg.nsg); + dma_to_cmb(n, &qsg, &iov); + + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_iovec_destroy(&iov); + + return err; + } + + if (unlikely(dma_buf_write(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_sglist_destroy(&qsg); + + return err; +} + +static uint16_t nvme_dma_write(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeCmd *cmd, NvmeRequest *req) +{ + if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { + return nvme_dma_write_sgl(n, ptr, len, cmd->dptr.sgl, req); + } + + uint64_t prp1 = le64_to_cpu(cmd->dptr.prp.prp1); + uint64_t prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + + return nvme_dma_write_prp(n, ptr, len, prp1, prp2, req); +} + static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint64_t prp1, uint64_t prp2, NvmeRequest *req) { @@ -357,6 +599,57 @@ out: return err; } +static uint16_t nvme_dma_read_sgl(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeSglDescriptor sgl, NvmeCmd *cmd, NvmeRequest *req) +{ + QEMUSGList qsg; + uint16_t err = NVME_SUCCESS; + + err = nvme_map_sgl(n, &qsg, sgl, len, req); + if (err) { + return err; + } + + if (req->is_cmb) { + QEMUIOVector iov; + + qemu_iovec_init(&iov, qsg.nsg); + dma_to_cmb(n, &qsg, &iov); + + if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_iovec_destroy(&iov); + + goto out; + } + + if (unlikely(dma_buf_read(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); + err = NVME_INVALID_FIELD | NVME_DNR; + } + +out: + qemu_sglist_destroy(&qsg); + + return err; +} + +static uint16_t nvme_dma_read(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeCmd *cmd, NvmeRequest *req) +{ + if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { + return nvme_dma_read_sgl(n, ptr, len, cmd->dptr.sgl, cmd, req); + } + + uint64_t prp1 = le64_to_cpu(cmd->dptr.prp.prp1); + uint64_t prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + + return nvme_dma_read_prp(n, ptr, len, prp1, prp2, req); +} + static void nvme_blk_req_destroy(NvmeBlockBackendRequest *blk_req) { if (blk_req->iov.nalloc) { @@ -410,20 +703,25 @@ static uint16_t nvme_blk_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint16_t err; uint32_t len = req->nlb * nvme_ns_lbads_bytes(ns); - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); - err = nvme_map_prp(n, &req->qsg, prp1, prp2, len, req); - if (err) { - return err; + if (NVME_CMD_FLAGS_PSDT(cmd->flags)) { + err = nvme_map_sgl(n, &req->qsg, cmd->dptr.sgl, len, req); + if (err) { + return err; + } + } else { + uint64_t prp1 = le64_to_cpu(cmd->dptr.prp.prp1); + uint64_t prp2 = le64_to_cpu(cmd->dptr.prp.prp2); + + err = nvme_map_prp(n, &req->qsg, prp1, prp2, len, req); + if (err) { + return err; + } } err = nvme_blk_setup(n, ns, &req->qsg, req); - if (err) { - return err; - } - return NVME_SUCCESS; + return err; } static void nvme_post_cqes(void *opaque) @@ -986,25 +1284,18 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } -static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c, - NvmeRequest *req) +static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); - trace_nvme_identify_ctrl(); - return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), - prp1, prp2, req); + return nvme_dma_read(n, (uint8_t *) &n->id_ctrl, sizeof(n->id_ctrl), cmd, + req); } -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c, - NvmeRequest *req) +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { NvmeNamespace *ns; - uint32_t nsid = le32_to_cpu(c->nsid); - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); + uint32_t nsid = le32_to_cpu(cmd->nsid); trace_nvme_identify_ns(nsid); @@ -1015,17 +1306,15 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c, ns = &n->namespace; - return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), - prp1, prp2, req); + return nvme_dma_read(n, (uint8_t *) &ns->id_ns, sizeof(ns->id_ns), cmd, + req); } -static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c, +static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { static const int data_len = 4 * KiB; - uint32_t min_nsid = le32_to_cpu(c->nsid); - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); + uint32_t min_nsid = le32_to_cpu(cmd->nsid); uint32_t *list; uint16_t ret; int i, j = 0; @@ -1042,12 +1331,12 @@ static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c, break; } } - ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2, req); + ret = nvme_dma_read(n, (uint8_t *) list, data_len, cmd, req); g_free(list); return ret; } -static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c, +static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { static const int data_len = 4 * KiB; @@ -1065,9 +1354,7 @@ static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c, uint32_t nid; }; - uint32_t nsid = le32_to_cpu(c->nsid); - uint64_t prp1 = le64_to_cpu(c->prp1); - uint64_t prp2 = le64_to_cpu(c->prp2); + uint32_t nsid = le32_to_cpu(cmd->nsid); struct ns_descr *list; uint16_t ret; @@ -1084,7 +1371,7 @@ static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c, list->nidl = 0x10; list->nid = cpu_to_be32(nsid); - ret = nvme_dma_read_prp(n, (uint8_t *) list, data_len, prp1, prp2, req); + ret = nvme_dma_read(n, (uint8_t *) list, data_len, cmd, req); g_free(list); return ret; } @@ -1095,11 +1382,11 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (le32_to_cpu(c->cns)) { case 0x00: - return nvme_identify_ns(n, c, req); + return nvme_identify_ns(n, cmd, req); case 0x01: - return nvme_identify_ctrl(n, c, req); + return nvme_identify_ctrl(n, cmd, req); case 0x02: - return nvme_identify_ns_list(n, c, req); + return nvme_identify_ns_list(n, cmd, req); case 0x03: return nvme_identify_ns_descriptor_list(n, cmd, req); default: @@ -1151,13 +1438,10 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); - uint64_t timestamp = nvme_get_timestamp(n); - return nvme_dma_read_prp(n, (uint8_t *)×tamp, sizeof(timestamp), - prp1, prp2, req); + return nvme_dma_read(n, (uint8_t *)×tamp, sizeof(timestamp), cmd, + req); } static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) @@ -1222,11 +1506,9 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd, { uint16_t ret; uint64_t timestamp; - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); - ret = nvme_dma_write_prp(n, (uint8_t *)×tamp, - sizeof(timestamp), prp1, prp2, req); + ret = nvme_dma_write(n, (uint8_t *)×tamp, sizeof(timestamp), cmd, + req); if (ret != NVME_SUCCESS) { return ret; } @@ -1303,8 +1585,6 @@ static uint16_t nvme_error_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { uint32_t trans_len; - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); if (off > sizeof(*n->elpes) * (NVME_ELPE + 1)) { return NVME_INVALID_FIELD | NVME_DNR; @@ -1316,16 +1596,12 @@ static uint16_t nvme_error_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, nvme_clear_events(n, NVME_AER_TYPE_ERROR); } - return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1, - prp2, req); + return nvme_dma_read(n, (uint8_t *) n->elpes + off, trans_len, cmd, req); } static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); - uint32_t trans_len; time_t current_ms; NvmeSmartLog smart; @@ -1359,16 +1635,13 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, nvme_clear_events(n, NVME_AER_TYPE_SMART); } - return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1, - prp2, req); + return nvme_dma_read(n, (uint8_t *) &smart + off, trans_len, cmd, req); } static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, uint64_t off, NvmeRequest *req) { uint32_t trans_len; - uint64_t prp1 = le64_to_cpu(cmd->prp1); - uint64_t prp2 = le64_to_cpu(cmd->prp2); NvmeFwSlotInfoLog fw_log; if (off > sizeof(fw_log)) { @@ -1379,8 +1652,7 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, trans_len = MIN(sizeof(fw_log) - off, buf_len); - return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, - prp2, req); + return nvme_dma_read(n, (uint8_t *) &fw_log + off, trans_len, cmd, req); } @@ -2139,6 +2411,8 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->vwc = 1; } + id->sgls = cpu_to_le32(0x1); + strcpy((char *) id->subnqn, "nqn.2014-08.org.nvmexpress:uuid:"); qemu_uuid_unparse(&qemu_uuid, (char *) id->subnqn + strlen((char *) id->subnqn)); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 832094f778..1d52b183d2 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -142,6 +142,12 @@ static inline bool nvme_rw_is_write(NvmeRequest *req) return req->cmd.opcode == NVME_CMD_WRITE; } +static inline bool nvme_is_error(uint16_t status, uint16_t err) +{ + /* strip DNR and MORE */ + return (status & 0xfff) == err; +} + static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) { NvmeIdNs *id = &ns->id_ns; diff --git a/hw/block/trace-events b/hw/block/trace-events index b324751ad9..b239e92294 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -34,6 +34,7 @@ nvme_irq_pin(void) "pulsing IRQ pin" nvme_irq_masked(void) "IRQ is masked" nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" nvme_map_prp(uint8_t cmd_opcode, uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "cmd_opcode=0x%"PRIx8", trans_len=%"PRIu64", len=%"PRIu32", prp1=0x%"PRIx64", prp2=0x%"PRIx64", num_prps=%d" +nvme_map_sgl(uint16_t cid, uint8_t typ, uint16_t nlb, uint64_t len) "cid %"PRIu16" type 0x%"PRIx8" nlb %"PRIu16" len %"PRIu64"" nvme_io_cmd(uint16_t cid, uint32_t nsid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" opc %"PRIu8"" nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" nvme_rw_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" @@ -76,6 +77,8 @@ nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" # nvme traces for error conditions nvme_err(uint16_t cid, const char *s, uint16_t status) "cid %"PRIu16" \"%s\" status 0x%"PRIx16"" +nvme_err_invalid_sgl_descriptor(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" +nvme_err_invalid_sgl_excess_length(uint16_t cid) "cid %"PRIu16"" nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" diff --git a/include/block/nvme.h b/include/block/nvme.h index a6ef8d8ff2..f79c71ba3f 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -205,15 +205,53 @@ enum NvmeCmbszMask { #define NVME_CMBSZ_GETSIZE(cmbsz) \ (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz)))) +enum NvmeSglDescriptorType { + SGL_DESCR_TYPE_DATA_BLOCK = 0x0, + SGL_DESCR_TYPE_BIT_BUCKET = 0x1, + SGL_DESCR_TYPE_SEGMENT = 0x2, + SGL_DESCR_TYPE_LAST_SEGMENT = 0x3, + SGL_DESCR_TYPE_KEYED_DATA_BLOCK = 0x4, + + SGL_DESCR_TYPE_VENDOR_SPECIFIC = 0xf, +}; + +enum NvmeSglDescriptorSubtype { + SGL_DESCR_SUBTYPE_ADDRESS = 0x0, + SGL_DESCR_SUBTYPE_OFFSET = 0x1, +}; + +typedef struct NvmeSglDescriptor { + uint64_t addr; + uint32_t len; + uint8_t rsvd[3]; + uint8_t type; +} NvmeSglDescriptor; + +#define NVME_SGL_TYPE(type) (type >> 4) + +typedef union NvmeCmdDptr { + struct { + uint64_t prp1; + uint64_t prp2; + } prp; + + NvmeSglDescriptor sgl; +} NvmeCmdDptr; + +enum NvmePsdt { + PSDT_PRP = 0x0, + PSDT_SGL_MPTR_CONTIGUOUS = 0x1, + PSDT_SGL_MPTR_SGL = 0x2, +}; + typedef struct NvmeCmd { uint8_t opcode; - uint8_t fuse; + uint8_t flags; uint16_t cid; uint32_t nsid; - uint64_t res1; + uint64_t rsvd2; uint64_t mptr; - uint64_t prp1; - uint64_t prp2; + NvmeCmdDptr dptr; uint32_t cdw10; uint32_t cdw11; uint32_t cdw12; @@ -222,6 +260,9 @@ typedef struct NvmeCmd { uint32_t cdw15; } NvmeCmd; +#define NVME_CMD_FLAGS_FUSE(flags) (flags & 0x3) +#define NVME_CMD_FLAGS_PSDT(flags) ((flags >> 6) & 0x3) + enum NvmeAdminCommands { NVME_ADM_CMD_DELETE_SQ = 0x00, NVME_ADM_CMD_CREATE_SQ = 0x01, @@ -427,6 +468,11 @@ enum NvmeStatusCodes { NVME_CMD_ABORT_MISSING_FUSE = 0x000a, NVME_INVALID_NSID = 0x000b, NVME_CMD_SEQ_ERROR = 0x000c, + NVME_INVALID_SGL_SEG_DESCRIPTOR = 0x000d, + NVME_INVALID_NUM_SGL_DESCRIPTORS = 0x000e, + NVME_DATA_SGL_LENGTH_INVALID = 0x000f, + NVME_METADATA_SGL_LENGTH_INVALID = 0x0010, + NVME_SGL_DESCRIPTOR_TYPE_INVALID = 0x0011, NVME_INVALID_USE_OF_CMB = 0x0012, NVME_LBA_RANGE = 0x0080, NVME_CAP_EXCEEDED = 0x0081, @@ -623,6 +669,16 @@ enum NvmeIdCtrlOncs { #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf) #define NVME_CTRL_CQES_MAX(cqes) (((cqes) >> 4) & 0xf) +#define NVME_CTRL_SGLS_SUPPORTED(sgls) ((sgls) & 0x3) +#define NVME_CTRL_SGLS_SUPPORTED_NO_ALIGNMENT(sgls) ((sgls) & (0x1 << 0)) +#define NVME_CTRL_SGLS_SUPPORTED_DWORD_ALIGNMENT(sgls) ((sgls) & (0x1 << 1)) +#define NVME_CTRL_SGLS_KEYED(sgls) ((sgls) & (0x1 << 2)) +#define NVME_CTRL_SGLS_BITBUCKET(sgls) ((sgls) & (0x1 << 16)) +#define NVME_CTRL_SGLS_MPTR_CONTIGUOUS(sgls) ((sgls) & (0x1 << 17)) +#define NVME_CTRL_SGLS_EXCESS_LENGTH(sgls) ((sgls) & (0x1 << 18)) +#define NVME_CTRL_SGLS_MPTR_SGL(sgls) ((sgls) & (0x1 << 19)) +#define NVME_CTRL_SGLS_ADDR_OFFSET(sgls) ((sgls) & (0x1 << 20)) + typedef struct NvmeFeatureVal { uint32_t arbitration; uint32_t power_mgmt; -- 2.39.5