diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 2b9e6cfaf2a80a..ff6e965b372d39 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -19,6 +19,7 @@ static const char * const nvme_ops[] = { [nvme_cmd_resv_report] = "Reservation Report", [nvme_cmd_resv_acquire] = "Reservation Acquire", [nvme_cmd_resv_release] = "Reservation Release", + [nvme_cmd_cancel] = "Cancel", [nvme_cmd_zone_mgmt_send] = "Zone Management Send", [nvme_cmd_zone_mgmt_recv] = "Zone Management Receive", [nvme_cmd_zone_append] = "Zone Append", diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 870314c521077c..ad141028bcffe6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1210,6 +1210,20 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) } EXPORT_SYMBOL_NS_GPL(nvme_command_effects, "NVME_TARGET_PASSTHRU"); +bool nvme_io_command_supported(struct nvme_ctrl *ctrl, u8 opcode) +{ + u32 effects = le32_to_cpu(ctrl->effects->iocs[opcode]); + + return effects & NVME_CMD_EFFECTS_CSUPP; +} +EXPORT_SYMBOL_GPL(nvme_io_command_supported); + +bool nvme_is_cancel(struct nvme_command *cmd) +{ + return cmd->common.opcode == nvme_cmd_cancel; +} +EXPORT_SYMBOL_GPL(nvme_is_cancel); + u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) { u32 effects = nvme_command_effects(ctrl, ns, opcode); @@ -3105,6 +3119,67 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); } +static enum rq_end_io_ret nvme_cancel_endio(struct request *req, blk_status_t error) +{ + struct nvme_ctrl *ctrl = req->end_io_data; + u32 result; + u16 imm_abrts, def_abrts; + + if (!error) { + result = le32_to_cpu(nvme_req(req)->result.u32); + def_abrts = upper_16_bits(result); + imm_abrts = lower_16_bits(result); + + dev_warn(ctrl->device, + "Cancel status: 0x0 imm abrts = %u def abrts = %u", + imm_abrts, def_abrts); + } else { + dev_warn(ctrl->device, "Cancel status: 0x%x", + nvme_req(req)->status); + } + + blk_mq_free_request(req); + return RQ_END_IO_NONE; +} + +int nvme_submit_cancel_req(struct nvme_ctrl *ctrl, struct request *rq, + unsigned int sqid, int action) +{ + struct nvme_command c = { }; + struct request *cancel_req; + + if (sqid == 0) + return -EINVAL; + + c.cancel.opcode = nvme_cmd_cancel; + c.cancel.sqid = cpu_to_le32(sqid); + c.cancel.nsid = NVME_NSID_ALL; + c.cancel.action = action; + if (action == NVME_CANCEL_ACTION_SINGLE_CMD) + c.cancel.cid = nvme_cid(rq); + else + c.cancel.cid = 0xFFFF; + + cancel_req = blk_mq_alloc_request_hctx(rq->q, nvme_req_op(&c), + BLK_MQ_REQ_NOWAIT | + BLK_MQ_REQ_RESERVED, + sqid - 1); + if (IS_ERR(cancel_req)) { + dev_warn(ctrl->device, "%s: Could not allocate the Cancel " + "command", __func__); + return PTR_ERR(cancel_req); + } + + nvme_init_request(cancel_req, &c); + cancel_req->end_io = nvme_cancel_endio; + cancel_req->end_io_data = ctrl; + + blk_execute_rq_nowait(cancel_req, false); + + return 0; +} +EXPORT_SYMBOL_GPL(nvme_submit_cancel_req); + static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, struct nvme_effects_log **log) { @@ -4675,9 +4750,14 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, */ if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS) set->reserved_tags = NVME_AQ_DEPTH; - else if (ctrl->ops->flags & NVME_F_FABRICS) + else if (ctrl->ops->flags & NVME_F_FABRICS) { /* Reserved for fabric connect */ set->reserved_tags = 1; + if (nvme_io_command_supported(ctrl, nvme_cmd_cancel)) { + /* Reserved for cancel commands */ + set->reserved_tags += NVME_RSV_CANCEL_MAX; + } + } set->numa_node = ctrl->numa_node; if (ctrl->ops->flags & NVME_F_BLOCKING) set->flags |= BLK_MQ_F_BLOCKING; @@ -5087,6 +5167,7 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64); BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64); BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_cancel_cmd) != 64); BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 51e07864212710..1225a6ff4093e8 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -908,6 +908,8 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl); void nvme_queue_scan(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, void *log, size_t size, u64 offset); +int nvme_submit_cancel_req(struct nvme_ctrl *ctrl, struct request *rq, + unsigned int sqid, int action); bool nvme_tryget_ns_head(struct nvme_ns_head *head); void nvme_put_ns_head(struct nvme_ns_head *head); int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, @@ -1187,6 +1189,8 @@ static inline void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) {}; u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); +bool nvme_io_command_supported(struct nvme_ctrl *ctrl, u8 opcode); +bool nvme_is_cancel(struct nvme_command *cmd); u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); int nvme_execute_rq(struct request *rq, bool at_head); void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b5a0295b5bf457..3de37d116e4aa9 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -74,12 +74,15 @@ struct nvme_rdma_request { struct nvme_rdma_sgl data_sgl; struct nvme_rdma_sgl *metadata_sgl; bool use_sig_mr; + bool aborted; }; enum nvme_rdma_queue_flags { NVME_RDMA_Q_ALLOCATED = 0, NVME_RDMA_Q_LIVE = 1, NVME_RDMA_Q_TR_READY = 2, + NVME_RDMA_Q_CANCEL_ONE = 3, + NVME_RDMA_Q_CANCEL_ALL = 4, }; struct nvme_rdma_queue { @@ -619,6 +622,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, } set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags); + clear_bit(NVME_RDMA_Q_CANCEL_ONE, &queue->flags); + clear_bit(NVME_RDMA_Q_CANCEL_ALL, &queue->flags); return 0; @@ -1954,16 +1959,18 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; - struct nvme_rdma_ctrl *ctrl = queue->ctrl; + struct nvme_rdma_ctrl *rdma_ctrl = queue->ctrl; + struct nvme_ctrl *ctrl = &rdma_ctrl->ctrl; struct nvme_command *cmd = req->req.cmd; int qid = nvme_rdma_queue_idx(queue); + int error, action; - dev_warn(ctrl->ctrl.device, + dev_warn(ctrl->device, "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n", rq->tag, nvme_cid(rq), cmd->common.opcode, nvme_fabrics_opcode_str(qid, cmd), qid); - if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) { + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { /* * If we are resetting, connecting or deleting we should * complete immediately because we may block controller @@ -1981,11 +1988,40 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) return BLK_EH_DONE; } + if (!req->aborted) { + if (!nvme_io_command_supported(ctrl, nvme_cmd_cancel) || !qid) + goto err_recovery; + + if (!test_and_set_bit(NVME_RDMA_Q_CANCEL_ONE, &queue->flags)) { + action = NVME_CANCEL_ACTION_SINGLE_CMD; + } else if (!test_and_set_bit(NVME_RDMA_Q_CANCEL_ALL, + &queue->flags)) { + action = NVME_CANCEL_ACTION_MUL_CMD; + } else { + /* No free reserved commands. + * this means a "multiple commands" cancel + * is currently under execution and this request + * is likely to be canceled. Mark this + * request as aborted and reset the timer. + */ + goto abort; + } + + error = nvme_submit_cancel_req(ctrl, rq, qid, action); + if (error) + goto err_recovery; + +abort: + req->aborted = true; + return BLK_EH_RESET_TIMER; + } + /* * LIVE state should trigger the normal error recovery which will * handle completing this request. */ - nvme_rdma_error_recovery(ctrl); +err_recovery: + nvme_rdma_error_recovery(rdma_ctrl); return BLK_EH_RESET_TIMER; } @@ -2009,6 +2045,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); dev = queue->device->dev; + req->aborted = false; req->sqe.dma = ib_dma_map_single(dev, req->sqe.data, sizeof(struct nvme_command), @@ -2113,6 +2150,7 @@ static void nvme_rdma_complete_rq(struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct ib_device *ibdev = queue->device->dev; + bool is_cancel = nvme_is_cancel(req->req.cmd); if (req->use_sig_mr) nvme_rdma_check_pi_status(req); @@ -2121,6 +2159,10 @@ static void nvme_rdma_complete_rq(struct request *rq) ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command), DMA_TO_DEVICE); nvme_complete_rq(rq); + if (is_cancel) { + if (!test_and_clear_bit(NVME_RDMA_Q_CANCEL_ALL, &queue->flags)) + clear_bit(NVME_RDMA_Q_CANCEL_ONE, &queue->flags); + } } static void nvme_rdma_map_queues(struct blk_mq_tag_set *set) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 542ffc921a3ff8..39de42ae3a7e96 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -122,6 +122,7 @@ struct nvme_tcp_request { size_t offset; size_t data_sent; enum nvme_tcp_send_state state; + bool aborted; }; enum nvme_tcp_queue_flags { @@ -129,6 +130,8 @@ enum nvme_tcp_queue_flags { NVME_TCP_Q_LIVE = 1, NVME_TCP_Q_POLLING = 2, NVME_TCP_Q_IO_CPU_SET = 3, + NVME_TCP_Q_CANCEL_ONE = 4, + NVME_TCP_Q_CANCEL_ALL = 5, }; enum nvme_tcp_recv_state { @@ -205,6 +208,7 @@ static struct workqueue_struct *nvme_tcp_wq; static const struct blk_mq_ops nvme_tcp_mq_ops; static const struct blk_mq_ops nvme_tcp_admin_mq_ops; static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); +static void nvme_tcp_complete_rq(struct request *rq); static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) { @@ -617,7 +621,7 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, req->status = cqe->status; if (!nvme_try_complete_req(rq, req->status, cqe->result)) - nvme_complete_rq(rq); + nvme_tcp_complete_rq(rq); queue->nr_cqe++; return 0; @@ -817,7 +821,7 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status) union nvme_result res = {}; if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res)) - nvme_complete_rq(rq); + nvme_tcp_complete_rq(rq); } static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, @@ -1833,6 +1837,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, goto err_init_connect; set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags); + clear_bit(NVME_TCP_Q_CANCEL_ONE, &queue->flags); + clear_bit(NVME_TCP_Q_CANCEL_ALL, &queue->flags); return 0; @@ -2551,10 +2557,12 @@ static void nvme_tcp_complete_timed_out(struct request *rq) static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) { struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); struct nvme_command *cmd = &pdu->cmd; - int qid = nvme_tcp_queue_id(req->queue); + struct nvme_tcp_queue *queue = req->queue; + struct nvme_ctrl *ctrl = &queue->ctrl->ctrl; + int qid = nvme_tcp_queue_id(queue); + int error, action; dev_warn(ctrl->device, "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n", @@ -2579,10 +2587,39 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) return BLK_EH_DONE; } + if (!req->aborted) { + if (!nvme_io_command_supported(ctrl, nvme_cmd_cancel) || !qid) + goto err_recovery; + + if (!test_and_set_bit(NVME_TCP_Q_CANCEL_ONE, &queue->flags)) { + action = NVME_CANCEL_ACTION_SINGLE_CMD; + } else if (!test_and_set_bit(NVME_TCP_Q_CANCEL_ALL, + &queue->flags)) { + action = NVME_CANCEL_ACTION_MUL_CMD; + } else { + /* No free reserved commands. + * this means a "multiple commands" cancel + * is currently under execution and this request + * is likely to be canceled. Mark this + * request as aborted and reset the timer. + */ + goto abort; + } + + error = nvme_submit_cancel_req(ctrl, rq, qid, action); + if (error) + goto err_recovery; + +abort: + req->aborted = true; + return BLK_EH_RESET_TIMER; + } + /* * LIVE state should trigger the normal error recovery which will * handle completing this request. */ +err_recovery: nvme_tcp_error_recovery(ctrl); return BLK_EH_RESET_TIMER; } @@ -2627,6 +2664,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, req->pdu_len = 0; req->pdu_sent = 0; req->h2cdata_left = 0; + req->aborted = false; req->data_len = blk_rq_nr_phys_segments(rq) ? blk_rq_payload_bytes(rq) : 0; req->curr_bio = rq->bio; @@ -2742,10 +2780,24 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) return len; } +static void nvme_tcp_complete_rq(struct request *rq) +{ + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + struct nvme_tcp_queue *queue = req->queue; + bool is_cancel = nvme_is_cancel(req->req.cmd); + + nvme_complete_rq(rq); + + if (is_cancel) { + if (!test_and_clear_bit(NVME_TCP_Q_CANCEL_ALL, &queue->flags)) + clear_bit(NVME_TCP_Q_CANCEL_ONE, &queue->flags); + } +} + static const struct blk_mq_ops nvme_tcp_mq_ops = { .queue_rq = nvme_tcp_queue_rq, .commit_rqs = nvme_tcp_commit_rqs, - .complete = nvme_complete_rq, + .complete = nvme_tcp_complete_rq, .init_request = nvme_tcp_init_request, .exit_request = nvme_tcp_exit_request, .init_hctx = nvme_tcp_init_hctx, diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index fb7446d6d6829b..e0a1ccb93da68d 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -126,3 +126,12 @@ config NVME_TARGET_PCI_EPF capable PCI controller. If unsure, say N. + +config NVME_TARGET_DELAY_REQUESTS + bool "NVMe over Fabrics target request delay" + depends on NVME_TARGET && NVME_TARGET_DEBUGFS + help + This is a testing feature to allow delaying request completion in an + NVMe over Fabrics target, which allows for support of the cancel command. + + If unsure, say N. diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile index ed8522911d1f73..5d9848605285bc 100644 --- a/drivers/nvme/target/Makefile +++ b/drivers/nvme/target/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_NVME_TARGET_PCI_EPF) += nvmet-pci-epf.o nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \ discovery.o io-cmd-file.o io-cmd-bdev.o pr.o +nvmet-$(CONFIG_NVME_TARGET_DELAY_REQUESTS) += io-cmd-cancel.o nvmet-$(CONFIG_NVME_TARGET_DEBUGFS) += debugfs.o nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index acc138bbf8f23d..149c72d0b42958 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -398,6 +398,9 @@ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log) log->iocs[nvme_cmd_resv_release] = log->iocs[nvme_cmd_resv_report] = cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) + log->iocs[nvme_cmd_cancel] = cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); +#endif log->iocs[nvme_cmd_write] = log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a2b5319c37f350..31efd45ac9cb9d 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -6,7 +6,9 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include +#include #include +#include #include #include @@ -803,10 +805,23 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status) nvmet_put_namespace(ns); } +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) +static void nvmet_delayed_execute_req(struct work_struct *work); +#endif + void nvmet_req_complete(struct nvmet_req *req, u16 status) { struct nvmet_sq *sq = req->sq; +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) + unsigned long flags; + /* only need to update the xarray if this was a delayed request */ + if (req->req_work.work.func == nvmet_delayed_execute_req) { + xa_lock_irqsave(&sq->outstanding_requests, flags); + __xa_erase(&sq->outstanding_requests, req->cmd->common.command_id); + xa_unlock_irqrestore(&sq->outstanding_requests, flags); + } +#endif __nvmet_req_complete(req, status); percpu_ref_put(&sq->ref); } @@ -978,7 +993,9 @@ int nvmet_sq_init(struct nvmet_sq *sq) init_completion(&sq->free_done); init_completion(&sq->confirm_done); nvmet_auth_sq_init(sq); - +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) + xa_init_flags(&sq->outstanding_requests, XA_FLAGS_LOCK_IRQ); +#endif return 0; } EXPORT_SYMBOL_GPL(nvmet_sq_init); @@ -1057,6 +1074,14 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req) if (nvmet_is_passthru_req(req)) return nvmet_parse_passthru_io_cmd(req); +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) + if (req->cmd->common.opcode == nvme_cmd_cancel) { + req->execute = nvmet_execute_cancel; + if (req->cmd->cancel.nsid == NVME_NSID_ALL) + return 0; + } +#endif + ret = nvmet_req_find_ns(req); if (unlikely(ret)) return ret; @@ -1742,6 +1767,47 @@ ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl, return ctrl->ops->host_traddr(ctrl, traddr, traddr_len); } +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) +static void nvmet_delayed_execute_req(struct work_struct *work) { + struct nvmet_req *req = + container_of(to_delayed_work(work), struct nvmet_req, req_work); + req->execute(req); +} + +void nvmet_execute_request(struct nvmet_req *req) { + struct nvmet_ctrl *ctrl = req->sq->ctrl; + int delay_count; + u32 delay_msec; + unsigned long flags; + + if (unlikely(req->sq->qid == 0) || + req->cmd->common.opcode == nvme_cmd_cancel) + return req->execute(req); + + if (ctrl) { + delay_count = atomic_dec_if_positive(&ctrl->delay_count) + 1; + delay_msec = ctrl->delay_msec; + } + if (!(ctrl && delay_count && delay_msec)) + return req->execute(req); + + xa_lock_irqsave(&req->sq->outstanding_requests, flags); + int ret = __xa_insert(&req->sq->outstanding_requests, + req->cmd->common.command_id, req, GFP_KERNEL); + xa_unlock_irqrestore(&req->sq->outstanding_requests, flags); + + if (ret) { + pr_err("nvmet: failure to delay command %d", + req->cmd->common.command_id); + return req->execute(req); + } + + INIT_DELAYED_WORK(&req->req_work, nvmet_delayed_execute_req); + queue_delayed_work(nvmet_wq, &req->req_work, msecs_to_jiffies(delay_msec)); +} +EXPORT_SYMBOL_GPL(nvmet_execute_request); +#endif + static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, const char *subsysnqn) { diff --git a/drivers/nvme/target/debugfs.c b/drivers/nvme/target/debugfs.c index e4300eb95101a7..4a80cfceab3460 100644 --- a/drivers/nvme/target/debugfs.c +++ b/drivers/nvme/target/debugfs.c @@ -153,6 +153,42 @@ static int nvmet_ctrl_tls_concat_show(struct seq_file *m, void *p) NVMET_DEBUGFS_ATTR(nvmet_ctrl_tls_concat); #endif +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) +static int nvmet_ctrl_delay_show(struct seq_file *m, void *p) +{ + struct nvmet_ctrl *ctrl = m->private; + int delay_count = atomic_read(&ctrl->delay_count); + + seq_printf(m, "%u %u\n", delay_count, ctrl->delay_msec); + return 0; +} + +static ssize_t nvmet_ctrl_delay_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = file->private_data; + struct nvmet_ctrl *ctrl = m->private; + char delay_buf[22] = {}; + int delay_count; + int delay_msec; + int n; + + if (count >= sizeof(delay_buf)) + return -EINVAL; + if (copy_from_user(delay_buf, buf, count)) + return -EFAULT; + + n = sscanf(delay_buf, "%u %u", &delay_count, &delay_msec); + if (n < 1 || n > 2) + return -EINVAL; + if (n == 2) + ctrl->delay_msec = delay_msec; + atomic_set(&ctrl->delay_count, delay_count); + return count; +} +NVMET_DEBUGFS_RW_ATTR(nvmet_ctrl_delay); +#endif /* CONFIG_NVME_TARGET_DELAY_REQUESTS */ + int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl) { char name[32]; @@ -183,6 +219,10 @@ int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl) &nvmet_ctrl_tls_concat_fops); debugfs_create_file("tls_key", S_IRUSR, ctrl->debugfs_dir, ctrl, &nvmet_ctrl_tls_key_fops); +#endif +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) + debugfs_create_file("delay", S_IWUSR, ctrl->debugfs_dir, ctrl, + &nvmet_ctrl_delay_fops); #endif return 0; } diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 7318b736d41417..f86b628b1efef7 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2378,7 +2378,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) } /* data transfer complete, resume with nvmet layer */ - fod->req.execute(&fod->req); + nvmet_execute_request(&fod->req); break; case NVMET_FCOP_READDATA: @@ -2589,7 +2589,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, * can invoke the nvmet_layer now. If read data, cmd completion will * push the data */ - fod->req.execute(&fod->req); + nvmet_execute_request(&fod->req); return; transport_error: diff --git a/drivers/nvme/target/io-cmd-cancel.c b/drivers/nvme/target/io-cmd-cancel.c new file mode 100644 index 00000000000000..8d7f363c916376 --- /dev/null +++ b/drivers/nvme/target/io-cmd-cancel.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVMe I/O cancel command implementation. + * Copyright (c) 2023 Red Hat + */ + +#include "nvmet.h" + +void nvmet_execute_cancel(struct nvmet_req *req) +{ + u16 cid; + __le16 sqid; + bool mult_cmds; + int ret = 0; + struct nvmet_ctrl *ctrl = req->sq->ctrl; + struct nvmet_sq *sq = req->sq; + struct nvmet_req *treq; + u32 canceled = 0; + + if (!nvmet_check_transfer_len(req, 0)) + return; + + sqid = le16_to_cpu(req->cmd->cancel.sqid); + if (sqid > ctrl->subsys->max_qid) { + ret = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; + goto exit; + } + + mult_cmds = req->cmd->cancel.action & NVME_CANCEL_ACTION_MUL_CMD; + cid = req->cmd->cancel.cid; + + if (cid == req->cmd->cancel.command_id && !mult_cmds) { + /* If action is set to "single command" and cid is + * set to the cid of this cancel command, then + * the controller shall abort the command with + * an "invalid cid" status code. + */ + ret = NVME_SC_INVALID_CID | NVME_STATUS_DNR; + } else if ((cid != 0xFFFF && mult_cmds) || sqid != sq->qid) { + /* if action is set to "multiple commands" and + * cid isn't set to 0xFFFF, then abort the command + * with an "invalid field" status. + * if the sqid field doesn't match the sqid of + * the queue to which the cancel command is submitted, + * then abort the command with an "invalid field" status. + */ + ret = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; + } + + if (!mult_cmds) { + treq = xa_load(&sq->outstanding_requests, cid); + if (treq) { + if (cancel_delayed_work(&treq->req_work)) { + pr_info("nvmet: CANCEL success: %d", cid); + nvmet_req_complete(treq, NVME_SC_ABORT_REQ); + canceled += 1; + } else { + pr_info("nvmet: CANCEL failed: %d", cid); + } + } else { + pr_info("nvmet: CANCEL request not found: %d", cid); + } + } else { + unsigned long ucid; + xa_for_each(&sq->outstanding_requests, ucid, treq) { + if (cancel_delayed_work(&treq->req_work)) { + nvmet_req_complete(treq, NVME_SC_ABORT_REQ); + canceled += 1; + } + } + pr_info("nvmet: CANCEL removed %d requests", canceled); + } +exit: + nvmet_set_result(req, canceled); + nvmet_req_complete(req, ret); +} + diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index a5c41144667c68..11d5f6dd9ece51 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -125,7 +125,7 @@ static void nvme_loop_execute_work(struct work_struct *work) struct nvme_loop_iod *iod = container_of(work, struct nvme_loop_iod, work); - iod->req.execute(&iod->req); + nvmet_execute_request(&iod->req); } static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 33fac9151b5b14..52580377418392 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -170,6 +170,9 @@ struct nvmet_sq { #endif struct completion free_done; struct completion confirm_done; +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) + struct xarray outstanding_requests; +#endif }; struct nvmet_ana_group { @@ -304,6 +307,10 @@ struct nvmet_ctrl { #endif #ifdef CONFIG_NVME_TARGET_TCP_TLS struct key *tls_key; +#endif +#ifdef CONFIG_NVME_TARGET_DELAY_REQUESTS + atomic_t delay_count; + u32 delay_msec; #endif struct nvmet_pr_log_mgr pr_log_mgr; }; @@ -490,6 +497,9 @@ struct nvmet_req { u16 error_loc; u64 error_slba; struct nvmet_pr_per_ctrl_ref *pc_ref; +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) + struct delayed_work req_work; +#endif }; #define NVMET_MAX_MPOOL_BVEC 16 @@ -564,6 +574,7 @@ size_t nvmet_req_transfer_len(struct nvmet_req *req); bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); +void nvmet_req_complete_delayed(struct nvmet_req *req, u16 status); int nvmet_req_alloc_sgls(struct nvmet_req *req); void nvmet_req_free_sgls(struct nvmet_req *req); @@ -714,6 +725,8 @@ void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req); void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req); void nvmet_bdev_execute_zone_append(struct nvmet_req *req); +void nvmet_execute_cancel(struct nvmet_req *req); + static inline u32 nvmet_rw_data_len(struct nvmet_req *req) { return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << @@ -963,4 +976,10 @@ struct nvmet_feat_arbitration { u8 ab; }; +#if IS_ENABLED(CONFIG_NVME_TARGET_DELAY_REQUESTS) +void nvmet_execute_request(struct nvmet_req *req); +#else +static inline void nvmet_execute_request(struct nvmet_req *req) { req->execute(req); } +#endif + #endif /* _NVMET_H */ diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 1afd93026f9bf0..f9ffb5feedb5c2 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -773,7 +773,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(status)) nvmet_req_complete(&rsp->req, status); else - rsp->req.execute(&rsp->req); + nvmet_execute_request(&rsp->req); } static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) @@ -958,7 +958,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) queue->cm_id->port_num, &rsp->read_cqe, NULL)) nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); } else { - rsp->req.execute(&rsp->req); + nvmet_execute_request(&rsp->req); } return true; diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 9cf97433b0b6d5..96851e226bc4ed 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -597,7 +597,7 @@ static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd) if (unlikely(cmd->flags & NVMET_TCP_F_INIT_FAILED)) nvmet_tcp_queue_response(&cmd->req); else - cmd->req.execute(&cmd->req); + nvmet_execute_request(&cmd->req); } static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) @@ -1104,7 +1104,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) goto out; } - queue->cmd->req.execute(&queue->cmd->req); + nvmet_execute_request(&queue->cmd->req); out: nvmet_prepare_receive_pdu(queue); return ret; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bfb5688363b0dc..cdf22a859f9b8f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -28,6 +28,9 @@ /* Special NSSR value, 'NVMe' */ #define NVME_SUBSYS_RESET 0x4E564D65 +/* Maximum number of reserved commands for Cancel */ +#define NVME_RSV_CANCEL_MAX 2 + enum nvme_subsys_type { /* Referral to another discovery type target subsystem */ NVME_NQN_DISC = 1, @@ -890,6 +893,7 @@ enum nvme_opcode { nvme_cmd_resv_report = 0x0e, nvme_cmd_resv_acquire = 0x11, nvme_cmd_resv_release = 0x15, + nvme_cmd_cancel = 0x18, nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_recv = 0x7a, nvme_cmd_zone_append = 0x7d, @@ -1425,6 +1429,22 @@ struct nvme_abort_cmd { __u32 rsvd11[5]; }; +struct nvme_cancel_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u32 rsvd1[8]; + __le16 sqid; + __u16 cid; + __u8 action; + __u8 rsvd11[3]; + __u32 rsvd12[4]; +}; + +#define NVME_CANCEL_ACTION_MUL_CMD 1 +#define NVME_CANCEL_ACTION_SINGLE_CMD 0 + struct nvme_download_firmware { __u8 opcode; __u8 flags; @@ -1887,6 +1907,7 @@ struct nvme_command { struct nvme_zone_mgmt_send_cmd zms; struct nvme_zone_mgmt_recv_cmd zmr; struct nvme_abort_cmd abort; + struct nvme_cancel_cmd cancel; struct nvme_get_log_page_command get_log_page; struct nvmf_common_command fabrics; struct nvmf_connect_command connect; @@ -2069,6 +2090,7 @@ enum { NVME_SC_INVALID_PI = 0x181, NVME_SC_READ_ONLY = 0x182, NVME_SC_ONCS_NOT_SUPPORTED = 0x183, + NVME_SC_INVALID_CID = 0x184, /* * I/O Command Set Specific - Fabrics commands: