From 9d09dd8d7626b9124ce4bc081aabcb0590173b27 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 May 2019 14:58:02 -0700 Subject: [PATCH 01/26] nvmet: add transport discovery change op Some transports, such as FC-NVME, support discovery controller change events without the use of a persistent discovery controller. FC receives events via RSCN from the FC Fabric Controller or subsystem FC port. This patch adds a nvmet transport op that is called whenever a discovery change event occurs in the nvmet layer. To facilitate the callback without adding another layer to cross into core.c to reference the transport ops, the port structure snapshots the transport ops when the port is enabled and clears them when disabled. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 2 ++ drivers/nvme/target/discovery.c | 4 ++++ drivers/nvme/target/nvmet.h | 2 ++ 3 files changed, 8 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 7734a6acff85..43e8c4adc1f4 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -311,6 +311,7 @@ int nvmet_enable_port(struct nvmet_port *port) port->inline_data_size = 0; port->enabled = true; + port->tr_ops = ops; return 0; } @@ -321,6 +322,7 @@ void nvmet_disable_port(struct nvmet_port *port) lockdep_assert_held(&nvmet_config_sem); port->enabled = false; + port->tr_ops = NULL; ops = nvmet_transports[port->disc_addr.trtype]; ops->remove_port(port); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 5baf269f3f8a..8efca26b4776 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -41,6 +41,10 @@ void nvmet_port_disc_changed(struct nvmet_port *port, __nvmet_disc_changed(port, ctrl); } mutex_unlock(&nvmet_disc_subsys->lock); + + /* If transport can signal change, notify transport */ + if (port->tr_ops && port->tr_ops->discovery_chg) + port->tr_ops->discovery_chg(port); } static void __nvmet_subsys_disc_changed(struct nvmet_port *port, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index c25d88fc9dec..dc270944bb25 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -140,6 +140,7 @@ struct nvmet_port { void *priv; bool enabled; int inline_data_size; + const struct nvmet_fabrics_ops *tr_ops; }; static inline struct nvmet_port *to_nvmet_port(struct config_item *item) @@ -277,6 +278,7 @@ struct nvmet_fabrics_ops { void (*disc_traddr)(struct nvmet_req *req, struct nvmet_port *port, char *traddr); u16 (*install_queue)(struct nvmet_sq *nvme_sq); + void (*discovery_chg)(struct nvmet_port *port); }; #define NVMET_MAX_INLINE_BIOVEC 8 From 150d71f725fd2f5a0015b7fa8df0816a207d4e4b Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 May 2019 14:58:03 -0700 Subject: [PATCH 02/26] nvmet-fc: add transport discovery change event callback support This patch adds support for the nvmet discovery_change transport op. In turn, the transport adds it's own LLDD api callback discovery_event op to request the LLDD to generate an RSCN for the discovery change. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 11 +++++++++++ include/linux/nvme-fc-driver.h | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 508661af0f50..1f252c9a953a 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2549,6 +2549,16 @@ nvmet_fc_remove_port(struct nvmet_port *port) kfree(pe); } +static void +nvmet_fc_discovery_chg(struct nvmet_port *port) +{ + struct nvmet_fc_port_entry *pe = port->priv; + struct nvmet_fc_tgtport *tgtport = pe->tgtport; + + if (tgtport && tgtport->ops->discovery_event) + tgtport->ops->discovery_event(&tgtport->fc_target_port); +} + static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_FC, @@ -2557,6 +2567,7 @@ static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { .remove_port = nvmet_fc_remove_port, .queue_response = nvmet_fc_fcp_nvme_cmd_done, .delete_ctrl = nvmet_fc_delete_ctrl, + .discovery_chg = nvmet_fc_discovery_chg, }; static int __init nvmet_fc_init_module(void) diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index c48e96436f56..98d904961b33 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -791,6 +791,11 @@ struct nvmet_fc_target_port { * nvmefc_tgt_fcp_req. * Entrypoint is Optional. * + * @discovery_event: Called by the transport to generate an RSCN + * change notifications to NVME initiators. The RSCN notifications + * should cause the initiator to rescan the discovery controller + * on the targetport. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -832,6 +837,7 @@ struct nvmet_fc_target_template { struct nvmefc_tgt_fcp_req *fcpreq); void (*defer_rcv)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); + void (*discovery_event)(struct nvmet_fc_target_port *tgtport); u32 max_hw_queues; u16 max_sgl_segments; From 4cf7c363b41552d76331fcf1e7ce600c8deeddc3 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 May 2019 14:58:04 -0700 Subject: [PATCH 03/26] nvme-fcloop: add support for nvmet discovery_event op Update fcloop to support the discovery_event operation and invoke a nvme rescan. In a real fc adapter, this would generate an RSCN, which the host would receive and convert into a nvme rescan on the remote port specified in the rscn payload. Signed-off-by: James Smart [kbuild-bot: fcloop_tgt_discovery_evt can be static] Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 381b5a90c48b..b8c1cc54a0db 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -231,6 +231,11 @@ struct fcloop_lsreq { int status; }; +struct fcloop_rscn { + struct fcloop_tport *tport; + struct work_struct work; +}; + enum { INI_IO_START = 0, INI_IO_ACTIVE = 1, @@ -348,6 +353,37 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, return 0; } +/* + * Simulate reception of RSCN and converting it to a initiator transport + * call to rescan a remote port. + */ +static void +fcloop_tgt_rscn_work(struct work_struct *work) +{ + struct fcloop_rscn *tgt_rscn = + container_of(work, struct fcloop_rscn, work); + struct fcloop_tport *tport = tgt_rscn->tport; + + if (tport->remoteport) + nvme_fc_rescan_remoteport(tport->remoteport); + kfree(tgt_rscn); +} + +static void +fcloop_tgt_discovery_evt(struct nvmet_fc_target_port *tgtport) +{ + struct fcloop_rscn *tgt_rscn; + + tgt_rscn = kzalloc(sizeof(*tgt_rscn), GFP_KERNEL); + if (!tgt_rscn) + return; + + tgt_rscn->tport = tgtport->private; + INIT_WORK(&tgt_rscn->work, fcloop_tgt_rscn_work); + + schedule_work(&tgt_rscn->work); +} + static void fcloop_tfcp_req_free(struct kref *ref) { @@ -839,6 +875,7 @@ static struct nvmet_fc_target_template tgttemplate = { .fcp_op = fcloop_fcp_op, .fcp_abort = fcloop_tgt_fcp_abort, .fcp_req_release = fcloop_fcp_req_release, + .discovery_event = fcloop_tgt_discovery_evt, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, From f60cb93bbfecf1ad13713af285c3793e861fc9b2 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 May 2019 14:58:05 -0700 Subject: [PATCH 04/26] lpfc: add support to generate RSCN events for nport This patch adds general RSCN support: - The ability to transmit an RSCN to the port on the other end of the link (regular port if pt2pt, or fabric controller if fabric). - And general recognition of an RSCN ELS when an ELS is received. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_crtn.h | 2 + drivers/scsi/lpfc/lpfc_els.c | 122 +++++++++++++++++++++++++++++++ drivers/scsi/lpfc/lpfc_hbadisc.c | 35 +++++++++ drivers/scsi/lpfc/lpfc_hw.h | 2 + drivers/scsi/lpfc/lpfc_sli.c | 1 + 6 files changed, 163 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index aafcffaa25f7..14293c546772 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -274,6 +274,7 @@ struct lpfc_stats { uint32_t elsXmitADISC; uint32_t elsXmitLOGO; uint32_t elsXmitSCR; + uint32_t elsXmitRSCN; uint32_t elsXmitRNID; uint32_t elsXmitFARP; uint32_t elsXmitFARPR; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index e0b14d791b8c..4b8eb9107b85 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -141,6 +141,7 @@ int lpfc_issue_els_adisc(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t); int lpfc_issue_els_logo(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t); int lpfc_issue_els_npiv_logo(struct lpfc_vport *, struct lpfc_nodelist *); int lpfc_issue_els_scr(struct lpfc_vport *, uint32_t, uint8_t); +int lpfc_issue_els_rscn(struct lpfc_vport *vport, uint8_t retry); int lpfc_issue_fabric_reglogin(struct lpfc_vport *); int lpfc_els_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *); int lpfc_ct_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *); @@ -355,6 +356,7 @@ void lpfc_mbox_timeout_handler(struct lpfc_hba *); struct lpfc_nodelist *lpfc_findnode_did(struct lpfc_vport *, uint32_t); struct lpfc_nodelist *lpfc_findnode_wwpn(struct lpfc_vport *, struct lpfc_name *); +struct lpfc_nodelist *lpfc_findnode_mapped(struct lpfc_vport *vport); int lpfc_sli_issue_mbox_wait(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 5ac4f8d76b91..00f5d9d547f9 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include "lpfc_hw4.h" #include "lpfc_hw.h" @@ -3078,6 +3080,116 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) return 0; } +/** + * lpfc_issue_els_rscn - Issue an RSCN to the Fabric Controller (Fabric) + * or the other nport (pt2pt). + * @vport: pointer to a host virtual N_Port data structure. + * @retry: number of retries to the command IOCB. + * + * This routine issues a RSCN to the Fabric Controller (DID 0xFFFFFD) + * when connected to a fabric, or to the remote port when connected + * in point-to-point mode. When sent to the Fabric Controller, it will + * replay the RSCN to registered recipients. + * + * Note that, in lpfc_prep_els_iocb() routine, the reference count of ndlp + * will be incremented by 1 for holding the ndlp and the reference to ndlp + * will be stored into the context1 field of the IOCB for the completion + * callback function to the RSCN ELS command. + * + * Return code + * 0 - Successfully issued RSCN command + * 1 - Failed to issue RSCN command + **/ +int +lpfc_issue_els_rscn(struct lpfc_vport *vport, uint8_t retry) +{ + struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *elsiocb; + struct lpfc_nodelist *ndlp; + struct { + struct fc_els_rscn rscn; + struct fc_els_rscn_page portid; + } *event; + uint32_t nportid; + uint16_t cmdsize = sizeof(*event); + + /* Not supported for private loop */ + if (phba->fc_topology == LPFC_TOPOLOGY_LOOP && + !(vport->fc_flag & FC_PUBLIC_LOOP)) + return 1; + + if (vport->fc_flag & FC_PT2PT) { + /* find any mapped nport - that would be the other nport */ + ndlp = lpfc_findnode_mapped(vport); + if (!ndlp) + return 1; + } else { + nportid = FC_FID_FCTRL; + /* find the fabric controller node */ + ndlp = lpfc_findnode_did(vport, nportid); + if (!ndlp) { + /* if one didn't exist, make one */ + ndlp = lpfc_nlp_init(vport, nportid); + if (!ndlp) + return 1; + lpfc_enqueue_node(vport, ndlp); + } else if (!NLP_CHK_NODE_ACT(ndlp)) { + ndlp = lpfc_enable_node(vport, ndlp, + NLP_STE_UNUSED_NODE); + if (!ndlp) + return 1; + } + } + + elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, + ndlp->nlp_DID, ELS_CMD_RSCN_XMT); + + if (!elsiocb) { + /* This will trigger the release of the node just + * allocated + */ + lpfc_nlp_put(ndlp); + return 1; + } + + event = ((struct lpfc_dmabuf *)elsiocb->context2)->virt; + + event->rscn.rscn_cmd = ELS_RSCN; + event->rscn.rscn_page_len = sizeof(struct fc_els_rscn_page); + event->rscn.rscn_plen = cpu_to_be16(cmdsize); + + nportid = vport->fc_myDID; + /* appears that page flags must be 0 for fabric to broadcast RSCN */ + event->portid.rscn_page_flags = 0; + event->portid.rscn_fid[0] = (nportid & 0x00FF0000) >> 16; + event->portid.rscn_fid[1] = (nportid & 0x0000FF00) >> 8; + event->portid.rscn_fid[2] = nportid & 0x000000FF; + + lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, + "Issue RSCN: did:x%x", + ndlp->nlp_DID, 0, 0); + + phba->fc_stat.elsXmitRSCN++; + elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd; + if (lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0) == + IOCB_ERROR) { + /* The additional lpfc_nlp_put will cause the following + * lpfc_els_free_iocb routine to trigger the rlease of + * the node. + */ + lpfc_nlp_put(ndlp); + lpfc_els_free_iocb(phba, elsiocb); + return 1; + } + /* This will cause the callback-function lpfc_cmpl_els_cmd to + * trigger the release of node. + */ + if (!(vport->fc_flag & FC_PT2PT)) + lpfc_nlp_put(ndlp); + + return 0; +} + /** * lpfc_issue_els_farpr - Issue a farp to an node on a vport * @vport: pointer to a host virtual N_Port data structure. @@ -6318,6 +6430,16 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_RSCN, lp[i]); + /* Check if RSCN is coming from a direct-connected remote NPort */ + if (vport->fc_flag & FC_PT2PT) { + /* If so, just ACC it, no other action needed for now */ + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, + "2024 pt2pt RSCN %08x Data: x%x x%x\n", + *lp, vport->fc_flag, payload_len); + lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL); + return 0; + } + /* If we are about to begin discovery, just ACC the RSCN. * Discovery processing will satisfy it. */ diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index c43852f97f25..28ecaa7fc715 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -5276,6 +5276,41 @@ lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did) return ndlp; } +struct lpfc_nodelist * +lpfc_findnode_mapped(struct lpfc_vport *vport) +{ + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp; + uint32_t data1; + unsigned long iflags; + + spin_lock_irqsave(shost->host_lock, iflags); + + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE || + ndlp->nlp_state == NLP_STE_MAPPED_NODE) { + data1 = (((uint32_t)ndlp->nlp_state << 24) | + ((uint32_t)ndlp->nlp_xri << 16) | + ((uint32_t)ndlp->nlp_type << 8) | + ((uint32_t)ndlp->nlp_rpi & 0xff)); + spin_unlock_irqrestore(shost->host_lock, iflags); + lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, + "2025 FIND node DID " + "Data: x%p x%x x%x x%x %p\n", + ndlp, ndlp->nlp_DID, + ndlp->nlp_flag, data1, + ndlp->active_rrqs_xri_bitmap); + return ndlp; + } + } + spin_unlock_irqrestore(shost->host_lock, iflags); + + /* FIND node did NOT FOUND */ + lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, + "2026 FIND mapped did NOT FOUND.\n"); + return NULL; +} + struct lpfc_nodelist * lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) { diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index edd8f3982023..5b439a6dcde1 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -601,6 +601,7 @@ struct fc_vft_header { #define ELS_CMD_RPL 0x57000000 #define ELS_CMD_FAN 0x60000000 #define ELS_CMD_RSCN 0x61040000 +#define ELS_CMD_RSCN_XMT 0x61040008 #define ELS_CMD_SCR 0x62000000 #define ELS_CMD_RNID 0x78000000 #define ELS_CMD_LIRR 0x7A000000 @@ -642,6 +643,7 @@ struct fc_vft_header { #define ELS_CMD_RPL 0x57 #define ELS_CMD_FAN 0x60 #define ELS_CMD_RSCN 0x0461 +#define ELS_CMD_RSCN_XMT 0x08000461 #define ELS_CMD_SCR 0x62 #define ELS_CMD_RNID 0x78 #define ELS_CMD_LIRR 0x7A diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d1512e4f9791..4329cc44bb55 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -9398,6 +9398,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, if (if_type >= LPFC_SLI_INTF_IF_TYPE_2) { if (pcmd && (*pcmd == ELS_CMD_FLOGI || *pcmd == ELS_CMD_SCR || + *pcmd == ELS_CMD_RSCN_XMT || *pcmd == ELS_CMD_FDISC || *pcmd == ELS_CMD_LOGO || *pcmd == ELS_CMD_PLOGI)) { From ab723121a8eade04ecc6bd7116924c359336f4eb Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 May 2019 14:58:06 -0700 Subject: [PATCH 05/26] lpfc: add nvmet discovery_event op support This patch adds support for the nvmet discovery op. When the callback routine is called, the driver will call the routine to generate an RSCN to the port on the other end of the link. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/scsi/lpfc/lpfc_nvmet.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index d74bfd264495..06170824a69b 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1139,6 +1139,22 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, spin_unlock_irqrestore(&ctxp->ctxlock, iflag); } +static void +lpfc_nvmet_discovery_event(struct nvmet_fc_target_port *tgtport) +{ + struct lpfc_nvmet_tgtport *tgtp; + struct lpfc_hba *phba; + uint32_t rc; + + tgtp = tgtport->private; + phba = tgtp->phba; + + rc = lpfc_issue_els_rscn(phba->pport, 0); + lpfc_printf_log(phba, KERN_ERR, LOG_NVME, + "6420 NVMET subsystem change: Notification %s\n", + (rc) ? "Failed" : "Sent"); +} + static struct nvmet_fc_target_template lpfc_tgttemplate = { .targetport_delete = lpfc_nvmet_targetport_delete, .xmt_ls_rsp = lpfc_nvmet_xmt_ls_rsp, @@ -1146,6 +1162,7 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = { .fcp_abort = lpfc_nvmet_xmt_fcp_abort, .fcp_req_release = lpfc_nvmet_xmt_fcp_release, .defer_rcv = lpfc_nvmet_defer_rcv, + .discovery_event = lpfc_nvmet_discovery_event, .max_hw_queues = 1, .max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS, From 6f2589f478795c46a61696d7d7c2f47a0bc6cfe3 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 May 2019 14:58:07 -0700 Subject: [PATCH 06/26] lpfc: add support for translating an RSCN rcv into a discovery rescan This patch updates RSCN receive processing to check for the remote port being an NVME port, and if so, invoke the nvme_fc callback to rescan the remote port. The rescan will generate a discovery udev event. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/scsi/lpfc/lpfc_crtn.h | 2 ++ drivers/scsi/lpfc/lpfc_els.c | 5 ++++ drivers/scsi/lpfc/lpfc_nvme.c | 44 +++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 4b8eb9107b85..866374801140 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -557,6 +557,8 @@ void lpfc_ras_stop_fwlog(struct lpfc_hba *phba); int lpfc_check_fwlog_support(struct lpfc_hba *phba); /* NVME interfaces. */ +void lpfc_nvme_rescan_port(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp); void lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp); int lpfc_nvme_register_port(struct lpfc_vport *vport, diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 00f5d9d547f9..968ed0fd37f7 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -6326,6 +6326,8 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport) continue; } + if (ndlp->nlp_fc4_type & NLP_FC4_NVME) + lpfc_nvme_rescan_port(vport, ndlp); lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); @@ -6437,6 +6439,9 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, "2024 pt2pt RSCN %08x Data: x%x x%x\n", *lp, vport->fc_flag, payload_len); lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL); + + if (ndlp->nlp_fc4_type & NLP_FC4_NVME) + lpfc_nvme_rescan_port(vport, ndlp); return 0; } diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 9d99cb915390..fdd16d9f55a1 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2402,6 +2402,50 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) #endif } +/** + * lpfc_nvme_rescan_port - Check to see if we should rescan this remoteport + * + * If the ndlp represents an NVME Target, that we are logged into, + * ping the NVME FC Transport layer to initiate a device rescan + * on this remote NPort. + */ +void +lpfc_nvme_rescan_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) +{ +#if (IS_ENABLED(CONFIG_NVME_FC)) + struct lpfc_nvme_rport *rport; + struct nvme_fc_remote_port *remoteport; + + rport = ndlp->nrport; + + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, + "6170 Rescan NPort DID x%06x type x%x " + "state x%x rport %p\n", + ndlp->nlp_DID, ndlp->nlp_type, ndlp->nlp_state, rport); + if (!rport) + goto input_err; + remoteport = rport->remoteport; + if (!remoteport) + goto input_err; + + /* Only rescan if we are an NVME target in the MAPPED state */ + if (remoteport->port_role & FC_PORT_ROLE_NVME_DISCOVERY && + ndlp->nlp_state == NLP_STE_MAPPED_NODE) { + nvme_fc_rescan_remoteport(remoteport); + + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, + "6172 NVME rescanned DID x%06x " + "port_state x%x\n", + ndlp->nlp_DID, remoteport->port_state); + } + return; +input_err: + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, + "6169 State error: lport %p, rport%p FCID x%06x\n", + vport->localport, ndlp->rport, ndlp->nlp_DID); +#endif +} + /* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport. * * There is no notion of Devloss or rport recovery from the current From 41b194b843a255d5a6e9468edd3ab1d71a24abb3 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 May 2019 14:58:08 -0700 Subject: [PATCH 07/26] lpfc: add sysfs interface to post NVME RSCN To support scenarios which aren't bound to nvmetcli add port scenarios, which is currently where the nvmet_fc transport invokes the discovery event callbacks, a syfs attribute is added to lpfc which can be written to cause an RSCN to be generated for the nport. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_attr.c | 60 +++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 14293c546772..2c3bb8a966e5 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -820,6 +820,7 @@ struct lpfc_hba { uint32_t cfg_use_msi; uint32_t cfg_auto_imax; uint32_t cfg_fcp_imax; + uint32_t cfg_force_rscn; uint32_t cfg_cq_poll_threshold; uint32_t cfg_cq_max_proc_limit; uint32_t cfg_fcp_cpu_map; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index d4c65e2109e2..2bd1e014103b 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -4958,6 +4958,64 @@ static DEVICE_ATTR(lpfc_req_fw_upgrade, S_IRUGO | S_IWUSR, lpfc_request_firmware_upgrade_show, lpfc_request_firmware_upgrade_store); +/** + * lpfc_force_rscn_store + * + * @dev: class device that is converted into a Scsi_host. + * @attr: device attribute, not used. + * @buf: unused string + * @count: unused variable. + * + * Description: + * Force the switch to send a RSCN to all other NPorts in our zone + * If we are direct connect pt2pt, build the RSCN command ourself + * and send to the other NPort. Not supported for private loop. + * + * Returns: + * 0 - on success + * -EIO - if command is not sent + **/ +static ssize_t +lpfc_force_rscn_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata; + int i; + + i = lpfc_issue_els_rscn(vport, 0); + if (i) + return -EIO; + return strlen(buf); +} + +/* + * lpfc_force_rscn: Force an RSCN to be sent to all remote NPorts + * connected to the HBA. + * + * Value range is any ascii value + */ +static int lpfc_force_rscn; +module_param(lpfc_force_rscn, int, 0644); +MODULE_PARM_DESC(lpfc_force_rscn, + "Force an RSCN to be sent to all remote NPorts"); +lpfc_param_show(force_rscn) + +/** + * lpfc_force_rscn_init - Force an RSCN to be sent to all remote NPorts + * @phba: lpfc_hba pointer. + * @val: unused value. + * + * Returns: + * zero if val saved. + **/ +static int +lpfc_force_rscn_init(struct lpfc_hba *phba, int val) +{ + return 0; +} +static DEVICE_ATTR_RW(lpfc_force_rscn); + /** * lpfc_fcp_imax_store * @@ -5958,6 +6016,7 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_lpfc_nvme_oas, &dev_attr_lpfc_nvme_embed_cmd, &dev_attr_lpfc_fcp_imax, + &dev_attr_lpfc_force_rscn, &dev_attr_lpfc_cq_poll_threshold, &dev_attr_lpfc_cq_max_proc_limit, &dev_attr_lpfc_fcp_cpu_map, @@ -7005,6 +7064,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) lpfc_nvme_oas_init(phba, lpfc_nvme_oas); lpfc_nvme_embed_cmd_init(phba, lpfc_nvme_embed_cmd); lpfc_fcp_imax_init(phba, lpfc_fcp_imax); + lpfc_force_rscn_init(phba, lpfc_force_rscn); lpfc_cq_poll_threshold_init(phba, lpfc_cq_poll_threshold); lpfc_cq_max_proc_limit_init(phba, lpfc_cq_max_proc_limit); lpfc_fcp_cpu_map_init(phba, lpfc_fcp_cpu_map); From 4bea364f161810523032f37a8ae0b7d92cf28eea Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 May 2019 15:25:26 -0700 Subject: [PATCH 08/26] nvme-fc: add message when creating new association When looking at console messages to troubleshoot, there are one maybe two messages before creation of the controller is complete. However, a lot of io takes place to reach that point. It's unclear when things have started. Add a message when the controller is attempting to create a new association. Thus we know what controller, between what host and remote port, and what NQN is being put into place for any subsequent success or failure messages. Signed-off-by: James Smart Reviewed-by: Chaitanya Kulkarni Reviewed-by: Giridhar Malavali Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index dd8169bbf0d2..9b497d785ed7 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2607,6 +2607,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (nvme_fc_ctlr_active_on_rport(ctrl)) return -ENOTUNIQ; + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: create association : host wwpn 0x%016llx " + " rport wwpn 0x%016llx: NQN \"%s\"\n", + ctrl->cnum, ctrl->lport->localport.port_name, + ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn); + /* * Create the admin queue */ From 2181e455612a8db2761eabbf126640552a451e96 Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Thu, 20 Jun 2019 08:48:10 +0200 Subject: [PATCH 09/26] nvme: fix possible io failures when removing multipathed ns When a shared namespace is removed, we call blk_cleanup_queue() when the device can still be accessed as the current path and this can result in submission to a dying queue. Hence, direct_make_request() called by our mpath device may fail (propagating the failure to userspace). Instead, we want to failover this I/O to a different path if one exists. Thus, before we cleanup the request queue, we make sure that the device is cleared from the current path nor it can be selected again as such. Fix this by: - clear the ns from the head->list and synchronize rcu to make sure there is no concurrent path search that restores it as the current path - clear the mpath current path in order to trigger a subsequent path search and sync srcu to wait for any ongoing request submissions - safely continue to namespace removal and blk_cleanup_queue Signed-off-by: Anton Eidelman Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 120fb593d1da..22c68e3b71d5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3344,6 +3344,14 @@ static void nvme_ns_remove(struct nvme_ns *ns) return; nvme_fault_inject_fini(ns); + + mutex_lock(&ns->ctrl->subsys->lock); + list_del_rcu(&ns->siblings); + mutex_unlock(&ns->ctrl->subsys->lock); + synchronize_rcu(); /* guarantee not available in head->list */ + nvme_mpath_clear_current_path(ns); + synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ + if (ns->disk && ns->disk->flags & GENHD_FL_UP) { del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); @@ -3351,16 +3359,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) blk_integrity_unregister(ns->disk); } - mutex_lock(&ns->ctrl->subsys->lock); - list_del_rcu(&ns->siblings); - nvme_mpath_clear_current_path(ns); - mutex_unlock(&ns->ctrl->subsys->lock); - down_write(&ns->ctrl->namespaces_rwsem); list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); - synchronize_srcu(&ns->head->srcu); nvme_mpath_check_last_path(ns); nvme_put_ns(ns); } From 1a87ee657c530bb2f3e39e4ac184d48f5f959cda Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 27 May 2019 01:29:01 +0900 Subject: [PATCH 10/26] nvme: export get and set features Future use intends to make use of both, so export these functions. And since their implementation is identical except for the opcode, provide a new function that implement both. [akinobu.mita@gmail.com>: fix line over 80 characters] Signed-off-by: Keith Busch Signed-off-by: Akinobu Mita Reviewed-by: Chaitanya Kulkarni Reviewed-by: Minwoo Im Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 24 +++++++++++++++++++++--- drivers/nvme/host/nvme.h | 6 ++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 22c68e3b71d5..3b3960e0c31f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1113,15 +1113,15 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, return id; } -static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, - void *buffer, size_t buflen, u32 *result) +static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, + unsigned int dword11, void *buffer, size_t buflen, u32 *result) { struct nvme_command c; union nvme_result res; int ret; memset(&c, 0, sizeof(c)); - c.features.opcode = nvme_admin_set_features; + c.features.opcode = op; c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); @@ -1132,6 +1132,24 @@ static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword return ret; } +int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, + unsigned int dword11, void *buffer, size_t buflen, + u32 *result) +{ + return nvme_features(dev, nvme_admin_set_features, fid, dword11, buffer, + buflen, result); +} +EXPORT_SYMBOL_GPL(nvme_set_features); + +int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, + unsigned int dword11, void *buffer, size_t buflen, + u32 *result) +{ + return nvme_features(dev, nvme_admin_get_features, fid, dword11, buffer, + buflen, result); +} +EXPORT_SYMBOL_GPL(nvme_get_features); + int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) { u32 q_count = (*count - 1) | ((*count - 1) << 16); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 55553d293a98..038b8931d9e5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -459,6 +459,12 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, blk_mq_req_flags_t flags, bool poll); +int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, + unsigned int dword11, void *buffer, size_t buflen, + u32 *result); +int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, + unsigned int dword11, void *buffer, size_t buflen, + u32 *result); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); From 7a1f46e3f75cff5042dfa1bb80c9929a0e412abc Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 6 Jun 2019 14:30:14 +0900 Subject: [PATCH 11/26] nvme: introduce nvme_is_fabrics to check fabrics cmd This patch introduces a nvme_is_fabrics() inline function to check whether or not the given command structure is for fabrics. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 +- drivers/nvme/target/core.c | 2 +- drivers/nvme/target/fabrics-cmd.c | 2 +- drivers/nvme/target/fc.c | 2 +- include/linux/nvme.h | 7 ++++++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5838f7cd53ac..1994d5b42f94 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -578,7 +578,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, switch (ctrl->state) { case NVME_CTRL_NEW: case NVME_CTRL_CONNECTING: - if (req->cmd->common.opcode == nvme_fabrics_command && + if (nvme_is_fabrics(req->cmd) && req->cmd->fabrics.fctype == nvme_fabrics_type_connect) return true; break; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 43e8c4adc1f4..0587707b1a25 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -873,7 +873,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, status = nvmet_parse_connect_cmd(req); else if (likely(req->sq->qid != 0)) status = nvmet_parse_io_cmd(req); - else if (req->cmd->common.opcode == nvme_fabrics_command) + else if (nvme_is_fabrics(req->cmd)) status = nvmet_parse_fabrics_cmd(req); else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) status = nvmet_parse_discovery_cmd(req); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 3b9f79aba98f..d16b55ffe79f 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -268,7 +268,7 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - if (cmd->common.opcode != nvme_fabrics_command) { + if (!nvme_is_fabrics(cmd)) { pr_err("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); req->error_loc = offsetof(struct nvme_common_command, opcode); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1f252c9a953a..ce8d819f86cc 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1806,7 +1806,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, */ rspcnt = atomic_inc_return(&fod->queue->zrspcnt); if (!(rspcnt % fod->queue->ersp_ratio) || - sqe->opcode == nvme_fabrics_command || + nvme_is_fabrics((struct nvme_command *) sqe) || xfr_length != fod->req.transfer_len || (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] || (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) || diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8028adacaff3..7080923e78d1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1165,6 +1165,11 @@ struct nvme_command { }; }; +static inline bool nvme_is_fabrics(struct nvme_command *cmd) +{ + return cmd->common.opcode == nvme_fabrics_command; +} + struct nvme_error_slot { __le64 error_count; __le16 sqid; @@ -1186,7 +1191,7 @@ static inline bool nvme_is_write(struct nvme_command *cmd) * * Why can't we simply have a Fabrics In and Fabrics out command? */ - if (unlikely(cmd->common.opcode == nvme_fabrics_command)) + if (unlikely(nvme_is_fabrics(cmd))) return cmd->fabrics.fctype & 1; return cmd->common.opcode & 1; } From d916b1be94b6dc8d293abed2451f3062f6af7551 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 23 May 2019 09:27:35 -0600 Subject: [PATCH 12/26] nvme-pci: use host managed power state for suspend The nvme pci driver prepares its devices for power loss during suspend by shutting down the controllers. The power setting is deferred to pci driver's power management before the platform removes power. The suspend-to-idle mode, however, does not remove power. NVMe devices that implement host managed power settings can achieve lower power and better transition latencies than using generic PCI power settings. Try to use this feature if the platform is not involved with the suspend. If successful, restore the previous power state on resume. Tested-by: Kai-Heng Feng Tested-by: Mario Limonciello Reviewed-by: Rafael J. Wysocki Signed-off-by: Keith Busch Signed-off-by: Sagi Grimberg [hch: fixed the compilation for the !CONFIG_PM_SLEEP case] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 95 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 524d6bd6d095..eeae5789303a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -116,6 +117,7 @@ struct nvme_dev { u32 cmbsz; u32 cmbloc; struct nvme_ctrl ctrl; + u32 last_ps; mempool_t *iod_mempool; @@ -2835,16 +2837,94 @@ static void nvme_remove(struct pci_dev *pdev) } #ifdef CONFIG_PM_SLEEP +static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps) +{ + return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps); +} + +static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps) +{ + return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL); +} + +static int nvme_resume(struct device *dev) +{ + struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); + struct nvme_ctrl *ctrl = &ndev->ctrl; + + if (pm_resume_via_firmware() || !ctrl->npss || + nvme_set_power_state(ctrl, ndev->last_ps) != 0) + nvme_reset_ctrl(ctrl); + return 0; +} + static int nvme_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); + struct nvme_ctrl *ctrl = &ndev->ctrl; + int ret = -EBUSY; + + /* + * The platform does not remove power for a kernel managed suspend so + * use host managed nvme power settings for lowest idle power if + * possible. This should have quicker resume latency than a full device + * shutdown. But if the firmware is involved after the suspend or the + * device does not support any non-default power states, shut down the + * device fully. + */ + if (pm_suspend_via_firmware() || !ctrl->npss) { + nvme_dev_disable(ndev, true); + return 0; + } + + nvme_start_freeze(ctrl); + nvme_wait_freeze(ctrl); + nvme_sync_queues(ctrl); + + if (ctrl->state != NVME_CTRL_LIVE && + ctrl->state != NVME_CTRL_ADMIN_ONLY) + goto unfreeze; + + ndev->last_ps = 0; + ret = nvme_get_power_state(ctrl, &ndev->last_ps); + if (ret < 0) + goto unfreeze; + + ret = nvme_set_power_state(ctrl, ctrl->npss); + if (ret < 0) + goto unfreeze; + + if (ret) { + /* + * Clearing npss forces a controller reset on resume. The + * correct value will be resdicovered then. + */ + nvme_dev_disable(ndev, true); + ctrl->npss = 0; + ret = 0; + goto unfreeze; + } + /* + * A saved state prevents pci pm from generically controlling the + * device's power. If we're using protocol specific settings, we don't + * want pci interfering. + */ + pci_save_state(pdev); +unfreeze: + nvme_unfreeze(ctrl); + return ret; +} + +static int nvme_simple_suspend(struct device *dev) +{ + struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); nvme_dev_disable(ndev, true); return 0; } -static int nvme_resume(struct device *dev) +static int nvme_simple_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); @@ -2852,9 +2932,16 @@ static int nvme_resume(struct device *dev) nvme_reset_ctrl(&ndev->ctrl); return 0; } -#endif -static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); +const struct dev_pm_ops nvme_dev_pm_ops = { + .suspend = nvme_suspend, + .resume = nvme_resume, + .freeze = nvme_simple_suspend, + .thaw = nvme_simple_resume, + .poweroff = nvme_simple_suspend, + .restore = nvme_simple_resume, +}; +#endif /* CONFIG_PM_SLEEP */ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev, pci_channel_state_t state) @@ -2959,9 +3046,11 @@ static struct pci_driver nvme_driver = { .probe = nvme_probe, .remove = nvme_remove, .shutdown = nvme_shutdown, +#ifdef CONFIG_PM_SLEEP .driver = { .pm = &nvme_dev_pm_ops, }, +#endif .sriov_configure = pci_sriov_configure_simple, .err_handler = &nvme_err_handler, }; From a232ea0ebffeaab48ec24cf795dcb07280a55ea1 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 9 Jun 2019 03:02:17 +0900 Subject: [PATCH 13/26] nvme-pci: remove unnecessary zero for static var poll_queues will be zero even without zero initialization here. Signed-off-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index eeae5789303a..02216b45613d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -80,7 +80,7 @@ MODULE_PARM_DESC(write_queues, "Number of queues to use for writes. If not set, reads and writes " "will share a queue set."); -static int poll_queues = 0; +static int poll_queues; module_param_cb(poll_queues, &queue_count_ops, &poll_queues, 0644); MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); From 483178f38cbe55a0b1854a93ceef715a0fc2ef9f Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 9 Jun 2019 03:02:18 +0900 Subject: [PATCH 14/26] nvme-pci: remove queue_count_ops for write_queues and poll_queues queue_count_set() seems like that it has been provided to limit the number of queue entries for write/poll queues. But, the queue_count_set() has been doing nothing but a parameter check even it has num_possible_cpus() which is nop. This patch removes entire queue_count_ops from the write_queues and poll_queues. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 02216b45613d..007f8becde4a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -68,20 +68,14 @@ static int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); -static int queue_count_set(const char *val, const struct kernel_param *kp); -static const struct kernel_param_ops queue_count_ops = { - .set = queue_count_set, - .get = param_get_int, -}; - static int write_queues; -module_param_cb(write_queues, &queue_count_ops, &write_queues, 0644); +module_param(write_queues, int, 0644); MODULE_PARM_DESC(write_queues, "Number of queues to use for writes. If not set, reads and writes " "will share a queue set."); static int poll_queues; -module_param_cb(poll_queues, &queue_count_ops, &poll_queues, 0644); +module_param(poll_queues, int, 0644); MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); struct nvme_dev; @@ -146,19 +140,6 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp) return param_set_int(val, kp); } -static int queue_count_set(const char *val, const struct kernel_param *kp) -{ - int n, ret; - - ret = kstrtoint(val, 10, &n); - if (ret) - return ret; - if (n > num_possible_cpus()) - n = num_possible_cpus(); - - return param_set_int(val, kp); -} - static inline unsigned int sq_idx(unsigned int qid, u32 stride) { return qid * 2 * stride; From dad77d63903e91a2e97a0c984cabe5d36e91ba60 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 9 Jun 2019 03:02:19 +0900 Subject: [PATCH 15/26] nvme-pci: adjust irq max_vector using num_possible_cpus() If the "irq_queues" are greater than num_possible_cpus(), nvme_calc_irq_sets() can have irq set_size for HCTX_TYPE_DEFAULT greater than it can be afforded. 2039 affd->set_size[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; It might cause a WARN() from the irq_build_affinity_masks() like [1]: 220 if (nr_present < numvecs) 221 WARN_ON(nr_present + nr_others < numvecs); This patch prevents it from the WARN() by adjusting the max_vector value from the nvme_setup_irqs(). [1] WARN messages when modprobe nvme write_queues=32 poll_queues=0: root@target:~/nvme# nproc 8 root@target:~/nvme# modprobe nvme write_queues=32 poll_queues=0 [ 17.925326] nvme nvme0: pci function 0000:00:04.0 [ 17.940601] WARNING: CPU: 3 PID: 1030 at kernel/irq/affinity.c:221 irq_create_affinity_masks+0x222/0x330 [ 17.940602] Modules linked in: nvme nvme_core [last unloaded: nvme] [ 17.940605] CPU: 3 PID: 1030 Comm: kworker/u17:4 Tainted: G W 5.1.0+ #156 [ 17.940605] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 17.940608] Workqueue: nvme-reset-wq nvme_reset_work [nvme] [ 17.940609] RIP: 0010:irq_create_affinity_masks+0x222/0x330 [ 17.940611] Code: 4c 8d 4c 24 28 4c 8d 44 24 30 e8 c9 fa ff ff 89 44 24 18 e8 c0 38 fa ff 8b 44 24 18 44 8b 54 24 1c 5a 44 01 d0 41 39 c4 76 02 <0f> 0b 48 89 df 44 01 e5 e8 f1 ce 10 00 48 8b 34 24 44 89 f0 44 01 [ 17.940611] RSP: 0018:ffffc90002277c50 EFLAGS: 00010216 [ 17.940612] RAX: 0000000000000008 RBX: ffff88807ca48860 RCX: 0000000000000000 [ 17.940612] RDX: ffff88807bc03800 RSI: 0000000000000020 RDI: 0000000000000000 [ 17.940613] RBP: 0000000000000001 R08: ffffc90002277c78 R09: ffffc90002277c70 [ 17.940613] R10: 0000000000000008 R11: 0000000000000001 R12: 0000000000000020 [ 17.940614] R13: 0000000000025d08 R14: 0000000000000001 R15: ffff88807bc03800 [ 17.940614] FS: 0000000000000000(0000) GS:ffff88807db80000(0000) knlGS:0000000000000000 [ 17.940616] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 17.940617] CR2: 00005635e583f790 CR3: 000000000240a000 CR4: 00000000000006e0 [ 17.940617] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 17.940618] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 17.940618] Call Trace: [ 17.940622] __pci_enable_msix_range+0x215/0x540 [ 17.940623] ? kernfs_put+0x117/0x160 [ 17.940625] pci_alloc_irq_vectors_affinity+0x74/0x110 [ 17.940626] nvme_reset_work+0xc30/0x1397 [nvme] [ 17.940628] ? __switch_to_asm+0x34/0x70 [ 17.940628] ? __switch_to_asm+0x40/0x70 [ 17.940629] ? __switch_to_asm+0x34/0x70 [ 17.940630] ? __switch_to_asm+0x40/0x70 [ 17.940630] ? __switch_to_asm+0x34/0x70 [ 17.940631] ? __switch_to_asm+0x40/0x70 [ 17.940632] ? nvme_irq_check+0x30/0x30 [nvme] [ 17.940633] process_one_work+0x20b/0x3e0 [ 17.940634] worker_thread+0x1f9/0x3d0 [ 17.940635] ? cancel_delayed_work+0xa0/0xa0 [ 17.940636] kthread+0x117/0x120 [ 17.940637] ? kthread_stop+0xf0/0xf0 [ 17.940638] ret_from_fork+0x3a/0x50 [ 17.940639] ---[ end trace aca8a131361cd42a ]--- [ 17.942124] nvme nvme0: 7/1/0 default/read/poll queues Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 007f8becde4a..c98b73da38e2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2051,6 +2051,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) .priv = dev, }; unsigned int irq_queues, this_p_queues; + unsigned int nr_cpus = num_possible_cpus(); /* * Poll queues don't need interrupts, but we need at least one IO @@ -2061,7 +2062,10 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) this_p_queues = nr_io_queues - 1; irq_queues = 1; } else { - irq_queues = nr_io_queues - this_p_queues + 1; + if (nr_cpus < nr_io_queues - this_p_queues) + irq_queues = nr_cpus + 1; + else + irq_queues = nr_io_queues - this_p_queues + 1; } dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; From e71afda49335620e3d9adf56015676db33a3bd86 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sat, 8 Jun 2019 13:01:02 -0700 Subject: [PATCH 16/26] nvme-pci: set the errno on ctrl state change error This patch removes the confusing assignment of the variable result at the time of declaration and sets the value in error cases next to the places where the actual error is happening. Here we also set the result value to -ENODEV when we fail at the final ctrl state transition in nvme_reset_work(). Without this assignment result will hold 0 from nvme_setup_io_queue() and on failure 0 will be passed to he nvme_remove_dead_ctrl() from final state transition. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c98b73da38e2..092c8403b306 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2467,11 +2467,13 @@ static void nvme_reset_work(struct work_struct *work) struct nvme_dev *dev = container_of(work, struct nvme_dev, ctrl.reset_work); bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); - int result = -ENODEV; + int result; enum nvme_ctrl_state new_state = NVME_CTRL_LIVE; - if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) + if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) { + result = -ENODEV; goto out; + } /* * If we're called to reset a live controller first shut it down before @@ -2575,6 +2577,7 @@ static void nvme_reset_work(struct work_struct *work) if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) { dev_warn(dev->ctrl.device, "failed to mark controller state %d\n", new_state); + result = -ENODEV; goto out; } From cee6c269b016ba89c62e34d6bccb103ee2c7de4f Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 9 Jun 2019 03:35:20 +0900 Subject: [PATCH 17/26] nvme-pci: properly report state change failure in nvme_reset_work If the state change to NVME_CTRL_CONNECTING fails, the dmesg is going to be like: [ 293.689160] nvme nvme0: failed to mark controller CONNECTING [ 293.689160] nvme nvme0: Removing after probe failure status: 0 Even it prints the first line to indicate the situation, the second line is not proper because the status is 0 which means normally success of the previous operation. This patch makes it indicate the proper error value when it fails. [ 25.932367] nvme nvme0: failed to mark controller CONNECTING [ 25.932369] nvme nvme0: Removing after probe failure status: -16 This situation is able to be easily reproduced by: root@target:~# rmmod nvme && modprobe nvme && rmmod nvme Signed-off-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 092c8403b306..d308ae7e2e11 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2517,6 +2517,7 @@ static void nvme_reset_work(struct work_struct *work) if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { dev_warn(dev->ctrl.device, "failed to mark controller CONNECTING\n"); + result = -EBUSY; goto out; } From 7c1ce408eb320b3d4051570d167852ffbd7778ce Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sat, 8 Jun 2019 13:16:32 -0700 Subject: [PATCH 18/26] nvme-pci: clean up nvme_remove_dead_ctrl a bit Remove the status parameter o nvme_remove_dead_ctrl(), which is only used for printing it. We move the print message to the same function where actual error is occurring. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d308ae7e2e11..189352081994 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2451,10 +2451,8 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) kfree(dev); } -static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) +static void nvme_remove_dead_ctrl(struct nvme_dev *dev) { - dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status); - nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); nvme_kill_queues(&dev->ctrl); @@ -2588,7 +2586,10 @@ static void nvme_reset_work(struct work_struct *work) out_unlock: mutex_unlock(&dev->shutdown_lock); out: - nvme_remove_dead_ctrl(dev, result); + if (result) + dev_warn(dev->ctrl.device, + "Removing after probe failure status: %d\n", result); + nvme_remove_dead_ctrl(dev); } static void nvme_remove_dead_ctrl_work(struct work_struct *work) From 7183a46a4879b1640ed428334a8468f3f9b0a4bb Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:29 +0900 Subject: [PATCH 19/26] nvme-trace: do not export nvme_trace_disk_name nvme_trace_disk_name() is now already being invoked with the function prototype in trace.h. We don't need to export this symbol at all. The following patches are going to provide target-side trace feature with the exactly same function with this so that this patch removes the EXPORT_SYMBOL() for this function. Signed-off-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 5f24ea7a28eb..14b0d2993cbe 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -145,6 +145,5 @@ const char *nvme_trace_disk_name(struct trace_seq *p, char *name) return ret; } -EXPORT_SYMBOL_GPL(nvme_trace_disk_name); EXPORT_TRACEPOINT_SYMBOL_GPL(nvme_sq); From 26f2990d85838caa650744a0ded9e38988a2bd7f Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:30 +0900 Subject: [PATCH 20/26] nvme-trace: move opcode symbol print to nvme.h The following patches are going to provide the target-side trace which might need these kind of macros. It would be great if it can be shared between host and target side both. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.h | 44 -------------------------------------- include/linux/nvme.h | 45 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 44 deletions(-) diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index e71502d141ed..62ee29107c32 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -16,50 +16,6 @@ #include "nvme.h" -#define nvme_admin_opcode_name(opcode) { opcode, #opcode } -#define show_admin_opcode_name(val) \ - __print_symbolic(val, \ - nvme_admin_opcode_name(nvme_admin_delete_sq), \ - nvme_admin_opcode_name(nvme_admin_create_sq), \ - nvme_admin_opcode_name(nvme_admin_get_log_page), \ - nvme_admin_opcode_name(nvme_admin_delete_cq), \ - nvme_admin_opcode_name(nvme_admin_create_cq), \ - nvme_admin_opcode_name(nvme_admin_identify), \ - nvme_admin_opcode_name(nvme_admin_abort_cmd), \ - nvme_admin_opcode_name(nvme_admin_set_features), \ - nvme_admin_opcode_name(nvme_admin_get_features), \ - nvme_admin_opcode_name(nvme_admin_async_event), \ - nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ - nvme_admin_opcode_name(nvme_admin_activate_fw), \ - nvme_admin_opcode_name(nvme_admin_download_fw), \ - nvme_admin_opcode_name(nvme_admin_ns_attach), \ - nvme_admin_opcode_name(nvme_admin_keep_alive), \ - nvme_admin_opcode_name(nvme_admin_directive_send), \ - nvme_admin_opcode_name(nvme_admin_directive_recv), \ - nvme_admin_opcode_name(nvme_admin_dbbuf), \ - nvme_admin_opcode_name(nvme_admin_format_nvm), \ - nvme_admin_opcode_name(nvme_admin_security_send), \ - nvme_admin_opcode_name(nvme_admin_security_recv), \ - nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) - -#define nvme_opcode_name(opcode) { opcode, #opcode } -#define show_nvm_opcode_name(val) \ - __print_symbolic(val, \ - nvme_opcode_name(nvme_cmd_flush), \ - nvme_opcode_name(nvme_cmd_write), \ - nvme_opcode_name(nvme_cmd_read), \ - nvme_opcode_name(nvme_cmd_write_uncor), \ - nvme_opcode_name(nvme_cmd_compare), \ - nvme_opcode_name(nvme_cmd_write_zeroes), \ - nvme_opcode_name(nvme_cmd_dsm), \ - nvme_opcode_name(nvme_cmd_resv_register), \ - nvme_opcode_name(nvme_cmd_resv_report), \ - nvme_opcode_name(nvme_cmd_resv_acquire), \ - nvme_opcode_name(nvme_cmd_resv_release)) - -#define show_opcode_name(qid, opcode) \ - (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) - const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10); const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7080923e78d1..86b3d04baf20 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -562,6 +562,22 @@ enum nvme_opcode { nvme_cmd_resv_release = 0x15, }; +#define nvme_opcode_name(opcode) { opcode, #opcode } +#define show_nvm_opcode_name(val) \ + __print_symbolic(val, \ + nvme_opcode_name(nvme_cmd_flush), \ + nvme_opcode_name(nvme_cmd_write), \ + nvme_opcode_name(nvme_cmd_read), \ + nvme_opcode_name(nvme_cmd_write_uncor), \ + nvme_opcode_name(nvme_cmd_compare), \ + nvme_opcode_name(nvme_cmd_write_zeroes), \ + nvme_opcode_name(nvme_cmd_dsm), \ + nvme_opcode_name(nvme_cmd_resv_register), \ + nvme_opcode_name(nvme_cmd_resv_report), \ + nvme_opcode_name(nvme_cmd_resv_acquire), \ + nvme_opcode_name(nvme_cmd_resv_release)) + + /* * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier * @@ -794,6 +810,35 @@ enum nvme_admin_opcode { nvme_admin_sanitize_nvm = 0x84, }; +#define nvme_admin_opcode_name(opcode) { opcode, #opcode } +#define show_admin_opcode_name(val) \ + __print_symbolic(val, \ + nvme_admin_opcode_name(nvme_admin_delete_sq), \ + nvme_admin_opcode_name(nvme_admin_create_sq), \ + nvme_admin_opcode_name(nvme_admin_get_log_page), \ + nvme_admin_opcode_name(nvme_admin_delete_cq), \ + nvme_admin_opcode_name(nvme_admin_create_cq), \ + nvme_admin_opcode_name(nvme_admin_identify), \ + nvme_admin_opcode_name(nvme_admin_abort_cmd), \ + nvme_admin_opcode_name(nvme_admin_set_features), \ + nvme_admin_opcode_name(nvme_admin_get_features), \ + nvme_admin_opcode_name(nvme_admin_async_event), \ + nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ + nvme_admin_opcode_name(nvme_admin_activate_fw), \ + nvme_admin_opcode_name(nvme_admin_download_fw), \ + nvme_admin_opcode_name(nvme_admin_ns_attach), \ + nvme_admin_opcode_name(nvme_admin_keep_alive), \ + nvme_admin_opcode_name(nvme_admin_directive_send), \ + nvme_admin_opcode_name(nvme_admin_directive_recv), \ + nvme_admin_opcode_name(nvme_admin_dbbuf), \ + nvme_admin_opcode_name(nvme_admin_format_nvm), \ + nvme_admin_opcode_name(nvme_admin_security_send), \ + nvme_admin_opcode_name(nvme_admin_security_recv), \ + nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) + +#define show_opcode_name(qid, opcode) \ + (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) + enum { NVME_QUEUE_PHYS_CONTIG = (1 << 0), NVME_CQ_IRQ_ENABLED = (1 << 1), From ad795e47cdef078bfd9e48745040d12104005aab Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:31 +0900 Subject: [PATCH 21/26] nvme-trace: support for fabrics commands in host-side This patch introduces fabrics commands tracing feature from host-side. This patch does not include any changes for the previous host-side tracing, but just add fabrics commands parsing in cmd=() format. Signed-off-by: Minwoo Im [hch: fixed some whitespace damage] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.c | 63 +++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/trace.h | 20 +++++++++---- include/linux/nvme.h | 20 +++++++++++-- 3 files changed, 94 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 14b0d2993cbe..f01ad0fd60bb 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -135,6 +135,69 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, } } +static const char *nvme_trace_fabrics_property_set(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 attrib = spc[0]; + u32 ofst = get_unaligned_le32(spc + 4); + u64 value = get_unaligned_le64(spc + 8); + + trace_seq_printf(p, "attrib=%u, ofst=0x%x, value=0x%llx", + attrib, ofst, value); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvme_trace_fabrics_connect(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u16 recfmt = get_unaligned_le16(spc); + u16 qid = get_unaligned_le16(spc + 2); + u16 sqsize = get_unaligned_le16(spc + 4); + u8 cattr = spc[6]; + u32 kato = get_unaligned_le32(spc + 8); + + trace_seq_printf(p, "recfmt=%u, qid=%u, sqsize=%u, cattr=%u, kato=%u", + recfmt, qid, sqsize, cattr, kato); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvme_trace_fabrics_property_get(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 attrib = spc[0]; + u32 ofst = get_unaligned_le32(spc + 4); + + trace_seq_printf(p, "attrib=%u, ofst=0x%x", attrib, ofst); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "spcecific=%*ph", 24, spc); + trace_seq_putc(p, 0); + return ret; +} + +const char *nvme_trace_parse_fabrics_cmd(struct trace_seq *p, + u8 fctype, u8 *spc) +{ + switch (fctype) { + case nvme_fabrics_type_property_set: + return nvme_trace_fabrics_property_set(p, spc); + case nvme_fabrics_type_connect: + return nvme_trace_fabrics_connect(p, spc); + case nvme_fabrics_type_property_get: + return nvme_trace_fabrics_property_get(p, spc); + default: + return nvme_trace_fabrics_common(p, spc); + } +} + const char *nvme_trace_disk_name(struct trace_seq *p, char *name) { const char *ret = trace_seq_buffer_ptr(p); diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 62ee29107c32..19a18c87fb7b 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -20,11 +20,15 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10); const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10); +const char *nvme_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype, + u8 *spc); -#define parse_nvme_cmd(qid, opcode, cdw10) \ - (qid ? \ - nvme_trace_parse_nvm_cmd(p, opcode, cdw10) : \ - nvme_trace_parse_admin_cmd(p, opcode, cdw10)) +#define parse_nvme_cmd(qid, opcode, fctype, cdw10) \ + ((opcode) == nvme_fabrics_command ? \ + nvme_trace_parse_fabrics_cmd(p, fctype, cdw10) : \ + ((qid) ? \ + nvme_trace_parse_nvm_cmd(p, opcode, cdw10) : \ + nvme_trace_parse_admin_cmd(p, opcode, cdw10))) const char *nvme_trace_disk_name(struct trace_seq *p, char *name); #define __print_disk_name(name) \ @@ -49,6 +53,7 @@ TRACE_EVENT(nvme_setup_cmd, __field(int, qid) __field(u8, opcode) __field(u8, flags) + __field(u8, fctype) __field(u16, cid) __field(u32, nsid) __field(u64, metadata) @@ -62,6 +67,7 @@ TRACE_EVENT(nvme_setup_cmd, __entry->cid = cmd->common.command_id; __entry->nsid = le32_to_cpu(cmd->common.nsid); __entry->metadata = le64_to_cpu(cmd->common.metadata); + __entry->fctype = cmd->fabrics.fctype; __assign_disk_name(__entry->disk, req->rq_disk); memcpy(__entry->cdw10, &cmd->common.cdw10, sizeof(__entry->cdw10)); @@ -70,8 +76,10 @@ TRACE_EVENT(nvme_setup_cmd, __entry->ctrl_id, __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->nsid, __entry->flags, __entry->metadata, - show_opcode_name(__entry->qid, __entry->opcode), - parse_nvme_cmd(__entry->qid, __entry->opcode, __entry->cdw10)) + show_opcode_name(__entry->qid, __entry->opcode, + __entry->fctype), + parse_nvme_cmd(__entry->qid, __entry->opcode, + __entry->fctype, __entry->cdw10)) ); TRACE_EVENT(nvme_complete_rq, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 86b3d04baf20..d98b2d8baf4e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -836,9 +836,6 @@ enum nvme_admin_opcode { nvme_admin_opcode_name(nvme_admin_security_recv), \ nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) -#define show_opcode_name(qid, opcode) \ - (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) - enum { NVME_QUEUE_PHYS_CONTIG = (1 << 0), NVME_CQ_IRQ_ENABLED = (1 << 1), @@ -1053,6 +1050,23 @@ enum nvmf_capsule_command { nvme_fabrics_type_property_get = 0x04, }; +#define nvme_fabrics_type_name(type) { type, #type } +#define show_fabrics_type_name(type) \ + __print_symbolic(type, \ + nvme_fabrics_type_name(nvme_fabrics_type_property_set), \ + nvme_fabrics_type_name(nvme_fabrics_type_connect), \ + nvme_fabrics_type_name(nvme_fabrics_type_property_get)) + +/* + * If not fabrics command, fctype will be ignored. + */ +#define show_opcode_name(qid, opcode, fctype) \ + ((opcode) == nvme_fabrics_command ? \ + show_fabrics_type_name(fctype) : \ + ((qid) ? \ + show_nvm_opcode_name(opcode) : \ + show_admin_opcode_name(opcode))) + struct nvmf_common_command { __u8 opcode; __u8 resv1; From 5f965f4fd92303066b319db4b4bbdb53cb924582 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:32 +0900 Subject: [PATCH 22/26] nvme-trace: print result and status in hex format The "result" field is in 64bit to be printed out which means it could be like: nvme_complete_rq: nvme0: qid=0, cmdid=0, res=18446612684158962624, etries=0, flags=0x0, status=0 Switch both the result and status field to be printed in hexadecimal format to be easier to read. Signed-off-by: Minwoo Im Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 19a18c87fb7b..daaf700eae79 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -105,7 +105,7 @@ TRACE_EVENT(nvme_complete_rq, __entry->status = nvme_req(req)->status; __assign_disk_name(__entry->disk, req->rq_disk); ), - TP_printk("nvme%d: %sqid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u", + TP_printk("nvme%d: %sqid=%d, cmdid=%u, res=%#llx, retries=%u, flags=0x%x, status=%#x", __entry->ctrl_id, __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->result, __entry->retries, __entry->flags, __entry->status) From a5448fdc469d67da99728a132229aba5fce8f67a Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:33 +0900 Subject: [PATCH 23/26] nvmet: introduce target-side trace This patch introduces target-side request tracing. As Christoph suggested, the trace would not be in a core or module to avoid disadvantages like cache miss: http://lists.infradead.org/pipermail/linux-nvme/2019-June/024721.html The target-side trace code is entirely based on the Johannes's trace code from the host side. It has lots of codes duplicated, but it would be better than having advantages mentioned above. It also traces not only fabrics commands, but also nvme normal commands. Once the codes to be shared gets bigger, then we can make it common as suggsted. This also removed the create_sq and create_cq trace parsing functions because it will be done by the connect fabrics command. Example: echo 1 > /sys/kernel/debug/tracing/event/nvmet/nvmet_req_init/enable echo 1 > /sys/kernel/debug/tracing/event/nvmet/nvmet_req_complete/enable cat /sys/kernel/debug/tracing/trace Signed-off-by: Minwoo Im [hch: fixed the symbol namespace and a an endianess conversion] Signed-off-by: Christoph Hellwig --- drivers/nvme/target/Makefile | 3 + drivers/nvme/target/core.c | 8 ++ drivers/nvme/target/trace.c | 201 +++++++++++++++++++++++++++++++++++ drivers/nvme/target/trace.h | 141 ++++++++++++++++++++++++ 4 files changed, 353 insertions(+) create mode 100644 drivers/nvme/target/trace.c create mode 100644 drivers/nvme/target/trace.h diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile index 8c3ad0fb6860..2b33836f3d3e 100644 --- a/drivers/nvme/target/Makefile +++ b/drivers/nvme/target/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +ccflags-y += -I$(src) + obj-$(CONFIG_NVME_TARGET) += nvmet.o obj-$(CONFIG_NVME_TARGET_LOOP) += nvme-loop.o obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o @@ -14,3 +16,4 @@ nvmet-rdma-y += rdma.o nvmet-fc-y += fc.o nvme-fcloop-y += fcloop.o nvmet-tcp-y += tcp.o +nvmet-$(CONFIG_TRACING) += trace.o diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 0587707b1a25..dad0243c7c96 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -10,6 +10,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" + #include "nvmet.h" struct workqueue_struct *buffered_io_wq; @@ -691,6 +694,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status) if (unlikely(status)) nvmet_set_error(req, status); + + trace_nvmet_req_complete(req); + if (req->ns) nvmet_put_namespace(req->ns); req->ops->queue_response(req); @@ -850,6 +856,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->error_loc = NVMET_NO_ERROR_LOC; req->error_slba = 0; + trace_nvmet_req_init(req, req->cmd); + /* no support for fused commands yet */ if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { req->error_loc = offsetof(struct nvme_common_command, flags); diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c new file mode 100644 index 000000000000..cdcdd14c6408 --- /dev/null +++ b/drivers/nvme/target/trace.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVM Express target device driver tracepoints + * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH + */ + +#include +#include "trace.h" + +static const char *nvmet_trace_admin_identify(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 cns = cdw10[0]; + u16 ctrlid = get_unaligned_le16(cdw10 + 2); + + trace_seq_printf(p, "cns=%u, ctrlid=%u", cns, ctrlid); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvmet_trace_admin_get_features(struct trace_seq *p, + u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 fid = cdw10[0]; + u8 sel = cdw10[1] & 0x7; + u32 cdw11 = get_unaligned_le32(cdw10 + 4); + + trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvmet_trace_read_write(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u64 slba = get_unaligned_le64(cdw10); + u16 length = get_unaligned_le16(cdw10 + 8); + u16 control = get_unaligned_le16(cdw10 + 10); + u32 dsmgmt = get_unaligned_le32(cdw10 + 12); + u32 reftag = get_unaligned_le32(cdw10 + 16); + + trace_seq_printf(p, + "slba=%llu, len=%u, ctrl=0x%x, dsmgmt=%u, reftag=%u", + slba, length, control, dsmgmt, reftag); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvmet_trace_dsm(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "nr=%u, attributes=%u", + get_unaligned_le32(cdw10), + get_unaligned_le32(cdw10 + 4)); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvmet_trace_common(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "cdw10=%*ph", 24, cdw10); + trace_seq_putc(p, 0); + + return ret; +} + +const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p, + u8 opcode, u8 *cdw10) +{ + switch (opcode) { + case nvme_admin_identify: + return nvmet_trace_admin_identify(p, cdw10); + case nvme_admin_get_features: + return nvmet_trace_admin_get_features(p, cdw10); + default: + return nvmet_trace_common(p, cdw10); + } +} + +const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p, + u8 opcode, u8 *cdw10) +{ + switch (opcode) { + case nvme_cmd_read: + case nvme_cmd_write: + case nvme_cmd_write_zeroes: + return nvmet_trace_read_write(p, cdw10); + case nvme_cmd_dsm: + return nvmet_trace_dsm(p, cdw10); + default: + return nvmet_trace_common(p, cdw10); + } +} + +static const char *nvmet_trace_fabrics_property_set(struct trace_seq *p, + u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 attrib = spc[0]; + u32 ofst = get_unaligned_le32(spc + 4); + u64 value = get_unaligned_le64(spc + 8); + + trace_seq_printf(p, "attrib=%u, ofst=0x%x, value=0x%llx", + attrib, ofst, value); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvmet_trace_fabrics_connect(struct trace_seq *p, + u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u16 recfmt = get_unaligned_le16(spc); + u16 qid = get_unaligned_le16(spc + 2); + u16 sqsize = get_unaligned_le16(spc + 4); + u8 cattr = spc[6]; + u32 kato = get_unaligned_le32(spc + 8); + + trace_seq_printf(p, "recfmt=%u, qid=%u, sqsize=%u, cattr=%u, kato=%u", + recfmt, qid, sqsize, cattr, kato); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvmet_trace_fabrics_property_get(struct trace_seq *p, + u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 attrib = spc[0]; + u32 ofst = get_unaligned_le32(spc + 4); + + trace_seq_printf(p, "attrib=%u, ofst=0x%x", attrib, ofst); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvmet_trace_fabrics_common(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "spcecific=%*ph", 24, spc); + trace_seq_putc(p, 0); + return ret; +} + +const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p, + u8 fctype, u8 *spc) +{ + switch (fctype) { + case nvme_fabrics_type_property_set: + return nvmet_trace_fabrics_property_set(p, spc); + case nvme_fabrics_type_connect: + return nvmet_trace_fabrics_connect(p, spc); + case nvme_fabrics_type_property_get: + return nvmet_trace_fabrics_property_get(p, spc); + default: + return nvmet_trace_fabrics_common(p, spc); + } +} + +const char *nvmet_trace_disk_name(struct trace_seq *p, char *name) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (*name) + trace_seq_printf(p, "disk=%s, ", name); + trace_seq_putc(p, 0); + + return ret; +} + +const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl) +{ + const char *ret = trace_seq_buffer_ptr(p); + + /* + * XXX: We don't know the controller instance before executing the + * connect command itself because the connect command for the admin + * queue will not provide the cntlid which will be allocated in this + * command. In case of io queues, the controller instance will be + * mapped by the extra data of the connect command. + * If we can know the extra data of the connect command in this stage, + * we can update this print statement later. + */ + if (ctrl) + trace_seq_printf(p, "%d", ctrl->cntlid); + else + trace_seq_printf(p, "_"); + trace_seq_putc(p, 0); + + return ret; +} + diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h new file mode 100644 index 000000000000..e645caa882dd --- /dev/null +++ b/drivers/nvme/target/trace.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * NVM Express target device driver tracepoints + * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH + * + * This is entirely based on drivers/nvme/host/trace.h + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nvmet + +#if !defined(_TRACE_NVMET_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NVMET_H + +#include +#include +#include + +#include "nvmet.h" + +const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, + u8 *cdw10); +const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, + u8 *cdw10); +const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype, + u8 *spc); + +#define parse_nvme_cmd(qid, opcode, fctype, cdw10) \ + ((opcode) == nvme_fabrics_command ? \ + nvmet_trace_parse_fabrics_cmd(p, fctype, cdw10) : \ + (qid ? \ + nvmet_trace_parse_nvm_cmd(p, opcode, cdw10) : \ + nvmet_trace_parse_admin_cmd(p, opcode, cdw10))) + +const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl); +#define __print_ctrl_name(ctrl) \ + nvmet_trace_ctrl_name(p, ctrl) + +const char *nvmet_trace_disk_name(struct trace_seq *p, char *name); +#define __print_disk_name(name) \ + nvmet_trace_disk_name(p, name) + +#ifndef TRACE_HEADER_MULTI_READ +static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req) +{ + return req->sq->ctrl; +} + +static inline void __assign_disk_name(char *name, struct nvmet_req *req, + bool init) +{ + struct nvmet_ctrl *ctrl = nvmet_req_to_ctrl(req); + struct nvmet_ns *ns; + + if ((init && req->sq->qid) || (!init && req->cq->qid)) { + ns = nvmet_find_namespace(ctrl, req->cmd->rw.nsid); + strncpy(name, ns->device_path, DISK_NAME_LEN); + return; + } + + memset(name, 0, DISK_NAME_LEN); +} +#endif + +TRACE_EVENT(nvmet_req_init, + TP_PROTO(struct nvmet_req *req, struct nvme_command *cmd), + TP_ARGS(req, cmd), + TP_STRUCT__entry( + __field(struct nvme_command *, cmd) + __field(struct nvmet_ctrl *, ctrl) + __array(char, disk, DISK_NAME_LEN) + __field(int, qid) + __field(u16, cid) + __field(u8, opcode) + __field(u8, fctype) + __field(u8, flags) + __field(u32, nsid) + __field(u64, metadata) + __array(u8, cdw10, 24) + ), + TP_fast_assign( + __entry->cmd = cmd; + __entry->ctrl = nvmet_req_to_ctrl(req); + __assign_disk_name(__entry->disk, req, true); + __entry->qid = req->sq->qid; + __entry->cid = cmd->common.command_id; + __entry->opcode = cmd->common.opcode; + __entry->fctype = cmd->fabrics.fctype; + __entry->flags = cmd->common.flags; + __entry->nsid = le32_to_cpu(cmd->common.nsid); + __entry->metadata = le64_to_cpu(cmd->common.metadata); + memcpy(__entry->cdw10, &cmd->common.cdw10, + sizeof(__entry->cdw10)); + ), + TP_printk("nvmet%s: %sqid=%d, cmdid=%u, nsid=%u, flags=%#x, " + "meta=%#llx, cmd=(%s, %s)", + __print_ctrl_name(__entry->ctrl), + __print_disk_name(__entry->disk), + __entry->qid, __entry->cid, __entry->nsid, + __entry->flags, __entry->metadata, + show_opcode_name(__entry->qid, __entry->opcode, + __entry->fctype), + parse_nvme_cmd(__entry->qid, __entry->opcode, + __entry->fctype, __entry->cdw10)) +); + +TRACE_EVENT(nvmet_req_complete, + TP_PROTO(struct nvmet_req *req), + TP_ARGS(req), + TP_STRUCT__entry( + __field(struct nvmet_ctrl *, ctrl) + __array(char, disk, DISK_NAME_LEN) + __field(int, qid) + __field(int, cid) + __field(u64, result) + __field(u16, status) + ), + TP_fast_assign( + __entry->ctrl = nvmet_req_to_ctrl(req); + __entry->qid = req->cq->qid; + __entry->cid = req->cqe->command_id; + __entry->result = le64_to_cpu(req->cqe->result.u64); + __entry->status = le16_to_cpu(req->cqe->status) >> 1; + __assign_disk_name(__entry->disk, req, false); + ), + TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x", + __print_ctrl_name(__entry->ctrl), + __print_disk_name(__entry->disk), + __entry->qid, __entry->cid, __entry->result, __entry->status) + +); + +#endif /* _TRACE_NVMET_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include From a3646451edd52ba238cbe4f618aaf6eb9bf9d60c Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 20 Jun 2019 08:49:02 +0200 Subject: [PATCH 24/26] nvme: prepare for fault injection into admin commands Currenlty fault injection support for nvme only enables to inject errors into the commands submitted to I/O queues. In preparation for fault injection into the admin commands, this makes the helper functions independent of struct nvme_ns. Signed-off-by: Akinobu Mita Reviewed-by: Minwoo Im Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/fault_inject.c | 36 ++++++++++++++++++-------------- drivers/nvme/host/nvme.h | 34 +++++++++++++++++------------- 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3b3960e0c31f..625605f8a0b5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3336,7 +3336,7 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups); nvme_mpath_add_disk(ns, id); - nvme_fault_inject_init(ns); + nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); kfree(id); return 0; @@ -3361,7 +3361,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; - nvme_fault_inject_fini(ns); + nvme_fault_inject_fini(&ns->fault_inject); mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c index 4cfd2c9222d4..e37b8c2fddea 100644 --- a/drivers/nvme/host/fault_inject.c +++ b/drivers/nvme/host/fault_inject.c @@ -15,11 +15,10 @@ static DECLARE_FAULT_ATTR(fail_default_attr); static char *fail_request; module_param(fail_request, charp, 0000); -void nvme_fault_inject_init(struct nvme_ns *ns) +void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj, + const char *dev_name) { struct dentry *dir, *parent; - char *name = ns->disk->disk_name; - struct nvme_fault_inject *fault_inj = &ns->fault_inject; struct fault_attr *attr = &fault_inj->attr; /* set default fault injection attribute */ @@ -27,20 +26,20 @@ void nvme_fault_inject_init(struct nvme_ns *ns) setup_fault_attr(&fail_default_attr, fail_request); /* create debugfs directory and attribute */ - parent = debugfs_create_dir(name, NULL); + parent = debugfs_create_dir(dev_name, NULL); if (!parent) { - pr_warn("%s: failed to create debugfs directory\n", name); + pr_warn("%s: failed to create debugfs directory\n", dev_name); return; } *attr = fail_default_attr; dir = fault_create_debugfs_attr("fault_inject", parent, attr); if (IS_ERR(dir)) { - pr_warn("%s: failed to create debugfs attr\n", name); + pr_warn("%s: failed to create debugfs attr\n", dev_name); debugfs_remove_recursive(parent); return; } - ns->fault_inject.parent = parent; + fault_inj->parent = parent; /* create debugfs for status code and dont_retry */ fault_inj->status = NVME_SC_INVALID_OPCODE; @@ -49,29 +48,34 @@ void nvme_fault_inject_init(struct nvme_ns *ns) debugfs_create_bool("dont_retry", 0600, dir, &fault_inj->dont_retry); } -void nvme_fault_inject_fini(struct nvme_ns *ns) +void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inject) { /* remove debugfs directories */ - debugfs_remove_recursive(ns->fault_inject.parent); + debugfs_remove_recursive(fault_inject->parent); } void nvme_should_fail(struct request *req) { struct gendisk *disk = req->rq_disk; - struct nvme_ns *ns = NULL; + struct nvme_fault_inject *fault_inject = NULL; u16 status; /* * make sure this request is coming from a valid namespace */ - if (!disk) - return; + if (disk) { + struct nvme_ns *ns = disk->private_data; - ns = disk->private_data; - if (ns && should_fail(&ns->fault_inject.attr, 1)) { + if (ns) + fault_inject = &ns->fault_inject; + else + WARN_ONCE(1, "No namespace found for request\n"); + } + + if (fault_inject && should_fail(&fault_inject->attr, 1)) { /* inject status code and DNR bit */ - status = ns->fault_inject.status; - if (ns->fault_inject.dont_retry) + status = fault_inject->status; + if (fault_inject->dont_retry) status |= NVME_SC_DNR; nvme_req(req)->status = status; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 038b8931d9e5..8f907576efb6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -146,6 +146,15 @@ enum nvme_ctrl_state { NVME_CTRL_DEAD, }; +struct nvme_fault_inject { +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + struct fault_attr attr; + struct dentry *parent; + bool dont_retry; /* DNR, do not retry */ + u16 status; /* status code */ +#endif +}; + struct nvme_ctrl { bool comp_seen; enum nvme_ctrl_state state; @@ -313,15 +322,6 @@ struct nvme_ns_head { #endif }; -#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS -struct nvme_fault_inject { - struct fault_attr attr; - struct dentry *parent; - bool dont_retry; /* DNR, do not retry */ - u16 status; /* status code */ -}; -#endif - struct nvme_ns { struct list_head list; @@ -349,9 +349,7 @@ struct nvme_ns { #define NVME_NS_ANA_PENDING 2 u16 noiob; -#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS struct nvme_fault_inject fault_inject; -#endif }; @@ -372,12 +370,18 @@ struct nvme_ctrl_ops { }; #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS -void nvme_fault_inject_init(struct nvme_ns *ns); -void nvme_fault_inject_fini(struct nvme_ns *ns); +void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj, + const char *dev_name); +void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inject); void nvme_should_fail(struct request *req); #else -static inline void nvme_fault_inject_init(struct nvme_ns *ns) {} -static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {} +static inline void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj, + const char *dev_name) +{ +} +static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj) +{ +} static inline void nvme_should_fail(struct request *req) {} #endif From f79d5fda4ea08c33a114087573d86f703149ee0e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 9 Jun 2019 23:17:01 +0900 Subject: [PATCH 25/26] nvme: enable to inject errors into admin commands This enables to inject errors into the commands submitted to the admin queue. It is useful to test error handling in the controller initialization. # echo 100 > /sys/kernel/debug/nvme0/fault_inject/probability # echo 1 > /sys/kernel/debug/nvme0/fault_inject/times # echo 10 > /sys/kernel/debug/nvme0/fault_inject/space # nvme reset /dev/nvme0 # dmesg ... nvme nvme0: Could not set queue count (16385) nvme nvme0: IO queues not created Signed-off-by: Akinobu Mita Reviewed-by: Minwoo Im Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 +++ drivers/nvme/host/fault_inject.c | 5 ++--- drivers/nvme/host/nvme.h | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 625605f8a0b5..b2dd4e391f5c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3722,6 +3722,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { + nvme_fault_inject_fini(&ctrl->fault_inject); dev_pm_qos_hide_latency_tolerance(ctrl->device); cdev_device_del(&ctrl->cdev, ctrl->device); } @@ -3817,6 +3818,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, dev_pm_qos_update_user_latency_tolerance(ctrl->device, min(default_ps_max_latency_us, (unsigned long)S32_MAX)); + nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device)); + return 0; out_free_name: kfree_const(ctrl->device->kobj.name); diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c index e37b8c2fddea..1352159733b0 100644 --- a/drivers/nvme/host/fault_inject.c +++ b/drivers/nvme/host/fault_inject.c @@ -60,9 +60,6 @@ void nvme_should_fail(struct request *req) struct nvme_fault_inject *fault_inject = NULL; u16 status; - /* - * make sure this request is coming from a valid namespace - */ if (disk) { struct nvme_ns *ns = disk->private_data; @@ -70,6 +67,8 @@ void nvme_should_fail(struct request *req) fault_inject = &ns->fault_inject; else WARN_ONCE(1, "No namespace found for request\n"); + } else { + fault_inject = &nvme_req(req)->ctrl->fault_inject; } if (fault_inject && should_fail(&fault_inject->attr, 1)) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 8f907576efb6..ea45d7d393ad 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -256,6 +256,8 @@ struct nvme_ctrl { struct page *discard_page; unsigned long discard_page_busy; + + struct nvme_fault_inject fault_inject; }; enum nvme_iopolicy { From 7e31d8215fd8cb1c13b47e23f1136545010e00de Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 9 Jun 2019 23:17:02 +0900 Subject: [PATCH 26/26] Documentation: nvme: add an example for nvme fault injection This adds an example of how to inject errors into admin commands. Suggested-by: Thomas Tai Signed-off-by: Akinobu Mita Reviewed-by: Chaitanya Kulkarni Reviewed-by: Minwoo Im Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- .../fault-injection/nvme-fault-injection.txt | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/Documentation/fault-injection/nvme-fault-injection.txt b/Documentation/fault-injection/nvme-fault-injection.txt index 8fbf3bf60b62..efcb339a3add 100644 --- a/Documentation/fault-injection/nvme-fault-injection.txt +++ b/Documentation/fault-injection/nvme-fault-injection.txt @@ -114,3 +114,59 @@ R13: ffff88011a3c9680 R14: 0000000000000000 R15: 0000000000000000 cpu_startup_entry+0x6f/0x80 start_secondary+0x187/0x1e0 secondary_startup_64+0xa5/0xb0 + +Example 3: Inject an error into the 10th admin command +------------------------------------------------------ + +echo 100 > /sys/kernel/debug/nvme0/fault_inject/probability +echo 10 > /sys/kernel/debug/nvme0/fault_inject/space +echo 1 > /sys/kernel/debug/nvme0/fault_inject/times +nvme reset /dev/nvme0 + +Expected Result: + +After NVMe controller reset, the reinitialization may or may not succeed. +It depends on which admin command is actually forced to fail. + +Message from dmesg: + +nvme nvme0: resetting controller +FAULT_INJECTION: forcing a failure. +name fault_inject, interval 1, probability 100, space 1, times 1 +CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.2.0-rc2+ #2 +Hardware name: MSI MS-7A45/B150M MORTAR ARCTIC (MS-7A45), BIOS 1.50 04/25/2017 +Call Trace: + + dump_stack+0x63/0x85 + should_fail+0x14a/0x170 + nvme_should_fail+0x38/0x80 [nvme_core] + nvme_irq+0x129/0x280 [nvme] + ? blk_mq_end_request+0xb3/0x120 + __handle_irq_event_percpu+0x84/0x1a0 + handle_irq_event_percpu+0x32/0x80 + handle_irq_event+0x3b/0x60 + handle_edge_irq+0x7f/0x1a0 + handle_irq+0x20/0x30 + do_IRQ+0x4e/0xe0 + common_interrupt+0xf/0xf + +RIP: 0010:cpuidle_enter_state+0xc5/0x460 +Code: ff e8 8f 5f 86 ff 80 7d c7 00 74 17 9c 58 0f 1f 44 00 00 f6 c4 02 0f 85 69 03 00 00 31 ff e8 62 aa 8c ff fb 66 0f 1f 44 00 00 <45> 85 ed 0f 88 37 03 00 00 4c 8b 45 d0 4c 2b 45 b8 48 ba cf f7 53 +RSP: 0018:ffffffff88c03dd0 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffdc +RAX: ffff9dac25a2ac80 RBX: ffffffff88d53760 RCX: 000000000000001f +RDX: 0000000000000000 RSI: 000000002d958403 RDI: 0000000000000000 +RBP: ffffffff88c03e18 R08: fffffff75e35ffb7 R09: 00000a49a56c0b48 +R10: ffffffff88c03da0 R11: 0000000000001b0c R12: ffff9dac25a34d00 +R13: 0000000000000006 R14: 0000000000000006 R15: ffffffff88d53760 + cpuidle_enter+0x2e/0x40 + call_cpuidle+0x23/0x40 + do_idle+0x201/0x280 + cpu_startup_entry+0x1d/0x20 + rest_init+0xaa/0xb0 + arch_call_rest_init+0xe/0x1b + start_kernel+0x51c/0x53b + x86_64_start_reservations+0x24/0x26 + x86_64_start_kernel+0x74/0x77 + secondary_startup_64+0xa4/0xb0 +nvme nvme0: Could not set queue count (16385) +nvme nvme0: IO queues not created