From b224f6134d72e3493a023b5bea917f9a6beea0c8 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 24 Nov 2017 16:30:53 +0100
Subject: nvme: set discard_alignment to zero

Similar to 7c084289795b ("rbd: set discard_alignment to zero"), NVMe
devices are currently incorrectly initialised with the block queue
discard_alignment set to the NVMe stream alignment.

As per Documentation/ABI/testing/sysfs-block:
  The discard_alignment parameter indicates how many bytes the beginning
  of the device is offset from the internal allocation unit's natural
  alignment.

Correcting the discard_alignment parameter to zero has no effect on how
discard requests are propagated through the block layer - @alignment in
__blkdev_issue_discard() remains zero. However, it does fix other
consumers, such as LIO's Block Limits VPD response.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f837d666cbd4..67f2f94cf86e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
 	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
 			NVME_DSM_MAX_RANGES);
 
-	queue->limits.discard_alignment = size;
+	queue->limits.discard_alignment = 0;
 	queue->limits.discard_granularity = size;
 
 	blk_queue_max_discard_sectors(queue, UINT_MAX);
-- 
cgit 


From bd9f5d65769b9fe5e72110d4cbc9097b53b01613 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 6 Dec 2017 18:30:09 +0800
Subject: nvme: call blk_integrity_unregister after queue is cleaned up

During IO complete path, bio_integrity_advance() is often called, and
blk_get_integrity() is called in this function. But in
blk_integrity_unregister, the buffer pointed by queue->integrity
is cleared, and blk_integrity->profile becomes NULL, then blk_get_integrity
returns NULL, and causes kernel oops[1] finally.

This patch fixes this issue by calling blk_integrity_unregister() after
blk_cleanup_queue().

[1] kernel oops log
[  122.068007] BUG: unable to handle kernel NULL pointer dereference at 000000000000000a
[  122.076760] IP: bio_integrity_advance+0x3d/0xf0
[  122.081815] PGD 0 P4D 0
[  122.084641] Oops: 0000 [#1] SMP
[  122.088142] Modules linked in: sunrpc ipmi_ssif intel_rapl vfat fat x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass mei_me ipmi_si crct10dif_pclmul crc32_pclmul sg mei ghash_clmulni_intel mxm_wmi ipmi_devintf iTCO_wdt intel_cstate intel_uncore pcspkr intel_rapl_perf iTCO_vendor_support dcdbas ipmi_msghandler lpc_ich acpi_power_meter shpchp wmi dm_multipath ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel ahci nvme tg3 libahci nvme_core i2c_core libata ptp megaraid_sas pps_core dm_mirror dm_region_hash dm_log dm_mod
[  122.149577] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.14.0-11.el7a.x86_64 #1
[  122.157635] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.5.5 08/16/2017
[  122.166179] task: ffff8802ff1e8000 task.stack: ffffc90000130000
[  122.172785] RIP: 0010:bio_integrity_advance+0x3d/0xf0
[  122.178419] RSP: 0018:ffff88047fc03d70 EFLAGS: 00010006
[  122.184248] RAX: ffff880473b08000 RBX: ffff880458c71a80 RCX: ffff880473b08248
[  122.192209] RDX: 0000000000000000 RSI: 000000000000003c RDI: ffffc900038d7ba0
[  122.200171] RBP: ffff88047fc03d78 R08: 0000000000000001 R09: ffffffffa01a78b5
[  122.208132] R10: ffff88047fc1eda0 R11: ffff880458c71ad0 R12: 0000000000007800
[  122.216094] R13: 0000000000000000 R14: 0000000000007800 R15: ffff880473a39b40
[  122.224056] FS:  0000000000000000(0000) GS:ffff88047fc00000(0000) knlGS:0000000000000000
[  122.233083] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  122.239494] CR2: 000000000000000a CR3: 0000000001c09002 CR4: 00000000001606e0
[  122.247455] Call Trace:
[  122.250183]  <IRQ>
[  122.252429]  bio_advance+0x28/0xf0
[  122.256217]  blk_update_request+0xa1/0x310
[  122.260778]  blk_mq_end_request+0x1e/0x70
[  122.265256]  nvme_complete_rq+0x1c/0xd0 [nvme_core]
[  122.270699]  nvme_pci_complete_rq+0x85/0x130 [nvme]
[  122.276140]  __blk_mq_complete_request+0x8d/0x140
[  122.281387]  blk_mq_complete_request+0x16/0x20
[  122.286345]  nvme_process_cq+0xdd/0x1c0 [nvme]
[  122.291301]  nvme_irq+0x23/0x50 [nvme]
[  122.295485]  __handle_irq_event_percpu+0x3c/0x190
[  122.300725]  handle_irq_event_percpu+0x32/0x80
[  122.305683]  handle_irq_event+0x3b/0x60
[  122.309964]  handle_edge_irq+0x8f/0x190
[  122.314247]  handle_irq+0xab/0x120
[  122.318043]  do_IRQ+0x48/0xd0
[  122.321355]  common_interrupt+0x9d/0x9d
[  122.325625]  </IRQ>
[  122.327967] RIP: 0010:cpuidle_enter_state+0xe9/0x280
[  122.333504] RSP: 0018:ffffc90000133e68 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff35
[  122.341952] RAX: ffff88047fc1b900 RBX: ffff88047fc24400 RCX: 000000000000001f
[  122.349913] RDX: 0000000000000000 RSI: fffffcf2e6007295 RDI: 0000000000000000
[  122.357874] RBP: ffffc90000133ea0 R08: 000000000000062e R09: 0000000000000253
[  122.365836] R10: 0000000000000225 R11: 0000000000000018 R12: 0000000000000002
[  122.373797] R13: 0000000000000001 R14: ffff88047fc24400 R15: 0000001c6bd1d263
[  122.381762]  ? cpuidle_enter_state+0xc5/0x280
[  122.386623]  cpuidle_enter+0x17/0x20
[  122.390611]  call_cpuidle+0x23/0x40
[  122.394501]  do_idle+0x17e/0x1f0
[  122.398101]  cpu_startup_entry+0x73/0x80
[  122.402478]  start_secondary+0x178/0x1c0
[  122.406854]  secondary_startup_64+0xa5/0xa5
[  122.411520] Code: 48 8b 5f 68 48 8b 47 08 31 d2 4c 8b 5b 48 48 8b 80 d0 03 00 00 48 83 b8 48 02 00 00 00 48 8d 88 48 02 00 00 48 0f 45 d1 c1 ee 09 <0f> b6 4a 0a 0f b6 52 09 89 f0 48 01 73 08 83 e9 09 d3 e8 0f af
[  122.432604] RIP: bio_integrity_advance+0x3d/0xf0 RSP: ffff88047fc03d70
[  122.439888] CR2: 000000000000000a

Reported-by: Zhang Yi <yizhan@redhat.com>
Tested-by: Zhang Yi <yizhan@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 67f2f94cf86e..2cc6192ef275 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2965,8 +2965,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		return;
 
 	if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
-		if (blk_get_integrity(ns->disk))
-			blk_integrity_unregister(ns->disk);
 		nvme_mpath_remove_disk_links(ns);
 		sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
 					&nvme_ns_id_attr_group);
@@ -2974,6 +2972,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 			nvme_nvm_unregister_sysfs(ns);
 		del_gendisk(ns->disk);
 		blk_cleanup_queue(ns->queue);
+		if (blk_get_integrity(ns->disk))
+			blk_integrity_unregister(ns->disk);
 	}
 
 	mutex_lock(&ns->ctrl->subsys->lock);
-- 
cgit 


From 249159c5f15812140fa216f9997d799ac0023a1f Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 14 Dec 2017 11:20:14 -0700
Subject: nvme: check hw sectors before setting chunk sectors

Some devices with IDs matching the "stripe" quirk don't actually have
this quirk, and don't have an MDTS value. When MDTS is not set, the
driver sets the max sectors to UINT_MAX, which is not a power of 2,
hitting a BUG_ON from blk_queue_chunk_sectors. This patch skips setting
chunk sectors for such devices.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2cc6192ef275..eab812dd2429 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1705,7 +1705,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
 		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
 		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
 	}
-	if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
+	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+	    is_power_of_2(ctrl->max_hw_sectors))
 		blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
 	blk_queue_virt_boundary(q, ctrl->page_size - 1);
 	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
-- 
cgit 


From 654b4a4acd8b52a4272114b95896e9a10d382cde Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 14 Dec 2017 11:20:32 -0700
Subject: nvme: setup streams after initializing namespace head

Fixes a NULL pointer dereference.

Reported-by: Arnav Dawn <a.dawn@samsung.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index eab812dd2429..1e46e60b8f10 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2870,7 +2870,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	nvme_set_queue_limits(ctrl, ns->queue);
-	nvme_setup_streams_ns(ctrl, ns);
 
 	id = nvme_identify_ns(ctrl, nsid);
 	if (!id)
@@ -2881,6 +2880,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	if (nvme_init_ns_head(ns, nsid, id, &new))
 		goto out_free_id;
+	nvme_setup_streams_ns(ctrl, ns);
 	
 #ifdef CONFIG_NVME_MULTIPATH
 	/*
-- 
cgit 


From cee160fd34b459ace029653436319557a643795a Mon Sep 17 00:00:00 2001
From: Jeff Lien <jeff.lien@wdc.com>
Date: Tue, 19 Dec 2017 13:24:15 -0600
Subject: nvme: fix sector units when going between formats

If you format a device with a 4k sector size back to 512 bytes, the queue
limit values for physical block size and minimum IO size were not getting
updated; only the logical block size was being updated.  This patch adds
code to update the physical block and IO minimum sizes.

Signed-off-by: Jeff Lien <jeff.lien@wdc.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1e46e60b8f10..961d6a4af19c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1335,6 +1335,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
 		struct nvme_ns *ns, struct nvme_id_ns *id)
 {
 	sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
+	unsigned short bs = 1 << ns->lba_shift;
 	unsigned stream_alignment = 0;
 
 	if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
@@ -1343,7 +1344,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	blk_mq_freeze_queue(disk->queue);
 	blk_integrity_unregister(disk);
 
-	blk_queue_logical_block_size(disk->queue, 1 << ns->lba_shift);
+	blk_queue_logical_block_size(disk->queue, bs);
+	blk_queue_physical_block_size(disk->queue, bs);
+	blk_queue_io_min(disk->queue, bs);
+
 	if (ns->ms && !ns->ext &&
 	    (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
 		nvme_init_integrity(disk, ns->ms, ns->pi_type);
-- 
cgit 


From 479a322fb729d657d34706ccf8dd12916f36628f Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 21 Dec 2017 15:07:27 +0200
Subject: nvme-mpath: fix last path removal during traffic

In case our last path is removed during traffic, we can end up requeueing
the bio(s) but never schedule the actual requeue work as upper layers
still have open handles on the mpath device node.

Fix this by scheduling requeue work if the namespace being removed is
the last path in the ns_head path list.

Fixes: 32acab3181c7 ("nvme: implement multipath access to nvme subsystems")
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 961d6a4af19c..839650e0926a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2991,6 +2991,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 	mutex_unlock(&ns->ctrl->namespaces_mutex);
 
 	synchronize_srcu(&ns->head->srcu);
+	nvme_mpath_check_last_path(ns);
 	nvme_put_ns(ns);
 }
 
-- 
cgit 


From 1a3838d732eaae47385490de88d978d4132d3d84 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Sun, 31 Dec 2017 15:33:27 +0200
Subject: nvme: modify the debug level for setting shutdown timeout

When an NVMe controller reports RTD3 Entry Latency larger than the value
of shutdown_timeout module parameter, we update the shutdown_timeout
accordingly to honor RTD3 Entry Latency. Use an informational debug level
instead of a warning level for it.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f837d666cbd4..2a69d735efbc 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2252,7 +2252,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 						 shutdown_timeout, 60);
 
 		if (ctrl->shutdown_timeout != shutdown_timeout)
-			dev_warn(ctrl->device,
+			dev_info(ctrl->device,
 				 "Shutdown timeout set to %u seconds\n",
 				 ctrl->shutdown_timeout);
 	} else
-- 
cgit 


From 2b1b7e784a63f5ded4dda804e05e3f34b3880b25 Mon Sep 17 00:00:00 2001
From: Jianchao Wang <jianchao.w.wang@oracle.com>
Date: Sat, 6 Jan 2018 08:01:58 +0800
Subject: nvme-pci: fix NULL pointer reference in nvme_alloc_ns

When the io queues setup or tagset allocation failed, ctrl.tagset is
NULL.  But the scan work will still be queued and executed, then panic
comes up due to NULL pointer reference of ctrl.tagset.

To fix this, add a new ctrl state NVME_CTRL_ADMIN_ONLY to inidcate only
admin queue is live. When non io queues or tagset allocation failed, ctrl
enters into this state, scan work will not be started.  But async event
work and nvme dev ioctl will be still available.  This will be helpful to
do further investigation and recovery.

Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2a69d735efbc..609307ca9e4d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -232,6 +232,15 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 
 	old_state = ctrl->state;
 	switch (new_state) {
+	case NVME_CTRL_ADMIN_ONLY:
+		switch (old_state) {
+		case NVME_CTRL_RESETTING:
+			changed = true;
+			/* FALLTHRU */
+		default:
+			break;
+		}
+		break;
 	case NVME_CTRL_LIVE:
 		switch (old_state) {
 		case NVME_CTRL_NEW:
@@ -247,6 +256,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		switch (old_state) {
 		case NVME_CTRL_NEW:
 		case NVME_CTRL_LIVE:
+		case NVME_CTRL_ADMIN_ONLY:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -266,6 +276,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 	case NVME_CTRL_DELETING:
 		switch (old_state) {
 		case NVME_CTRL_LIVE:
+		case NVME_CTRL_ADMIN_ONLY:
 		case NVME_CTRL_RESETTING:
 		case NVME_CTRL_RECONNECTING:
 			changed = true;
@@ -2336,8 +2347,14 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
 	struct nvme_ctrl *ctrl =
 		container_of(inode->i_cdev, struct nvme_ctrl, cdev);
 
-	if (ctrl->state != NVME_CTRL_LIVE)
+	switch (ctrl->state) {
+	case NVME_CTRL_LIVE:
+	case NVME_CTRL_ADMIN_ONLY:
+		break;
+	default:
 		return -EWOULDBLOCK;
+	}
+
 	file->private_data = ctrl;
 	return 0;
 }
@@ -2601,6 +2618,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
 	static const char *const state_name[] = {
 		[NVME_CTRL_NEW]		= "new",
 		[NVME_CTRL_LIVE]	= "live",
+		[NVME_CTRL_ADMIN_ONLY]	= "only-admin",
 		[NVME_CTRL_RESETTING]	= "resetting",
 		[NVME_CTRL_RECONNECTING]= "reconnecting",
 		[NVME_CTRL_DELETING]	= "deleting",
@@ -3073,6 +3091,8 @@ static void nvme_scan_work(struct work_struct *work)
 	if (ctrl->state != NVME_CTRL_LIVE)
 		return;
 
+	WARN_ON_ONCE(!ctrl->tagset);
+
 	if (nvme_identify_ctrl(ctrl, &id))
 		return;
 
@@ -3093,8 +3113,7 @@ static void nvme_scan_work(struct work_struct *work)
 void nvme_queue_scan(struct nvme_ctrl *ctrl)
 {
 	/*
-	 * Do not queue new scan work when a controller is reset during
-	 * removal.
+	 * Only new queue scan work when admin and IO queues are both alive
 	 */
 	if (ctrl->state == NVME_CTRL_LIVE)
 		queue_work(nvme_wq, &ctrl->scan_work);
-- 
cgit 


From 85088c4a0f65f0be25a98164ec6bca02ac5cad04 Mon Sep 17 00:00:00 2001
From: Nitzan Carmi <nitzanc@mellanox.com>
Date: Thu, 4 Jan 2018 17:56:13 +0200
Subject: nvme: take refcount on transport module

The block device is backed by the transport so we must ensure that the
transport driver will not be removed until all references are released.
Otherwise, we might end up referencing freed memory.

Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Nitzan Carmi <nitzanc@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 609307ca9e4d..c8bcfe64e976 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1228,16 +1228,27 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
 #ifdef CONFIG_NVME_MULTIPATH
 	/* should never be called due to GENHD_FL_HIDDEN */
 	if (WARN_ON_ONCE(ns->head->disk))
-		return -ENXIO;
+		goto fail;
 #endif
 	if (!kref_get_unless_zero(&ns->kref))
-		return -ENXIO;
+		goto fail;
+	if (!try_module_get(ns->ctrl->ops->module))
+		goto fail_put_ns;
+
 	return 0;
+
+fail_put_ns:
+	nvme_put_ns(ns);
+fail:
+	return -ENXIO;
 }
 
 static void nvme_release(struct gendisk *disk, fmode_t mode)
 {
-	nvme_put_ns(disk->private_data);
+	struct nvme_ns *ns = disk->private_data;
+
+	module_put(ns->ctrl->ops->module);
+	nvme_put_ns(ns);
 }
 
 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-- 
cgit 


From b837b28394fb76993c28bb242db7061ee0417da6 Mon Sep 17 00:00:00 2001
From: Israel Rukshin <israelr@mellanox.com>
Date: Thu, 4 Jan 2018 17:56:14 +0200
Subject: nvme: fix subsystem multiple controllers support check

There is a problem when another module (e.g. nvmet) takes a reference on
the nvme block device and the physical nvme drive is removed.  In that
case nvme_free_ctrl() will not be called and the controller state will be
"deleting" or "dead" unless nvmet module releases the block device.
Later on, the same nvme drive probes back and nvme_init_subsystem() will
be called and fail due to duplicate subnqn (if the nvme device doesn't
support subsystem with multiple controllers). This will cause a probe
failure.  This commit changes the check of multiple controllers support
at nvme_init_subsystem() by not counting all the controllers at "dead" or
"deleting" state (this is safe because controllers at this state will
never be active again).

Fixes: ab9e00cc72fa ("nvme: track subsystems")
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Israel Rukshin <israelr@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c8bcfe64e976..2bcd49584f71 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2069,6 +2069,22 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = {
 	NULL,
 };
 
+static int nvme_active_ctrls(struct nvme_subsystem *subsys)
+{
+	int count = 0;
+	struct nvme_ctrl *ctrl;
+
+	mutex_lock(&subsys->lock);
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+		if (ctrl->state != NVME_CTRL_DELETING &&
+		    ctrl->state != NVME_CTRL_DEAD)
+			count++;
+	}
+	mutex_unlock(&subsys->lock);
+
+	return count;
+}
+
 static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 {
 	struct nvme_subsystem *subsys, *found;
@@ -2107,7 +2123,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 		 * Verify that the subsystem actually supports multiple
 		 * controllers, else bail out.
 		 */
-		if (!(id->cmic & (1 << 1))) {
+		if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
 			dev_err(ctrl->device,
 				"ignoring ctrl due to duplicate subnqn (%s).\n",
 				found->subnqn);
-- 
cgit 


From e96fef2c3fa396dda680e943dddaa4f2a06e7b1c Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 9 Jan 2018 12:04:14 -0700
Subject: nvme: Add more command status translation

This adds more NVMe status code translations to blk_status_t values,
and captures all the current status codes NVMe multipath uses.

Acked-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2bcd49584f71..d8956d94cbd8 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -157,13 +157,20 @@ static blk_status_t nvme_error_status(struct request *req)
 		return BLK_STS_OK;
 	case NVME_SC_CAP_EXCEEDED:
 		return BLK_STS_NOSPC;
+	case NVME_SC_LBA_RANGE:
+		return BLK_STS_TARGET;
+	case NVME_SC_BAD_ATTRIBUTES:
 	case NVME_SC_ONCS_NOT_SUPPORTED:
+	case NVME_SC_INVALID_OPCODE:
+	case NVME_SC_INVALID_FIELD:
+	case NVME_SC_INVALID_NS:
 		return BLK_STS_NOTSUPP;
 	case NVME_SC_WRITE_FAULT:
 	case NVME_SC_READ_ERROR:
 	case NVME_SC_UNWRITTEN_BLOCK:
 	case NVME_SC_ACCESS_DENIED:
 	case NVME_SC_READ_ONLY:
+	case NVME_SC_COMPARE_FAILED:
 		return BLK_STS_MEDIUM;
 	case NVME_SC_GUARD_CHECK:
 	case NVME_SC_APPTAG_CHECK:
-- 
cgit 


From 908e45643d6450551bfbdbad3f088d4bd1f1c1fb Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 9 Jan 2018 12:04:15 -0700
Subject: nvme/multipath: Consult blk_status_t for failover

This removes nvme multipath's specific status decoding to see if failover
is needed, using the generic blk_status_t that was decoded earlier. This
abstraction from the raw NVMe status means all status decoding exists
in one place.

Acked-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d8956d94cbd8..2fe15351ac4e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -197,8 +197,10 @@ static inline bool nvme_req_needs_retry(struct request *req)
 
 void nvme_complete_rq(struct request *req)
 {
-	if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
-		if (nvme_req_needs_failover(req)) {
+	blk_status_t status = nvme_error_status(req);
+
+	if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
+		if (nvme_req_needs_failover(req, status)) {
 			nvme_failover_req(req);
 			return;
 		}
@@ -209,8 +211,7 @@ void nvme_complete_rq(struct request *req)
 			return;
 		}
 	}
-
-	blk_mq_end_request(req, nvme_error_status(req));
+	blk_mq_end_request(req, status);
 }
 EXPORT_SYMBOL_GPL(nvme_complete_rq);
 
-- 
cgit 


From 79c48ccf2fec7c10105bd635d3bb1128167b1258 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Sun, 14 Jan 2018 12:39:00 +0200
Subject: nvme-pci: serialize pci resets

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2fe15351ac4e..4d8f63b3c5b6 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -95,7 +95,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
 
-static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
 {
 	int ret;
 
@@ -104,6 +104,7 @@ static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
 		flush_work(&ctrl->reset_work);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
 
 static void nvme_delete_ctrl_work(struct work_struct *work)
 {
-- 
cgit 


From b227c59b9b5b8ae52639c8980af853d2f654f90a Mon Sep 17 00:00:00 2001
From: Roy Shterman <roys@lightbitslabs.com>
Date: Sun, 14 Jan 2018 12:39:02 +0200
Subject: nvme: host delete_work and reset_work on separate workqueues

We need to ensure that delete_work will be hosted on a different
workqueue than all the works we flush or cancel from it.
Otherwise we may hit a circular dependency warning [1].

Also, given that delete_work flushes reset_work, host reset_work
on nvme_reset_wq and delete_work on nvme_delete_wq. In addition,
fix the flushing in the individual drivers to flush nvme_delete_wq
when draining queued deletes.

[1]:
[  178.491942] =============================================
[  178.492718] [ INFO: possible recursive locking detected ]
[  178.493495] 4.9.0-rc4-c844263313a8-lb #3 Tainted: G           OE
[  178.494382] ---------------------------------------------
[  178.495160] kworker/5:1/135 is trying to acquire lock:
[  178.495894]  (
[  178.496120] "nvme-wq"
[  178.496471] ){++++.+}
[  178.496599] , at:
[  178.496921] [<ffffffffa70ac206>] flush_work+0x1a6/0x2d0
[  178.497670]
               but task is already holding lock:
[  178.498499]  (
[  178.498724] "nvme-wq"
[  178.499074] ){++++.+}
[  178.499202] , at:
[  178.499520] [<ffffffffa70ad6c2>] process_one_work+0x162/0x6a0
[  178.500343]
               other info that might help us debug this:
[  178.501269]  Possible unsafe locking scenario:

[  178.502113]        CPU0
[  178.502472]        ----
[  178.502829]   lock(
[  178.503115] "nvme-wq"
[  178.503467] );
[  178.503716]   lock(
[  178.504001] "nvme-wq"
[  178.504353] );
[  178.504601]
                *** DEADLOCK ***

[  178.505441]  May be due to missing lock nesting notation

[  178.506453] 2 locks held by kworker/5:1/135:
[  178.507068]  #0:
[  178.507330]  (
[  178.507598] "nvme-wq"
[  178.507726] ){++++.+}
[  178.508079] , at:
[  178.508173] [<ffffffffa70ad6c2>] process_one_work+0x162/0x6a0
[  178.509004]  #1:
[  178.509265]  (
[  178.509532] (&ctrl->delete_work)
[  178.509795] ){+.+.+.}
[  178.510145] , at:
[  178.510239] [<ffffffffa70ad6c2>] process_one_work+0x162/0x6a0
[  178.511070]
               stack backtrace:
:
[  178.511693] CPU: 5 PID: 135 Comm: kworker/5:1 Tainted: G           OE   4.9.0-rc4-c844263313a8-lb #3
[  178.512974] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014
[  178.514247] Workqueue: nvme-wq nvme_del_ctrl_work [nvme_tcp]
[  178.515071]  ffffc2668175bae0 ffffffffa7450823 ffffffffa88abd80 ffffffffa88abd80
[  178.516195]  ffffc2668175bb98 ffffffffa70eb012 ffffffffa8d8d90d ffff9c472e9ea700
[  178.517318]  ffff9c472e9ea700 ffff9c4700000000 ffff9c4700007200 ab83be61bec0d50e
[  178.518443] Call Trace:
[  178.518807]  [<ffffffffa7450823>] dump_stack+0x85/0xc2
[  178.519542]  [<ffffffffa70eb012>] __lock_acquire+0x17d2/0x18f0
[  178.520377]  [<ffffffffa75839a7>] ? serial8250_console_putchar+0x27/0x30
[  178.521330]  [<ffffffffa7583980>] ? wait_for_xmitr+0xa0/0xa0
[  178.522174]  [<ffffffffa70ac1eb>] ? flush_work+0x18b/0x2d0
[  178.522975]  [<ffffffffa70eb7cb>] lock_acquire+0x11b/0x220
[  178.523753]  [<ffffffffa70ac206>] ? flush_work+0x1a6/0x2d0
[  178.524535]  [<ffffffffa70ac229>] flush_work+0x1c9/0x2d0
[  178.525291]  [<ffffffffa70ac206>] ? flush_work+0x1a6/0x2d0
[  178.526077]  [<ffffffffa70a9cf0>] ? flush_workqueue_prep_pwqs+0x220/0x220
[  178.527040]  [<ffffffffa70ae7cf>] __cancel_work_timer+0x10f/0x1d0
[  178.527907]  [<ffffffffa70fecb9>] ? vprintk_default+0x29/0x40
[  178.528726]  [<ffffffffa71cb507>] ? printk+0x48/0x50
[  178.529434]  [<ffffffffa70ae8c3>] cancel_delayed_work_sync+0x13/0x20
[  178.530381]  [<ffffffffc042100b>] nvme_stop_ctrl+0x5b/0x70 [nvme_core]
[  178.531314]  [<ffffffffc0403dcc>] nvme_del_ctrl_work+0x2c/0x50 [nvme_tcp]
[  178.532271]  [<ffffffffa70ad741>] process_one_work+0x1e1/0x6a0
[  178.533101]  [<ffffffffa70ad6c2>] ? process_one_work+0x162/0x6a0
[  178.533954]  [<ffffffffa70adc4e>] worker_thread+0x4e/0x490
[  178.534735]  [<ffffffffa70adc00>] ? process_one_work+0x6a0/0x6a0
[  178.535588]  [<ffffffffa70adc00>] ? process_one_work+0x6a0/0x6a0
[  178.536441]  [<ffffffffa70b48cf>] kthread+0xff/0x120
[  178.537149]  [<ffffffffa70b47d0>] ? kthread_park+0x60/0x60
[  178.538094]  [<ffffffffa70b47d0>] ? kthread_park+0x60/0x60
[  178.538900]  [<ffffffffa78e332a>] ret_from_fork+0x2a/0x40

Signed-off-by: Roy Shterman <roys@lightbitslabs.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 44 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 5 deletions(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 4d8f63b3c5b6..fde6fd2e7eef 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -65,9 +65,26 @@ static bool streams;
 module_param(streams, bool, 0644);
 MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
 
+/*
+ * nvme_wq - hosts nvme related works that are not reset or delete
+ * nvme_reset_wq - hosts nvme reset works
+ * nvme_delete_wq - hosts nvme delete works
+ *
+ * nvme_wq will host works such are scan, aen handling, fw activation,
+ * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq
+ * runs reset works which also flush works hosted on nvme_wq for
+ * serialization purposes. nvme_delete_wq host controller deletion
+ * works which flush reset works for serialization.
+ */
 struct workqueue_struct *nvme_wq;
 EXPORT_SYMBOL_GPL(nvme_wq);
 
+struct workqueue_struct *nvme_reset_wq;
+EXPORT_SYMBOL_GPL(nvme_reset_wq);
+
+struct workqueue_struct *nvme_delete_wq;
+EXPORT_SYMBOL_GPL(nvme_delete_wq);
+
 static DEFINE_IDA(nvme_subsystems_ida);
 static LIST_HEAD(nvme_subsystems);
 static DEFINE_MUTEX(nvme_subsystems_lock);
@@ -89,7 +106,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
 {
 	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
 		return -EBUSY;
-	if (!queue_work(nvme_wq, &ctrl->reset_work))
+	if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
 		return -EBUSY;
 	return 0;
 }
@@ -123,7 +140,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
 {
 	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
 		return -EBUSY;
-	if (!queue_work(nvme_wq, &ctrl->delete_work))
+	if (!queue_work(nvme_delete_wq, &ctrl->delete_work))
 		return -EBUSY;
 	return 0;
 }
@@ -3526,16 +3543,26 @@ EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
 
 int __init nvme_core_init(void)
 {
-	int result;
+	int result = -ENOMEM;
 
 	nvme_wq = alloc_workqueue("nvme-wq",
 			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
 	if (!nvme_wq)
-		return -ENOMEM;
+		goto out;
+
+	nvme_reset_wq = alloc_workqueue("nvme-reset-wq",
+			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+	if (!nvme_reset_wq)
+		goto destroy_wq;
+
+	nvme_delete_wq = alloc_workqueue("nvme-delete-wq",
+			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+	if (!nvme_delete_wq)
+		goto destroy_reset_wq;
 
 	result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme");
 	if (result < 0)
-		goto destroy_wq;
+		goto destroy_delete_wq;
 
 	nvme_class = class_create(THIS_MODULE, "nvme");
 	if (IS_ERR(nvme_class)) {
@@ -3554,8 +3581,13 @@ destroy_class:
 	class_destroy(nvme_class);
 unregister_chrdev:
 	unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
+destroy_delete_wq:
+	destroy_workqueue(nvme_delete_wq);
+destroy_reset_wq:
+	destroy_workqueue(nvme_reset_wq);
 destroy_wq:
 	destroy_workqueue(nvme_wq);
+out:
 	return result;
 }
 
@@ -3565,6 +3597,8 @@ void nvme_core_exit(void)
 	class_destroy(nvme_subsys_class);
 	class_destroy(nvme_class);
 	unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
+	destroy_workqueue(nvme_delete_wq);
+	destroy_workqueue(nvme_reset_wq);
 	destroy_workqueue(nvme_wq);
 }
 
-- 
cgit 


From ad70062cdb4002c74db4fbed4e2b34daffccacc2 Mon Sep 17 00:00:00 2001
From: Jianchao Wang <jianchao.w.wang@oracle.com>
Date: Mon, 22 Jan 2018 22:03:16 +0800
Subject: nvme-pci: introduce RECONNECTING state to mark initializing procedure

After Sagi's commit (nvme-rdma: fix concurrent reset and reconnect),
both nvme-fc/rdma have following pattern:
RESETTING    - quiesce blk-mq queues, teardown and delete queues/
               connections, clear out outstanding IO requests...
RECONNECTING - establish new queues/connections and some other
               initializing things.
Introduce RECONNECTING to nvme-pci transport to do the same mark.
Then we get a coherent state definition among nvme pci/rdma/fc
transports.

Suggested-by: James Smart <james.smart@broadcom.com>
Reviewed-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fde6fd2e7eef..63c2c469112d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -260,7 +260,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 	switch (new_state) {
 	case NVME_CTRL_ADMIN_ONLY:
 		switch (old_state) {
-		case NVME_CTRL_RESETTING:
+		case NVME_CTRL_RECONNECTING:
 			changed = true;
 			/* FALLTHRU */
 		default:
-- 
cgit 


From 3d030e41d96f46c14faf79f19c3cf1b9961815c8 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Fri, 26 Jan 2018 11:21:37 +0100
Subject: nvme: add tracepoint for nvme_setup_cmd

Add tracepoints for nvme_setup_cmd() for tracing admin and/or nvm commands.

Examples of the two tracepoints are as follows for trace_nvme_setup_admin_cmd():
kworker/u8:0-5     [003] ....     2.998792: nvme_setup_admin_cmd: cmdid=14, flags=0x0, meta=0x0, cmd=(nvme_admin_create_cq cqid=1, qsize=1023, cq_flags=0x3, irq_vector=0)

and trace_nvme_setup_nvm_cmd():
dd-205   [001] ....     3.503929: nvme_setup_nvm_cmd: qid=1, nsid=1, cmdid=989, flags=0x0, meta=0x0, cmd=(nvme_cmd_read slba=4096, len=2047, ctrl=0x0, dsmgmt=0, reftag=0)

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 63c2c469112d..f1430ca79a5b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -29,6 +29,9 @@
 #include <linux/pm_qos.h>
 #include <asm/unaligned.h>
 
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
 #include "nvme.h"
 #include "fabrics.h"
 
@@ -628,6 +631,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 	}
 
 	cmd->common.command_id = req->tag;
+	if (ns)
+		trace_nvme_setup_nvm_cmd(req->q->id, cmd);
+	else
+		trace_nvme_setup_admin_cmd(cmd);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nvme_setup_cmd);
-- 
cgit 


From ca5554a696dce37852f6d6721520b4f13fc295c3 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Fri, 26 Jan 2018 11:21:38 +0100
Subject: nvme: add tracepoint for nvme_complete_rq

Add a tracepoint in nvme_complete_rq() for completions of NVMe commands. An
expmale output of the trace-point is as follows:

<idle>-0     [001] d.h.     3.505266: nvme_complete_rq: cmdid=989, qid=1, res=0, retries=0, flags=0x0, status=0

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/nvme/host/core.c')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f1430ca79a5b..b3af8e914570 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -220,6 +220,8 @@ void nvme_complete_rq(struct request *req)
 {
 	blk_status_t status = nvme_error_status(req);
 
+	trace_nvme_complete_rq(req);
+
 	if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
 		if (nvme_req_needs_failover(req, status)) {
 			nvme_failover_req(req);
-- 
cgit