summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/block/queue-sysfs.txt18
-rw-r--r--drivers/nvme/host/pci.c20
-rw-r--r--drivers/nvme/host/rdma.c83
-rw-r--r--drivers/nvme/target/admin-cmd.c6
-rw-r--r--drivers/nvme/target/core.c4
-rw-r--r--drivers/nvme/target/loop.c4
-rw-r--r--drivers/nvme/target/nvmet.h1
-rw-r--r--drivers/nvme/target/rdma.c100
-rw-r--r--fs/fs-writeback.c6
-rw-r--r--include/linux/bvec.h3
10 files changed, 160 insertions, 85 deletions
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index d515d58962b9..2a3904030dea 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -14,6 +14,12 @@ add_random (RW)
This file allows to turn off the disk entropy contribution. Default
value of this file is '1'(on).
+dax (RO)
+--------
+This file indicates whether the device supports Direct Access (DAX),
+used by CPU-addressable storage to bypass the pagecache. It shows '1'
+if true, '0' if not.
+
discard_granularity (RO)
-----------------------
This shows the size of internal allocation of the device in bytes, if
@@ -46,6 +52,12 @@ hw_sector_size (RO)
-------------------
This is the hardware sector size of the device, in bytes.
+io_poll (RW)
+------------
+When read, this file shows the total number of block IO polls and how
+many returned success. Writing '0' to this file will disable polling
+for this device. Writing any non-zero value will enable this feature.
+
iostats (RW)
-------------
This file is used to control (on/off) the iostats accounting of the
@@ -151,5 +163,11 @@ device state. This means that it might not be safe to toggle the
setting from "write back" to "write through", since that will also
eliminate cache flushes issued by the kernel.
+write_same_max_bytes (RO)
+-------------------------
+This is the number of bytes the device can write in a single write-same
+command. A value of '0' means write-same is not supported by this
+device.
+
Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d7c33f9361aa..8dcf5a960951 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1543,15 +1543,10 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
reinit_completion(&dev->ioq_wait);
retry:
timeout = ADMIN_TIMEOUT;
- for (; i > 0; i--) {
- struct nvme_queue *nvmeq = dev->queues[i];
-
- if (!pass)
- nvme_suspend_queue(nvmeq);
- if (nvme_delete_queue(nvmeq, opcode))
+ for (; i > 0; i--, sent++)
+ if (nvme_delete_queue(dev->queues[i], opcode))
break;
- ++sent;
- }
+
while (sent--) {
timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
if (timeout == 0)
@@ -1693,11 +1688,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_stop_queues(&dev->ctrl);
csts = readl(dev->bar + NVME_REG_CSTS);
}
+
+ for (i = dev->queue_count - 1; i > 0; i--)
+ nvme_suspend_queue(dev->queues[i]);
+
if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
- for (i = dev->queue_count - 1; i >= 0; i--) {
- struct nvme_queue *nvmeq = dev->queues[i];
- nvme_suspend_queue(nvmeq);
- }
+ nvme_suspend_queue(dev->queues[0]);
} else {
nvme_disable_io_queues(dev);
nvme_disable_admin_queue(dev, shutdown);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 3e3ce2b0424e..8d2875b4c56d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -12,13 +12,11 @@
* more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/delay.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/string.h>
-#include <linux/jiffies.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
#include <linux/types.h>
@@ -26,7 +24,6 @@
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/nvme.h>
-#include <linux/t10-pi.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
@@ -169,7 +166,6 @@ MODULE_PARM_DESC(register_always,
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
-static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl);
/* XXX: really should move to a generic header sooner or later.. */
static inline void put_unaligned_le24(u32 val, u8 *p)
@@ -687,11 +683,6 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
list_del(&ctrl->list);
mutex_unlock(&nvme_rdma_ctrl_mutex);
- if (ctrl->ctrl.tagset) {
- blk_cleanup_queue(ctrl->ctrl.connect_q);
- blk_mq_free_tag_set(&ctrl->tag_set);
- nvme_rdma_dev_put(ctrl->device);
- }
kfree(ctrl->queues);
nvmf_free_options(nctrl->opts);
free_ctrl:
@@ -748,8 +739,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- if (ctrl->queue_count > 1)
+ if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
+ nvme_queue_scan(&ctrl->ctrl);
+ nvme_queue_async_events(&ctrl->ctrl);
+ }
dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
@@ -1269,7 +1263,7 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
{
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
struct rdma_conn_param param = { };
- struct nvme_rdma_cm_req priv;
+ struct nvme_rdma_cm_req priv = { };
int ret;
param.qp_num = queue->qp->qp_num;
@@ -1318,37 +1312,39 @@ out_destroy_queue_ib:
* that caught the event. Since we hold the callout until the controller
* deletion is completed, we'll deadlock if the controller deletion will
* call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
- * of destroying this queue before-hand, destroy the queue resources
- * after the controller deletion completed with the exception of destroying
- * the cm_id implicitely by returning a non-zero rc to the callout.
+ * of destroying this queue before-hand, destroy the queue resources,
+ * then queue the controller deletion which won't destroy this queue and
+ * we destroy the cm_id implicitely by returning a non-zero rc to the callout.
*/
static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
{
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
- int ret, ctrl_deleted = 0;
+ int ret;
- /* First disable the queue so ctrl delete won't free it */
- if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
- goto out;
+ /* Own the controller deletion */
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
+ return 0;
- /* delete the controller */
- ret = __nvme_rdma_del_ctrl(ctrl);
- if (!ret) {
- dev_warn(ctrl->ctrl.device,
- "Got rdma device removal event, deleting ctrl\n");
- flush_work(&ctrl->delete_work);
+ dev_warn(ctrl->ctrl.device,
+ "Got rdma device removal event, deleting ctrl\n");
- /* Return non-zero so the cm_id will destroy implicitly */
- ctrl_deleted = 1;
+ /* Get rid of reconnect work if its running */
+ cancel_delayed_work_sync(&ctrl->reconnect_work);
+ /* Disable the queue so ctrl delete won't free it */
+ if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
/* Free this queue ourselves */
- rdma_disconnect(queue->cm_id);
- ib_drain_qp(queue->qp);
+ nvme_rdma_stop_queue(queue);
nvme_rdma_destroy_queue_ib(queue);
+
+ /* Return non-zero so the cm_id will destroy implicitly */
+ ret = 1;
}
-out:
- return ctrl_deleted;
+ /* Queue controller deletion */
+ queue_work(nvme_rdma_wq, &ctrl->delete_work);
+ flush_work(&ctrl->delete_work);
+ return ret;
}
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
@@ -1648,7 +1644,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
nvme_rdma_free_io_queues(ctrl);
}
- if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+ if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
nvme_shutdown_ctrl(&ctrl->ctrl);
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1657,15 +1653,27 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
nvme_rdma_destroy_admin_queue(ctrl);
}
+static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
+{
+ nvme_uninit_ctrl(&ctrl->ctrl);
+ if (shutdown)
+ nvme_rdma_shutdown_ctrl(ctrl);
+
+ if (ctrl->ctrl.tagset) {
+ blk_cleanup_queue(ctrl->ctrl.connect_q);
+ blk_mq_free_tag_set(&ctrl->tag_set);
+ nvme_rdma_dev_put(ctrl->device);
+ }
+
+ nvme_put_ctrl(&ctrl->ctrl);
+}
+
static void nvme_rdma_del_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_rdma_shutdown_ctrl(ctrl);
- nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
+ __nvme_rdma_remove_ctrl(ctrl, true);
}
static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
@@ -1698,9 +1706,7 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
+ __nvme_rdma_remove_ctrl(ctrl, false);
}
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
@@ -1739,6 +1745,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
nvme_queue_scan(&ctrl->ctrl);
+ nvme_queue_async_events(&ctrl->ctrl);
}
return;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 2fac17a5ad53..47c564b5a289 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -13,7 +13,6 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
-#include <linux/random.h>
#include <generated/utsrelease.h>
#include "nvmet.h"
@@ -83,7 +82,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
- u64 serial;
u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -96,10 +94,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->vid = 0;
id->ssvid = 0;
- /* generate a random serial number as our controllers are ephemeral: */
- get_random_bytes(&serial, sizeof(serial));
memset(id->sn, ' ', sizeof(id->sn));
- snprintf(id->sn, sizeof(id->sn), "%llx", serial);
+ snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
memset(id->mn, ' ', sizeof(id->mn));
strncpy((char *)id->mn, "Linux", sizeof(id->mn));
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 8a891ca53367..6559d5afa7bf 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -13,6 +13,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
+#include <linux/random.h>
#include "nvmet.h"
static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
@@ -728,6 +729,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
+ /* generate a random serial number as our controllers are ephemeral: */
+ get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
+
kref_init(&ctrl->ref);
ctrl->subsys = subsys;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 94e782987cc9..7affd40a6b33 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -414,9 +414,8 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
struct nvme_loop_ctrl *ctrl = container_of(work,
struct nvme_loop_ctrl, delete_work);
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_loop_shutdown_ctrl(ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
+ nvme_loop_shutdown_ctrl(ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
@@ -501,7 +500,6 @@ out_free_queues:
nvme_loop_destroy_admin_queue(ctrl);
out_disable:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
- nvme_remove_namespaces(&ctrl->ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 57dd6d834c28..76b6eedccaf9 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -113,6 +113,7 @@ struct nvmet_ctrl {
struct mutex lock;
u64 cap;
+ u64 serial;
u32 cc;
u32 csts;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index e06d504bdf0c..b4d648536c3e 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state {
NVMET_RDMA_Q_CONNECTING,
NVMET_RDMA_Q_LIVE,
NVMET_RDMA_Q_DISCONNECTING,
+ NVMET_RDMA_IN_DEVICE_REMOVAL,
};
struct nvmet_rdma_queue {
@@ -615,15 +616,10 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
if (!len)
return 0;
- /* use the already allocated data buffer if possible */
- if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) {
- nvmet_rdma_use_inline_sg(rsp, len, 0);
- } else {
- status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
- len);
- if (status)
- return status;
- }
+ status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
+ len);
+ if (status)
+ return status;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
@@ -984,7 +980,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
struct nvmet_rdma_device *dev = queue->dev;
nvmet_rdma_free_queue(queue);
- rdma_destroy_id(cm_id);
+
+ if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
+ rdma_destroy_id(cm_id);
+
kref_put(&dev->ref, nvmet_rdma_free_dev);
}
@@ -1233,8 +1232,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
switch (queue->state) {
case NVMET_RDMA_Q_CONNECTING:
case NVMET_RDMA_Q_LIVE:
- disconnect = true;
queue->state = NVMET_RDMA_Q_DISCONNECTING;
+ case NVMET_RDMA_IN_DEVICE_REMOVAL:
+ disconnect = true;
break;
case NVMET_RDMA_Q_DISCONNECTING:
break;
@@ -1272,6 +1272,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
schedule_work(&queue->release_work);
}
+/**
+ * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @queue: nvmet rdma queue (cm id qp_context)
+ * @addr: nvmet address (cm_id context)
+ *
+ * DEVICE_REMOVAL event notifies us that the RDMA device is about
+ * to unplug so we should take care of destroying our RDMA resources.
+ * This event will be generated for each allocated cm_id.
+ *
+ * Note that this event can be generated on a normal queue cm_id
+ * and/or a device bound listener cm_id (where in this case
+ * queue will be null).
+ *
+ * we claim ownership on destroying the cm_id. For queues we move
+ * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * we nullify the priv to prevent double cm_id destruction and destroying
+ * the cm_id implicitely by returning a non-zero rc to the callout.
+ */
+static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
+ struct nvmet_rdma_queue *queue)
+{
+ unsigned long flags;
+
+ if (!queue) {
+ struct nvmet_port *port = cm_id->context;
+
+ /*
+ * This is a listener cm_id. Make sure that
+ * future remove_port won't invoke a double
+ * cm_id destroy. use atomic xchg to make sure
+ * we don't compete with remove_port.
+ */
+ if (xchg(&port->priv, NULL) != cm_id)
+ return 0;
+ } else {
+ /*
+ * This is a queue cm_id. Make sure that
+ * release queue will not destroy the cm_id
+ * and schedule all ctrl queues removal (only
+ * if the queue is not disconnecting already).
+ */
+ spin_lock_irqsave(&queue->state_lock, flags);
+ if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
+ queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
+ spin_unlock_irqrestore(&queue->state_lock, flags);
+ nvmet_rdma_queue_disconnect(queue);
+ flush_scheduled_work();
+ }
+
+ /*
+ * We need to return 1 so that the core will destroy
+ * it's own ID. What a great API design..
+ */
+ return 1;
+}
+
static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -1294,20 +1350,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_ADDR_CHANGE:
case RDMA_CM_EVENT_DISCONNECTED:
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
- /*
- * We can get the device removal callback even for a
- * CM ID that we aren't actually using. In that case
- * the context pointer is NULL, so we shouldn't try
- * to disconnect a non-existing queue. But we also
- * need to return 1 so that the core will destroy
- * it's own ID. What a great API design..
- */
- if (queue)
- nvmet_rdma_queue_disconnect(queue);
- else
- ret = 1;
+ nvmet_rdma_queue_disconnect(queue);
+ break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ ret = nvmet_rdma_device_removal(cm_id, queue);
break;
case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_UNREACHABLE:
@@ -1396,9 +1443,10 @@ out_destroy_id:
static void nvmet_rdma_remove_port(struct nvmet_port *port)
{
- struct rdma_cm_id *cm_id = port->priv;
+ struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
- rdma_destroy_id(cm_id);
+ if (cm_id)
+ rdma_destroy_id(cm_id);
}
static struct nvmet_fabrics_ops nvmet_rdma_ops = {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4d09d4441e3e..05713a5da083 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1949,6 +1949,12 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
{
struct backing_dev_info *bdi;
+ /*
+ * If we are expecting writeback progress we must submit plugged IO.
+ */
+ if (blk_needs_flush_plug(current))
+ blk_schedule_flush_plug(current);
+
if (!nr_pages)
nr_pages = get_nr_dirty_pages();
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 701b64a3b7c5..89b65b82d98f 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -74,7 +74,8 @@ static inline void bvec_iter_advance(const struct bio_vec *bv,
"Attempted to advance past end of bvec iter\n");
while (bytes) {
- unsigned len = min(bytes, bvec_iter_len(bv, *iter));
+ unsigned iter_len = bvec_iter_len(bv, *iter);
+ unsigned len = min(bytes, iter_len);
bytes -= len;
iter->bi_size -= len;