summaryrefslogtreecommitdiff
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/Makefile2
-rw-r--r--drivers/nvme/host/core.c149
-rw-r--r--drivers/nvme/host/nvme.h2
-rw-r--r--drivers/nvme/host/pr.c315
-rw-r--r--drivers/nvme/host/tcp.c49
-rw-r--r--drivers/nvme/target/tcp.c46
6 files changed, 375 insertions, 188 deletions
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index d3fc5063e4be..c7c3cf202d12 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_FC) += nvme-fc.o
obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
obj-$(CONFIG_NVME_APPLE) += nvme-apple.o
-nvme-core-y += core.o ioctl.o sysfs.o
+nvme-core-y += core.o ioctl.o sysfs.o pr.o
nvme-core-$(CONFIG_NVME_VERBOSE_ERRORS) += constants.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d81f530d4084..47d7ba2827ff 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -279,7 +279,7 @@ static blk_status_t nvme_error_status(u16 status)
case NVME_SC_INVALID_PI:
return BLK_STS_PROTECTION;
case NVME_SC_RESERVATION_CONFLICT:
- return BLK_STS_NEXUS;
+ return BLK_STS_RESV_CONFLICT;
case NVME_SC_HOST_PATH_ERROR:
return BLK_STS_TRANSPORT;
case NVME_SC_ZONE_TOO_MANY_ACTIVE:
@@ -2108,153 +2108,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
}
}
-static char nvme_pr_type(enum pr_type type)
-{
- switch (type) {
- case PR_WRITE_EXCLUSIVE:
- return 1;
- case PR_EXCLUSIVE_ACCESS:
- return 2;
- case PR_WRITE_EXCLUSIVE_REG_ONLY:
- return 3;
- case PR_EXCLUSIVE_ACCESS_REG_ONLY:
- return 4;
- case PR_WRITE_EXCLUSIVE_ALL_REGS:
- return 5;
- case PR_EXCLUSIVE_ACCESS_ALL_REGS:
- return 6;
- default:
- return 0;
- }
-}
-
-static int nvme_send_ns_head_pr_command(struct block_device *bdev,
- struct nvme_command *c, u8 data[16])
-{
- struct nvme_ns_head *head = bdev->bd_disk->private_data;
- int srcu_idx = srcu_read_lock(&head->srcu);
- struct nvme_ns *ns = nvme_find_path(head);
- int ret = -EWOULDBLOCK;
-
- if (ns) {
- c->common.nsid = cpu_to_le32(ns->head->ns_id);
- ret = nvme_submit_sync_cmd(ns->queue, c, data, 16);
- }
- srcu_read_unlock(&head->srcu, srcu_idx);
- return ret;
-}
-
-static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c,
- u8 data[16])
-{
- c->common.nsid = cpu_to_le32(ns->head->ns_id);
- return nvme_submit_sync_cmd(ns->queue, c, data, 16);
-}
-
-static int nvme_sc_to_pr_err(int nvme_sc)
-{
- if (nvme_is_path_error(nvme_sc))
- return PR_STS_PATH_FAILED;
-
- switch (nvme_sc) {
- case NVME_SC_SUCCESS:
- return PR_STS_SUCCESS;
- case NVME_SC_RESERVATION_CONFLICT:
- return PR_STS_RESERVATION_CONFLICT;
- case NVME_SC_ONCS_NOT_SUPPORTED:
- return -EOPNOTSUPP;
- case NVME_SC_BAD_ATTRIBUTES:
- case NVME_SC_INVALID_OPCODE:
- case NVME_SC_INVALID_FIELD:
- case NVME_SC_INVALID_NS:
- return -EINVAL;
- default:
- return PR_STS_IOERR;
- }
-}
-
-static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
- u64 key, u64 sa_key, u8 op)
-{
- struct nvme_command c = { };
- u8 data[16] = { 0, };
- int ret;
-
- put_unaligned_le64(key, &data[0]);
- put_unaligned_le64(sa_key, &data[8]);
-
- c.common.opcode = op;
- c.common.cdw10 = cpu_to_le32(cdw10);
-
- if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
- bdev->bd_disk->fops == &nvme_ns_head_ops)
- ret = nvme_send_ns_head_pr_command(bdev, &c, data);
- else
- ret = nvme_send_ns_pr_command(bdev->bd_disk->private_data, &c,
- data);
- if (ret < 0)
- return ret;
-
- return nvme_sc_to_pr_err(ret);
-}
-
-static int nvme_pr_register(struct block_device *bdev, u64 old,
- u64 new, unsigned flags)
-{
- u32 cdw10;
-
- if (flags & ~PR_FL_IGNORE_KEY)
- return -EOPNOTSUPP;
-
- cdw10 = old ? 2 : 0;
- cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
- cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
- return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
-}
-
-static int nvme_pr_reserve(struct block_device *bdev, u64 key,
- enum pr_type type, unsigned flags)
-{
- u32 cdw10;
-
- if (flags & ~PR_FL_IGNORE_KEY)
- return -EOPNOTSUPP;
-
- cdw10 = nvme_pr_type(type) << 8;
- cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
- enum pr_type type, bool abort)
-{
- u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1);
-
- return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_clear(struct block_device *bdev, u64 key)
-{
- u32 cdw10 = 1 | (key ? 0 : 1 << 3);
-
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
-}
-
-static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
-{
- u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 0 : 1 << 3);
-
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
-}
-
-const struct pr_ops nvme_pr_ops = {
- .pr_register = nvme_pr_register,
- .pr_reserve = nvme_pr_reserve,
- .pr_release = nvme_pr_release,
- .pr_preempt = nvme_pr_preempt,
- .pr_clear = nvme_pr_clear,
-};
-
#ifdef CONFIG_BLK_SED_OPAL
static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
bool send)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 3ae2f1bf42d3..f35647c470af 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -19,6 +19,8 @@
#include <trace/events/block.h>
+extern const struct pr_ops nvme_pr_ops;
+
extern unsigned int nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
new file mode 100644
index 000000000000..391b1465ebfd
--- /dev/null
+++ b/drivers/nvme/host/pr.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2015 Intel Corporation
+ * Keith Busch <kbusch@kernel.org>
+ */
+#include <linux/blkdev.h>
+#include <linux/pr.h>
+#include <asm/unaligned.h>
+
+#include "nvme.h"
+
+static enum nvme_pr_type nvme_pr_type_from_blk(enum pr_type type)
+{
+ switch (type) {
+ case PR_WRITE_EXCLUSIVE:
+ return NVME_PR_WRITE_EXCLUSIVE;
+ case PR_EXCLUSIVE_ACCESS:
+ return NVME_PR_EXCLUSIVE_ACCESS;
+ case PR_WRITE_EXCLUSIVE_REG_ONLY:
+ return NVME_PR_WRITE_EXCLUSIVE_REG_ONLY;
+ case PR_EXCLUSIVE_ACCESS_REG_ONLY:
+ return NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY;
+ case PR_WRITE_EXCLUSIVE_ALL_REGS:
+ return NVME_PR_WRITE_EXCLUSIVE_ALL_REGS;
+ case PR_EXCLUSIVE_ACCESS_ALL_REGS:
+ return NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS;
+ }
+
+ return 0;
+}
+
+static enum pr_type block_pr_type_from_nvme(enum nvme_pr_type type)
+{
+ switch (type) {
+ case NVME_PR_WRITE_EXCLUSIVE:
+ return PR_WRITE_EXCLUSIVE;
+ case NVME_PR_EXCLUSIVE_ACCESS:
+ return PR_EXCLUSIVE_ACCESS;
+ case NVME_PR_WRITE_EXCLUSIVE_REG_ONLY:
+ return PR_WRITE_EXCLUSIVE_REG_ONLY;
+ case NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY:
+ return PR_EXCLUSIVE_ACCESS_REG_ONLY;
+ case NVME_PR_WRITE_EXCLUSIVE_ALL_REGS:
+ return PR_WRITE_EXCLUSIVE_ALL_REGS;
+ case NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS:
+ return PR_EXCLUSIVE_ACCESS_ALL_REGS;
+ }
+
+ return 0;
+}
+
+static int nvme_send_ns_head_pr_command(struct block_device *bdev,
+ struct nvme_command *c, void *data, unsigned int data_len)
+{
+ struct nvme_ns_head *head = bdev->bd_disk->private_data;
+ int srcu_idx = srcu_read_lock(&head->srcu);
+ struct nvme_ns *ns = nvme_find_path(head);
+ int ret = -EWOULDBLOCK;
+
+ if (ns) {
+ c->common.nsid = cpu_to_le32(ns->head->ns_id);
+ ret = nvme_submit_sync_cmd(ns->queue, c, data, data_len);
+ }
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return ret;
+}
+
+static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c,
+ void *data, unsigned int data_len)
+{
+ c->common.nsid = cpu_to_le32(ns->head->ns_id);
+ return nvme_submit_sync_cmd(ns->queue, c, data, data_len);
+}
+
+static int nvme_sc_to_pr_err(int nvme_sc)
+{
+ if (nvme_is_path_error(nvme_sc))
+ return PR_STS_PATH_FAILED;
+
+ switch (nvme_sc) {
+ case NVME_SC_SUCCESS:
+ return PR_STS_SUCCESS;
+ case NVME_SC_RESERVATION_CONFLICT:
+ return PR_STS_RESERVATION_CONFLICT;
+ case NVME_SC_ONCS_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ case NVME_SC_BAD_ATTRIBUTES:
+ case NVME_SC_INVALID_OPCODE:
+ case NVME_SC_INVALID_FIELD:
+ case NVME_SC_INVALID_NS:
+ return -EINVAL;
+ default:
+ return PR_STS_IOERR;
+ }
+}
+
+static int nvme_send_pr_command(struct block_device *bdev,
+ struct nvme_command *c, void *data, unsigned int data_len)
+{
+ if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
+ bdev->bd_disk->fops == &nvme_ns_head_ops)
+ return nvme_send_ns_head_pr_command(bdev, c, data, data_len);
+
+ return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data,
+ data_len);
+}
+
+static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
+ u64 key, u64 sa_key, u8 op)
+{
+ struct nvme_command c = { };
+ u8 data[16] = { 0, };
+ int ret;
+
+ put_unaligned_le64(key, &data[0]);
+ put_unaligned_le64(sa_key, &data[8]);
+
+ c.common.opcode = op;
+ c.common.cdw10 = cpu_to_le32(cdw10);
+
+ ret = nvme_send_pr_command(bdev, &c, data, sizeof(data));
+ if (ret < 0)
+ return ret;
+
+ return nvme_sc_to_pr_err(ret);
+}
+
+static int nvme_pr_register(struct block_device *bdev, u64 old,
+ u64 new, unsigned flags)
+{
+ u32 cdw10;
+
+ if (flags & ~PR_FL_IGNORE_KEY)
+ return -EOPNOTSUPP;
+
+ cdw10 = old ? 2 : 0;
+ cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
+ cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_reserve(struct block_device *bdev, u64 key,
+ enum pr_type type, unsigned flags)
+{
+ u32 cdw10;
+
+ if (flags & ~PR_FL_IGNORE_KEY)
+ return -EOPNOTSUPP;
+
+ cdw10 = nvme_pr_type_from_blk(type) << 8;
+ cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
+ enum pr_type type, bool abort)
+{
+ u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (abort ? 2 : 1);
+
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_clear(struct block_device *bdev, u64 key)
+{
+ u32 cdw10 = 1 | (key ? 0 : 1 << 3);
+
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+}
+
+static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+{
+ u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (key ? 0 : 1 << 3);
+
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+}
+
+static int nvme_pr_resv_report(struct block_device *bdev, void *data,
+ u32 data_len, bool *eds)
+{
+ struct nvme_command c = { };
+ int ret;
+
+ c.common.opcode = nvme_cmd_resv_report;
+ c.common.cdw10 = cpu_to_le32(nvme_bytes_to_numd(data_len));
+ c.common.cdw11 = cpu_to_le32(NVME_EXTENDED_DATA_STRUCT);
+ *eds = true;
+
+retry:
+ ret = nvme_send_pr_command(bdev, &c, data, data_len);
+ if (ret == NVME_SC_HOST_ID_INCONSIST &&
+ c.common.cdw11 == cpu_to_le32(NVME_EXTENDED_DATA_STRUCT)) {
+ c.common.cdw11 = 0;
+ *eds = false;
+ goto retry;
+ }
+
+ if (ret < 0)
+ return ret;
+
+ return nvme_sc_to_pr_err(ret);
+}
+
+static int nvme_pr_read_keys(struct block_device *bdev,
+ struct pr_keys *keys_info)
+{
+ u32 rse_len, num_keys = keys_info->num_keys;
+ struct nvme_reservation_status_ext *rse;
+ int ret, i;
+ bool eds;
+
+ /*
+ * Assume we are using 128-bit host IDs and allocate a buffer large
+ * enough to get enough keys to fill the return keys buffer.
+ */
+ rse_len = struct_size(rse, regctl_eds, num_keys);
+ rse = kzalloc(rse_len, GFP_KERNEL);
+ if (!rse)
+ return -ENOMEM;
+
+ ret = nvme_pr_resv_report(bdev, rse, rse_len, &eds);
+ if (ret)
+ goto free_rse;
+
+ keys_info->generation = le32_to_cpu(rse->gen);
+ keys_info->num_keys = get_unaligned_le16(&rse->regctl);
+
+ num_keys = min(num_keys, keys_info->num_keys);
+ for (i = 0; i < num_keys; i++) {
+ if (eds) {
+ keys_info->keys[i] =
+ le64_to_cpu(rse->regctl_eds[i].rkey);
+ } else {
+ struct nvme_reservation_status *rs;
+
+ rs = (struct nvme_reservation_status *)rse;
+ keys_info->keys[i] = le64_to_cpu(rs->regctl_ds[i].rkey);
+ }
+ }
+
+free_rse:
+ kfree(rse);
+ return ret;
+}
+
+static int nvme_pr_read_reservation(struct block_device *bdev,
+ struct pr_held_reservation *resv)
+{
+ struct nvme_reservation_status_ext tmp_rse, *rse;
+ int ret, i, num_regs;
+ u32 rse_len;
+ bool eds;
+
+get_num_regs:
+ /*
+ * Get the number of registrations so we know how big to allocate
+ * the response buffer.
+ */
+ ret = nvme_pr_resv_report(bdev, &tmp_rse, sizeof(tmp_rse), &eds);
+ if (ret)
+ return ret;
+
+ num_regs = get_unaligned_le16(&tmp_rse.regctl);
+ if (!num_regs) {
+ resv->generation = le32_to_cpu(tmp_rse.gen);
+ return 0;
+ }
+
+ rse_len = struct_size(rse, regctl_eds, num_regs);
+ rse = kzalloc(rse_len, GFP_KERNEL);
+ if (!rse)
+ return -ENOMEM;
+
+ ret = nvme_pr_resv_report(bdev, rse, rse_len, &eds);
+ if (ret)
+ goto free_rse;
+
+ if (num_regs != get_unaligned_le16(&rse->regctl)) {
+ kfree(rse);
+ goto get_num_regs;
+ }
+
+ resv->generation = le32_to_cpu(rse->gen);
+ resv->type = block_pr_type_from_nvme(rse->rtype);
+
+ for (i = 0; i < num_regs; i++) {
+ if (eds) {
+ if (rse->regctl_eds[i].rcsts) {
+ resv->key = le64_to_cpu(rse->regctl_eds[i].rkey);
+ break;
+ }
+ } else {
+ struct nvme_reservation_status *rs;
+
+ rs = (struct nvme_reservation_status *)rse;
+ if (rs->regctl_ds[i].rcsts) {
+ resv->key = le64_to_cpu(rs->regctl_ds[i].rkey);
+ break;
+ }
+ }
+ }
+
+free_rse:
+ kfree(rse);
+ return ret;
+}
+
+const struct pr_ops nvme_pr_ops = {
+ .pr_register = nvme_pr_register,
+ .pr_reserve = nvme_pr_reserve,
+ .pr_release = nvme_pr_release,
+ .pr_preempt = nvme_pr_preempt,
+ .pr_clear = nvme_pr_clear,
+ .pr_read_keys = nvme_pr_read_keys,
+ .pr_read_reservation = nvme_pr_read_reservation,
+};
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 260b3554d821..3e7dd6f91832 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -997,25 +997,28 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
u32 h2cdata_left = req->h2cdata_left;
while (true) {
+ struct bio_vec bvec;
+ struct msghdr msg = {
+ .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES,
+ };
struct page *page = nvme_tcp_req_cur_page(req);
size_t offset = nvme_tcp_req_cur_offset(req);
size_t len = nvme_tcp_req_cur_length(req);
bool last = nvme_tcp_pdu_last_send(req, len);
int req_data_sent = req->data_sent;
- int ret, flags = MSG_DONTWAIT;
+ int ret;
if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
- flags |= MSG_EOR;
+ msg.msg_flags |= MSG_EOR;
else
- flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ msg.msg_flags |= MSG_MORE;
- if (sendpage_ok(page)) {
- ret = kernel_sendpage(queue->sock, page, offset, len,
- flags);
- } else {
- ret = sock_no_sendpage(queue->sock, page, offset, len,
- flags);
- }
+ if (!sendpage_ok(page))
+ msg.msg_flags &= ~MSG_SPLICE_PAGES,
+
+ bvec_set_page(&bvec, page, len, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
+ ret = sock_sendmsg(queue->sock, &msg);
if (ret <= 0)
return ret;
@@ -1054,22 +1057,24 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, };
bool inline_data = nvme_tcp_has_inline_data(req);
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) + hdgst - req->offset;
- int flags = MSG_DONTWAIT;
int ret;
if (inline_data || nvme_tcp_queue_more(queue))
- flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ msg.msg_flags |= MSG_MORE;
else
- flags |= MSG_EOR;
+ msg.msg_flags |= MSG_EOR;
if (queue->hdr_digest && !req->offset)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
- ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
- offset_in_page(pdu) + req->offset, len, flags);
+ bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
+ ret = sock_sendmsg(queue->sock, &msg);
if (unlikely(ret <= 0))
return ret;
@@ -1093,6 +1098,8 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req);
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_MORE, };
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) - req->offset + hdgst;
int ret;
@@ -1101,13 +1108,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
if (!req->h2cdata_left)
- ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
- offset_in_page(pdu) + req->offset, len,
- MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
- else
- ret = sock_no_sendpage(queue->sock, virt_to_page(pdu),
- offset_in_page(pdu) + req->offset, len,
- MSG_DONTWAIT | MSG_MORE);
+ msg.msg_flags |= MSG_SPLICE_PAGES;
+
+ bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
+ ret = sock_sendmsg(queue->sock, &msg);
if (unlikely(ret <= 0))
return ret;
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index ed98df72c76b..868aa4de2e4c 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -576,13 +576,17 @@ static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd)
static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
{
+ struct msghdr msg = {
+ .msg_flags = MSG_DONTWAIT | MSG_MORE | MSG_SPLICE_PAGES,
+ };
+ struct bio_vec bvec;
u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
int left = sizeof(*cmd->data_pdu) - cmd->offset + hdgst;
int ret;
- ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu),
- offset_in_page(cmd->data_pdu) + cmd->offset,
- left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
+ bvec_set_virt(&bvec, (void *)cmd->data_pdu + cmd->offset, left);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left);
+ ret = sock_sendmsg(cmd->queue->sock, &msg);
if (ret <= 0)
return ret;
@@ -603,17 +607,21 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
int ret;
while (cmd->cur_sg) {
+ struct msghdr msg = {
+ .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES,
+ };
struct page *page = sg_page(cmd->cur_sg);
+ struct bio_vec bvec;
u32 left = cmd->cur_sg->length - cmd->offset;
- int flags = MSG_DONTWAIT;
if ((!last_in_batch && cmd->queue->send_list_len) ||
cmd->wbytes_done + left < cmd->req.transfer_len ||
queue->data_digest || !queue->nvme_sq.sqhd_disabled)
- flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ msg.msg_flags |= MSG_MORE;
- ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
- left, flags);
+ bvec_set_page(&bvec, page, left, cmd->offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left);
+ ret = sock_sendmsg(cmd->queue->sock, &msg);
if (ret <= 0)
return ret;
@@ -649,18 +657,20 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
bool last_in_batch)
{
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, };
+ struct bio_vec bvec;
u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
int left = sizeof(*cmd->rsp_pdu) - cmd->offset + hdgst;
- int flags = MSG_DONTWAIT;
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
- flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ msg.msg_flags |= MSG_MORE;
else
- flags |= MSG_EOR;
+ msg.msg_flags |= MSG_EOR;
- ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->rsp_pdu),
- offset_in_page(cmd->rsp_pdu) + cmd->offset, left, flags);
+ bvec_set_virt(&bvec, (void *)cmd->rsp_pdu + cmd->offset, left);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left);
+ ret = sock_sendmsg(cmd->queue->sock, &msg);
if (ret <= 0)
return ret;
cmd->offset += ret;
@@ -677,18 +687,20 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
{
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, };
+ struct bio_vec bvec;
u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
int left = sizeof(*cmd->r2t_pdu) - cmd->offset + hdgst;
- int flags = MSG_DONTWAIT;
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
- flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ msg.msg_flags |= MSG_MORE;
else
- flags |= MSG_EOR;
+ msg.msg_flags |= MSG_EOR;
- ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->r2t_pdu),
- offset_in_page(cmd->r2t_pdu) + cmd->offset, left, flags);
+ bvec_set_virt(&bvec, (void *)cmd->r2t_pdu + cmd->offset, left);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left);
+ ret = sock_sendmsg(cmd->queue->sock, &msg);
if (ret <= 0)
return ret;
cmd->offset += ret;