summaryrefslogtreecommitdiff
path: root/drivers/vhost/net.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost/net.c')
-rw-r--r--drivers/vhost/net.c596
1 files changed, 325 insertions, 271 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index bca86bf7189f..7f886d3dba7d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2009 Red Hat, Inc.
* Author: Michael S. Tsirkin <mst@redhat.com>
*
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
* virtio-net server in host kernel.
*/
@@ -36,7 +35,7 @@
#include "vhost.h"
-static int experimental_zcopytx = 1;
+static int experimental_zcopytx = 0;
module_param(experimental_zcopytx, int, 0444);
MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
" 1 -Enable; 0 - Disable");
@@ -70,11 +69,15 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
#define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN)
-enum {
- VHOST_NET_FEATURES = VHOST_FEATURES |
- (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
- (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_F_IOMMU_PLATFORM)
+static const int vhost_net_bits[] = {
+ VHOST_FEATURES,
+ VHOST_NET_F_VIRTIO_NET_HDR,
+ VIRTIO_NET_F_MRG_RXBUF,
+ VIRTIO_F_ACCESS_PLATFORM,
+ VIRTIO_F_RING_RESET,
+ VIRTIO_F_IN_ORDER,
+ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO,
+ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO
};
enum {
@@ -96,6 +99,7 @@ struct vhost_net_ubuf_ref {
atomic_t refcount;
wait_queue_head_t wait;
struct vhost_virtqueue *vq;
+ struct rcu_head rcu;
};
#define VHOST_NET_BATCH 64
@@ -119,7 +123,7 @@ struct vhost_net_virtqueue {
/* Number of XDP frames batched */
int batched_xdp;
/* an array of userspace buffers info */
- struct ubuf_info *ubuf_info;
+ struct ubuf_info_msgzc *ubuf_info;
/* Reference counting for outstanding ubufs.
* Protected by vq mutex. Writers must also take device mutex. */
struct vhost_net_ubuf_ref *ubufs;
@@ -141,10 +145,8 @@ struct vhost_net {
unsigned tx_zcopy_err;
/* Flush in progress. Protected by tx vq lock. */
bool tx_flush;
- /* Private page frag */
- struct page_frag page_frag;
- /* Refcount bias of page frag */
- int refcnt_bias;
+ /* Private page frag cache */
+ struct page_frag_cache pf_cache;
};
static unsigned vhost_net_zcopy_mask __read_mostly;
@@ -249,9 +251,13 @@ vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy)
static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs)
{
- int r = atomic_sub_return(1, &ubufs->refcount);
+ int r;
+
+ rcu_read_lock();
+ r = atomic_sub_return(1, &ubufs->refcount);
if (unlikely(!r))
wake_up(&ubufs->wait);
+ rcu_read_unlock();
return r;
}
@@ -264,7 +270,7 @@ static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs)
static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs)
{
vhost_net_ubuf_put_and_wait(ubufs);
- kfree(ubufs);
+ kfree_rcu(ubufs, rcu);
}
static void vhost_net_clear_ubuf_info(struct vhost_net *n)
@@ -376,14 +382,17 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
while (j) {
add = min(UIO_MAXIOV - nvq->done_idx, j);
vhost_add_used_and_signal_n(vq->dev, vq,
- &vq->heads[nvq->done_idx], add);
+ &vq->heads[nvq->done_idx],
+ NULL, add);
nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
j -= add;
}
}
-static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
+static void vhost_zerocopy_complete(struct sk_buff *skb,
+ struct ubuf_info *ubuf_base, bool success)
{
+ struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
struct vhost_net_ubuf_ref *ubufs = ubuf->ctx;
struct vhost_virtqueue *vq = ubufs->vq;
int cnt;
@@ -408,6 +417,10 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
rcu_read_unlock_bh();
}
+static const struct ubuf_info_ops vhost_ubuf_ops = {
+ .complete = vhost_zerocopy_complete,
+};
+
static inline unsigned long busy_clock(void)
{
return local_clock() >> 10;
@@ -425,7 +438,7 @@ static void vhost_net_disable_vq(struct vhost_net *n,
struct vhost_net_virtqueue *nvq =
container_of(vq, struct vhost_net_virtqueue, vq);
struct vhost_poll *poll = n->poll + (nvq - n->vqs);
- if (!vq->private_data)
+ if (!vhost_vq_get_backend(vq))
return;
vhost_poll_stop(poll);
}
@@ -438,14 +451,15 @@ static int vhost_net_enable_vq(struct vhost_net *n,
struct vhost_poll *poll = n->poll + (nvq - n->vqs);
struct socket *sock;
- sock = vq->private_data;
+ sock = vhost_vq_get_backend(vq);
if (!sock)
return 0;
return vhost_poll_start(poll, sock->file);
}
-static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
+static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq,
+ unsigned int count)
{
struct vhost_virtqueue *vq = &nvq->vq;
struct vhost_dev *dev = vq->dev;
@@ -453,7 +467,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
if (!nvq->done_idx)
return;
- vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
+ vhost_add_used_and_signal_n(dev, vq, vq->heads,
+ vq->nheads, count);
nvq->done_idx = 0;
}
@@ -462,25 +477,42 @@ static void vhost_tx_batch(struct vhost_net *net,
struct socket *sock,
struct msghdr *msghdr)
{
+ struct vhost_virtqueue *vq = &nvq->vq;
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
struct tun_msg_ctl ctl = {
.type = TUN_MSG_PTR,
.num = nvq->batched_xdp,
.ptr = nvq->xdp,
};
- int err;
+ int i, err;
+
+ if (in_order) {
+ vq->heads[0].len = 0;
+ vq->nheads[0] = nvq->done_idx;
+ }
if (nvq->batched_xdp == 0)
goto signal_used;
msghdr->msg_control = &ctl;
+ msghdr->msg_controllen = sizeof(ctl);
err = sock->ops->sendmsg(sock, msghdr, 0);
if (unlikely(err < 0)) {
vq_err(&nvq->vq, "Fail to batch sending packets\n");
+
+ /* free pages owned by XDP; since this is an unlikely error path,
+ * keep it simple and avoid more complex bulk update for the
+ * used pages
+ */
+ for (i = 0; i < nvq->batched_xdp; ++i)
+ put_page(virt_to_head_page(nvq->xdp[i].data));
+ nvq->batched_xdp = 0;
+ nvq->done_idx = 0;
return;
}
signal_used:
- vhost_net_signal_used(nvq);
+ vhost_net_signal_used(nvq, in_order ? 1 : nvq->done_idx);
nvq->batched_xdp = 0;
}
@@ -525,7 +557,7 @@ static void vhost_net_busy_poll(struct vhost_net *net,
return;
vhost_disable_notify(&net->dev, vq);
- sock = rvq->private_data;
+ sock = vhost_vq_get_backend(rvq);
busyloop_timeout = poll_rx ? rvq->busyloop_timeout:
tvq->busyloop_timeout;
@@ -534,7 +566,7 @@ static void vhost_net_busy_poll(struct vhost_net *net,
endtime = busy_clock() + busyloop_timeout;
while (vhost_can_busy_poll(endtime)) {
- if (vhost_has_work(&net->dev)) {
+ if (vhost_vq_has_work(vq)) {
*busyloop_intr = true;
break;
}
@@ -560,24 +592,27 @@ static void vhost_net_busy_poll(struct vhost_net *net,
static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
struct vhost_net_virtqueue *tnvq,
unsigned int *out_num, unsigned int *in_num,
- struct msghdr *msghdr, bool *busyloop_intr)
+ struct msghdr *msghdr, bool *busyloop_intr,
+ unsigned int *ndesc)
{
struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_virtqueue *rvq = &rnvq->vq;
struct vhost_virtqueue *tvq = &tnvq->vq;
- int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
- out_num, in_num, NULL, NULL);
+ int r = vhost_get_vq_desc_n(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
+ out_num, in_num, NULL, NULL, ndesc);
if (r == tvq->num && tvq->busyloop_timeout) {
/* Flush batched packets first */
- if (!vhost_sock_zcopy(tvq->private_data))
- vhost_tx_batch(net, tnvq, tvq->private_data, msghdr);
+ if (!vhost_sock_zcopy(vhost_vq_get_backend(tvq)))
+ vhost_tx_batch(net, tnvq,
+ vhost_vq_get_backend(tvq),
+ msghdr);
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false);
- r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
- out_num, in_num, NULL, NULL);
+ r = vhost_get_vq_desc_n(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
+ out_num, in_num, NULL, NULL, ndesc);
}
return r;
@@ -598,28 +633,24 @@ static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter,
/* Skip header. TODO: support TSO. */
size_t len = iov_length(vq->iov, out);
- iov_iter_init(iter, WRITE, vq->iov, out, len);
+ iov_iter_init(iter, ITER_SOURCE, vq->iov, out, len);
iov_iter_advance(iter, hdr_size);
return iov_iter_count(iter);
}
-static bool vhost_exceeds_weight(int pkts, int total_len)
-{
- return total_len >= VHOST_NET_WEIGHT ||
- pkts >= VHOST_NET_PKT_WEIGHT;
-}
-
static int get_tx_bufs(struct vhost_net *net,
struct vhost_net_virtqueue *nvq,
struct msghdr *msg,
unsigned int *out, unsigned int *in,
- size_t *len, bool *busyloop_intr)
+ size_t *len, bool *busyloop_intr,
+ unsigned int *ndesc)
{
struct vhost_virtqueue *vq = &nvq->vq;
int ret;
- ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, busyloop_intr);
+ ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg,
+ busyloop_intr, ndesc);
if (ret < 0 || ret == vq->num)
return ret;
@@ -647,43 +678,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len)
!vhost_vq_avail_empty(vq->dev, vq);
}
-#define SKB_FRAG_PAGE_ORDER get_order(32768)
-
-static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz,
- struct page_frag *pfrag, gfp_t gfp)
-{
- if (pfrag->page) {
- if (pfrag->offset + sz <= pfrag->size)
- return true;
- __page_frag_cache_drain(pfrag->page, net->refcnt_bias);
- }
-
- pfrag->offset = 0;
- net->refcnt_bias = 0;
- if (SKB_FRAG_PAGE_ORDER) {
- /* Avoid direct reclaim but allow kswapd to wake */
- pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
- __GFP_COMP | __GFP_NOWARN |
- __GFP_NORETRY,
- SKB_FRAG_PAGE_ORDER);
- if (likely(pfrag->page)) {
- pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
- goto done;
- }
- }
- pfrag->page = alloc_page(gfp);
- if (likely(pfrag->page)) {
- pfrag->size = PAGE_SIZE;
- goto done;
- }
- return false;
-
-done:
- net->refcnt_bias = USHRT_MAX;
- page_ref_add(pfrag->page, USHRT_MAX - 1);
- return true;
-}
-
#define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
@@ -692,11 +686,9 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
struct vhost_virtqueue *vq = &nvq->vq;
struct vhost_net *net = container_of(vq->dev, struct vhost_net,
dev);
- struct socket *sock = vq->private_data;
- struct page_frag *alloc_frag = &net->page_frag;
+ struct socket *sock = vhost_vq_get_backend(vq);
struct virtio_net_hdr *gso;
struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp];
- struct tun_xdp_hdr *hdr;
size_t len = iov_iter_count(from);
int headroom = vhost_sock_xdp(sock) ? XDP_PACKET_HEADROOM : 0;
int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -704,6 +696,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
int sock_hlen = nvq->sock_hlen;
void *buf;
int copied;
+ int ret;
if (unlikely(len < nvq->sock_hlen))
return -EFAULT;
@@ -713,21 +706,21 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
return -ENOSPC;
buflen += SKB_DATA_ALIGN(len + pad);
- alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
- if (unlikely(!vhost_net_page_frag_refill(net, buflen,
- alloc_frag, GFP_KERNEL)))
+ buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL,
+ SMP_CACHE_BYTES);
+ if (unlikely(!buf))
return -ENOMEM;
- buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
- copied = copy_page_from_iter(alloc_frag->page,
- alloc_frag->offset +
- offsetof(struct tun_xdp_hdr, gso),
- sock_hlen, from);
- if (copied != sock_hlen)
- return -EFAULT;
+ copied = copy_from_iter(buf + pad - sock_hlen, len, from);
+ if (copied != len) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ gso = buf + pad - sock_hlen;
- hdr = buf;
- gso = &hdr->gso;
+ if (!sock_hlen)
+ memset(buf, 0, pad);
if ((gso->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
vhost16_to_cpu(vq, gso->csum_start) +
@@ -737,28 +730,25 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
vhost16_to_cpu(vq, gso->csum_start) +
vhost16_to_cpu(vq, gso->csum_offset) + 2);
- if (vhost16_to_cpu(vq, gso->hdr_len) > len)
- return -EINVAL;
+ if (vhost16_to_cpu(vq, gso->hdr_len) > len) {
+ ret = -EINVAL;
+ goto err;
+ }
}
- len -= sock_hlen;
- copied = copy_page_from_iter(alloc_frag->page,
- alloc_frag->offset + pad,
- len, from);
- if (copied != len)
- return -EFAULT;
-
- xdp->data_hard_start = buf;
- xdp->data = buf + pad;
- xdp->data_end = xdp->data + len;
- hdr->buflen = buflen;
+ /* pad contains sock_hlen */
+ memcpy(buf, buf + pad - sock_hlen, sock_hlen);
- --net->refcnt_bias;
- alloc_frag->offset += buflen;
+ xdp_init_buff(xdp, buflen, NULL);
+ xdp_prepare_buff(xdp, buf, pad, len - sock_hlen, true);
++nvq->batched_xdp;
return 0;
+
+err:
+ page_frag_free(buf);
+ return ret;
}
static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
@@ -778,20 +768,27 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
int err;
int sent_pkts = 0;
bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+ unsigned int ndesc = 0;
- for (;;) {
+ do {
bool busyloop_intr = false;
if (nvq->done_idx == VHOST_NET_BATCH)
vhost_tx_batch(net, nvq, sock, &msg);
head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
- &busyloop_intr);
+ &busyloop_intr, &ndesc);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
/* Nothing new? Wait for eventfd to tell us they refilled. */
if (head == vq->num) {
+ /* Flush batched packets to handle pending RX
+ * work (if busyloop_intr is set) and to avoid
+ * unnecessary virtqueue kicks.
+ */
+ vhost_tx_batch(net, nvq, sock, &msg);
if (unlikely(busyloop_intr)) {
vhost_poll_queue(&vq->poll);
} else if (unlikely(vhost_enable_notify(&net->dev,
@@ -813,16 +810,18 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
goto done;
} else if (unlikely(err != -ENOSPC)) {
vhost_tx_batch(net, nvq, sock, &msg);
- vhost_discard_vq_desc(vq, 1);
+ vhost_discard_vq_desc(vq, 1, ndesc);
vhost_net_enable_vq(net, vq);
break;
}
- /* We can't build XDP buff, go for single
- * packet path but let's flush batched
- * packets.
- */
- vhost_tx_batch(net, nvq, sock, &msg);
+ if (nvq->batched_xdp) {
+ /* We can't build XDP buff, go for single
+ * packet path but let's flush batched
+ * packets.
+ */
+ vhost_tx_batch(net, nvq, sock, &msg);
+ }
msg.msg_control = NULL;
} else {
if (tx_can_batch(vq, total_len))
@@ -831,25 +830,26 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
msg.msg_flags &= ~MSG_MORE;
}
- /* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) {
- vhost_discard_vq_desc(vq, 1);
- vhost_net_enable_vq(net, vq);
- break;
- }
- if (err != len)
+ if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) {
+ vhost_discard_vq_desc(vq, 1, ndesc);
+ vhost_net_enable_vq(net, vq);
+ break;
+ }
+ pr_debug("Fail to send packet: err %d", err);
+ } else if (unlikely(err != len))
pr_debug("Truncated TX packet: len %d != %zd\n",
err, len);
done:
- vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
- vq->heads[nvq->done_idx].len = 0;
- ++nvq->done_idx;
- if (vhost_exceeds_weight(++sent_pkts, total_len)) {
- vhost_poll_queue(&vq->poll);
- break;
+ if (in_order) {
+ vq->heads[0].id = cpu_to_vhost32(vq, head);
+ } else {
+ vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
+ vq->heads[nvq->done_idx].len = 0;
}
- }
+ ++nvq->done_idx;
+ } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
vhost_tx_batch(net, nvq, sock, &msg);
}
@@ -870,11 +870,13 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
struct tun_msg_ctl ctl;
size_t len, total_len = 0;
int err;
- struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
+ struct vhost_net_ubuf_ref *ubufs;
+ struct ubuf_info_msgzc *ubuf;
+ unsigned int ndesc = 0;
bool zcopy_used;
int sent_pkts = 0;
- for (;;) {
+ do {
bool busyloop_intr;
/* Release DMAs done buffers first */
@@ -882,7 +884,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
busyloop_intr = false;
head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
- &busyloop_intr);
+ &busyloop_intr, &ndesc);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -903,18 +905,17 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
/* use msg_control to pass vhost zerocopy ubuf info to skb */
if (zcopy_used) {
- struct ubuf_info *ubuf;
ubuf = nvq->ubuf_info + nvq->upend_idx;
-
vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
- ubuf->callback = vhost_zerocopy_callback;
ubuf->ctx = nvq->ubufs;
ubuf->desc = nvq->upend_idx;
- refcount_set(&ubuf->refcnt, 1);
+ ubuf->ubuf.ops = &vhost_ubuf_ops;
+ ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG;
+ refcount_set(&ubuf->ubuf.refcnt, 1);
msg.msg_control = &ctl;
ctl.type = TUN_MSG_UBUF;
- ctl.ptr = ubuf;
+ ctl.ptr = &ubuf->ubuf;
msg.msg_controllen = sizeof(ctl);
ubufs = nvq->ubufs;
atomic_inc(&ubufs->refcount);
@@ -931,19 +932,26 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
msg.msg_flags &= ~MSG_MORE;
}
- /* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) {
+ bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS;
+
if (zcopy_used) {
- vhost_net_ubuf_put(ubufs);
- nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
- % UIO_MAXIOV;
+ if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS)
+ vhost_net_ubuf_put(ubufs);
+ if (retry)
+ nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
+ % UIO_MAXIOV;
+ else
+ vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
}
- vhost_discard_vq_desc(vq, 1);
- vhost_net_enable_vq(net, vq);
- break;
- }
- if (err != len)
+ if (retry) {
+ vhost_discard_vq_desc(vq, 1, ndesc);
+ vhost_net_enable_vq(net, vq);
+ break;
+ }
+ pr_debug("Fail to send packet: err %d", err);
+ } else if (unlikely(err != len))
pr_debug("Truncated TX packet: "
" len %d != %zd\n", err, len);
if (!zcopy_used)
@@ -951,11 +959,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
else
vhost_zerocopy_signal_used(net, vq);
vhost_net_tx_packet(net);
- if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) {
- vhost_poll_queue(&vq->poll);
- break;
- }
- }
+ } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
}
/* Expects to be always run from workqueue - which acts as
@@ -967,11 +971,11 @@ static void handle_tx(struct vhost_net *net)
struct socket *sock;
mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
- sock = vq->private_data;
+ sock = vhost_vq_get_backend(vq);
if (!sock)
goto out;
- if (!vq_iotlb_prefetch(vq))
+ if (!vq_meta_prefetch(vq))
goto out;
vhost_disable_notify(&net->dev, vq);
@@ -1008,7 +1012,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
}
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
- bool *busyloop_intr)
+ bool *busyloop_intr, unsigned int *count)
{
struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
@@ -1018,7 +1022,8 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
if (!len && rvq->busyloop_timeout) {
/* Flush batched heads first */
- vhost_net_signal_used(rnvq);
+ vhost_net_signal_used(rnvq, *count);
+ *count = 0;
/* Both tx vq and rx socket were polled here */
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
@@ -1030,7 +1035,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
/* This is a multi-buffer version of vhost_get_desc, that works if
* vq has read descriptors only.
- * @vq - the relevant virtqueue
+ * @nvq - the relevant vhost_net virtqueue
* @datalen - data length we'll be reading
* @iovcount - returned count of io vectors we fill
* @log - vhost log
@@ -1038,15 +1043,19 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
* @quota - headcount quota, 1 for big buffer
* returns number of buffer heads allocated, negative on error
*/
-static int get_rx_bufs(struct vhost_virtqueue *vq,
+static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
struct vring_used_elem *heads,
+ u16 *nheads,
int datalen,
unsigned *iovcount,
struct vhost_log *log,
unsigned *log_num,
- unsigned int quota)
+ unsigned int quota,
+ unsigned int *ndesc)
{
- unsigned int out, in;
+ struct vhost_virtqueue *vq = &nvq->vq;
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+ unsigned int out, in, desc_num, n = 0;
int seg = 0;
int headcount = 0;
unsigned d;
@@ -1054,16 +1063,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
/* len is always initialized before use since we are always called with
* datalen > 0.
*/
- u32 uninitialized_var(len);
+ u32 len;
while (datalen > 0 && headcount < quota) {
if (unlikely(seg >= UIO_MAXIOV)) {
r = -ENOBUFS;
goto err;
}
- r = vhost_get_vq_desc(vq, vq->iov + seg,
- ARRAY_SIZE(vq->iov) - seg, &out,
- &in, log, log_num);
+ r = vhost_get_vq_desc_n(vq, vq->iov + seg,
+ ARRAY_SIZE(vq->iov) - seg, &out,
+ &in, log, log_num, &desc_num);
if (unlikely(r < 0))
goto err;
@@ -1082,14 +1091,17 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
nlogs += *log_num;
log += *log_num;
}
- heads[headcount].id = cpu_to_vhost32(vq, d);
len = iov_length(vq->iov + seg, in);
- heads[headcount].len = cpu_to_vhost32(vq, len);
- datalen -= len;
+ if (!in_order) {
+ heads[headcount].id = cpu_to_vhost32(vq, d);
+ heads[headcount].len = cpu_to_vhost32(vq, len);
+ }
++headcount;
+ datalen -= len;
seg += in;
+ n += desc_num;
}
- heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+
*iovcount = seg;
if (unlikely(log))
*log_num = nlogs;
@@ -1099,9 +1111,20 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
r = UIO_MAXIOV + 1;
goto err;
}
+
+ if (!in_order)
+ heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+ else {
+ heads[0].len = cpu_to_vhost32(vq, len + datalen);
+ heads[0].id = cpu_to_vhost32(vq, d);
+ nheads[0] = headcount;
+ }
+
+ *ndesc = n;
+
return headcount;
err:
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_vq_desc(vq, headcount, n);
return r;
}
@@ -1111,7 +1134,9 @@ static void handle_rx(struct vhost_net *net)
{
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_virtqueue *vq = &nvq->vq;
- unsigned uninitialized_var(in), log;
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+ unsigned int count = 0;
+ unsigned in, log;
struct vhost_log *vq_log;
struct msghdr msg = {
.msg_name = NULL,
@@ -1130,17 +1155,19 @@ static void handle_rx(struct vhost_net *net)
size_t vhost_hlen, sock_hlen;
size_t vhost_len, sock_len;
bool busyloop_intr = false;
+ bool set_num_buffers;
struct socket *sock;
struct iov_iter fixup;
__virtio16 num_buffers;
int recv_pkts = 0;
+ unsigned int ndesc;
mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
- sock = vq->private_data;
+ sock = vhost_vq_get_backend(vq);
if (!sock)
goto out;
- if (!vq_iotlb_prefetch(vq))
+ if (!vq_meta_prefetch(vq))
goto out;
vhost_disable_notify(&net->dev, vq);
@@ -1152,14 +1179,21 @@ static void handle_rx(struct vhost_net *net)
vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ?
vq->log : NULL;
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
+ set_num_buffers = mergeable ||
+ vhost_has_feature(vq, VIRTIO_F_VERSION_1);
- while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
- &busyloop_intr))) {
+ do {
+ sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
+ &busyloop_intr, &count);
+ if (!sock_len)
+ break;
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
- headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
+ headcount = get_rx_bufs(nvq, vq->heads + count,
+ vq->nheads + count,
vhost_len, &in, vq_log, &log,
- likely(mergeable) ? UIO_MAXIOV : 1);
+ likely(mergeable) ? UIO_MAXIOV : 1,
+ &ndesc);
/* On error, stop handling until the next kick. */
if (unlikely(headcount < 0))
goto out;
@@ -1182,14 +1216,14 @@ static void handle_rx(struct vhost_net *net)
msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
/* On overrun, truncate and discard */
if (unlikely(headcount > UIO_MAXIOV)) {
- iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
+ iov_iter_init(&msg.msg_iter, ITER_DEST, vq->iov, 1, 1);
err = sock->ops->recvmsg(sock, &msg,
1, MSG_DONTWAIT | MSG_TRUNC);
pr_debug("Discarded rx packet: len %zd\n", sock_len);
continue;
}
/* We don't need to be notified again. */
- iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
+ iov_iter_init(&msg.msg_iter, ITER_DEST, vq->iov, in, vhost_len);
fixup = msg.msg_iter;
if (unlikely((vhost_hlen))) {
/* We will supply the header ourselves
@@ -1205,7 +1239,7 @@ static void handle_rx(struct vhost_net *net)
if (unlikely(err != sock_len)) {
pr_debug("Discarded rx packet: "
" len %d, expected %zd\n", err, sock_len);
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_vq_desc(vq, headcount, ndesc);
continue;
}
/* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
@@ -1225,31 +1259,31 @@ static void handle_rx(struct vhost_net *net)
/* TODO: Should check and handle checksum. */
num_buffers = cpu_to_vhost16(vq, headcount);
- if (likely(mergeable) &&
+ if (likely(set_num_buffers) &&
copy_to_iter(&num_buffers, sizeof num_buffers,
&fixup) != sizeof num_buffers) {
vq_err(vq, "Failed num_buffers write");
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_vq_desc(vq, headcount, ndesc);
goto out;
}
nvq->done_idx += headcount;
- if (nvq->done_idx > VHOST_NET_BATCH)
- vhost_net_signal_used(nvq);
+ count += in_order ? 1 : headcount;
+ if (nvq->done_idx > VHOST_NET_BATCH) {
+ vhost_net_signal_used(nvq, count);
+ count = 0;
+ }
if (unlikely(vq_log))
vhost_log_write(vq, vq_log, log, vhost_len,
vq->iov, in);
total_len += vhost_len;
- if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
- vhost_poll_queue(&vq->poll);
- goto out;
- }
- }
+ } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len)));
+
if (unlikely(busyloop_intr))
vhost_poll_queue(&vq->poll);
- else
+ else if (!sock_len)
vhost_net_enable_vq(net, vq);
out:
- vhost_net_signal_used(nvq);
+ vhost_net_signal_used(nvq, count);
mutex_unlock(&vq->mutex);
}
@@ -1337,14 +1371,18 @@ static int vhost_net_open(struct inode *inode, struct file *f)
n->vqs[i].rx_ring = NULL;
vhost_net_buf_init(&n->vqs[i].rxq);
}
- vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
+ vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
+ UIO_MAXIOV + VHOST_NET_BATCH,
+ VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true,
+ NULL);
- vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
- vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
+ vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev,
+ vqs[VHOST_NET_VQ_TX]);
+ vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev,
+ vqs[VHOST_NET_VQ_RX]);
f->private_data = n;
- n->page_frag.page = NULL;
- n->refcnt_bias = 0;
+ page_frag_cache_init(&n->pf_cache);
return 0;
}
@@ -1357,9 +1395,9 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
container_of(vq, struct vhost_net_virtqueue, vq);
mutex_lock(&vq->mutex);
- sock = vq->private_data;
+ sock = vhost_vq_get_backend(vq);
vhost_net_disable_vq(n, vq);
- vq->private_data = NULL;
+ vhost_vq_set_backend(vq, NULL);
vhost_net_buf_unproduce(nvq);
nvq->rx_ring = NULL;
mutex_unlock(&vq->mutex);
@@ -1373,16 +1411,9 @@ static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
*rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq);
}
-static void vhost_net_flush_vq(struct vhost_net *n, int index)
-{
- vhost_poll_flush(n->poll + index);
- vhost_poll_flush(&n->vqs[index].vq.poll);
-}
-
static void vhost_net_flush(struct vhost_net *n)
{
- vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
- vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
+ vhost_dev_flush(&n->dev);
if (n->vqs[VHOST_NET_VQ_TX].ubufs) {
mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
n->tx_flush = true;
@@ -1419,18 +1450,13 @@ static int vhost_net_release(struct inode *inode, struct file *f)
kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue);
kfree(n->vqs[VHOST_NET_VQ_TX].xdp);
kfree(n->dev.vqs);
- if (n->page_frag.page)
- __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias);
+ page_frag_cache_drain(&n->pf_cache);
kvfree(n);
return 0;
}
static struct socket *get_raw_socket(int fd)
{
- struct {
- struct sockaddr_ll sa;
- char buf[MAX_ADDR_LEN];
- } uaddr;
int r;
struct socket *sock = sockfd_lookup(fd, &r);
@@ -1443,11 +1469,7 @@ static struct socket *get_raw_socket(int fd)
goto err;
}
- r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0);
- if (r < 0)
- goto err;
-
- if (uaddr.sa.sll_family != AF_PACKET) {
+ if (sock->sk->sk_family != AF_PACKET) {
r = -EPFNOSUPPORT;
goto err;
}
@@ -1457,13 +1479,9 @@ err:
return ERR_PTR(r);
}
-static struct ptr_ring *get_tap_ptr_ring(int fd)
+static struct ptr_ring *get_tap_ptr_ring(struct file *file)
{
struct ptr_ring *ring;
- struct file *file = fget(fd);
-
- if (!file)
- return NULL;
ring = tun_get_tx_ring(file);
if (!IS_ERR(ring))
goto out;
@@ -1472,7 +1490,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd)
goto out;
ring = NULL;
out:
- fput(file);
return ring;
}
@@ -1529,6 +1546,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
nvq = &n->vqs[index];
mutex_lock(&vq->mutex);
+ if (fd == -1)
+ vhost_clear_msg(&n->dev);
+
/* Verify that ring has been setup correctly. */
if (!vhost_vq_access_ok(vq)) {
r = -EFAULT;
@@ -1541,7 +1561,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
}
/* start polling new socket */
- oldsock = vq->private_data;
+ oldsock = vhost_vq_get_backend(vq);
if (sock != oldsock) {
ubufs = vhost_net_ubuf_alloc(vq,
sock && vhost_sock_zcopy(sock));
@@ -1551,7 +1571,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
}
vhost_net_disable_vq(n, vq);
- vq->private_data = sock;
+ vhost_vq_set_backend(vq, sock);
vhost_net_buf_unproduce(nvq);
r = vhost_vq_init_access(vq);
if (r)
@@ -1559,8 +1579,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
r = vhost_net_enable_vq(n, vq);
if (r)
goto err_used;
- if (index == VHOST_NET_VQ_RX)
- nvq->rx_ring = get_tap_ptr_ring(fd);
+ if (index == VHOST_NET_VQ_RX) {
+ if (sock)
+ nvq->rx_ring = get_tap_ptr_ring(sock->file);
+ else
+ nvq->rx_ring = NULL;
+ }
oldubufs = nvq->ubufs;
nvq->ubufs = ubufs;
@@ -1580,7 +1604,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
}
if (oldsock) {
- vhost_net_flush_vq(n, index);
+ vhost_dev_flush(&n->dev);
sockfd_put(oldsock);
}
@@ -1588,7 +1612,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
return 0;
err_used:
- vq->private_data = oldsock;
+ vhost_vq_set_backend(vq, oldsock);
vhost_net_enable_vq(n, vq);
if (ubufs)
vhost_net_ubuf_put_wait_and_free(ubufs);
@@ -1607,7 +1631,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
struct socket *tx_sock = NULL;
struct socket *rx_sock = NULL;
long err;
- struct vhost_umem *umem;
+ struct vhost_iotlb *umem;
mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev);
@@ -1632,31 +1656,23 @@ done:
return err;
}
-static int vhost_net_set_backend_features(struct vhost_net *n, u64 features)
+static int vhost_net_set_features(struct vhost_net *n, const u64 *features)
{
+ size_t vhost_hlen, sock_hlen, hdr_len;
int i;
- mutex_lock(&n->dev.mutex);
- for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
- mutex_lock(&n->vqs[i].vq.mutex);
- n->vqs[i].vq.acked_backend_features = features;
- mutex_unlock(&n->vqs[i].vq.mutex);
- }
- mutex_unlock(&n->dev.mutex);
-
- return 0;
-}
+ hdr_len = virtio_features_test_bit(features, VIRTIO_NET_F_MRG_RXBUF) ||
+ virtio_features_test_bit(features, VIRTIO_F_VERSION_1) ?
+ sizeof(struct virtio_net_hdr_mrg_rxbuf) :
+ sizeof(struct virtio_net_hdr);
-static int vhost_net_set_features(struct vhost_net *n, u64 features)
-{
- size_t vhost_hlen, sock_hlen, hdr_len;
- int i;
+ if (virtio_features_test_bit(features,
+ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO) ||
+ virtio_features_test_bit(features,
+ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO))
+ hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel);
- hdr_len = (features & ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_F_VERSION_1))) ?
- sizeof(struct virtio_net_hdr_mrg_rxbuf) :
- sizeof(struct virtio_net_hdr);
- if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) {
+ if (virtio_features_test_bit(features, VHOST_NET_F_VIRTIO_NET_HDR)) {
/* vhost provides vnet_hdr */
vhost_hlen = hdr_len;
sock_hlen = 0;
@@ -1666,18 +1682,19 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
sock_hlen = hdr_len;
}
mutex_lock(&n->dev.mutex);
- if ((features & (1 << VHOST_F_LOG_ALL)) &&
+ if (virtio_features_test_bit(features, VHOST_F_LOG_ALL) &&
!vhost_log_access_ok(&n->dev))
goto out_unlock;
- if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) {
- if (vhost_init_device_iotlb(&n->dev, true))
+ if (virtio_features_test_bit(features, VIRTIO_F_ACCESS_PLATFORM)) {
+ if (vhost_init_device_iotlb(&n->dev))
goto out_unlock;
}
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].vq.mutex);
- n->vqs[i].vq.acked_features = features;
+ virtio_features_copy(n->vqs[i].vq.acked_features_array,
+ features);
n->vqs[i].vhost_hlen = vhost_hlen;
n->vqs[i].sock_hlen = sock_hlen;
mutex_unlock(&n->vqs[i].vq.mutex);
@@ -1714,12 +1731,14 @@ out:
static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
unsigned long arg)
{
+ const DEFINE_VHOST_FEATURES_ARRAY(vhost_net_features, vhost_net_bits);
+ u64 all_features[VIRTIO_FEATURES_U64S];
struct vhost_net *n = f->private_data;
void __user *argp = (void __user *)arg;
u64 __user *featurep = argp;
struct vhost_vring_file backend;
- u64 features;
- int r;
+ u64 features, count, copied;
+ int r, i;
switch (ioctl) {
case VHOST_NET_SET_BACKEND:
@@ -1727,16 +1746,60 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
return -EFAULT;
return vhost_net_set_backend(n, backend.index, backend.fd);
case VHOST_GET_FEATURES:
- features = VHOST_NET_FEATURES;
+ features = vhost_net_features[0];
if (copy_to_user(featurep, &features, sizeof features))
return -EFAULT;
return 0;
case VHOST_SET_FEATURES:
if (copy_from_user(&features, featurep, sizeof features))
return -EFAULT;
- if (features & ~VHOST_NET_FEATURES)
+ if (features & ~vhost_net_features[0])
return -EOPNOTSUPP;
- return vhost_net_set_features(n, features);
+
+ virtio_features_from_u64(all_features, features);
+ return vhost_net_set_features(n, all_features);
+ case VHOST_GET_FEATURES_ARRAY:
+ if (copy_from_user(&count, featurep, sizeof(count)))
+ return -EFAULT;
+
+ /* Copy the net features, up to the user-provided buffer size */
+ argp += sizeof(u64);
+ copied = min(count, (u64)VIRTIO_FEATURES_U64S);
+ if (copy_to_user(argp, vhost_net_features,
+ copied * sizeof(u64)))
+ return -EFAULT;
+
+ /* Zero the trailing space provided by user-space, if any */
+ if (clear_user(argp, size_mul(count - copied, sizeof(u64))))
+ return -EFAULT;
+ return 0;
+ case VHOST_SET_FEATURES_ARRAY:
+ if (copy_from_user(&count, featurep, sizeof(count)))
+ return -EFAULT;
+
+ virtio_features_zero(all_features);
+ argp += sizeof(u64);
+ copied = min(count, (u64)VIRTIO_FEATURES_U64S);
+ if (copy_from_user(all_features, argp, copied * sizeof(u64)))
+ return -EFAULT;
+
+ /*
+ * Any feature specified by user-space above
+ * VIRTIO_FEATURES_BITS is not supported by definition.
+ */
+ for (i = copied; i < count; ++i) {
+ if (copy_from_user(&features, featurep + 1 + i,
+ sizeof(features)))
+ return -EFAULT;
+ if (features)
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < VIRTIO_FEATURES_U64S; i++)
+ if (all_features[i] & ~vhost_net_features[i])
+ return -EOPNOTSUPP;
+
+ return vhost_net_set_features(n, all_features);
case VHOST_GET_BACKEND_FEATURES:
features = VHOST_NET_BACKEND_FEATURES;
if (copy_to_user(featurep, &features, sizeof(features)))
@@ -1747,7 +1810,8 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
return -EFAULT;
if (features & ~VHOST_NET_BACKEND_FEATURES)
return -EOPNOTSUPP;
- return vhost_net_set_backend_features(n, features);
+ vhost_set_backend_features(&n->dev, features);
+ return 0;
case VHOST_RESET_OWNER:
return vhost_net_reset_owner(n);
case VHOST_SET_OWNER:
@@ -1764,14 +1828,6 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
}
}
-#ifdef CONFIG_COMPAT
-static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
- unsigned long arg)
-{
- return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
-}
-#endif
-
static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
@@ -1807,9 +1863,7 @@ static const struct file_operations vhost_net_fops = {
.write_iter = vhost_net_chr_write_iter,
.poll = vhost_net_chr_poll,
.unlocked_ioctl = vhost_net_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = vhost_net_compat_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.open = vhost_net_open,
.llseek = noop_llseek,
};
@@ -1820,7 +1874,7 @@ static struct miscdevice vhost_net_misc = {
.fops = &vhost_net_fops,
};
-static int vhost_net_init(void)
+static int __init vhost_net_init(void)
{
if (experimental_zcopytx)
vhost_net_enable_zcopy(VHOST_NET_VQ_TX);
@@ -1828,7 +1882,7 @@ static int vhost_net_init(void)
}
module_init(vhost_net_init);
-static void vhost_net_exit(void)
+static void __exit vhost_net_exit(void)
{
misc_deregister(&vhost_net_misc);
}