summaryrefslogtreecommitdiff
path: root/drivers/vhost/net.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost/net.c')
-rw-r--r--drivers/vhost/net.c201
1 files changed, 143 insertions, 58 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 7cbfc7d718b3..6edac0c1ba9b 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -69,12 +69,15 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
#define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN)
-enum {
- VHOST_NET_FEATURES = VHOST_FEATURES |
- (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
- (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
- (1ULL << VIRTIO_F_RING_RESET)
+static const u64 vhost_net_features[VIRTIO_FEATURES_DWORDS] = {
+ VHOST_FEATURES |
+ (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
+ (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
+ (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
+ (1ULL << VIRTIO_F_RING_RESET) |
+ (1ULL << VIRTIO_F_IN_ORDER),
+ VIRTIO_BIT(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) |
+ VIRTIO_BIT(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO),
};
enum {
@@ -374,7 +377,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
while (j) {
add = min(UIO_MAXIOV - nvq->done_idx, j);
vhost_add_used_and_signal_n(vq->dev, vq,
- &vq->heads[nvq->done_idx], add);
+ &vq->heads[nvq->done_idx],
+ NULL, add);
nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
j -= add;
}
@@ -449,7 +453,8 @@ static int vhost_net_enable_vq(struct vhost_net *n,
return vhost_poll_start(poll, sock->file);
}
-static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
+static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq,
+ unsigned int count)
{
struct vhost_virtqueue *vq = &nvq->vq;
struct vhost_dev *dev = vq->dev;
@@ -457,7 +462,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
if (!nvq->done_idx)
return;
- vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
+ vhost_add_used_and_signal_n(dev, vq, vq->heads,
+ vq->nheads, count);
nvq->done_idx = 0;
}
@@ -466,6 +472,8 @@ static void vhost_tx_batch(struct vhost_net *net,
struct socket *sock,
struct msghdr *msghdr)
{
+ struct vhost_virtqueue *vq = &nvq->vq;
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
struct tun_msg_ctl ctl = {
.type = TUN_MSG_PTR,
.num = nvq->batched_xdp,
@@ -473,6 +481,11 @@ static void vhost_tx_batch(struct vhost_net *net,
};
int i, err;
+ if (in_order) {
+ vq->heads[0].len = 0;
+ vq->nheads[0] = nvq->done_idx;
+ }
+
if (nvq->batched_xdp == 0)
goto signal_used;
@@ -494,7 +507,7 @@ static void vhost_tx_batch(struct vhost_net *net,
}
signal_used:
- vhost_net_signal_used(nvq);
+ vhost_net_signal_used(nvq, in_order ? 1 : nvq->done_idx);
nvq->batched_xdp = 0;
}
@@ -668,7 +681,6 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
struct socket *sock = vhost_vq_get_backend(vq);
struct virtio_net_hdr *gso;
struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp];
- struct tun_xdp_hdr *hdr;
size_t len = iov_iter_count(from);
int headroom = vhost_sock_xdp(sock) ? XDP_PACKET_HEADROOM : 0;
int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -691,15 +703,13 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
if (unlikely(!buf))
return -ENOMEM;
- copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso),
- sock_hlen, from);
- if (copied != sock_hlen) {
+ copied = copy_from_iter(buf + pad - sock_hlen, len, from);
+ if (copied != len) {
ret = -EFAULT;
goto err;
}
- hdr = buf;
- gso = &hdr->gso;
+ gso = buf + pad - sock_hlen;
if (!sock_hlen)
memset(buf, 0, pad);
@@ -718,16 +728,11 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
}
}
- len -= sock_hlen;
- copied = copy_from_iter(buf + pad, len, from);
- if (copied != len) {
- ret = -EFAULT;
- goto err;
- }
+ /* pad contains sock_hlen */
+ memcpy(buf, buf + pad - sock_hlen, sock_hlen);
xdp_init_buff(xdp, buflen, NULL);
- xdp_prepare_buff(xdp, buf, pad, len, true);
- hdr->buflen = buflen;
+ xdp_prepare_buff(xdp, buf, pad, len - sock_hlen, true);
++nvq->batched_xdp;
@@ -756,6 +761,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
int sent_pkts = 0;
bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
bool busyloop_intr;
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
do {
busyloop_intr = false;
@@ -792,11 +798,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
break;
}
- /* We can't build XDP buff, go for single
- * packet path but let's flush batched
- * packets.
- */
- vhost_tx_batch(net, nvq, sock, &msg);
+ if (nvq->batched_xdp) {
+ /* We can't build XDP buff, go for single
+ * packet path but let's flush batched
+ * packets.
+ */
+ vhost_tx_batch(net, nvq, sock, &msg);
+ }
msg.msg_control = NULL;
} else {
if (tx_can_batch(vq, total_len))
@@ -817,8 +825,12 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
pr_debug("Truncated TX packet: len %d != %zd\n",
err, len);
done:
- vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
- vq->heads[nvq->done_idx].len = 0;
+ if (in_order) {
+ vq->heads[0].id = cpu_to_vhost32(vq, head);
+ } else {
+ vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
+ vq->heads[nvq->done_idx].len = 0;
+ }
++nvq->done_idx;
} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
@@ -997,7 +1009,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
}
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
- bool *busyloop_intr)
+ bool *busyloop_intr, unsigned int count)
{
struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
@@ -1007,7 +1019,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
if (!len && rvq->busyloop_timeout) {
/* Flush batched heads first */
- vhost_net_signal_used(rnvq);
+ vhost_net_signal_used(rnvq, count);
/* Both tx vq and rx socket were polled here */
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
@@ -1019,7 +1031,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
/* This is a multi-buffer version of vhost_get_desc, that works if
* vq has read descriptors only.
- * @vq - the relevant virtqueue
+ * @nvq - the relevant vhost_net virtqueue
* @datalen - data length we'll be reading
* @iovcount - returned count of io vectors we fill
* @log - vhost log
@@ -1027,14 +1039,17 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
* @quota - headcount quota, 1 for big buffer
* returns number of buffer heads allocated, negative on error
*/
-static int get_rx_bufs(struct vhost_virtqueue *vq,
+static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
struct vring_used_elem *heads,
+ u16 *nheads,
int datalen,
unsigned *iovcount,
struct vhost_log *log,
unsigned *log_num,
unsigned int quota)
{
+ struct vhost_virtqueue *vq = &nvq->vq;
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
unsigned int out, in;
int seg = 0;
int headcount = 0;
@@ -1071,14 +1086,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
nlogs += *log_num;
log += *log_num;
}
- heads[headcount].id = cpu_to_vhost32(vq, d);
len = iov_length(vq->iov + seg, in);
- heads[headcount].len = cpu_to_vhost32(vq, len);
- datalen -= len;
+ if (!in_order) {
+ heads[headcount].id = cpu_to_vhost32(vq, d);
+ heads[headcount].len = cpu_to_vhost32(vq, len);
+ }
++headcount;
+ datalen -= len;
seg += in;
}
- heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+
*iovcount = seg;
if (unlikely(log))
*log_num = nlogs;
@@ -1088,6 +1105,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
r = UIO_MAXIOV + 1;
goto err;
}
+
+ if (!in_order)
+ heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+ else {
+ heads[0].len = cpu_to_vhost32(vq, len + datalen);
+ heads[0].id = cpu_to_vhost32(vq, d);
+ nheads[0] = headcount;
+ }
+
return headcount;
err:
vhost_discard_vq_desc(vq, headcount);
@@ -1100,6 +1126,8 @@ static void handle_rx(struct vhost_net *net)
{
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_virtqueue *vq = &nvq->vq;
+ bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+ unsigned int count = 0;
unsigned in, log;
struct vhost_log *vq_log;
struct msghdr msg = {
@@ -1147,12 +1175,13 @@ static void handle_rx(struct vhost_net *net)
do {
sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
- &busyloop_intr);
+ &busyloop_intr, count);
if (!sock_len)
break;
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
- headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
+ headcount = get_rx_bufs(nvq, vq->heads + count,
+ vq->nheads + count,
vhost_len, &in, vq_log, &log,
likely(mergeable) ? UIO_MAXIOV : 1);
/* On error, stop handling until the next kick. */
@@ -1228,8 +1257,11 @@ static void handle_rx(struct vhost_net *net)
goto out;
}
nvq->done_idx += headcount;
- if (nvq->done_idx > VHOST_NET_BATCH)
- vhost_net_signal_used(nvq);
+ count += in_order ? 1 : headcount;
+ if (nvq->done_idx > VHOST_NET_BATCH) {
+ vhost_net_signal_used(nvq, count);
+ count = 0;
+ }
if (unlikely(vq_log))
vhost_log_write(vq, vq_log, log, vhost_len,
vq->iov, in);
@@ -1241,7 +1273,7 @@ static void handle_rx(struct vhost_net *net)
else if (!sock_len)
vhost_net_enable_vq(net, vq);
out:
- vhost_net_signal_used(nvq);
+ vhost_net_signal_used(nvq, count);
mutex_unlock(&vq->mutex);
}
@@ -1614,16 +1646,23 @@ done:
return err;
}
-static int vhost_net_set_features(struct vhost_net *n, u64 features)
+static int vhost_net_set_features(struct vhost_net *n, const u64 *features)
{
size_t vhost_hlen, sock_hlen, hdr_len;
int i;
- hdr_len = (features & ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_F_VERSION_1))) ?
- sizeof(struct virtio_net_hdr_mrg_rxbuf) :
- sizeof(struct virtio_net_hdr);
- if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) {
+ hdr_len = virtio_features_test_bit(features, VIRTIO_NET_F_MRG_RXBUF) ||
+ virtio_features_test_bit(features, VIRTIO_F_VERSION_1) ?
+ sizeof(struct virtio_net_hdr_mrg_rxbuf) :
+ sizeof(struct virtio_net_hdr);
+
+ if (virtio_features_test_bit(features,
+ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO) ||
+ virtio_features_test_bit(features,
+ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO))
+ hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel);
+
+ if (virtio_features_test_bit(features, VHOST_NET_F_VIRTIO_NET_HDR)) {
/* vhost provides vnet_hdr */
vhost_hlen = hdr_len;
sock_hlen = 0;
@@ -1633,18 +1672,19 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
sock_hlen = hdr_len;
}
mutex_lock(&n->dev.mutex);
- if ((features & (1 << VHOST_F_LOG_ALL)) &&
+ if (virtio_features_test_bit(features, VHOST_F_LOG_ALL) &&
!vhost_log_access_ok(&n->dev))
goto out_unlock;
- if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
+ if (virtio_features_test_bit(features, VIRTIO_F_ACCESS_PLATFORM)) {
if (vhost_init_device_iotlb(&n->dev))
goto out_unlock;
}
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].vq.mutex);
- n->vqs[i].vq.acked_features = features;
+ virtio_features_copy(n->vqs[i].vq.acked_features_array,
+ features);
n->vqs[i].vhost_hlen = vhost_hlen;
n->vqs[i].sock_hlen = sock_hlen;
mutex_unlock(&n->vqs[i].vq.mutex);
@@ -1681,12 +1721,13 @@ out:
static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
unsigned long arg)
{
+ u64 all_features[VIRTIO_FEATURES_DWORDS];
struct vhost_net *n = f->private_data;
void __user *argp = (void __user *)arg;
u64 __user *featurep = argp;
struct vhost_vring_file backend;
- u64 features;
- int r;
+ u64 features, count, copied;
+ int r, i;
switch (ioctl) {
case VHOST_NET_SET_BACKEND:
@@ -1694,16 +1735,60 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
return -EFAULT;
return vhost_net_set_backend(n, backend.index, backend.fd);
case VHOST_GET_FEATURES:
- features = VHOST_NET_FEATURES;
+ features = vhost_net_features[0];
if (copy_to_user(featurep, &features, sizeof features))
return -EFAULT;
return 0;
case VHOST_SET_FEATURES:
if (copy_from_user(&features, featurep, sizeof features))
return -EFAULT;
- if (features & ~VHOST_NET_FEATURES)
+ if (features & ~vhost_net_features[0])
return -EOPNOTSUPP;
- return vhost_net_set_features(n, features);
+
+ virtio_features_from_u64(all_features, features);
+ return vhost_net_set_features(n, all_features);
+ case VHOST_GET_FEATURES_ARRAY:
+ if (copy_from_user(&count, featurep, sizeof(count)))
+ return -EFAULT;
+
+ /* Copy the net features, up to the user-provided buffer size */
+ argp += sizeof(u64);
+ copied = min(count, VIRTIO_FEATURES_DWORDS);
+ if (copy_to_user(argp, vhost_net_features,
+ copied * sizeof(u64)))
+ return -EFAULT;
+
+ /* Zero the trailing space provided by user-space, if any */
+ if (clear_user(argp, size_mul(count - copied, sizeof(u64))))
+ return -EFAULT;
+ return 0;
+ case VHOST_SET_FEATURES_ARRAY:
+ if (copy_from_user(&count, featurep, sizeof(count)))
+ return -EFAULT;
+
+ virtio_features_zero(all_features);
+ argp += sizeof(u64);
+ copied = min(count, VIRTIO_FEATURES_DWORDS);
+ if (copy_from_user(all_features, argp, copied * sizeof(u64)))
+ return -EFAULT;
+
+ /*
+ * Any feature specified by user-space above
+ * VIRTIO_FEATURES_MAX is not supported by definition.
+ */
+ for (i = copied; i < count; ++i) {
+ if (copy_from_user(&features, featurep + 1 + i,
+ sizeof(features)))
+ return -EFAULT;
+ if (features)
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++)
+ if (all_features[i] & ~vhost_net_features[i])
+ return -EOPNOTSUPP;
+
+ return vhost_net_set_features(n, all_features);
case VHOST_GET_BACKEND_FEATURES:
features = VHOST_NET_BACKEND_FEATURES;
if (copy_to_user(featurep, &features, sizeof(features)))