summaryrefslogtreecommitdiff
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/net.c82
-rw-r--r--drivers/vhost/scsi.c9
-rw-r--r--drivers/vhost/test.c10
-rw-r--r--drivers/vhost/vhost.c80
-rw-r--r--drivers/vhost/vhost.h52
-rw-r--r--drivers/vhost/vsock.c10
6 files changed, 175 insertions, 68 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 35ded4330431..7f886d3dba7d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -69,15 +69,15 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
#define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN)
-static const u64 vhost_net_features[VIRTIO_FEATURES_DWORDS] = {
- VHOST_FEATURES |
- (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
- (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
- (1ULL << VIRTIO_F_RING_RESET) |
- (1ULL << VIRTIO_F_IN_ORDER),
- VIRTIO_BIT(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) |
- VIRTIO_BIT(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO),
+static const int vhost_net_bits[] = {
+ VHOST_FEATURES,
+ VHOST_NET_F_VIRTIO_NET_HDR,
+ VIRTIO_NET_F_MRG_RXBUF,
+ VIRTIO_F_ACCESS_PLATFORM,
+ VIRTIO_F_RING_RESET,
+ VIRTIO_F_IN_ORDER,
+ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO,
+ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO
};
enum {
@@ -592,14 +592,15 @@ static void vhost_net_busy_poll(struct vhost_net *net,
static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
struct vhost_net_virtqueue *tnvq,
unsigned int *out_num, unsigned int *in_num,
- struct msghdr *msghdr, bool *busyloop_intr)
+ struct msghdr *msghdr, bool *busyloop_intr,
+ unsigned int *ndesc)
{
struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_virtqueue *rvq = &rnvq->vq;
struct vhost_virtqueue *tvq = &tnvq->vq;
- int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
- out_num, in_num, NULL, NULL);
+ int r = vhost_get_vq_desc_n(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
+ out_num, in_num, NULL, NULL, ndesc);
if (r == tvq->num && tvq->busyloop_timeout) {
/* Flush batched packets first */
@@ -610,8 +611,8 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false);
- r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
- out_num, in_num, NULL, NULL);
+ r = vhost_get_vq_desc_n(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
+ out_num, in_num, NULL, NULL, ndesc);
}
return r;
@@ -642,12 +643,14 @@ static int get_tx_bufs(struct vhost_net *net,
struct vhost_net_virtqueue *nvq,
struct msghdr *msg,
unsigned int *out, unsigned int *in,
- size_t *len, bool *busyloop_intr)
+ size_t *len, bool *busyloop_intr,
+ unsigned int *ndesc)
{
struct vhost_virtqueue *vq = &nvq->vq;
int ret;
- ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, busyloop_intr);
+ ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg,
+ busyloop_intr, ndesc);
if (ret < 0 || ret == vq->num)
return ret;
@@ -766,6 +769,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
int sent_pkts = 0;
bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
+ unsigned int ndesc = 0;
do {
bool busyloop_intr = false;
@@ -774,7 +778,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
vhost_tx_batch(net, nvq, sock, &msg);
head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
- &busyloop_intr);
+ &busyloop_intr, &ndesc);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -806,7 +810,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
goto done;
} else if (unlikely(err != -ENOSPC)) {
vhost_tx_batch(net, nvq, sock, &msg);
- vhost_discard_vq_desc(vq, 1);
+ vhost_discard_vq_desc(vq, 1, ndesc);
vhost_net_enable_vq(net, vq);
break;
}
@@ -829,7 +833,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) {
if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) {
- vhost_discard_vq_desc(vq, 1);
+ vhost_discard_vq_desc(vq, 1, ndesc);
vhost_net_enable_vq(net, vq);
break;
}
@@ -868,6 +872,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
int err;
struct vhost_net_ubuf_ref *ubufs;
struct ubuf_info_msgzc *ubuf;
+ unsigned int ndesc = 0;
bool zcopy_used;
int sent_pkts = 0;
@@ -879,7 +884,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
busyloop_intr = false;
head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
- &busyloop_intr);
+ &busyloop_intr, &ndesc);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -941,7 +946,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
}
if (retry) {
- vhost_discard_vq_desc(vq, 1);
+ vhost_discard_vq_desc(vq, 1, ndesc);
vhost_net_enable_vq(net, vq);
break;
}
@@ -1045,11 +1050,12 @@ static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
unsigned *iovcount,
struct vhost_log *log,
unsigned *log_num,
- unsigned int quota)
+ unsigned int quota,
+ unsigned int *ndesc)
{
struct vhost_virtqueue *vq = &nvq->vq;
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
- unsigned int out, in;
+ unsigned int out, in, desc_num, n = 0;
int seg = 0;
int headcount = 0;
unsigned d;
@@ -1064,9 +1070,9 @@ static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
r = -ENOBUFS;
goto err;
}
- r = vhost_get_vq_desc(vq, vq->iov + seg,
- ARRAY_SIZE(vq->iov) - seg, &out,
- &in, log, log_num);
+ r = vhost_get_vq_desc_n(vq, vq->iov + seg,
+ ARRAY_SIZE(vq->iov) - seg, &out,
+ &in, log, log_num, &desc_num);
if (unlikely(r < 0))
goto err;
@@ -1093,6 +1099,7 @@ static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
++headcount;
datalen -= len;
seg += in;
+ n += desc_num;
}
*iovcount = seg;
@@ -1113,9 +1120,11 @@ static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
nheads[0] = headcount;
}
+ *ndesc = n;
+
return headcount;
err:
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_vq_desc(vq, headcount, n);
return r;
}
@@ -1151,6 +1160,7 @@ static void handle_rx(struct vhost_net *net)
struct iov_iter fixup;
__virtio16 num_buffers;
int recv_pkts = 0;
+ unsigned int ndesc;
mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
sock = vhost_vq_get_backend(vq);
@@ -1182,7 +1192,8 @@ static void handle_rx(struct vhost_net *net)
headcount = get_rx_bufs(nvq, vq->heads + count,
vq->nheads + count,
vhost_len, &in, vq_log, &log,
- likely(mergeable) ? UIO_MAXIOV : 1);
+ likely(mergeable) ? UIO_MAXIOV : 1,
+ &ndesc);
/* On error, stop handling until the next kick. */
if (unlikely(headcount < 0))
goto out;
@@ -1228,7 +1239,7 @@ static void handle_rx(struct vhost_net *net)
if (unlikely(err != sock_len)) {
pr_debug("Discarded rx packet: "
" len %d, expected %zd\n", err, sock_len);
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_vq_desc(vq, headcount, ndesc);
continue;
}
/* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
@@ -1252,7 +1263,7 @@ static void handle_rx(struct vhost_net *net)
copy_to_iter(&num_buffers, sizeof num_buffers,
&fixup) != sizeof num_buffers) {
vq_err(vq, "Failed num_buffers write");
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_vq_desc(vq, headcount, ndesc);
goto out;
}
nvq->done_idx += headcount;
@@ -1720,7 +1731,8 @@ out:
static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
unsigned long arg)
{
- u64 all_features[VIRTIO_FEATURES_DWORDS];
+ const DEFINE_VHOST_FEATURES_ARRAY(vhost_net_features, vhost_net_bits);
+ u64 all_features[VIRTIO_FEATURES_U64S];
struct vhost_net *n = f->private_data;
void __user *argp = (void __user *)arg;
u64 __user *featurep = argp;
@@ -1752,7 +1764,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
/* Copy the net features, up to the user-provided buffer size */
argp += sizeof(u64);
- copied = min(count, VIRTIO_FEATURES_DWORDS);
+ copied = min(count, (u64)VIRTIO_FEATURES_U64S);
if (copy_to_user(argp, vhost_net_features,
copied * sizeof(u64)))
return -EFAULT;
@@ -1767,13 +1779,13 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
virtio_features_zero(all_features);
argp += sizeof(u64);
- copied = min(count, VIRTIO_FEATURES_DWORDS);
+ copied = min(count, (u64)VIRTIO_FEATURES_U64S);
if (copy_from_user(all_features, argp, copied * sizeof(u64)))
return -EFAULT;
/*
* Any feature specified by user-space above
- * VIRTIO_FEATURES_MAX is not supported by definition.
+ * VIRTIO_FEATURES_BITS is not supported by definition.
*/
for (i = copied; i < count; ++i) {
if (copy_from_user(&features, featurep + 1 + i,
@@ -1783,7 +1795,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
return -EOPNOTSUPP;
}
- for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++)
+ for (i = 0; i < VIRTIO_FEATURES_U64S; i++)
if (all_features[i] & ~vhost_net_features[i])
return -EOPNOTSUPP;
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 98e4f68f4e3c..f43c1fe9fad9 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -197,11 +197,14 @@ enum {
};
/* Note: can't set VIRTIO_F_VERSION_1 yet, since that implies ANY_LAYOUT. */
-enum {
- VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) |
- (1ULL << VIRTIO_SCSI_F_T10_PI)
+static const int vhost_scsi_bits[] = {
+ VHOST_FEATURES,
+ VIRTIO_SCSI_F_HOTPLUG,
+ VIRTIO_SCSI_F_T10_PI
};
+#define VHOST_SCSI_FEATURES VHOST_FEATURES_U64(vhost_scsi_bits, 0)
+
#define VHOST_SCSI_MAX_TARGET 256
#define VHOST_SCSI_MAX_IO_VQ 1024
#define VHOST_SCSI_MAX_EVENT 128
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 42c955a5b211..1e4e36edbcd2 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -28,6 +28,12 @@
*/
#define VHOST_TEST_PKT_WEIGHT 256
+static const int vhost_test_bits[] = {
+ VHOST_FEATURES
+};
+
+#define VHOST_TEST_FEATURES VHOST_FEATURES_U64(vhost_test_bits, 0)
+
enum {
VHOST_TEST_VQ = 0,
VHOST_TEST_VQ_MAX = 1,
@@ -328,14 +334,14 @@ static long vhost_test_ioctl(struct file *f, unsigned int ioctl,
return -EFAULT;
return vhost_test_set_backend(n, backend.index, backend.fd);
case VHOST_GET_FEATURES:
- features = VHOST_FEATURES;
+ features = VHOST_TEST_FEATURES;
if (copy_to_user(featurep, &features, sizeof features))
return -EFAULT;
return 0;
case VHOST_SET_FEATURES:
if (copy_from_user(&features, featurep, sizeof features))
return -EFAULT;
- if (features & ~VHOST_FEATURES)
+ if (features & ~VHOST_TEST_FEATURES)
return -EOPNOTSUPP;
return vhost_test_set_features(n, features);
case VHOST_RESET_OWNER:
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 8570fdf2e14a..bccdc9eab267 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -804,11 +804,13 @@ static int vhost_kthread_worker_create(struct vhost_worker *worker,
ret = vhost_attach_task_to_cgroups(worker);
if (ret)
- goto stop_worker;
+ goto free_id;
worker->id = id;
return 0;
+free_id:
+ xa_erase(&dev->worker_xa, id);
stop_worker:
vhost_kthread_do_stop(worker);
return ret;
@@ -2792,18 +2794,34 @@ static int get_indirect(struct vhost_virtqueue *vq,
return 0;
}
-/* This looks in the virtqueue and for the first available buffer, and converts
- * it to an iovec for convenient access. Since descriptors consist of some
- * number of output then some number of input descriptors, it's actually two
- * iovecs, but we pack them into one and note how many of each there were.
+/**
+ * vhost_get_vq_desc_n - Fetch the next available descriptor chain and build iovecs
+ * @vq: target virtqueue
+ * @iov: array that receives the scatter/gather segments
+ * @iov_size: capacity of @iov in elements
+ * @out_num: the number of output segments
+ * @in_num: the number of input segments
+ * @log: optional array to record addr/len for each writable segment; NULL if unused
+ * @log_num: optional output; number of entries written to @log when provided
+ * @ndesc: optional output; number of descriptors consumed from the available ring
+ * (useful for rollback via vhost_discard_vq_desc)
*
- * This function returns the descriptor number found, or vq->num (which is
- * never a valid descriptor number) if none was found. A negative code is
- * returned on error. */
-int vhost_get_vq_desc(struct vhost_virtqueue *vq,
- struct iovec iov[], unsigned int iov_size,
- unsigned int *out_num, unsigned int *in_num,
- struct vhost_log *log, unsigned int *log_num)
+ * Extracts one available descriptor chain from @vq and translates guest addresses
+ * into host iovecs.
+ *
+ * On success, advances @vq->last_avail_idx by 1 and @vq->next_avail_head by the
+ * number of descriptors consumed (also stored via @ndesc when non-NULL).
+ *
+ * Return:
+ * - head index in [0, @vq->num) on success;
+ * - @vq->num if no descriptor is currently available;
+ * - negative errno on failure
+ */
+int vhost_get_vq_desc_n(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num,
+ unsigned int *ndesc)
{
bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
struct vring_desc desc;
@@ -2921,17 +2939,49 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
vq->last_avail_idx++;
vq->next_avail_head += c;
+ if (ndesc)
+ *ndesc = c;
+
/* Assume notifications from guest are disabled at this point,
* if they aren't we would need to update avail_event index. */
BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
return head;
}
+EXPORT_SYMBOL_GPL(vhost_get_vq_desc_n);
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found. A negative code is
+ * returned on error.
+ */
+int vhost_get_vq_desc(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num)
+{
+ return vhost_get_vq_desc_n(vq, iov, iov_size, out_num, in_num,
+ log, log_num, NULL);
+}
EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
-/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
-void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
+/**
+ * vhost_discard_vq_desc - Reverse the effect of vhost_get_vq_desc_n()
+ * @vq: target virtqueue
+ * @nbufs: number of buffers to roll back
+ * @ndesc: number of descriptors to roll back
+ *
+ * Rewinds the internal consumer cursors after a failed attempt to use buffers
+ * returned by vhost_get_vq_desc_n().
+ */
+void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int nbufs,
+ unsigned int ndesc)
{
- vq->last_avail_idx -= n;
+ vq->next_avail_head -= ndesc;
+ vq->last_avail_idx -= nbufs;
}
EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 621a6d9a8791..4fe99765c5c7 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -14,6 +14,7 @@
#include <linux/atomic.h>
#include <linux/vhost_iotlb.h>
#include <linux/irqbypass.h>
+#include <linux/unroll.h>
struct vhost_work;
struct vhost_task;
@@ -230,7 +231,15 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
struct iovec iov[], unsigned int iov_size,
unsigned int *out_num, unsigned int *in_num,
struct vhost_log *log, unsigned int *log_num);
-void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
+
+int vhost_get_vq_desc_n(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num,
+ unsigned int *ndesc);
+
+void vhost_discard_vq_desc(struct vhost_virtqueue *, int nbuf,
+ unsigned int ndesc);
bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work);
bool vhost_vq_has_work(struct vhost_virtqueue *vq);
@@ -279,14 +288,39 @@ void vhost_iotlb_map_free(struct vhost_iotlb *iotlb,
eventfd_signal((vq)->error_ctx);\
} while (0)
-enum {
- VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
- (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
- (1ULL << VIRTIO_RING_F_EVENT_IDX) |
- (1ULL << VHOST_F_LOG_ALL) |
- (1ULL << VIRTIO_F_ANY_LAYOUT) |
- (1ULL << VIRTIO_F_VERSION_1)
-};
+#define VHOST_FEATURES \
+ VIRTIO_F_NOTIFY_ON_EMPTY, \
+ VIRTIO_RING_F_INDIRECT_DESC, \
+ VIRTIO_RING_F_EVENT_IDX, \
+ VHOST_F_LOG_ALL, \
+ VIRTIO_F_ANY_LAYOUT, \
+ VIRTIO_F_VERSION_1
+
+static inline u64 vhost_features_u64(const int *features, int size, int idx)
+{
+ u64 res = 0;
+
+ unrolled_count(VIRTIO_FEATURES_BITS)
+ for (int i = 0; i < size; ++i) {
+ int bit = features[i];
+
+ if (virtio_features_chk_bit(bit) && VIRTIO_U64(bit) == idx)
+ res |= VIRTIO_BIT(bit);
+ }
+ return res;
+}
+
+#define VHOST_FEATURES_U64(features, idx) \
+ vhost_features_u64(features, ARRAY_SIZE(features), idx)
+
+#define DEFINE_VHOST_FEATURES_ARRAY_ENTRY(idx, features) \
+ [idx] = VHOST_FEATURES_U64(features, idx),
+
+#define DEFINE_VHOST_FEATURES_ARRAY(array, features) \
+ u64 array[VIRTIO_FEATURES_U64S] = { \
+ UNROLL(VIRTIO_FEATURES_U64S, \
+ DEFINE_VHOST_FEATURES_ARRAY_ENTRY, features) \
+ }
/**
* vhost_vq_set_backend - Set backend.
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index ae01457ea2cd..0298ddc34824 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -29,12 +29,14 @@
*/
#define VHOST_VSOCK_PKT_WEIGHT 256
-enum {
- VHOST_VSOCK_FEATURES = VHOST_FEATURES |
- (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
- (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
+static const int vhost_vsock_bits[] = {
+ VHOST_FEATURES,
+ VIRTIO_F_ACCESS_PLATFORM,
+ VIRTIO_VSOCK_F_SEQPACKET
};
+#define VHOST_VSOCK_FEATURES VHOST_FEATURES_U64(vhost_vsock_bits, 0)
+
enum {
VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
};