summaryrefslogtreecommitdiff
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/net.c107
-rw-r--r--drivers/vhost/scsi.c99
-rw-r--r--drivers/vhost/vdpa.c58
-rw-r--r--drivers/vhost/vhost.c213
-rw-r--r--drivers/vhost/vhost.h3
-rw-r--r--drivers/vhost/vringh.c1
-rw-r--r--drivers/vhost/vsock.c8
7 files changed, 279 insertions, 210 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f2ed7167c848..b9b9e9d40951 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -141,10 +141,8 @@ struct vhost_net {
unsigned tx_zcopy_err;
/* Flush in progress. Protected by tx vq lock. */
bool tx_flush;
- /* Private page frag */
- struct page_frag page_frag;
- /* Refcount bias of page frag */
- int refcnt_bias;
+ /* Private page frag cache */
+ struct page_frag_cache pf_cache;
};
static unsigned vhost_net_zcopy_mask __read_mostly;
@@ -382,7 +380,7 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
}
}
-static void vhost_zerocopy_callback(struct sk_buff *skb,
+static void vhost_zerocopy_complete(struct sk_buff *skb,
struct ubuf_info *ubuf_base, bool success)
{
struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
@@ -410,6 +408,10 @@ static void vhost_zerocopy_callback(struct sk_buff *skb,
rcu_read_unlock_bh();
}
+static const struct ubuf_info_ops vhost_ubuf_ops = {
+ .complete = vhost_zerocopy_complete,
+};
+
static inline unsigned long busy_clock(void)
{
return local_clock() >> 10;
@@ -655,41 +657,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len)
!vhost_vq_avail_empty(vq->dev, vq);
}
-static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz,
- struct page_frag *pfrag, gfp_t gfp)
-{
- if (pfrag->page) {
- if (pfrag->offset + sz <= pfrag->size)
- return true;
- __page_frag_cache_drain(pfrag->page, net->refcnt_bias);
- }
-
- pfrag->offset = 0;
- net->refcnt_bias = 0;
- if (SKB_FRAG_PAGE_ORDER) {
- /* Avoid direct reclaim but allow kswapd to wake */
- pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
- __GFP_COMP | __GFP_NOWARN |
- __GFP_NORETRY,
- SKB_FRAG_PAGE_ORDER);
- if (likely(pfrag->page)) {
- pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
- goto done;
- }
- }
- pfrag->page = alloc_page(gfp);
- if (likely(pfrag->page)) {
- pfrag->size = PAGE_SIZE;
- goto done;
- }
- return false;
-
-done:
- net->refcnt_bias = USHRT_MAX;
- page_ref_add(pfrag->page, USHRT_MAX - 1);
- return true;
-}
-
#define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
@@ -699,7 +666,6 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
struct vhost_net *net = container_of(vq->dev, struct vhost_net,
dev);
struct socket *sock = vhost_vq_get_backend(vq);
- struct page_frag *alloc_frag = &net->page_frag;
struct virtio_net_hdr *gso;
struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp];
struct tun_xdp_hdr *hdr;
@@ -710,6 +676,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
int sock_hlen = nvq->sock_hlen;
void *buf;
int copied;
+ int ret;
if (unlikely(len < nvq->sock_hlen))
return -EFAULT;
@@ -719,22 +686,24 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
return -ENOSPC;
buflen += SKB_DATA_ALIGN(len + pad);
- alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
- if (unlikely(!vhost_net_page_frag_refill(net, buflen,
- alloc_frag, GFP_KERNEL)))
+ buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL,
+ SMP_CACHE_BYTES);
+ if (unlikely(!buf))
return -ENOMEM;
- buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
- copied = copy_page_from_iter(alloc_frag->page,
- alloc_frag->offset +
- offsetof(struct tun_xdp_hdr, gso),
- sock_hlen, from);
- if (copied != sock_hlen)
- return -EFAULT;
+ copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso),
+ sock_hlen, from);
+ if (copied != sock_hlen) {
+ ret = -EFAULT;
+ goto err;
+ }
hdr = buf;
gso = &hdr->gso;
+ if (!sock_hlen)
+ memset(buf, 0, pad);
+
if ((gso->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
vhost16_to_cpu(vq, gso->csum_start) +
vhost16_to_cpu(vq, gso->csum_offset) + 2 >
@@ -743,27 +712,30 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
vhost16_to_cpu(vq, gso->csum_start) +
vhost16_to_cpu(vq, gso->csum_offset) + 2);
- if (vhost16_to_cpu(vq, gso->hdr_len) > len)
- return -EINVAL;
+ if (vhost16_to_cpu(vq, gso->hdr_len) > len) {
+ ret = -EINVAL;
+ goto err;
+ }
}
len -= sock_hlen;
- copied = copy_page_from_iter(alloc_frag->page,
- alloc_frag->offset + pad,
- len, from);
- if (copied != len)
- return -EFAULT;
+ copied = copy_from_iter(buf + pad, len, from);
+ if (copied != len) {
+ ret = -EFAULT;
+ goto err;
+ }
xdp_init_buff(xdp, buflen, NULL);
xdp_prepare_buff(xdp, buf, pad, len, true);
hdr->buflen = buflen;
- --net->refcnt_bias;
- alloc_frag->offset += buflen;
-
++nvq->batched_xdp;
return 0;
+
+err:
+ page_frag_free(buf);
+ return ret;
}
static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
@@ -911,7 +883,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
ubuf->ctx = nvq->ubufs;
ubuf->desc = nvq->upend_idx;
- ubuf->ubuf.callback = vhost_zerocopy_callback;
+ ubuf->ubuf.ops = &vhost_ubuf_ops;
ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG;
refcount_set(&ubuf->ubuf.refcnt, 1);
msg.msg_control = &ctl;
@@ -1135,6 +1107,7 @@ static void handle_rx(struct vhost_net *net)
size_t vhost_hlen, sock_hlen;
size_t vhost_len, sock_len;
bool busyloop_intr = false;
+ bool set_num_buffers;
struct socket *sock;
struct iov_iter fixup;
__virtio16 num_buffers;
@@ -1157,6 +1130,8 @@ static void handle_rx(struct vhost_net *net)
vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ?
vq->log : NULL;
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
+ set_num_buffers = mergeable ||
+ vhost_has_feature(vq, VIRTIO_F_VERSION_1);
do {
sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
@@ -1233,7 +1208,7 @@ static void handle_rx(struct vhost_net *net)
/* TODO: Should check and handle checksum. */
num_buffers = cpu_to_vhost16(vq, headcount);
- if (likely(mergeable) &&
+ if (likely(set_num_buffers) &&
copy_to_iter(&num_buffers, sizeof num_buffers,
&fixup) != sizeof num_buffers) {
vq_err(vq, "Failed num_buffers write");
@@ -1353,8 +1328,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
vqs[VHOST_NET_VQ_RX]);
f->private_data = n;
- n->page_frag.page = NULL;
- n->refcnt_bias = 0;
+ page_frag_cache_init(&n->pf_cache);
return 0;
}
@@ -1422,8 +1396,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue);
kfree(n->vqs[VHOST_NET_VQ_TX].xdp);
kfree(n->dev.vqs);
- if (n->page_frag.page)
- __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias);
+ page_frag_cache_drain(&n->pf_cache);
kvfree(n);
return 0;
}
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 282aac45c690..718fa4e0b31e 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -27,7 +27,7 @@
#include <linux/miscdevice.h>
#include <linux/blk_types.h>
#include <linux/bio.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <scsi/scsi_common.h>
#include <scsi/scsi_proto.h>
#include <target/target_core_base.h>
@@ -210,6 +210,7 @@ struct vhost_scsi {
struct vhost_scsi_tmf {
struct vhost_work vwork;
+ struct work_struct flush_work;
struct vhost_scsi *vhost;
struct vhost_scsi_virtqueue *svq;
@@ -358,14 +359,23 @@ static void vhost_scsi_release_tmf_res(struct vhost_scsi_tmf *tmf)
vhost_scsi_put_inflight(inflight);
}
+static void vhost_scsi_drop_cmds(struct vhost_scsi_virtqueue *svq)
+{
+ struct vhost_scsi_cmd *cmd, *t;
+ struct llist_node *llnode;
+
+ llnode = llist_del_all(&svq->completion_list);
+ llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list)
+ vhost_scsi_release_cmd_res(&cmd->tvc_se_cmd);
+}
+
static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
{
if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) {
struct vhost_scsi_tmf *tmf = container_of(se_cmd,
struct vhost_scsi_tmf, se_cmd);
- struct vhost_virtqueue *vq = &tmf->svq->vq;
- vhost_vq_work_queue(vq, &tmf->vwork);
+ schedule_work(&tmf->flush_work);
} else {
struct vhost_scsi_cmd *cmd = container_of(se_cmd,
struct vhost_scsi_cmd, tvc_se_cmd);
@@ -373,7 +383,8 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
struct vhost_scsi_virtqueue, vq);
llist_add(&cmd->tvc_completion_list, &svq->completion_list);
- vhost_vq_work_queue(&svq->vq, &svq->completion_work);
+ if (!vhost_vq_work_queue(&svq->vq, &svq->completion_work))
+ vhost_scsi_drop_cmds(svq);
}
}
@@ -497,10 +508,8 @@ again:
vq_err(vq, "Faulted on vhost_scsi_send_event\n");
}
-static void vhost_scsi_evt_work(struct vhost_work *work)
+static void vhost_scsi_complete_events(struct vhost_scsi *vs, bool drop)
{
- struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
- vs_event_work);
struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
struct vhost_scsi_evt *evt, *t;
struct llist_node *llnode;
@@ -508,12 +517,20 @@ static void vhost_scsi_evt_work(struct vhost_work *work)
mutex_lock(&vq->mutex);
llnode = llist_del_all(&vs->vs_event_list);
llist_for_each_entry_safe(evt, t, llnode, list) {
- vhost_scsi_do_evt_work(vs, evt);
+ if (!drop)
+ vhost_scsi_do_evt_work(vs, evt);
vhost_scsi_free_evt(vs, evt);
}
mutex_unlock(&vq->mutex);
}
+static void vhost_scsi_evt_work(struct vhost_work *work)
+{
+ struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
+ vs_event_work);
+ vhost_scsi_complete_events(vs, false);
+}
+
static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd)
{
struct iov_iter *iter = &cmd->saved_iter;
@@ -1012,20 +1029,23 @@ vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
/* virtio-scsi spec requires byte 0 of the lun to be 1 */
vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp);
} else {
- struct vhost_scsi_tpg **vs_tpg, *tpg;
-
- vs_tpg = vhost_vq_get_backend(vq); /* validated at handler entry */
-
- tpg = READ_ONCE(vs_tpg[*vc->target]);
- if (unlikely(!tpg)) {
- vq_err(vq, "Target 0x%x does not exist\n", *vc->target);
- } else {
- if (tpgp)
- *tpgp = tpg;
- ret = 0;
+ struct vhost_scsi_tpg **vs_tpg, *tpg = NULL;
+
+ if (vc->target) {
+ /* validated at handler entry */
+ vs_tpg = vhost_vq_get_backend(vq);
+ tpg = READ_ONCE(vs_tpg[*vc->target]);
+ if (unlikely(!tpg)) {
+ vq_err(vq, "Target 0x%x does not exist\n", *vc->target);
+ goto out;
+ }
}
- }
+ if (tpgp)
+ *tpgp = tpg;
+ ret = 0;
+ }
+out:
return ret;
}
@@ -1270,33 +1290,32 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work)
{
struct vhost_scsi_tmf *tmf = container_of(work, struct vhost_scsi_tmf,
vwork);
- struct vhost_virtqueue *ctl_vq, *vq;
- int resp_code, i;
-
- if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE) {
- /*
- * Flush IO vqs that don't share a worker with the ctl to make
- * sure they have sent their responses before us.
- */
- ctl_vq = &tmf->vhost->vqs[VHOST_SCSI_VQ_CTL].vq;
- for (i = VHOST_SCSI_VQ_IO; i < tmf->vhost->dev.nvqs; i++) {
- vq = &tmf->vhost->vqs[i].vq;
-
- if (vhost_vq_is_setup(vq) &&
- vq->worker != ctl_vq->worker)
- vhost_vq_flush(vq);
- }
+ int resp_code;
+ if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE)
resp_code = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
- } else {
+ else
resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED;
- }
vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs,
tmf->vq_desc, &tmf->resp_iov, resp_code);
vhost_scsi_release_tmf_res(tmf);
}
+static void vhost_scsi_tmf_flush_work(struct work_struct *work)
+{
+ struct vhost_scsi_tmf *tmf = container_of(work, struct vhost_scsi_tmf,
+ flush_work);
+ struct vhost_virtqueue *vq = &tmf->svq->vq;
+ /*
+ * Make sure we have sent responses for other commands before we
+ * send our response.
+ */
+ vhost_dev_flush(vq->dev);
+ if (!vhost_vq_work_queue(vq, &tmf->vwork))
+ vhost_scsi_release_tmf_res(tmf);
+}
+
static void
vhost_scsi_handle_tmf(struct vhost_scsi *vs, struct vhost_scsi_tpg *tpg,
struct vhost_virtqueue *vq,
@@ -1320,6 +1339,7 @@ vhost_scsi_handle_tmf(struct vhost_scsi *vs, struct vhost_scsi_tpg *tpg,
if (!tmf)
goto send_reject;
+ INIT_WORK(&tmf->flush_work, vhost_scsi_tmf_flush_work);
vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work);
tmf->vhost = vs;
tmf->svq = svq;
@@ -1509,7 +1529,8 @@ vhost_scsi_send_evt(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
}
llist_add(&evt->list, &vs->vs_event_list);
- vhost_vq_work_queue(vq, &vs->vs_event_work);
+ if (!vhost_vq_work_queue(vq, &vs->vs_event_work))
+ vhost_scsi_complete_events(vs, true);
}
static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index bc4a51e4638b..5a49b5a6d496 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
if (irq < 0)
return;
- irq_bypass_unregister_producer(&vq->call_ctx.producer);
if (!vq->call_ctx.ctx)
return;
- vq->call_ctx.producer.token = vq->call_ctx.ctx;
vq->call_ctx.producer.irq = irq;
ret = irq_bypass_register_producer(&vq->call_ctx.producer);
if (unlikely(ret))
@@ -595,6 +593,9 @@ static long vhost_vdpa_suspend(struct vhost_vdpa *v)
const struct vdpa_config_ops *ops = vdpa->config;
int ret;
+ if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK))
+ return 0;
+
if (!ops->suspend)
return -EOPNOTSUPP;
@@ -615,6 +616,9 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
const struct vdpa_config_ops *ops = vdpa->config;
int ret;
+ if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK))
+ return 0;
+
if (!ops->resume)
return -EOPNOTSUPP;
@@ -681,6 +685,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
if (!ops->set_group_asid)
return -EOPNOTSUPP;
return ops->set_group_asid(vdpa, idx, s.num);
+ case VHOST_VDPA_GET_VRING_SIZE:
+ if (!ops->get_vq_size)
+ return -EOPNOTSUPP;
+ s.index = idx;
+ s.num = ops->get_vq_size(vdpa, idx);
+ if (copy_to_user(argp, &s, sizeof(s)))
+ return -EFAULT;
+ return 0;
case VHOST_GET_VRING_BASE:
r = ops->get_vq_state(v->vdpa, idx, &vq_state);
if (r)
@@ -695,6 +707,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
vq->last_avail_idx = vq_state.split.avail_index;
}
break;
+ case VHOST_SET_VRING_CALL:
+ if (vq->call_ctx.ctx) {
+ if (ops->get_status(vdpa) &
+ VIRTIO_CONFIG_S_DRIVER_OK)
+ vhost_vdpa_unsetup_vq_irq(v, idx);
+ vq->call_ctx.producer.token = NULL;
+ }
+ break;
}
r = vhost_vring_ioctl(&v->vdev, cmd, argp);
@@ -733,13 +753,16 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
cb.callback = vhost_vdpa_virtqueue_cb;
cb.private = vq;
cb.trigger = vq->call_ctx.ctx;
+ vq->call_ctx.producer.token = vq->call_ctx.ctx;
+ if (ops->get_status(vdpa) &
+ VIRTIO_CONFIG_S_DRIVER_OK)
+ vhost_vdpa_setup_vq_irq(v, idx);
} else {
cb.callback = NULL;
cb.private = NULL;
cb.trigger = NULL;
}
ops->set_vq_cb(vdpa, idx, &cb);
- vhost_vdpa_setup_vq_irq(v, idx);
break;
case VHOST_SET_VRING_NUM:
@@ -1298,26 +1321,24 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct device *dma_dev = vdpa_get_dma_dev(vdpa);
- const struct bus_type *bus;
int ret;
/* Device want to do DMA by itself */
if (ops->set_map || ops->dma_map)
return 0;
- bus = dma_dev->bus;
- if (!bus)
- return -EFAULT;
-
if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) {
dev_warn_once(&v->dev,
"Failed to allocate domain, device is not IOMMU cache coherent capable\n");
return -ENOTSUPP;
}
- v->domain = iommu_domain_alloc(bus);
- if (!v->domain)
- return -EIO;
+ v->domain = iommu_paging_domain_alloc(dma_dev);
+ if (IS_ERR(v->domain)) {
+ ret = PTR_ERR(v->domain);
+ v->domain = NULL;
+ return ret;
+ }
ret = iommu_attach_device(v->domain, dma_dev);
if (ret)
@@ -1407,6 +1428,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
for (i = 0; i < nvqs; i++) {
vqs[i] = &v->vqs[i];
vqs[i]->handle_kick = handle_vq_kick;
+ vqs[i]->call_ctx.ctx = NULL;
}
vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
vhost_vdpa_process_iotlb_msg);
@@ -1469,13 +1491,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
notify = ops->get_vq_notification(vdpa, index);
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
- PFN_DOWN(notify.addr), PAGE_SIZE,
- vma->vm_page_prot))
- return VM_FAULT_SIGBUS;
-
- return VM_FAULT_NOPAGE;
+ return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr));
}
static const struct vm_operations_struct vhost_vdpa_vm_ops = {
@@ -1534,7 +1550,7 @@ static void vhost_vdpa_release_dev(struct device *device)
struct vhost_vdpa *v =
container_of(device, struct vhost_vdpa, dev);
- ida_simple_remove(&vhost_vdpa_ida, v->minor);
+ ida_free(&vhost_vdpa_ida, v->minor);
kfree(v->vqs);
kfree(v);
}
@@ -1557,8 +1573,8 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
if (!v)
return -ENOMEM;
- minor = ida_simple_get(&vhost_vdpa_ida, 0,
- VHOST_VDPA_DEV_MAX, GFP_KERNEL);
+ minor = ida_alloc_max(&vhost_vdpa_ida, VHOST_VDPA_DEV_MAX - 1,
+ GFP_KERNEL);
if (minor < 0) {
kfree(v);
return minor;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 045f666b4f12..63612faeab72 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -263,34 +263,37 @@ bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work)
}
EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
-void vhost_vq_flush(struct vhost_virtqueue *vq)
-{
- struct vhost_flush_struct flush;
-
- init_completion(&flush.wait_event);
- vhost_work_init(&flush.work, vhost_flush_work);
-
- if (vhost_vq_work_queue(vq, &flush.work))
- wait_for_completion(&flush.wait_event);
-}
-EXPORT_SYMBOL_GPL(vhost_vq_flush);
-
/**
- * vhost_worker_flush - flush a worker
+ * __vhost_worker_flush - flush a worker
* @worker: worker to flush
*
- * This does not use RCU to protect the worker, so the device or worker
- * mutex must be held.
+ * The worker's flush_mutex must be held.
*/
-static void vhost_worker_flush(struct vhost_worker *worker)
+static void __vhost_worker_flush(struct vhost_worker *worker)
{
struct vhost_flush_struct flush;
+ if (!worker->attachment_cnt || worker->killed)
+ return;
+
init_completion(&flush.wait_event);
vhost_work_init(&flush.work, vhost_flush_work);
vhost_worker_queue(worker, &flush.work);
+ /*
+ * Drop mutex in case our worker is killed and it needs to take the
+ * mutex to force cleanup.
+ */
+ mutex_unlock(&worker->mutex);
wait_for_completion(&flush.wait_event);
+ mutex_lock(&worker->mutex);
+}
+
+static void vhost_worker_flush(struct vhost_worker *worker)
+{
+ mutex_lock(&worker->mutex);
+ __vhost_worker_flush(worker);
+ mutex_unlock(&worker->mutex);
}
void vhost_dev_flush(struct vhost_dev *dev)
@@ -298,15 +301,8 @@ void vhost_dev_flush(struct vhost_dev *dev)
struct vhost_worker *worker;
unsigned long i;
- xa_for_each(&dev->worker_xa, i, worker) {
- mutex_lock(&worker->mutex);
- if (!worker->attachment_cnt) {
- mutex_unlock(&worker->mutex);
- continue;
- }
+ xa_for_each(&dev->worker_xa, i, worker)
vhost_worker_flush(worker);
- mutex_unlock(&worker->mutex);
- }
}
EXPORT_SYMBOL_GPL(vhost_dev_flush);
@@ -392,7 +388,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
__vhost_vq_meta_reset(vq);
}
-static bool vhost_worker(void *data)
+static bool vhost_run_work_list(void *data)
{
struct vhost_worker *worker = data;
struct vhost_work *work, *work_next;
@@ -417,6 +413,40 @@ static bool vhost_worker(void *data)
return !!node;
}
+static void vhost_worker_killed(void *data)
+{
+ struct vhost_worker *worker = data;
+ struct vhost_dev *dev = worker->dev;
+ struct vhost_virtqueue *vq;
+ int i, attach_cnt = 0;
+
+ mutex_lock(&worker->mutex);
+ worker->killed = true;
+
+ for (i = 0; i < dev->nvqs; i++) {
+ vq = dev->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ if (worker ==
+ rcu_dereference_check(vq->worker,
+ lockdep_is_held(&vq->mutex))) {
+ rcu_assign_pointer(vq->worker, NULL);
+ attach_cnt++;
+ }
+ mutex_unlock(&vq->mutex);
+ }
+
+ worker->attachment_cnt -= attach_cnt;
+ if (attach_cnt)
+ synchronize_rcu();
+ /*
+ * Finish vhost_worker_flush calls and any other works that snuck in
+ * before the synchronize_rcu.
+ */
+ vhost_run_work_list(worker);
+ mutex_unlock(&worker->mutex);
+}
+
static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
{
kfree(vq->indirect);
@@ -631,10 +661,12 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
if (!worker)
return NULL;
+ worker->dev = dev;
snprintf(name, sizeof(name), "vhost-%d", current->pid);
- vtsk = vhost_task_create(vhost_worker, worker, name);
- if (!vtsk)
+ vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
+ worker, name);
+ if (IS_ERR(vtsk))
goto free_worker;
mutex_init(&worker->mutex);
@@ -664,22 +696,37 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
{
struct vhost_worker *old_worker;
- old_worker = rcu_dereference_check(vq->worker,
- lockdep_is_held(&vq->dev->mutex));
-
mutex_lock(&worker->mutex);
- worker->attachment_cnt++;
- mutex_unlock(&worker->mutex);
+ if (worker->killed) {
+ mutex_unlock(&worker->mutex);
+ return;
+ }
+
+ mutex_lock(&vq->mutex);
+
+ old_worker = rcu_dereference_check(vq->worker,
+ lockdep_is_held(&vq->mutex));
rcu_assign_pointer(vq->worker, worker);
+ worker->attachment_cnt++;
- if (!old_worker)
+ if (!old_worker) {
+ mutex_unlock(&vq->mutex);
+ mutex_unlock(&worker->mutex);
return;
+ }
+ mutex_unlock(&vq->mutex);
+ mutex_unlock(&worker->mutex);
+
/*
* Take the worker mutex to make sure we see the work queued from
* device wide flushes which doesn't use RCU for execution.
*/
mutex_lock(&old_worker->mutex);
- old_worker->attachment_cnt--;
+ if (old_worker->killed) {
+ mutex_unlock(&old_worker->mutex);
+ return;
+ }
+
/*
* We don't want to call synchronize_rcu for every vq during setup
* because it will slow down VM startup. If we haven't done
@@ -690,6 +737,8 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
mutex_lock(&vq->mutex);
if (!vhost_vq_get_backend(vq) && !vq->kick) {
mutex_unlock(&vq->mutex);
+
+ old_worker->attachment_cnt--;
mutex_unlock(&old_worker->mutex);
/*
* vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID.
@@ -705,7 +754,8 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
/* Make sure new vq queue/flush/poll calls see the new worker */
synchronize_rcu();
/* Make sure whatever was queued gets run */
- vhost_worker_flush(old_worker);
+ __vhost_worker_flush(old_worker);
+ old_worker->attachment_cnt--;
mutex_unlock(&old_worker->mutex);
}
@@ -754,10 +804,16 @@ static int vhost_free_worker(struct vhost_dev *dev,
return -ENODEV;
mutex_lock(&worker->mutex);
- if (worker->attachment_cnt) {
+ if (worker->attachment_cnt || worker->killed) {
mutex_unlock(&worker->mutex);
return -EBUSY;
}
+ /*
+ * A flush might have raced and snuck in before attachment_cnt was set
+ * to zero. Make sure flushes are flushed from the queue before
+ * freeing.
+ */
+ __vhost_worker_flush(worker);
mutex_unlock(&worker->mutex);
vhost_worker_destroy(dev, worker);
@@ -1290,10 +1346,36 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
mutex_unlock(&d->vqs[i]->mutex);
}
-static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
- __virtio16 *idx)
+static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq)
{
- return vhost_get_avail(vq, *idx, &vq->avail->idx);
+ __virtio16 idx;
+ int r;
+
+ r = vhost_get_avail(vq, idx, &vq->avail->idx);
+ if (unlikely(r < 0)) {
+ vq_err(vq, "Failed to access available index at %p (%d)\n",
+ &vq->avail->idx, r);
+ return r;
+ }
+
+ /* Check it isn't doing very strange thing with available indexes */
+ vq->avail_idx = vhost16_to_cpu(vq, idx);
+ if (unlikely((u16)(vq->avail_idx - vq->last_avail_idx) > vq->num)) {
+ vq_err(vq, "Invalid available index change from %u to %u",
+ vq->last_avail_idx, vq->avail_idx);
+ return -EINVAL;
+ }
+
+ /* We're done if there is nothing new */
+ if (vq->avail_idx == vq->last_avail_idx)
+ return 0;
+
+ /*
+ * We updated vq->avail_idx so we need a memory barrier between
+ * the index read above and the caller reading avail ring entries.
+ */
+ smp_rmb();
+ return 1;
}
static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
@@ -2498,38 +2580,17 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
{
struct vring_desc desc;
unsigned int i, head, found = 0;
- u16 last_avail_idx;
- __virtio16 avail_idx;
+ u16 last_avail_idx = vq->last_avail_idx;
__virtio16 ring_head;
int ret, access;
- /* Check it isn't doing very strange things with descriptor numbers. */
- last_avail_idx = vq->last_avail_idx;
-
if (vq->avail_idx == vq->last_avail_idx) {
- if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
- vq_err(vq, "Failed to access avail idx at %p\n",
- &vq->avail->idx);
- return -EFAULT;
- }
- vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
-
- if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
- vq_err(vq, "Guest moved used index from %u to %u",
- last_avail_idx, vq->avail_idx);
- return -EFAULT;
- }
+ ret = vhost_get_avail_idx(vq);
+ if (unlikely(ret < 0))
+ return ret;
- /* If there's nothing new since last we looked, return
- * invalid.
- */
- if (vq->avail_idx == last_avail_idx)
+ if (!ret)
return vq->num;
-
- /* Only get avail ring entries after they have been
- * exposed by guest.
- */
- smp_rmb();
}
/* Grab the next descriptor number they're advertising, and increment
@@ -2790,25 +2851,21 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
/* return true if we're sure that avaiable ring is empty */
bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
- __virtio16 avail_idx;
int r;
if (vq->avail_idx != vq->last_avail_idx)
return false;
- r = vhost_get_avail_idx(vq, &avail_idx);
- if (unlikely(r))
- return false;
- vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ r = vhost_get_avail_idx(vq);
- return vq->avail_idx == vq->last_avail_idx;
+ /* Note: we treat error as non-empty here */
+ return r == 0;
}
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
/* OK, now we need to know about added descriptors. */
bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
- __virtio16 avail_idx;
int r;
if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
@@ -2832,15 +2889,13 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
/* They could have slipped one in as we were doing that: make
* sure it's written, then check again. */
smp_mb();
- r = vhost_get_avail_idx(vq, &avail_idx);
- if (r) {
- vq_err(vq, "Failed to check avail idx at %p: %d\n",
- &vq->avail->idx, r);
+
+ r = vhost_get_avail_idx(vq);
+ /* Note: we treat error as empty here */
+ if (unlikely(r < 0))
return false;
- }
- vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
- return vq->avail_idx != vq->last_avail_idx;
+ return r;
}
EXPORT_SYMBOL_GPL(vhost_enable_notify);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 9e942fcda5c3..bb75a292d50c 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -28,12 +28,14 @@ struct vhost_work {
struct vhost_worker {
struct vhost_task *vtsk;
+ struct vhost_dev *dev;
/* Used to serialize device wide flushing with worker swapping. */
struct mutex mutex;
struct llist_head work_list;
u64 kcov_handle;
u32 id;
int attachment_cnt;
+ bool killed;
};
/* Poll a file (eventfd or socket) */
@@ -205,7 +207,6 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
struct vhost_log *log, unsigned int *log_num);
void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
-void vhost_vq_flush(struct vhost_virtqueue *vq);
bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work);
bool vhost_vq_has_work(struct vhost_virtqueue *vq);
bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 7b8fd977f71c..73e153f9b449 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -1614,4 +1614,5 @@ EXPORT_SYMBOL(vringh_need_notify_iotlb);
#endif
+MODULE_DESCRIPTION("host side of a virtio ring");
MODULE_LICENSE("GPL");
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index ec20ecff85c7..802153e23073 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -244,7 +244,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
restart_tx = true;
}
- consume_skb(skb);
+ virtio_transport_consume_skb_sent(skb, true);
}
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
if (added)
@@ -451,6 +451,8 @@ static struct virtio_transport vhost_transport = {
.notify_buffer_size = virtio_transport_notify_buffer_size,
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
+ .unsent_bytes = virtio_transport_unsent_bytes,
+
.read_skb = virtio_transport_read_skb,
},
@@ -667,6 +669,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
}
vsock->guest_cid = 0; /* no CID assigned yet */
+ vsock->seqpacket_allow = false;
atomic_set(&vsock->queued_replies, 0);
@@ -810,8 +813,7 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
goto err;
}
- if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
- vsock->seqpacket_allow = true;
+ vsock->seqpacket_allow = features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET);
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
vq = &vsock->vqs[i];