diff options
Diffstat (limited to 'drivers/vhost/vsock.c')
| -rw-r--r-- | drivers/vhost/vsock.c | 572 |
1 files changed, 363 insertions, 209 deletions
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 3fbc068eaa9b..0298ddc34824 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * vhost transport for vsock * * Copyright (C) 2013-2015 Red Hat, Inc. * Author: Asias He <asias@redhat.com> * Stefan Hajnoczi <stefanha@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. */ #include <linux/miscdevice.h> #include <linux/atomic.h> @@ -21,9 +20,25 @@ #include "vhost.h" #define VHOST_VSOCK_DEFAULT_HOST_CID 2 +/* Max number of bytes transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others. */ +#define VHOST_VSOCK_WEIGHT 0x80000 +/* Max number of packets transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others with + * small pkts. + */ +#define VHOST_VSOCK_PKT_WEIGHT 256 + +static const int vhost_vsock_bits[] = { + VHOST_FEATURES, + VIRTIO_F_ACCESS_PLATFORM, + VIRTIO_VSOCK_F_SEQPACKET +}; + +#define VHOST_VSOCK_FEATURES VHOST_FEATURES_U64(vhost_vsock_bits, 0) enum { - VHOST_VSOCK_FEATURES = VHOST_FEATURES, + VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) }; /* Used to track all the vhost_vsock instances on the system. */ @@ -38,12 +53,12 @@ struct vhost_vsock { struct hlist_node hash; struct vhost_work send_pkt_work; - spinlock_t send_pkt_list_lock; - struct list_head send_pkt_list; /* host->guest pending packets */ + struct sk_buff_head send_pkt_queue; /* host->guest pending packets */ atomic_t queued_replies; u32 guest_cid; + bool seqpacket_allow; }; static u32 vhost_transport_get_local_cid(void) @@ -78,51 +93,48 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct vhost_virtqueue *vq) { struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + int pkts = 0, total_len = 0; bool added = false; bool restart_tx = false; mutex_lock(&vq->mutex); - if (!vq->private_data) + if (!vhost_vq_get_backend(vq)) + goto out; + + if (!vq_meta_prefetch(vq)) goto out; /* Avoid further vmexits, we're already processing the virtqueue */ vhost_disable_notify(&vsock->dev, vq); - for (;;) { - struct virtio_vsock_pkt *pkt; + do { + struct virtio_vsock_hdr *hdr; + size_t iov_len, payload_len; struct iov_iter iov_iter; + u32 flags_to_restore = 0; + struct sk_buff *skb; unsigned out, in; size_t nbytes; - size_t len; + u32 offset; int head; - spin_lock_bh(&vsock->send_pkt_list_lock); - if (list_empty(&vsock->send_pkt_list)) { - spin_unlock_bh(&vsock->send_pkt_list_lock); + skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); + + if (!skb) { vhost_enable_notify(&vsock->dev, vq); break; } - pkt = list_first_entry(&vsock->send_pkt_list, - struct virtio_vsock_pkt, list); - list_del_init(&pkt->list); - spin_unlock_bh(&vsock->send_pkt_list_lock); - head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); if (head < 0) { - spin_lock_bh(&vsock->send_pkt_list_lock); - list_add(&pkt->list, &vsock->send_pkt_list); - spin_unlock_bh(&vsock->send_pkt_list_lock); + virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); break; } if (head == vq->num) { - spin_lock_bh(&vsock->send_pkt_list_lock); - list_add(&pkt->list, &vsock->send_pkt_list); - spin_unlock_bh(&vsock->send_pkt_list_lock); - + virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); /* We cannot finish yet if more buffers snuck in while * re-enabling notify. */ @@ -134,48 +146,109 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, } if (out) { - virtio_transport_free_pkt(pkt); + kfree_skb(skb); vq_err(vq, "Expected 0 output buffers, got %u\n", out); break; } - len = iov_length(&vq->iov[out], in); - iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); + iov_len = iov_length(&vq->iov[out], in); + if (iov_len < sizeof(*hdr)) { + kfree_skb(skb); + vq_err(vq, "Buffer len [%zu] too small\n", iov_len); + break; + } + + iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len); + offset = VIRTIO_VSOCK_SKB_CB(skb)->offset; + payload_len = skb->len - offset; + hdr = virtio_vsock_hdr(skb); + + /* If the packet is greater than the space available in the + * buffer, we split it using multiple buffers. + */ + if (payload_len > iov_len - sizeof(*hdr)) { + payload_len = iov_len - sizeof(*hdr); + + /* As we are copying pieces of large packet's buffer to + * small rx buffers, headers of packets in rx queue are + * created dynamically and are initialized with header + * of current packet(except length). But in case of + * SOCK_SEQPACKET, we also must clear message delimeter + * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit + * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise, + * there will be sequence of packets with these + * bits set. After initialized header will be copied to + * rx buffer, these required bits will be restored. + */ + if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { + hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM; + + if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) { + hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR; + } + } + } + + /* Set the correct length in the header */ + hdr->len = cpu_to_le32(payload_len); - nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); - if (nbytes != sizeof(pkt->hdr)) { - virtio_transport_free_pkt(pkt); + nbytes = copy_to_iter(hdr, sizeof(*hdr), &iov_iter); + if (nbytes != sizeof(*hdr)) { + kfree_skb(skb); vq_err(vq, "Faulted on copying pkt hdr\n"); break; } - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { - virtio_transport_free_pkt(pkt); + if (skb_copy_datagram_iter(skb, + offset, + &iov_iter, + payload_len)) { + kfree_skb(skb); vq_err(vq, "Faulted on copying pkt buf\n"); break; } - vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); - added = true; - - if (pkt->reply) { - int val; + /* Deliver to monitoring devices all packets that we + * will transmit. + */ + virtio_transport_deliver_tap_pkt(skb); - val = atomic_dec_return(&vsock->queued_replies); + vhost_add_used(vq, head, sizeof(*hdr) + payload_len); + added = true; - /* Do we have resources to resume tx processing? */ - if (val + 1 == tx_vq->num) - restart_tx = true; - } + VIRTIO_VSOCK_SKB_CB(skb)->offset += payload_len; + total_len += payload_len; - /* Deliver to monitoring devices all correctly transmitted - * packets. + /* If we didn't send all the payload we can requeue the packet + * to send it with the next available buffer. */ - virtio_transport_deliver_tap_pkt(pkt); + if (VIRTIO_VSOCK_SKB_CB(skb)->offset < skb->len) { + hdr->flags |= cpu_to_le32(flags_to_restore); - virtio_transport_free_pkt(pkt); - } + /* We are queueing the same skb to handle + * the remaining bytes, and we want to deliver it + * to monitoring devices in the next iteration. + */ + virtio_vsock_skb_clear_tap_delivered(skb); + virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); + } else { + if (virtio_vsock_skb_reply(skb)) { + int val; + + val = atomic_dec_return(&vsock->queued_replies); + + /* Do we have resources to resume tx + * processing? + */ + if (val + 1 == tx_vq->num) + restart_tx = true; + } + + virtio_transport_consume_skb_sent(skb, true); + } + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); if (added) vhost_signal(&vsock->dev, vq); @@ -198,29 +271,27 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work) } static int -vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) +vhost_transport_send_pkt(struct sk_buff *skb) { + struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); struct vhost_vsock *vsock; - int len = pkt->len; + int len = skb->len; rcu_read_lock(); /* Find the vhost_vsock according to guest context id */ - vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); + vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid)); if (!vsock) { rcu_read_unlock(); - virtio_transport_free_pkt(pkt); + kfree_skb(skb); return -ENODEV; } - if (pkt->reply) + if (virtio_vsock_skb_reply(skb)) atomic_inc(&vsock->queued_replies); - spin_lock_bh(&vsock->send_pkt_list_lock); - list_add_tail(&pkt->list, &vsock->send_pkt_list); - spin_unlock_bh(&vsock->send_pkt_list_lock); - - vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); + vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work); rcu_read_unlock(); return len; @@ -230,10 +301,8 @@ static int vhost_transport_cancel_pkt(struct vsock_sock *vsk) { struct vhost_vsock *vsock; - struct virtio_vsock_pkt *pkt, *n; int cnt = 0; int ret = -ENODEV; - LIST_HEAD(freeme); rcu_read_lock(); @@ -242,20 +311,7 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk) if (!vsock) goto out; - spin_lock_bh(&vsock->send_pkt_list_lock); - list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { - if (pkt->vsk != vsk) - continue; - list_move(&pkt->list, &freeme); - } - spin_unlock_bh(&vsock->send_pkt_list_lock); - - list_for_each_entry_safe(pkt, n, &freeme, list) { - if (pkt->reply) - cnt++; - list_del(&pkt->list); - virtio_transport_free_pkt(pkt); - } + cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue); if (cnt) { struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; @@ -272,12 +328,14 @@ out: return ret; } -static struct virtio_vsock_pkt * -vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, +static struct sk_buff * +vhost_vsock_alloc_skb(struct vhost_virtqueue *vq, unsigned int out, unsigned int in) { - struct virtio_vsock_pkt *pkt; + struct virtio_vsock_hdr *hdr; struct iov_iter iov_iter; + struct sk_buff *skb; + size_t payload_len; size_t nbytes; size_t len; @@ -286,49 +344,49 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, return NULL; } - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) + len = iov_length(vq->iov, out); + + if (len < VIRTIO_VSOCK_SKB_HEADROOM || + len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM) return NULL; - len = iov_length(vq->iov, out); - iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); + /* len contains both payload and hdr */ + skb = virtio_vsock_alloc_skb(len, GFP_KERNEL); + if (!skb) + return NULL; + + iov_iter_init(&iov_iter, ITER_SOURCE, vq->iov, out, len); - nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); - if (nbytes != sizeof(pkt->hdr)) { + hdr = virtio_vsock_hdr(skb); + nbytes = copy_from_iter(hdr, sizeof(*hdr), &iov_iter); + if (nbytes != sizeof(*hdr)) { vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", - sizeof(pkt->hdr), nbytes); - kfree(pkt); + sizeof(*hdr), nbytes); + kfree_skb(skb); return NULL; } - if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) - pkt->len = le32_to_cpu(pkt->hdr.len); + payload_len = le32_to_cpu(hdr->len); /* No payload */ - if (!pkt->len) - return pkt; + if (!payload_len) + return skb; - /* The pkt is too big */ - if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { - kfree(pkt); + /* The pkt is too big or the length in the header is invalid */ + if (payload_len + sizeof(*hdr) > len) { + kfree_skb(skb); return NULL; } - pkt->buf = kmalloc(pkt->len, GFP_KERNEL); - if (!pkt->buf) { - kfree(pkt); - return NULL; - } + virtio_vsock_skb_put(skb, payload_len); - nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { - vq_err(vq, "Expected %u byte payload, got %zu bytes\n", - pkt->len, nbytes); - virtio_transport_free_pkt(pkt); + if (skb_copy_datagram_from_iter(skb, 0, &iov_iter, payload_len)) { + vq_err(vq, "Failed to copy %zu byte payload\n", payload_len); + kfree_skb(skb); return NULL; } - return pkt; + return skb; } /* Is there space left for replies to rx packets? */ @@ -343,25 +401,105 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) return val < vq->num; } +static bool vhost_transport_msgzerocopy_allow(void) +{ + return true; +} + +static bool vhost_transport_seqpacket_allow(u32 remote_cid); + +static struct virtio_transport vhost_transport = { + .transport = { + .module = THIS_MODULE, + + .get_local_cid = vhost_transport_get_local_cid, + + .init = virtio_transport_do_socket_init, + .destruct = virtio_transport_destruct, + .release = virtio_transport_release, + .connect = virtio_transport_connect, + .shutdown = virtio_transport_shutdown, + .cancel_pkt = vhost_transport_cancel_pkt, + + .dgram_enqueue = virtio_transport_dgram_enqueue, + .dgram_dequeue = virtio_transport_dgram_dequeue, + .dgram_bind = virtio_transport_dgram_bind, + .dgram_allow = virtio_transport_dgram_allow, + + .stream_enqueue = virtio_transport_stream_enqueue, + .stream_dequeue = virtio_transport_stream_dequeue, + .stream_has_data = virtio_transport_stream_has_data, + .stream_has_space = virtio_transport_stream_has_space, + .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, + .stream_is_active = virtio_transport_stream_is_active, + .stream_allow = virtio_transport_stream_allow, + + .seqpacket_dequeue = virtio_transport_seqpacket_dequeue, + .seqpacket_enqueue = virtio_transport_seqpacket_enqueue, + .seqpacket_allow = vhost_transport_seqpacket_allow, + .seqpacket_has_data = virtio_transport_seqpacket_has_data, + + .msgzerocopy_allow = vhost_transport_msgzerocopy_allow, + + .notify_poll_in = virtio_transport_notify_poll_in, + .notify_poll_out = virtio_transport_notify_poll_out, + .notify_recv_init = virtio_transport_notify_recv_init, + .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, + .notify_send_init = virtio_transport_notify_send_init, + .notify_send_pre_block = virtio_transport_notify_send_pre_block, + .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, + .notify_buffer_size = virtio_transport_notify_buffer_size, + .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, + + .unsent_bytes = virtio_transport_unsent_bytes, + + .read_skb = virtio_transport_read_skb, + }, + + .send_pkt = vhost_transport_send_pkt, +}; + +static bool vhost_transport_seqpacket_allow(u32 remote_cid) +{ + struct vhost_vsock *vsock; + bool seqpacket_allow = false; + + rcu_read_lock(); + vsock = vhost_vsock_get(remote_cid); + + if (vsock) + seqpacket_allow = vsock->seqpacket_allow; + + rcu_read_unlock(); + + return seqpacket_allow; +} + static void vhost_vsock_handle_tx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, dev); - struct virtio_vsock_pkt *pkt; - int head; + int head, pkts = 0, total_len = 0; unsigned int out, in; + struct sk_buff *skb; bool added = false; mutex_lock(&vq->mutex); - if (!vq->private_data) + if (!vhost_vq_get_backend(vq)) + goto out; + + if (!vq_meta_prefetch(vq)) goto out; vhost_disable_notify(&vsock->dev, vq); - for (;;) { - u32 len; + do { + struct virtio_vsock_hdr *hdr; if (!vhost_vsock_more_replies(vsock)) { /* Stop tx until the device processes already @@ -384,26 +522,30 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) break; } - pkt = vhost_vsock_alloc_pkt(vq, out, in); - if (!pkt) { + skb = vhost_vsock_alloc_skb(vq, out, in); + if (!skb) { vq_err(vq, "Faulted on pkt\n"); continue; } - len = pkt->len; + total_len += sizeof(*hdr) + skb->len; /* Deliver to monitoring devices all received packets */ - virtio_transport_deliver_tap_pkt(pkt); + virtio_transport_deliver_tap_pkt(skb); + + hdr = virtio_vsock_hdr(skb); /* Only accept correctly addressed packets */ - if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) - virtio_transport_recv_pkt(pkt); + if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid && + le64_to_cpu(hdr->dst_cid) == + vhost_transport_get_local_cid()) + virtio_transport_recv_pkt(&vhost_transport, skb); else - virtio_transport_free_pkt(pkt); + kfree_skb(skb); - vhost_add_used(vq, head, sizeof(pkt->hdr) + len); + vhost_add_used(vq, head, 0); added = true; - } + } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); no_more_replies: if (added) @@ -445,8 +587,8 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) goto err_vq; } - if (!vq->private_data) { - vq->private_data = vsock; + if (!vhost_vq_get_backend(vq)) { + vhost_vq_set_backend(vq, vsock); ret = vhost_vq_init_access(vq); if (ret) goto err_vq; @@ -455,18 +597,23 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) mutex_unlock(&vq->mutex); } + /* Some packets may have been queued before the device was started, + * let's kick the send worker to send them. + */ + vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work); + mutex_unlock(&vsock->dev.mutex); return 0; err_vq: - vq->private_data = NULL; + vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); - vq->private_data = NULL; + vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); } err: @@ -474,22 +621,24 @@ err: return ret; } -static int vhost_vsock_stop(struct vhost_vsock *vsock) +static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner) { size_t i; - int ret; + int ret = 0; mutex_lock(&vsock->dev.mutex); - ret = vhost_dev_check_owner(&vsock->dev); - if (ret) - goto err; + if (check_owner) { + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + } for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { struct vhost_virtqueue *vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); - vq->private_data = NULL; + vhost_vq_set_backend(vq, NULL); mutex_unlock(&vq->mutex); } @@ -523,6 +672,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) } vsock->guest_cid = 0; /* no CID assigned yet */ + vsock->seqpacket_allow = false; atomic_set(&vsock->queued_replies, 0); @@ -531,11 +681,12 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; - vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), + UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, + VHOST_VSOCK_WEIGHT, true, NULL); file->private_data = vsock; - spin_lock_init(&vsock->send_pkt_list_lock); - INIT_LIST_HEAD(&vsock->send_pkt_list); + skb_queue_head_init(&vsock->send_pkt_queue); vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); return 0; @@ -546,12 +697,7 @@ out: static void vhost_vsock_flush(struct vhost_vsock *vsock) { - int i; - - for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) - if (vsock->vqs[i].handle_kick) - vhost_poll_flush(&vsock->vqs[i].poll); - vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); + vhost_dev_flush(&vsock->dev); } static void vhost_vsock_reset_orphans(struct sock *sk) @@ -577,7 +723,7 @@ static void vhost_vsock_reset_orphans(struct sock *sk) vsk->peer_shutdown = SHUTDOWN_MASK; sk->sk_state = SS_UNCONNECTED; sk->sk_err = ECONNRESET; - sk->sk_error_report(sk); + sk_error_report(sk); } static int vhost_vsock_dev_release(struct inode *inode, struct file *file) @@ -594,22 +740,19 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ - vsock_for_each_connected_socket(vhost_vsock_reset_orphans); + vsock_for_each_connected_socket(&vhost_transport.transport, + vhost_vsock_reset_orphans); - vhost_vsock_stop(vsock); + /* Don't check the owner, because we are in the release path, so we + * need to stop the vsock device in any case. + * vhost_vsock_stop() can not fail in this case, so we don't need to + * check the return code. + */ + vhost_vsock_stop(vsock, false); vhost_vsock_flush(vsock); vhost_dev_stop(&vsock->dev); - spin_lock_bh(&vsock->send_pkt_list_lock); - while (!list_empty(&vsock->send_pkt_list)) { - struct virtio_vsock_pkt *pkt; - - pkt = list_first_entry(&vsock->send_pkt_list, - struct virtio_vsock_pkt, list); - list_del_init(&pkt->list); - virtio_transport_free_pkt(pkt); - } - spin_unlock_bh(&vsock->send_pkt_list_lock); + virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue); vhost_dev_cleanup(&vsock->dev); kfree(vsock->dev.vqs); @@ -630,6 +773,12 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) if (guest_cid > U32_MAX) return -EINVAL; + /* Refuse if CID is assigned to the guest->host transport (i.e. nested + * VM), to make the loopback work. + */ + if (vsock_find_cid(guest_cid)) + return -EADDRINUSE; + /* Refuse if CID is already in use */ mutex_lock(&vhost_vsock_mutex); other = vhost_vsock_get(guest_cid); @@ -659,10 +808,16 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) mutex_lock(&vsock->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&vsock->dev)) { - mutex_unlock(&vsock->dev.mutex); - return -EFAULT; + goto err; + } + + if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { + if (vhost_init_device_iotlb(&vsock->dev)) + goto err; } + vsock->seqpacket_allow = features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET); + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { vq = &vsock->vqs[i]; mutex_lock(&vq->mutex); @@ -671,6 +826,10 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) } mutex_unlock(&vsock->dev.mutex); return 0; + +err: + mutex_unlock(&vsock->dev.mutex); + return -EFAULT; } static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, @@ -694,7 +853,7 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, if (start) return vhost_vsock_start(vsock); else - return vhost_vsock_stop(vsock); + return vhost_vsock_stop(vsock, true); case VHOST_GET_FEATURES: features = VHOST_VSOCK_FEATURES; if (copy_to_user(argp, &features, sizeof(features))) @@ -704,6 +863,18 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, if (copy_from_user(&features, argp, sizeof(features))) return -EFAULT; return vhost_vsock_set_features(vsock, features); + case VHOST_GET_BACKEND_FEATURES: + features = VHOST_VSOCK_BACKEND_FEATURES; + if (copy_to_user(argp, &features, sizeof(features))) + return -EFAULT; + return 0; + case VHOST_SET_BACKEND_FEATURES: + if (copy_from_user(&features, argp, sizeof(features))) + return -EFAULT; + if (features & ~VHOST_VSOCK_BACKEND_FEATURES) + return -EOPNOTSUPP; + vhost_set_backend_features(&vsock->dev, features); + return 0; default: mutex_lock(&vsock->dev.mutex); r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); @@ -716,13 +887,33 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, } } -#ifdef CONFIG_COMPAT -static long vhost_vsock_dev_compat_ioctl(struct file *f, unsigned int ioctl, - unsigned long arg) +static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + int noblock = file->f_flags & O_NONBLOCK; + + return vhost_chr_read_iter(dev, to, noblock); +} + +static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb, + struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + + return vhost_chr_write_iter(dev, from); +} + +static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait) { - return vhost_vsock_dev_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + + return vhost_chr_poll(file, dev, wait); } -#endif static const struct file_operations vhost_vsock_fops = { .owner = THIS_MODULE, @@ -730,9 +921,10 @@ static const struct file_operations vhost_vsock_fops = { .release = vhost_vsock_dev_release, .llseek = noop_llseek, .unlocked_ioctl = vhost_vsock_dev_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = vhost_vsock_dev_compat_ioctl, -#endif + .compat_ioctl = compat_ptr_ioctl, + .read_iter = vhost_vsock_chr_read_iter, + .write_iter = vhost_vsock_chr_write_iter, + .poll = vhost_vsock_chr_poll, }; static struct miscdevice vhost_vsock_misc = { @@ -741,66 +933,28 @@ static struct miscdevice vhost_vsock_misc = { .fops = &vhost_vsock_fops, }; -static struct virtio_transport vhost_transport = { - .transport = { - .get_local_cid = vhost_transport_get_local_cid, - - .init = virtio_transport_do_socket_init, - .destruct = virtio_transport_destruct, - .release = virtio_transport_release, - .connect = virtio_transport_connect, - .shutdown = virtio_transport_shutdown, - .cancel_pkt = vhost_transport_cancel_pkt, - - .dgram_enqueue = virtio_transport_dgram_enqueue, - .dgram_dequeue = virtio_transport_dgram_dequeue, - .dgram_bind = virtio_transport_dgram_bind, - .dgram_allow = virtio_transport_dgram_allow, - - .stream_enqueue = virtio_transport_stream_enqueue, - .stream_dequeue = virtio_transport_stream_dequeue, - .stream_has_data = virtio_transport_stream_has_data, - .stream_has_space = virtio_transport_stream_has_space, - .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, - .stream_is_active = virtio_transport_stream_is_active, - .stream_allow = virtio_transport_stream_allow, - - .notify_poll_in = virtio_transport_notify_poll_in, - .notify_poll_out = virtio_transport_notify_poll_out, - .notify_recv_init = virtio_transport_notify_recv_init, - .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, - .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, - .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, - .notify_send_init = virtio_transport_notify_send_init, - .notify_send_pre_block = virtio_transport_notify_send_pre_block, - .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, - .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, - - .set_buffer_size = virtio_transport_set_buffer_size, - .set_min_buffer_size = virtio_transport_set_min_buffer_size, - .set_max_buffer_size = virtio_transport_set_max_buffer_size, - .get_buffer_size = virtio_transport_get_buffer_size, - .get_min_buffer_size = virtio_transport_get_min_buffer_size, - .get_max_buffer_size = virtio_transport_get_max_buffer_size, - }, - - .send_pkt = vhost_transport_send_pkt, -}; - static int __init vhost_vsock_init(void) { int ret; - ret = vsock_core_init(&vhost_transport.transport); + ret = vsock_core_register(&vhost_transport.transport, + VSOCK_TRANSPORT_F_H2G); if (ret < 0) return ret; - return misc_register(&vhost_vsock_misc); + + ret = misc_register(&vhost_vsock_misc); + if (ret) { + vsock_core_unregister(&vhost_transport.transport); + return ret; + } + + return 0; }; static void __exit vhost_vsock_exit(void) { misc_deregister(&vhost_vsock_misc); - vsock_core_exit(); + vsock_core_unregister(&vhost_transport.transport); }; module_init(vhost_vsock_init); |
