diff options
Diffstat (limited to 'net/vmw_vsock/virtio_transport.c')
| -rw-r--r-- | net/vmw_vsock/virtio_transport.c | 238 |
1 files changed, 197 insertions, 41 deletions
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index e95df847176b..8c867023a2e5 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -63,6 +63,17 @@ struct virtio_vsock { u32 guest_cid; bool seqpacket_allow; + + /* These fields are used only in tx path in function + * 'virtio_transport_send_pkt_work()', so to save + * stack space in it, place both of them here. Each + * pointer from 'out_sgs' points to the corresponding + * element in 'out_bufs' - this is initialized in + * 'virtio_vsock_probe()'. Both fields are protected + * by 'tx_lock'. +1 is needed for packet header. + */ + struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1]; + struct scatterlist out_bufs[MAX_SKB_FRAGS + 1]; }; static u32 virtio_transport_get_local_cid(void) @@ -83,6 +94,63 @@ out_rcu: return ret; } +/* Caller need to hold vsock->tx_lock on vq */ +static int virtio_transport_send_skb(struct sk_buff *skb, struct virtqueue *vq, + struct virtio_vsock *vsock, gfp_t gfp) +{ + int ret, in_sg = 0, out_sg = 0; + struct scatterlist **sgs; + + sgs = vsock->out_sgs; + sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), + sizeof(*virtio_vsock_hdr(skb))); + out_sg++; + + if (!skb_is_nonlinear(skb)) { + if (skb->len > 0) { + sg_init_one(sgs[out_sg], skb->data, skb->len); + out_sg++; + } + } else { + struct skb_shared_info *si; + int i; + + /* If skb is nonlinear, then its buffer must contain + * only header and nothing more. Data is stored in + * the fragged part. + */ + WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb))); + + si = skb_shinfo(skb); + + for (i = 0; i < si->nr_frags; i++) { + skb_frag_t *skb_frag = &si->frags[i]; + void *va; + + /* We will use 'page_to_virt()' for the userspace page + * here, because virtio or dma-mapping layers will call + * 'virt_to_phys()' later to fill the buffer descriptor. + * We don't touch memory at "virtual" address of this page. + */ + va = page_to_virt(skb_frag_page(skb_frag)); + sg_init_one(sgs[out_sg], + va + skb_frag_off(skb_frag), + skb_frag_size(skb_frag)); + out_sg++; + } + } + + ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, gfp); + /* Usually this means that there is no more space available in + * the vq + */ + if (ret < 0) + return ret; + + virtio_transport_deliver_tap_pkt(skb); + return 0; +} + static void virtio_transport_send_pkt_work(struct work_struct *work) { @@ -100,29 +168,17 @@ virtio_transport_send_pkt_work(struct work_struct *work) vq = vsock->vqs[VSOCK_VQ_TX]; for (;;) { - struct scatterlist hdr, buf, *sgs[2]; - int ret, in_sg = 0, out_sg = 0; struct sk_buff *skb; bool reply; + int ret; skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); if (!skb) break; - virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); - sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); - sgs[out_sg++] = &hdr; - if (skb->len > 0) { - sg_init_one(&buf, skb->data, skb->len); - sgs[out_sg++] = &buf; - } - - ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); - /* Usually this means that there is no more space available in - * the vq - */ + ret = virtio_transport_send_skb(skb, vq, vsock, GFP_KERNEL); if (ret < 0) { virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); break; @@ -152,6 +208,28 @@ out: queue_work(virtio_vsock_workqueue, &vsock->rx_work); } +/* Caller need to hold RCU for vsock. + * Returns 0 if the packet is successfully put on the vq. + */ +static int virtio_transport_send_skb_fast_path(struct virtio_vsock *vsock, struct sk_buff *skb) +{ + struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX]; + int ret; + + /* Inside RCU, can't sleep! */ + ret = mutex_trylock(&vsock->tx_lock); + if (unlikely(ret == 0)) + return -EBUSY; + + ret = virtio_transport_send_skb(skb, vq, vsock, GFP_ATOMIC); + if (ret == 0) + virtqueue_kick(vq); + + mutex_unlock(&vsock->tx_lock); + + return ret; +} + static int virtio_transport_send_pkt(struct sk_buff *skb) { @@ -175,11 +253,20 @@ virtio_transport_send_pkt(struct sk_buff *skb) goto out_rcu; } - if (virtio_vsock_skb_reply(skb)) - atomic_inc(&vsock->queued_replies); + /* If send_pkt_queue is empty, we can safely bypass this queue + * because packet order is maintained and (try) to put the packet + * on the virtqueue using virtio_transport_send_skb_fast_path. + * If this fails we simply put the packet on the intermediate + * queue and schedule the worker. + */ + if (!skb_queue_empty_lockless(&vsock->send_pkt_queue) || + virtio_transport_send_skb_fast_path(vsock, skb)) { + if (virtio_vsock_skb_reply(skb)) + atomic_inc(&vsock->queued_replies); - virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); - queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); + virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); + queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); + } out_rcu: rcu_read_unlock(); @@ -220,7 +307,7 @@ out_rcu: static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) { - int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM; + int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; struct scatterlist pkt, *p; struct virtqueue *vq; struct sk_buff *skb; @@ -229,7 +316,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) vq = vsock->vqs[VSOCK_VQ_RX]; do { - skb = virtio_vsock_alloc_skb(total_len, GFP_KERNEL); + skb = virtio_vsock_alloc_linear_skb(total_len, GFP_KERNEL); if (!skb) break; @@ -268,7 +355,7 @@ static void virtio_transport_tx_work(struct work_struct *work) virtqueue_disable_cb(vq); while ((skb = virtqueue_get_buf(vq, &len)) != NULL) { - consume_skb(skb); + virtio_transport_consume_skb_sent(skb, true); added = true; } } while (!virtqueue_enable_cb(vq)); @@ -413,6 +500,42 @@ static void virtio_vsock_rx_done(struct virtqueue *vq) queue_work(virtio_vsock_workqueue, &vsock->rx_work); } +static bool virtio_transport_can_msgzerocopy(int bufs_num) +{ + struct virtio_vsock *vsock; + bool res = false; + + rcu_read_lock(); + + vsock = rcu_dereference(the_virtio_vsock); + if (vsock) { + struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX]; + + /* Check that tx queue is large enough to keep whole + * data to send. This is needed, because when there is + * not enough free space in the queue, current skb to + * send will be reinserted to the head of tx list of + * the socket to retry transmission later, so if skb + * is bigger than whole queue, it will be reinserted + * again and again, thus blocking other skbs to be sent. + * Each page of the user provided buffer will be added + * as a single buffer to the tx virtqueue, so compare + * number of pages against maximum capacity of the queue. + */ + if (bufs_num <= vq->num_max) + res = true; + } + + rcu_read_unlock(); + + return res; +} + +static bool virtio_transport_msgzerocopy_allow(void) +{ + return true; +} + static bool virtio_transport_seqpacket_allow(u32 remote_cid); static struct virtio_transport virtio_transport = { @@ -446,6 +569,8 @@ static struct virtio_transport virtio_transport = { .seqpacket_allow = virtio_transport_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, + .msgzerocopy_allow = virtio_transport_msgzerocopy_allow, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, @@ -457,11 +582,15 @@ static struct virtio_transport virtio_transport = { .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, + .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, + + .unsent_bytes = virtio_transport_unsent_bytes, .read_skb = virtio_transport_read_skb, }, .send_pkt = virtio_transport_send_pkt, + .can_msgzerocopy = virtio_transport_can_msgzerocopy, }; static bool virtio_transport_seqpacket_allow(u32 remote_cid) @@ -495,8 +624,9 @@ static void virtio_transport_rx_work(struct work_struct *work) do { virtqueue_disable_cb(vq); for (;;) { + unsigned int len, payload_len; + struct virtio_vsock_hdr *hdr; struct sk_buff *skb; - unsigned int len; if (!virtio_transport_more_replies(vsock)) { /* Stop rx until the device processes already @@ -513,13 +643,22 @@ static void virtio_transport_rx_work(struct work_struct *work) vsock->rx_buf_nr--; /* Drop short/long packets */ - if (unlikely(len < sizeof(struct virtio_vsock_hdr) || + if (unlikely(len < sizeof(*hdr) || len > virtio_vsock_skb_len(skb))) { kfree_skb(skb); continue; } - virtio_vsock_skb_rx_put(skb); + hdr = virtio_vsock_hdr(skb); + payload_len = le32_to_cpu(hdr->len); + if (unlikely(payload_len > len - sizeof(*hdr))) { + kfree_skb(skb); + continue; + } + + if (payload_len) + virtio_vsock_skb_put(skb, payload_len); + virtio_transport_deliver_tap_pkt(skb); virtio_transport_recv_pkt(&virtio_transport, skb); } @@ -534,20 +673,21 @@ out: static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) { struct virtio_device *vdev = vsock->vdev; - static const char * const names[] = { - "rx", - "tx", - "event", - }; - vq_callback_t *callbacks[] = { - virtio_vsock_rx_done, - virtio_vsock_tx_done, - virtio_vsock_event_done, + struct virtqueue_info vqs_info[] = { + { "rx", virtio_vsock_rx_done }, + { "tx", virtio_vsock_tx_done }, + { "event", virtio_vsock_event_done }, }; int ret; - ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, callbacks, names, - NULL); + mutex_lock(&vsock->rx_lock); + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + mutex_unlock(&vsock->rx_lock); + + atomic_set(&vsock->queued_replies, 0); + + ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL); if (ret < 0) return ret; @@ -555,6 +695,11 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) virtio_device_ready(vdev); + return 0; +} + +static void virtio_vsock_vqs_start(struct virtio_vsock *vsock) +{ mutex_lock(&vsock->tx_lock); vsock->tx_run = true; mutex_unlock(&vsock->tx_lock); @@ -569,7 +714,16 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) vsock->event_run = true; mutex_unlock(&vsock->event_lock); - return 0; + /* virtio_transport_send_pkt() can queue packets once + * the_virtio_vsock is set, but they won't be processed until + * vsock->tx_run is set to true. We queue vsock->send_pkt_work + * when initialization finishes to send those packets queued + * earlier. + * We don't need to queue the other workers (rx, event) because + * as long as we don't fill the queues with empty buffers, the + * host can't send us any notification. + */ + queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); } static void virtio_vsock_vqs_del(struct virtio_vsock *vsock) @@ -621,6 +775,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev) { struct virtio_vsock *vsock = NULL; int ret; + int i; ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); if (ret) @@ -641,9 +796,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->vdev = vdev; - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - atomic_set(&vsock->queued_replies, 0); mutex_init(&vsock->tx_lock); mutex_init(&vsock->rx_lock); @@ -663,7 +815,11 @@ static int virtio_vsock_probe(struct virtio_device *vdev) if (ret < 0) goto out; + for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++) + vsock->out_sgs[i] = &vsock->out_bufs[i]; + rcu_assign_pointer(the_virtio_vsock, vsock); + virtio_vsock_vqs_start(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -736,6 +892,7 @@ static int virtio_vsock_restore(struct virtio_device *vdev) goto out; rcu_assign_pointer(the_virtio_vsock, vsock); + virtio_vsock_vqs_start(vsock); out: mutex_unlock(&the_virtio_vsock_mutex); @@ -756,7 +913,6 @@ static struct virtio_driver virtio_vsock_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtio_vsock_probe, .remove = virtio_vsock_remove, @@ -770,7 +926,7 @@ static int __init virtio_vsock_init(void) { int ret; - virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); + virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", WQ_PERCPU, 0); if (!virtio_vsock_workqueue) return -ENOMEM; |
