diff options
Diffstat (limited to 'drivers/virtio/virtio_ring.c')
| -rw-r--r-- | drivers/virtio/virtio_ring.c | 1972 |
1 files changed, 1457 insertions, 515 deletions
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 962f1477b1fa..ddab68959671 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/hrtimer.h> #include <linux/dma-mapping.h> +#include <linux/kmsan.h> #include <linux/spinlock.h> #include <xen/xen.h> @@ -68,12 +69,20 @@ struct vring_desc_state_split { void *data; /* Data for callback. */ - struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ + + /* Indirect desc table and extra table, if any. These two will be + * allocated together. So we won't stress more to the memory allocator. + */ + struct vring_desc *indir_desc; }; struct vring_desc_state_packed { void *data; /* Data for callback. */ - struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + + /* Indirect desc table and extra table, if any. These two will be + * allocated together. So we won't stress more to the memory allocator. + */ + struct vring_packed_desc *indir_desc; u16 num; /* Descriptor list length. */ u16 last; /* The last desc state in a list. */ }; @@ -85,6 +94,71 @@ struct vring_desc_extra { u16 next; /* The next desc state in a list. */ }; +struct vring_virtqueue_split { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + struct vring_desc_extra *desc_extra; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + + /* + * The parameters for creating vrings are reserved for creating new + * vring. + */ + u32 vring_align; + bool may_reduce_num; +}; + +struct vring_virtqueue_packed { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_extra *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; +}; + struct vring_virtqueue { struct virtqueue vq; @@ -92,7 +166,7 @@ struct vring_virtqueue { bool packed_ring; /* Is DMA API used? */ - bool use_dma_api; + bool use_map_api; /* Can we use weak barriers? */ bool weak_barriers; @@ -111,7 +185,12 @@ struct vring_virtqueue { /* Number we've added since last sync. */ unsigned int num_added; - /* Last used index we've seen. */ + /* Last used index we've seen. + * for split ring, it just contains last used index + * for packed ring: + * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. + * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. + */ u16 last_used_idx; /* Hint for event idx: already triggered no need to disable. */ @@ -119,67 +198,10 @@ struct vring_virtqueue { union { /* Available for split ring */ - struct { - /* Actual memory layout for this queue. */ - struct vring vring; - - /* Last written value to avail->flags */ - u16 avail_flags_shadow; - - /* - * Last written value to avail->idx in - * guest byte order. - */ - u16 avail_idx_shadow; - - /* Per-descriptor state. */ - struct vring_desc_state_split *desc_state; - struct vring_desc_extra *desc_extra; - - /* DMA address and size information */ - dma_addr_t queue_dma_addr; - size_t queue_size_in_bytes; - } split; + struct vring_virtqueue_split split; /* Available for packed ring */ - struct { - /* Actual memory layout for this queue. */ - struct { - unsigned int num; - struct vring_packed_desc *desc; - struct vring_packed_desc_event *driver; - struct vring_packed_desc_event *device; - } vring; - - /* Driver ring wrap counter. */ - bool avail_wrap_counter; - - /* Device ring wrap counter. */ - bool used_wrap_counter; - - /* Avail used flags. */ - u16 avail_used_flags; - - /* Index of the next avail descriptor. */ - u16 next_avail_idx; - - /* - * Last written value to driver->flags in - * guest byte order. - */ - u16 event_flags_shadow; - - /* Per-descriptor state. */ - struct vring_desc_state_packed *desc_state; - struct vring_desc_extra *desc_extra; - - /* DMA address and size information */ - dma_addr_t ring_dma_addr; - dma_addr_t driver_event_dma_addr; - dma_addr_t device_event_dma_addr; - size_t ring_size_in_bytes; - size_t event_size_in_bytes; - } packed; + struct vring_virtqueue_packed packed; }; /* How to notify other side. FIXME: commonalize hcalls! */ @@ -188,6 +210,8 @@ struct vring_virtqueue { /* DMA, allocation, and size information */ bool we_own_ring; + union virtio_map map; + #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; @@ -198,18 +222,18 @@ struct vring_virtqueue { #endif }; +static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); +static void vring_free(struct virtqueue *_vq); /* * Helpers. */ -#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) +#define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq) -static inline bool virtqueue_use_indirect(struct virtqueue *_vq, - unsigned int total_sg) +static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, + unsigned int total_sg) { - struct vring_virtqueue *vq = to_vvq(_vq); - /* * If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold @@ -243,7 +267,7 @@ static inline bool virtqueue_use_indirect(struct virtqueue *_vq, * unconditionally on data path. */ -static bool vring_use_dma_api(struct virtio_device *vdev) +static bool vring_use_map_api(const struct virtio_device *vdev) { if (!virtio_has_dma_quirk(vdev)) return true; @@ -263,29 +287,42 @@ static bool vring_use_dma_api(struct virtio_device *vdev) return false; } -size_t virtio_max_dma_size(struct virtio_device *vdev) +static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring, + const struct vring_desc_extra *extra) +{ + return vring->use_map_api && (extra->addr != DMA_MAPPING_ERROR); +} + +size_t virtio_max_dma_size(const struct virtio_device *vdev) { size_t max_segment_size = SIZE_MAX; - if (vring_use_dma_api(vdev)) - max_segment_size = dma_max_mapping_size(vdev->dev.parent); + if (vring_use_map_api(vdev)) { + if (vdev->map) { + max_segment_size = + vdev->map->max_mapping_size(vdev->vmap); + } else + max_segment_size = + dma_max_mapping_size(vdev->dev.parent); + } return max_segment_size; } EXPORT_SYMBOL_GPL(virtio_max_dma_size); static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) + dma_addr_t *map_handle, gfp_t flag, + union virtio_map map) { - if (vring_use_dma_api(vdev)) { - return dma_alloc_coherent(vdev->dev.parent, size, - dma_handle, flag); + if (vring_use_map_api(vdev)) { + return virtqueue_map_alloc_coherent(vdev, map, size, + map_handle, flag); } else { void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); if (queue) { phys_addr_t phys_addr = virt_to_phys(queue); - *dma_handle = (dma_addr_t)phys_addr; + *map_handle = (dma_addr_t)phys_addr; /* * Sanity check: make sure we dind't truncate @@ -298,7 +335,7 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, * warning and abort if we end up with an * unrepresentable address. */ - if (WARN_ON_ONCE(*dma_handle != phys_addr)) { + if (WARN_ON_ONCE(*map_handle != phys_addr)) { free_pages_exact(queue, PAGE_ALIGN(size)); return NULL; } @@ -308,10 +345,12 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, } static void vring_free_queue(struct virtio_device *vdev, size_t size, - void *queue, dma_addr_t dma_handle) + void *queue, dma_addr_t map_handle, + union virtio_map map) { - if (vring_use_dma_api(vdev)) - dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); + if (vring_use_map_api(vdev)) + virtqueue_map_free_coherent(vdev, map, size, + queue, map_handle); else free_pages_exact(queue, PAGE_ALIGN(size)); } @@ -319,49 +358,93 @@ static void vring_free_queue(struct virtio_device *vdev, size_t size, /* * The DMA ops on various arches are rather gnarly right now, and * making all of the arch DMA ops work on the vring device itself - * is a mess. For now, we use the parent device for DMA ops. + * is a mess. */ -static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) +static struct device *vring_dma_dev(const struct vring_virtqueue *vq) +{ + return vq->map.dma_dev; +} + +static int vring_mapping_error(const struct vring_virtqueue *vq, + dma_addr_t addr) { - return vq->vq.vdev->dev.parent; + struct virtio_device *vdev = vq->vq.vdev; + + if (!vq->use_map_api) + return 0; + + if (vdev->map) + return vdev->map->mapping_error(vq->map, addr); + else + return dma_mapping_error(vring_dma_dev(vq), addr); } /* Map one sg entry. */ -static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, - struct scatterlist *sg, - enum dma_data_direction direction) +static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, + enum dma_data_direction direction, dma_addr_t *addr, + u32 *len, bool premapped) { - if (!vq->use_dma_api) - return (dma_addr_t)sg_phys(sg); + if (premapped) { + *addr = sg_dma_address(sg); + *len = sg_dma_len(sg); + return 0; + } + + *len = sg->length; + + if (!vq->use_map_api) { + /* + * If DMA is not used, KMSAN doesn't know that the scatterlist + * is initialized by the hardware. Explicitly check/unpoison it + * depending on the direction. + */ + kmsan_handle_dma(sg_phys(sg), sg->length, direction); + *addr = (dma_addr_t)sg_phys(sg); + return 0; + } /* * We can't use dma_map_sg, because we don't use scatterlists in * the way it expects (we don't guarantee that the scatterlist * will exist for the lifetime of the mapping). */ - return dma_map_page(vring_dma_dev(vq), - sg_page(sg), sg->offset, sg->length, - direction); + *addr = virtqueue_map_page_attrs(&vq->vq, sg_page(sg), + sg->offset, sg->length, + direction, 0); + + if (vring_mapping_error(vq, *addr)) + return -ENOMEM; + + return 0; } static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, void *cpu_addr, size_t size, enum dma_data_direction direction) { - if (!vq->use_dma_api) + if (!vq->use_map_api) return (dma_addr_t)virt_to_phys(cpu_addr); - return dma_map_single(vring_dma_dev(vq), - cpu_addr, size, direction); + return virtqueue_map_single_attrs(&vq->vq, cpu_addr, + size, direction, 0); } -static int vring_mapping_error(const struct vring_virtqueue *vq, - dma_addr_t addr) +static void virtqueue_init(struct vring_virtqueue *vq, u32 num) { - if (!vq->use_dma_api) - return 0; + vq->vq.num_free = num; + + if (vq->packed_ring) + vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); + else + vq->last_used_idx = 0; + + vq->event_triggered = false; + vq->num_added = 0; - return dma_mapping_error(vring_dma_dev(vq), addr); +#ifdef DEBUG + vq->in_use = false; + vq->last_add_time_valid = false; +#endif } @@ -369,66 +452,37 @@ static int vring_mapping_error(const struct vring_virtqueue *vq, * Split ring specific functions - *_split(). */ -static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, - struct vring_desc *desc) +static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, + struct vring_desc_extra *extra) { u16 flags; - if (!vq->use_dma_api) - return; - - flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); + flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { - dma_unmap_single(vring_dma_dev(vq), - virtio64_to_cpu(vq->vq.vdev, desc->addr), - virtio32_to_cpu(vq->vq.vdev, desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - dma_unmap_page(vring_dma_dev(vq), - virtio64_to_cpu(vq->vq.vdev, desc->addr), - virtio32_to_cpu(vq->vq.vdev, desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } -} - -static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, - unsigned int i) -{ - struct vring_desc_extra *extra = vq->split.desc_extra; - u16 flags; - - if (!vq->use_dma_api) + if (!vq->use_map_api) + goto out; + } else if (!vring_need_unmap_buffer(vq, extra)) goto out; - flags = extra[i].flags; - - if (flags & VRING_DESC_F_INDIRECT) { - dma_unmap_single(vring_dma_dev(vq), - extra[i].addr, - extra[i].len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - dma_unmap_page(vring_dma_dev(vq), - extra[i].addr, - extra[i].len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } + virtqueue_unmap_page_attrs(&vq->vq, + extra->addr, + extra->len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE, + 0); out: - return extra[i].next; + return extra->next; } static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, unsigned int total_sg, gfp_t gfp) { + struct vring_desc_extra *extra; struct vring_desc *desc; - unsigned int i; + unsigned int i, size; /* * We require lowmem mappings for the descriptors because @@ -437,40 +491,41 @@ static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, */ gfp &= ~__GFP_HIGHMEM; - desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); + size = sizeof(*desc) * total_sg + sizeof(*extra) * total_sg; + + desc = kmalloc(size, gfp); if (!desc) return NULL; + extra = (struct vring_desc_extra *)&desc[total_sg]; + for (i = 0; i < total_sg; i++) - desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); + extra[i].next = i + 1; + return desc; } static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, struct vring_desc *desc, + struct vring_desc_extra *extra, unsigned int i, dma_addr_t addr, unsigned int len, - u16 flags, - bool indirect) + u16 flags, bool premapped) { - struct vring_virtqueue *vring = to_vvq(vq); - struct vring_desc_extra *extra = vring->split.desc_extra; u16 next; desc[i].flags = cpu_to_virtio16(vq->vdev, flags); desc[i].addr = cpu_to_virtio64(vq->vdev, addr); desc[i].len = cpu_to_virtio32(vq->vdev, len); - if (!indirect) { - next = extra[i].next; - desc[i].next = cpu_to_virtio16(vq->vdev, next); + extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; + extra[i].len = len; + extra[i].flags = flags; + + next = extra[i].next; - extra[i].addr = addr; - extra[i].len = len; - extra[i].flags = flags; - } else - next = virtio16_to_cpu(vq->vdev, desc[i].next); + desc[i].next = cpu_to_virtio16(vq->vdev, next); return next; } @@ -482,9 +537,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, unsigned int in_sgs, void *data, void *ctx, + bool premapped, gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_desc_extra *extra; struct scatterlist *sg; struct vring_desc *desc; unsigned int i, n, avail, descs_used, prev, err_idx; @@ -507,7 +564,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, head = vq->free_head; - if (virtqueue_use_indirect(_vq, total_sg)) + if (virtqueue_use_indirect(vq, total_sg)) desc = alloc_indirect_split(_vq, total_sg, gfp); else { desc = NULL; @@ -520,14 +577,16 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Set up rest to use this indirect table. */ i = 0; descs_used = 1; + extra = (struct vring_desc_extra *)&desc[total_sg]; } else { indirect = false; desc = vq->split.vring.desc; + extra = vq->split.desc_extra; i = head; descs_used = total_sg; } - if (vq->vq.num_free < descs_used) { + if (unlikely(vq->vq.num_free < descs_used)) { pr_debug("Can't add buf len %i - avail = %i\n", descs_used, vq->vq.num_free); /* FIXME: for historical reasons, we force a notify here if @@ -543,39 +602,42 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + u32 len; + + if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr, &len, premapped)) goto unmap_release; prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, + i = virtqueue_add_desc_split(_vq, desc, extra, i, addr, len, VRING_DESC_F_NEXT, - indirect); + premapped); } } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + u32 len; + + if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr, &len, premapped)) goto unmap_release; prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, - sg->length, + i = virtqueue_add_desc_split(_vq, desc, extra, i, addr, len, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, - indirect); + premapped); } } /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); - if (!indirect && vq->use_dma_api) + if (!indirect && vring_need_unmap_buffer(vq, &extra[prev])) vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= ~VRING_DESC_F_NEXT; @@ -588,10 +650,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, goto unmap_release; virtqueue_add_desc_split(_vq, vq->split.vring.desc, + vq->split.desc_extra, head, addr, total_sg * sizeof(struct vring_desc), - VRING_DESC_F_INDIRECT, - false); + VRING_DESC_F_INDIRECT, false); } /* We're using some buffers from the free list. */ @@ -644,11 +706,8 @@ unmap_release: for (n = 0; n < total_sg; n++) { if (i == err_idx) break; - if (indirect) { - vring_unmap_one_split_indirect(vq, &desc[i]); - i = virtio16_to_cpu(_vq->vdev, desc[i].next); - } else - i = vring_unmap_one_split(vq, i); + + i = vring_unmap_one_split(vq, &extra[i]); } if (indirect) @@ -692,22 +751,25 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, void **ctx) { + struct vring_desc_extra *extra; unsigned int i, j; __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); /* Clear data ptr. */ vq->split.desc_state[head].data = NULL; + extra = vq->split.desc_extra; + /* Put back on free list: unmap first-level descriptors and find end */ i = head; while (vq->split.vring.desc[i].flags & nextflag) { - vring_unmap_one_split(vq, i); + vring_unmap_one_split(vq, &extra[i]); i = vq->split.desc_extra[i].next; vq->vq.num_free++; } - vring_unmap_one_split(vq, i); + vring_unmap_one_split(vq, &extra[i]); vq->split.desc_extra[i].next = vq->free_head; vq->free_head = head; @@ -717,20 +779,25 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, if (vq->indirect) { struct vring_desc *indir_desc = vq->split.desc_state[head].indir_desc; - u32 len; + u32 len, num; /* Free the indirect table, if any, now that it's unmapped. */ if (!indir_desc) return; - len = vq->split.desc_extra[head].len; BUG_ON(!(vq->split.desc_extra[head].flags & VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); - for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split_indirect(vq, &indir_desc[j]); + num = len / sizeof(struct vring_desc); + + extra = (struct vring_desc_extra *)&indir_desc[num]; + + if (vq->use_map_api) { + for (j = 0; j < num; j++) + vring_unmap_one_split(vq, &extra[j]); + } kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; @@ -739,7 +806,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, } } -static inline bool more_used_split(const struct vring_virtqueue *vq) +static bool more_used_split(const struct vring_virtqueue *vq) { return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->split.vring.used->idx); @@ -809,6 +876,14 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq) if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; + + /* + * If device triggered an event already it won't trigger one again: + * no need to disable. + */ + if (vq->event_triggered) + return; + if (vq->event) /* TODO: this is a hack. Figure out a cleaner value to write. */ vring_used_event(&vq->split.vring) = 0x0; @@ -819,7 +894,7 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq) } } -static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) +static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 last_used_idx; @@ -844,7 +919,7 @@ static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) return last_used_idx; } -static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) +static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -915,127 +990,313 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) return NULL; } -static struct virtqueue *vring_create_virtqueue_split( - unsigned int index, - unsigned int num, - unsigned int vring_align, - struct virtio_device *vdev, - bool weak_barriers, - bool may_reduce_num, - bool context, - bool (*notify)(struct virtqueue *), - void (*callback)(struct virtqueue *), - const char *name) +static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split, + struct vring_virtqueue *vq) +{ + struct virtio_device *vdev; + + vdev = vq->vq.vdev; + + vring_split->avail_flags_shadow = 0; + vring_split->avail_idx_shadow = 0; + + /* No callback? Tell other side not to bother us. */ + if (!vq->vq.callback) { + vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; + if (!vq->event) + vring_split->vring.avail->flags = cpu_to_virtio16(vdev, + vring_split->avail_flags_shadow); + } +} + +static void virtqueue_reinit_split(struct vring_virtqueue *vq) +{ + int num; + + num = vq->split.vring.num; + + vq->split.vring.avail->flags = 0; + vq->split.vring.avail->idx = 0; + + /* reset avail event */ + vq->split.vring.avail->ring[num] = 0; + + vq->split.vring.used->flags = 0; + vq->split.vring.used->idx = 0; + + /* reset used event */ + *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0; + + virtqueue_init(vq, num); + + virtqueue_vring_init_split(&vq->split, vq); +} + +static void virtqueue_vring_attach_split(struct vring_virtqueue *vq, + struct vring_virtqueue_split *vring_split) +{ + vq->split = *vring_split; + + /* Put everything in free lists. */ + vq->free_head = 0; +} + +static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split) +{ + struct vring_desc_state_split *state; + struct vring_desc_extra *extra; + u32 num = vring_split->vring.num; + + state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL); + if (!state) + goto err_state; + + extra = vring_alloc_desc_extra(num); + if (!extra) + goto err_extra; + + memset(state, 0, num * sizeof(struct vring_desc_state_split)); + + vring_split->desc_state = state; + vring_split->desc_extra = extra; + return 0; + +err_extra: + kfree(state); +err_state: + return -ENOMEM; +} + +static void vring_free_split(struct vring_virtqueue_split *vring_split, + struct virtio_device *vdev, + union virtio_map map) +{ + vring_free_queue(vdev, vring_split->queue_size_in_bytes, + vring_split->vring.desc, + vring_split->queue_dma_addr, + map); + + kfree(vring_split->desc_state); + kfree(vring_split->desc_extra); +} + +static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, + struct virtio_device *vdev, + u32 num, + unsigned int vring_align, + bool may_reduce_num, + union virtio_map map) { - struct virtqueue *vq; void *queue = NULL; dma_addr_t dma_addr; - size_t queue_size_in_bytes; - struct vring vring; /* We assume num is a power of 2. */ - if (num & (num - 1)) { + if (!is_power_of_2(num)) { dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); - return NULL; + return -EINVAL; } /* TODO: allocate each queue chunk individually */ for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr, - GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, + map); if (queue) break; if (!may_reduce_num) - return NULL; + return -ENOMEM; } if (!num) - return NULL; + return -ENOMEM; if (!queue) { /* Try to get a single page. You are my only hope! */ queue = vring_alloc_queue(vdev, vring_size(num, vring_align), - &dma_addr, GFP_KERNEL|__GFP_ZERO); + &dma_addr, GFP_KERNEL | __GFP_ZERO, + map); } if (!queue) + return -ENOMEM; + + vring_init(&vring_split->vring, num, queue, vring_align); + + vring_split->queue_dma_addr = dma_addr; + vring_split->queue_size_in_bytes = vring_size(num, vring_align); + + vring_split->vring_align = vring_align; + vring_split->may_reduce_num = may_reduce_num; + + return 0; +} + +static struct virtqueue *__vring_new_virtqueue_split(unsigned int index, + struct vring_virtqueue_split *vring_split, + struct virtio_device *vdev, + bool weak_barriers, + bool context, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name, + union virtio_map map) +{ + struct vring_virtqueue *vq; + int err; + + vq = kmalloc(sizeof(*vq), GFP_KERNEL); + if (!vq) return NULL; - queue_size_in_bytes = vring_size(num, vring_align); - vring_init(&vring, num, queue, vring_align); + vq->packed_ring = false; + vq->vq.callback = callback; + vq->vq.vdev = vdev; + vq->vq.name = name; + vq->vq.index = index; + vq->vq.reset = false; + vq->we_own_ring = false; + vq->notify = notify; + vq->weak_barriers = weak_barriers; +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION + vq->broken = true; +#else + vq->broken = false; +#endif + vq->map = map; + vq->use_map_api = vring_use_map_api(vdev); + + vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && + !context; + vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); + + if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) + vq->weak_barriers = false; + + err = vring_alloc_state_extra_split(vring_split); + if (err) { + kfree(vq); + return NULL; + } + + virtqueue_vring_init_split(vring_split, vq); + + virtqueue_init(vq, vring_split->vring.num); + virtqueue_vring_attach_split(vq, vring_split); + + spin_lock(&vdev->vqs_list_lock); + list_add_tail(&vq->vq.list, &vdev->vqs); + spin_unlock(&vdev->vqs_list_lock); + return &vq->vq; +} + +static struct virtqueue *vring_create_virtqueue_split( + unsigned int index, + unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + bool may_reduce_num, + bool context, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name, + union virtio_map map) +{ + struct vring_virtqueue_split vring_split = {}; + struct virtqueue *vq; + int err; + + err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, + may_reduce_num, map); + if (err) + return NULL; - vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, - notify, callback, name); + vq = __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers, + context, notify, callback, name, map); if (!vq) { - vring_free_queue(vdev, queue_size_in_bytes, queue, - dma_addr); + vring_free_split(&vring_split, vdev, map); return NULL; } - to_vvq(vq)->split.queue_dma_addr = dma_addr; - to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; to_vvq(vq)->we_own_ring = true; return vq; } +static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) +{ + struct vring_virtqueue_split vring_split = {}; + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + int err; + + err = vring_alloc_queue_split(&vring_split, vdev, num, + vq->split.vring_align, + vq->split.may_reduce_num, + vq->map); + if (err) + goto err; + + err = vring_alloc_state_extra_split(&vring_split); + if (err) + goto err_state_extra; + + vring_free(&vq->vq); + + virtqueue_vring_init_split(&vring_split, vq); + + virtqueue_init(vq, vring_split.vring.num); + virtqueue_vring_attach_split(vq, &vring_split); + + return 0; + +err_state_extra: + vring_free_split(&vring_split, vdev, vq->map); +err: + virtqueue_reinit_split(vq); + return -ENOMEM; +} + /* * Packed ring specific functions - *_packed(). */ - -static void vring_unmap_state_packed(const struct vring_virtqueue *vq, - struct vring_desc_extra *state) +static bool packed_used_wrap_counter(u16 last_used_idx) { - u16 flags; - - if (!vq->use_dma_api) - return; - - flags = state->flags; + return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); +} - if (flags & VRING_DESC_F_INDIRECT) { - dma_unmap_single(vring_dma_dev(vq), - state->addr, state->len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - dma_unmap_page(vring_dma_dev(vq), - state->addr, state->len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } +static u16 packed_last_used(u16 last_used_idx) +{ + return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); } -static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, - struct vring_packed_desc *desc) +static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, + const struct vring_desc_extra *extra) { u16 flags; - if (!vq->use_dma_api) - return; - - flags = le16_to_cpu(desc->flags); + flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { - dma_unmap_single(vring_dma_dev(vq), - le64_to_cpu(desc->addr), - le32_to_cpu(desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - dma_unmap_page(vring_dma_dev(vq), - le64_to_cpu(desc->addr), - le32_to_cpu(desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } + if (!vq->use_map_api) + return; + } else if (!vring_need_unmap_buffer(vq, extra)) + return; + + virtqueue_unmap_page_attrs(&vq->vq, + extra->addr, extra->len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE, + 0); } static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, gfp_t gfp) { + struct vring_desc_extra *extra; struct vring_packed_desc *desc; + int i, size; /* * We require lowmem mappings for the descriptors because @@ -1044,7 +1305,16 @@ static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, */ gfp &= ~__GFP_HIGHMEM; - desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); + size = (sizeof(*desc) + sizeof(*extra)) * total_sg; + + desc = kmalloc(size, gfp); + if (!desc) + return NULL; + + extra = (struct vring_desc_extra *)&desc[total_sg]; + + for (i = 0; i < total_sg; i++) + extra[i].next = i + 1; return desc; } @@ -1055,11 +1325,13 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unsigned int out_sgs, unsigned int in_sgs, void *data, + bool premapped, gfp_t gfp) { + struct vring_desc_extra *extra; struct vring_packed_desc *desc; struct scatterlist *sg; - unsigned int i, n, err_idx; + unsigned int i, n, err_idx, len; u16 head, id; dma_addr_t addr; @@ -1068,6 +1340,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, if (!desc) return -ENOMEM; + extra = (struct vring_desc_extra *)&desc[total_sg]; + if (unlikely(vq->vq.num_free < 1)) { pr_debug("Can't add buf len 1 - avail = 0\n"); kfree(desc); @@ -1081,15 +1355,22 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + if (vring_map_one_sg(vq, sg, n < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE, + &addr, &len, premapped)) goto unmap_release; desc[i].flags = cpu_to_le16(n < out_sgs ? 0 : VRING_DESC_F_WRITE); desc[i].addr = cpu_to_le64(addr); - desc[i].len = cpu_to_le32(sg->length); + desc[i].len = cpu_to_le32(len); + + if (unlikely(vq->use_map_api)) { + extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; + extra[i].len = len; + extra[i].flags = n < out_sgs ? 0 : VRING_DESC_F_WRITE; + } + i++; } } @@ -1106,7 +1387,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, sizeof(struct vring_packed_desc)); vq->packed.vring.desc[head].id = cpu_to_le16(id); - if (vq->use_dma_api) { + if (vq->use_map_api) { vq->packed.desc_extra[id].addr = addr; vq->packed.desc_extra[id].len = total_sg * sizeof(struct vring_packed_desc); @@ -1155,7 +1436,7 @@ unmap_release: err_idx = i; for (i = 0; i < err_idx; i++) - vring_unmap_desc_packed(vq, &desc[i]); + vring_unmap_extra_packed(vq, &extra[i]); kfree(desc); @@ -1170,12 +1451,13 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, unsigned int in_sgs, void *data, void *ctx, + bool premapped, gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); struct vring_packed_desc *desc; struct scatterlist *sg; - unsigned int i, n, c, descs_used, err_idx; + unsigned int i, n, c, descs_used, err_idx, len; __le16 head_flags, flags; u16 head, id, prev, curr, avail_used_flags; int err; @@ -1194,9 +1476,9 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, BUG_ON(total_sg == 0); - if (virtqueue_use_indirect(_vq, total_sg)) { + if (virtqueue_use_indirect(vq, total_sg)) { err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, - in_sgs, data, gfp); + in_sgs, data, premapped, gfp); if (err != -ENOMEM) { END_USE(vq); return err; @@ -1228,9 +1510,11 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, c = 0; for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, n < out_sgs ? + DMA_TO_DEVICE : DMA_FROM_DEVICE, + &addr, &len, premapped)) goto unmap_release; flags = cpu_to_le16(vq->packed.avail_used_flags | @@ -1242,12 +1526,13 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, desc[i].flags = flags; desc[i].addr = cpu_to_le64(addr); - desc[i].len = cpu_to_le32(sg->length); + desc[i].len = cpu_to_le32(len); desc[i].id = cpu_to_le16(id); - if (unlikely(vq->use_dma_api)) { - vq->packed.desc_extra[curr].addr = addr; - vq->packed.desc_extra[curr].len = sg->length; + if (unlikely(vq->use_map_api)) { + vq->packed.desc_extra[curr].addr = premapped ? + DMA_MAPPING_ERROR : addr; + vq->packed.desc_extra[curr].len = len; vq->packed.desc_extra[curr].flags = le16_to_cpu(flags); } @@ -1263,7 +1548,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, } } - if (i < head) + if (i <= head) vq->packed.avail_wrap_counter ^= 1; /* We're using some buffers from the free list. */ @@ -1303,8 +1588,7 @@ unmap_release: for (n = 0; n < total_sg; n++) { if (i == err_idx) break; - vring_unmap_state_packed(vq, - &vq->packed.desc_extra[curr]); + vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); curr = vq->packed.desc_extra[curr].next; i++; if (i >= vq->packed.vring.num) @@ -1380,28 +1664,32 @@ static void detach_buf_packed(struct vring_virtqueue *vq, vq->free_head = id; vq->vq.num_free += state->num; - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->use_map_api)) { curr = id; for (i = 0; i < state->num; i++) { - vring_unmap_state_packed(vq, - &vq->packed.desc_extra[curr]); + vring_unmap_extra_packed(vq, + &vq->packed.desc_extra[curr]); curr = vq->packed.desc_extra[curr].next; } } if (vq->indirect) { - u32 len; + struct vring_desc_extra *extra; + u32 len, num; /* Free the indirect table, if any, now that it's unmapped. */ desc = state->indir_desc; if (!desc) return; - if (vq->use_dma_api) { + if (vq->use_map_api) { len = vq->packed.desc_extra[id].len; - for (i = 0; i < len / sizeof(struct vring_packed_desc); - i++) - vring_unmap_desc_packed(vq, &desc[i]); + num = len / sizeof(struct vring_packed_desc); + + extra = (struct vring_desc_extra *)&desc[num]; + + for (i = 0; i < num; i++) + vring_unmap_extra_packed(vq, &extra[i]); } kfree(desc); state->indir_desc = NULL; @@ -1423,10 +1711,16 @@ static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, return avail == used && used == used_wrap_counter; } -static inline bool more_used_packed(const struct vring_virtqueue *vq) +static bool more_used_packed(const struct vring_virtqueue *vq) { - return is_used_desc_packed(vq, vq->last_used_idx, - vq->packed.used_wrap_counter); + u16 last_used; + u16 last_used_idx; + bool used_wrap_counter; + + last_used_idx = READ_ONCE(vq->last_used_idx); + last_used = packed_last_used(last_used_idx); + used_wrap_counter = packed_used_wrap_counter(last_used_idx); + return is_used_desc_packed(vq, last_used, used_wrap_counter); } static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, @@ -1434,7 +1728,8 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); - u16 last_used, id; + u16 last_used, id, last_used_idx; + bool used_wrap_counter; void *ret; START_USE(vq); @@ -1453,7 +1748,9 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, /* Only get used elements after they have been exposed by host. */ virtio_rmb(vq->weak_barriers); - last_used = vq->last_used_idx; + last_used_idx = READ_ONCE(vq->last_used_idx); + used_wrap_counter = packed_used_wrap_counter(last_used_idx); + last_used = packed_last_used(last_used_idx); id = le16_to_cpu(vq->packed.vring.desc[last_used].id); *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); @@ -1470,12 +1767,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, ret = vq->packed.desc_state[id].data; detach_buf_packed(vq, id, ctx); - vq->last_used_idx += vq->packed.desc_state[id].num; - if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { - vq->last_used_idx -= vq->packed.vring.num; - vq->packed.used_wrap_counter ^= 1; + last_used += vq->packed.desc_state[id].num; + if (unlikely(last_used >= vq->packed.vring.num)) { + last_used -= vq->packed.vring.num; + used_wrap_counter ^= 1; } + last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); + WRITE_ONCE(vq->last_used_idx, last_used); + /* * If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before @@ -1484,9 +1784,7 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) virtio_store_mb(vq->weak_barriers, &vq->packed.vring.driver->off_wrap, - cpu_to_le16(vq->last_used_idx | - (vq->packed.used_wrap_counter << - VRING_PACKED_EVENT_F_WRAP_CTR))); + cpu_to_le16(vq->last_used_idx)); LAST_ADD_TIME_INVALID(vq); @@ -1500,12 +1798,20 @@ static void virtqueue_disable_cb_packed(struct virtqueue *_vq) if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; + + /* + * If device triggered an event already it won't trigger one again: + * no need to disable. + */ + if (vq->event_triggered) + return; + vq->packed.vring.driver->flags = cpu_to_le16(vq->packed.event_flags_shadow); } } -static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) +static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -1518,9 +1824,7 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) if (vq->event) { vq->packed.vring.driver->off_wrap = - cpu_to_le16(vq->last_used_idx | - (vq->packed.used_wrap_counter << - VRING_PACKED_EVENT_F_WRAP_CTR)); + cpu_to_le16(vq->last_used_idx); /* * We need to update event offset and event wrap * counter first before updating event flags. @@ -1537,8 +1841,7 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) } END_USE(vq); - return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << - VRING_PACKED_EVENT_F_WRAP_CTR); + return vq->last_used_idx; } static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) @@ -1556,7 +1859,7 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - u16 used_idx, wrap_counter; + u16 used_idx, wrap_counter, last_used_idx; u16 bufs; START_USE(vq); @@ -1569,9 +1872,10 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) if (vq->event) { /* TODO: tune this threshold */ bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; - wrap_counter = vq->packed.used_wrap_counter; + last_used_idx = READ_ONCE(vq->last_used_idx); + wrap_counter = packed_used_wrap_counter(last_used_idx); - used_idx = vq->last_used_idx + bufs; + used_idx = packed_last_used(last_used_idx) + bufs; if (used_idx >= vq->packed.vring.num) { used_idx -= vq->packed.vring.num; wrap_counter ^= 1; @@ -1601,9 +1905,10 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) */ virtio_mb(vq->weak_barriers); - if (is_used_desc_packed(vq, - vq->last_used_idx, - vq->packed.used_wrap_counter)) { + last_used_idx = READ_ONCE(vq->last_used_idx); + wrap_counter = packed_used_wrap_counter(last_used_idx); + used_idx = packed_last_used(last_used_idx); + if (is_used_desc_packed(vq, used_idx, wrap_counter)) { END_USE(vq); return false; } @@ -1636,8 +1941,7 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) return NULL; } -static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, - unsigned int num) +static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) { struct vring_desc_extra *desc_extra; unsigned int i; @@ -1655,19 +1959,36 @@ static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *v return desc_extra; } -static struct virtqueue *vring_create_virtqueue_packed( - unsigned int index, - unsigned int num, - unsigned int vring_align, - struct virtio_device *vdev, - bool weak_barriers, - bool may_reduce_num, - bool context, - bool (*notify)(struct virtqueue *), - void (*callback)(struct virtqueue *), - const char *name) +static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, + struct virtio_device *vdev, + union virtio_map map) +{ + if (vring_packed->vring.desc) + vring_free_queue(vdev, vring_packed->ring_size_in_bytes, + vring_packed->vring.desc, + vring_packed->ring_dma_addr, + map); + + if (vring_packed->vring.driver) + vring_free_queue(vdev, vring_packed->event_size_in_bytes, + vring_packed->vring.driver, + vring_packed->driver_event_dma_addr, + map); + + if (vring_packed->vring.device) + vring_free_queue(vdev, vring_packed->event_size_in_bytes, + vring_packed->vring.device, + vring_packed->device_event_dma_addr, + map); + + kfree(vring_packed->desc_state); + kfree(vring_packed->desc_extra); +} + +static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, + struct virtio_device *vdev, + u32 num, union virtio_map map) { - struct vring_virtqueue *vq; struct vring_packed_desc *ring; struct vring_packed_desc_event *driver, *device; dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; @@ -1677,46 +1998,144 @@ static struct virtqueue *vring_create_virtqueue_packed( ring = vring_alloc_queue(vdev, ring_size_in_bytes, &ring_dma_addr, - GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, + map); if (!ring) - goto err_ring; + goto err; + + vring_packed->vring.desc = ring; + vring_packed->ring_dma_addr = ring_dma_addr; + vring_packed->ring_size_in_bytes = ring_size_in_bytes; event_size_in_bytes = sizeof(struct vring_packed_desc_event); driver = vring_alloc_queue(vdev, event_size_in_bytes, &driver_event_dma_addr, - GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, + map); if (!driver) - goto err_driver; + goto err; + + vring_packed->vring.driver = driver; + vring_packed->event_size_in_bytes = event_size_in_bytes; + vring_packed->driver_event_dma_addr = driver_event_dma_addr; device = vring_alloc_queue(vdev, event_size_in_bytes, &device_event_dma_addr, - GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, + map); if (!device) - goto err_device; + goto err; + + vring_packed->vring.device = device; + vring_packed->device_event_dma_addr = device_event_dma_addr; + + vring_packed->vring.num = num; + + return 0; + +err: + vring_free_packed(vring_packed, vdev, map); + return -ENOMEM; +} + +static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed) +{ + struct vring_desc_state_packed *state; + struct vring_desc_extra *extra; + u32 num = vring_packed->vring.num; + + state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL); + if (!state) + goto err_desc_state; + + memset(state, 0, num * sizeof(struct vring_desc_state_packed)); + + extra = vring_alloc_desc_extra(num); + if (!extra) + goto err_desc_extra; + + vring_packed->desc_state = state; + vring_packed->desc_extra = extra; + + return 0; + +err_desc_extra: + kfree(state); +err_desc_state: + return -ENOMEM; +} + +static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed, + bool callback) +{ + vring_packed->next_avail_idx = 0; + vring_packed->avail_wrap_counter = 1; + vring_packed->event_flags_shadow = 0; + vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; + + /* No callback? Tell other side not to bother us. */ + if (!callback) { + vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; + vring_packed->vring.driver->flags = + cpu_to_le16(vring_packed->event_flags_shadow); + } +} + +static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq, + struct vring_virtqueue_packed *vring_packed) +{ + vq->packed = *vring_packed; + + /* Put everything in free lists. */ + vq->free_head = 0; +} + +static void virtqueue_reinit_packed(struct vring_virtqueue *vq) +{ + memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes); + memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes); + + /* we need to reset the desc.flags. For more, see is_used_desc_packed() */ + memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes); + + virtqueue_init(vq, vq->packed.vring.num); + virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback); +} + +static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index, + struct vring_virtqueue_packed *vring_packed, + struct virtio_device *vdev, + bool weak_barriers, + bool context, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name, + union virtio_map map) +{ + struct vring_virtqueue *vq; + int err; vq = kmalloc(sizeof(*vq), GFP_KERNEL); if (!vq) - goto err_vq; + return NULL; vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.name = name; - vq->vq.num_free = num; vq->vq.index = index; - vq->we_own_ring = true; + vq->vq.reset = false; + vq->we_own_ring = false; vq->notify = notify; vq->weak_barriers = weak_barriers; +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION + vq->broken = true; +#else vq->broken = false; - vq->last_used_idx = 0; - vq->event_triggered = false; - vq->num_added = 0; - vq->packed_ring = true; - vq->use_dma_api = vring_use_dma_api(vdev); -#ifdef DEBUG - vq->in_use = false; - vq->last_add_time_valid = false; #endif + vq->packed_ring = true; + vq->map = map; + vq->use_map_api = vring_use_map_api(vdev); vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -1725,66 +2144,121 @@ static struct virtqueue *vring_create_virtqueue_packed( if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) vq->weak_barriers = false; - vq->packed.ring_dma_addr = ring_dma_addr; - vq->packed.driver_event_dma_addr = driver_event_dma_addr; - vq->packed.device_event_dma_addr = device_event_dma_addr; + err = vring_alloc_state_extra_packed(vring_packed); + if (err) { + kfree(vq); + return NULL; + } - vq->packed.ring_size_in_bytes = ring_size_in_bytes; - vq->packed.event_size_in_bytes = event_size_in_bytes; + virtqueue_vring_init_packed(vring_packed, !!callback); - vq->packed.vring.num = num; - vq->packed.vring.desc = ring; - vq->packed.vring.driver = driver; - vq->packed.vring.device = device; + virtqueue_init(vq, vring_packed->vring.num); + virtqueue_vring_attach_packed(vq, vring_packed); - vq->packed.next_avail_idx = 0; - vq->packed.avail_wrap_counter = 1; - vq->packed.used_wrap_counter = 1; - vq->packed.event_flags_shadow = 0; - vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; + spin_lock(&vdev->vqs_list_lock); + list_add_tail(&vq->vq.list, &vdev->vqs); + spin_unlock(&vdev->vqs_list_lock); + return &vq->vq; +} - vq->packed.desc_state = kmalloc_array(num, - sizeof(struct vring_desc_state_packed), - GFP_KERNEL); - if (!vq->packed.desc_state) - goto err_desc_state; +static struct virtqueue *vring_create_virtqueue_packed( + unsigned int index, + unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + bool may_reduce_num, + bool context, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name, + union virtio_map map) +{ + struct vring_virtqueue_packed vring_packed = {}; + struct virtqueue *vq; - memset(vq->packed.desc_state, 0, - num * sizeof(struct vring_desc_state_packed)); + if (vring_alloc_queue_packed(&vring_packed, vdev, num, map)) + return NULL; - /* Put everything in free lists. */ - vq->free_head = 0; + vq = __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers, + context, notify, callback, name, map); + if (!vq) { + vring_free_packed(&vring_packed, vdev, map); + return NULL; + } - vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); - if (!vq->packed.desc_extra) - goto err_desc_extra; + to_vvq(vq)->we_own_ring = true; - /* No callback? Tell other side not to bother us. */ - if (!callback) { - vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; - vq->packed.vring.driver->flags = - cpu_to_le16(vq->packed.event_flags_shadow); - } + return vq; +} - spin_lock(&vdev->vqs_list_lock); - list_add_tail(&vq->vq.list, &vdev->vqs); - spin_unlock(&vdev->vqs_list_lock); - return &vq->vq; +static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) +{ + struct vring_virtqueue_packed vring_packed = {}; + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + int err; -err_desc_extra: - kfree(vq->packed.desc_state); -err_desc_state: - kfree(vq); -err_vq: - vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); -err_device: - vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); -err_driver: - vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); + if (vring_alloc_queue_packed(&vring_packed, vdev, num, vq->map)) + goto err_ring; + + err = vring_alloc_state_extra_packed(&vring_packed); + if (err) + goto err_state_extra; + + vring_free(&vq->vq); + + virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback); + + virtqueue_init(vq, vring_packed.vring.num); + virtqueue_vring_attach_packed(vq, &vring_packed); + + return 0; + +err_state_extra: + vring_free_packed(&vring_packed, vdev, vq->map); err_ring: - return NULL; + virtqueue_reinit_packed(vq); + return -ENOMEM; } +static int virtqueue_disable_and_recycle(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + void *buf; + int err; + + if (!vq->we_own_ring) + return -EPERM; + + if (!vdev->config->disable_vq_and_reset) + return -ENOENT; + + if (!vdev->config->enable_vq_after_reset) + return -ENOENT; + + err = vdev->config->disable_vq_and_reset(_vq); + if (err) + return err; + + while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) + recycle(_vq, buf); + + return 0; +} + +static int virtqueue_enable_after_reset(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + + if (vdev->config->enable_vq_after_reset(_vq)) + return -EBUSY; + + return 0; +} /* * Generic functions and exported symbols. @@ -1797,14 +2271,15 @@ static inline int virtqueue_add(struct virtqueue *_vq, unsigned int in_sgs, void *data, void *ctx, + bool premapped, gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, - out_sgs, in_sgs, data, ctx, gfp) : + out_sgs, in_sgs, data, ctx, premapped, gfp) : virtqueue_add_split(_vq, sgs, total_sg, - out_sgs, in_sgs, data, ctx, gfp); + out_sgs, in_sgs, data, ctx, premapped, gfp); } /** @@ -1820,6 +2295,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, * at the same time (except where noted). * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + * + * NB: ENOSPC is a special code that is only returned on an attempt to add a + * buffer to a full VQ. It indicates that some buffers are outstanding and that + * the operation can be retried after some buffers have been used. */ int virtqueue_add_sgs(struct virtqueue *_vq, struct scatterlist *sgs[], @@ -1838,7 +2317,7 @@ int virtqueue_add_sgs(struct virtqueue *_vq, total_sg++; } return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, - data, NULL, gfp); + data, NULL, false, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_sgs); @@ -1860,11 +2339,34 @@ int virtqueue_add_outbuf(struct virtqueue *vq, void *data, gfp_t gfp) { - return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); + return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, false, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); /** + * virtqueue_add_outbuf_premapped - expose output buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg readable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Return: + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_outbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, true, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped); + +/** * virtqueue_add_inbuf - expose input buffers to other end * @vq: the struct virtqueue we're talking about. * @sg: scatterlist (must be well-formed and terminated!) @@ -1882,7 +2384,7 @@ int virtqueue_add_inbuf(struct virtqueue *vq, void *data, gfp_t gfp) { - return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); + return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); @@ -1906,11 +2408,53 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, void *ctx, gfp_t gfp) { - return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); + return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, false, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); /** + * virtqueue_add_inbuf_premapped - expose input buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg writable by other side + * @data: the token identifying the buffer. + * @ctx: extra context for the token + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Return: + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_inbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + void *ctx, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, true, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped); + +/** + * virtqueue_dma_dev - get the dma dev + * @_vq: the struct virtqueue we're talking about. + * + * Returns the dma dev. That can been used for dma api. + */ +struct device *virtqueue_dma_dev(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (vq->use_map_api && !_vq->vdev->map) + return vq->map.dma_dev; + else + return NULL; +} +EXPORT_SYMBOL_GPL(virtqueue_dma_dev); + +/** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @_vq: the struct virtqueue * @@ -2019,12 +2563,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - /* If device triggered an event already it won't trigger one again: - * no need to disable. - */ - if (vq->event_triggered) - return; - if (vq->packed_ring) virtqueue_disable_cb_packed(_vq); else @@ -2044,7 +2582,7 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb); * Caller must ensure we don't call this with other virtqueue * operations at the same time (except where noted). */ -unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) +unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -2065,7 +2603,7 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); * * This does not need to be serialized. */ -bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) +bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -2091,7 +2629,7 @@ EXPORT_SYMBOL_GPL(virtqueue_poll); */ bool virtqueue_enable_cb(struct virtqueue *_vq) { - unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); + unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); return !virtqueue_poll(_vq, last_used_idx); } @@ -2115,7 +2653,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) struct vring_virtqueue *vq = to_vvq(_vq); if (vq->event_triggered) - vq->event_triggered = false; + data_race(vq->event_triggered = false); return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : virtqueue_enable_cb_delayed_split(_vq); @@ -2127,8 +2665,8 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); * @_vq: the struct virtqueue we're talking about. * * Returns NULL or the "data" token handed to virtqueue_add_*(). - * This is not valid on an active queue; it is useful only for device - * shutdown. + * This is not valid on an active queue; it is useful for device + * shutdown or the reset queue. */ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) { @@ -2144,6 +2682,14 @@ static inline bool more_used(const struct vring_virtqueue *vq) return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); } +/** + * vring_interrupt - notify a virtqueue on an interrupt + * @irq: the IRQ number (ignored) + * @_vq: the struct virtqueue to notify + * + * Calls the callback function of @_vq to process the virtqueue + * notification. + */ irqreturn_t vring_interrupt(int irq, void *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -2153,12 +2699,19 @@ irqreturn_t vring_interrupt(int irq, void *_vq) return IRQ_NONE; } - if (unlikely(vq->broken)) + if (unlikely(vq->broken)) { +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION + dev_warn_once(&vq->vq.vdev->dev, + "virtio vring IRQ raised before DRIVER_OK"); + return IRQ_NONE; +#else return IRQ_HANDLED; +#endif + } /* Just a hint for performance: so it's ok that this can be racy! */ if (vq->event) - vq->event_triggered = true; + data_race(vq->event_triggered = true); pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); if (vq->vq.callback) @@ -2168,94 +2721,32 @@ irqreturn_t vring_interrupt(int irq, void *_vq) } EXPORT_SYMBOL_GPL(vring_interrupt); -/* Only available for split ring */ -struct virtqueue *__vring_new_virtqueue(unsigned int index, - struct vring vring, - struct virtio_device *vdev, - bool weak_barriers, - bool context, - bool (*notify)(struct virtqueue *), - void (*callback)(struct virtqueue *), - const char *name) +struct virtqueue *vring_create_virtqueue( + unsigned int index, + unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + bool may_reduce_num, + bool context, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name) { - struct vring_virtqueue *vq; + union virtio_map map = {.dma_dev = vdev->dev.parent}; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) - return NULL; - - vq = kmalloc(sizeof(*vq), GFP_KERNEL); - if (!vq) - return NULL; - - vq->packed_ring = false; - vq->vq.callback = callback; - vq->vq.vdev = vdev; - vq->vq.name = name; - vq->vq.num_free = vring.num; - vq->vq.index = index; - vq->we_own_ring = false; - vq->notify = notify; - vq->weak_barriers = weak_barriers; - vq->broken = false; - vq->last_used_idx = 0; - vq->event_triggered = false; - vq->num_added = 0; - vq->use_dma_api = vring_use_dma_api(vdev); -#ifdef DEBUG - vq->in_use = false; - vq->last_add_time_valid = false; -#endif - - vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && - !context; - vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); - - if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) - vq->weak_barriers = false; - - vq->split.queue_dma_addr = 0; - vq->split.queue_size_in_bytes = 0; - - vq->split.vring = vring; - vq->split.avail_flags_shadow = 0; - vq->split.avail_idx_shadow = 0; - - /* No callback? Tell other side not to bother us. */ - if (!callback) { - vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; - if (!vq->event) - vq->split.vring.avail->flags = cpu_to_virtio16(vdev, - vq->split.avail_flags_shadow); - } - - vq->split.desc_state = kmalloc_array(vring.num, - sizeof(struct vring_desc_state_split), GFP_KERNEL); - if (!vq->split.desc_state) - goto err_state; - - vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); - if (!vq->split.desc_extra) - goto err_extra; - - /* Put everything in free lists. */ - vq->free_head = 0; - memset(vq->split.desc_state, 0, vring.num * - sizeof(struct vring_desc_state_split)); - - spin_lock(&vdev->vqs_list_lock); - list_add_tail(&vq->vq.list, &vdev->vqs); - spin_unlock(&vdev->vqs_list_lock); - return &vq->vq; + return vring_create_virtqueue_packed(index, num, vring_align, + vdev, weak_barriers, may_reduce_num, + context, notify, callback, name, map); -err_extra: - kfree(vq->split.desc_state); -err_state: - kfree(vq); - return NULL; + return vring_create_virtqueue_split(index, num, vring_align, + vdev, weak_barriers, may_reduce_num, + context, notify, callback, name, map); } -EXPORT_SYMBOL_GPL(__vring_new_virtqueue); +EXPORT_SYMBOL_GPL(vring_create_virtqueue); -struct virtqueue *vring_create_virtqueue( +struct virtqueue *vring_create_virtqueue_map( unsigned int index, unsigned int num, unsigned int vring_align, @@ -2265,21 +2756,118 @@ struct virtqueue *vring_create_virtqueue( bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), - const char *name) + const char *name, + union virtio_map map) { if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) return vring_create_virtqueue_packed(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name); + context, notify, callback, name, map); return vring_create_virtqueue_split(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name); + context, notify, callback, name, map); } -EXPORT_SYMBOL_GPL(vring_create_virtqueue); +EXPORT_SYMBOL_GPL(vring_create_virtqueue_map); + +/** + * virtqueue_resize - resize the vring of vq + * @_vq: the struct virtqueue we're talking about. + * @num: new ring num + * @recycle: callback to recycle unused buffers + * @recycle_done: callback to be invoked when recycle for all unused buffers done + * + * When it is really necessary to create a new vring, it will set the current vq + * into the reset state. Then call the passed callback to recycle the buffer + * that is no longer used. Only after the new vring is successfully created, the + * old vring will be released. + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size. + * vq can still work normally + * -EBUSY: Failed to sync with device, vq may not work properly + * -ENOENT: Transport or device not supported + * -E2BIG/-EINVAL: num error + * -EPERM: Operation not permitted + * + */ +int virtqueue_resize(struct virtqueue *_vq, u32 num, + void (*recycle)(struct virtqueue *vq, void *buf), + void (*recycle_done)(struct virtqueue *vq)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + int err, err_reset; + + if (num > vq->vq.num_max) + return -E2BIG; + + if (!num) + return -EINVAL; + + if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) + return 0; + + err = virtqueue_disable_and_recycle(_vq, recycle); + if (err) + return err; + if (recycle_done) + recycle_done(_vq); + + if (vq->packed_ring) + err = virtqueue_resize_packed(_vq, num); + else + err = virtqueue_resize_split(_vq, num); + + err_reset = virtqueue_enable_after_reset(_vq); + if (err_reset) + return err_reset; + + return err; +} +EXPORT_SYMBOL_GPL(virtqueue_resize); + +/** + * virtqueue_reset - detach and recycle all unused buffers + * @_vq: the struct virtqueue we're talking about. + * @recycle: callback to recycle unused buffers + * @recycle_done: callback to be invoked when recycle for all unused buffers done + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EBUSY: Failed to sync with device, vq may not work properly + * -ENOENT: Transport or device not supported + * -EPERM: Operation not permitted + */ +int virtqueue_reset(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf), + void (*recycle_done)(struct virtqueue *vq)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + int err; + + err = virtqueue_disable_and_recycle(_vq, recycle); + if (err) + return err; + if (recycle_done) + recycle_done(_vq); + + if (vq->packed_ring) + virtqueue_reinit_packed(vq); + else + virtqueue_reinit_split(vq); + + return virtqueue_enable_after_reset(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_reset); -/* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, unsigned int vring_align, @@ -2291,41 +2879,50 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, void (*callback)(struct virtqueue *vq), const char *name) { - struct vring vring; + struct vring_virtqueue_split vring_split = {}; + union virtio_map map = {.dma_dev = vdev->dev.parent}; - if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) - return NULL; + if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + struct vring_virtqueue_packed vring_packed = {}; - vring_init(&vring, num, pages, vring_align); - return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, - notify, callback, name); + vring_packed.vring.num = num; + vring_packed.vring.desc = pages; + return __vring_new_virtqueue_packed(index, &vring_packed, + vdev, weak_barriers, + context, notify, callback, + name, map); + } + + vring_init(&vring_split.vring, num, pages, vring_align); + return __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers, + context, notify, callback, name, + map); } EXPORT_SYMBOL_GPL(vring_new_virtqueue); -void vring_del_virtqueue(struct virtqueue *_vq) +static void vring_free(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - spin_lock(&vq->vq.vdev->vqs_list_lock); - list_del(&_vq->list); - spin_unlock(&vq->vq.vdev->vqs_list_lock); - if (vq->we_own_ring) { if (vq->packed_ring) { vring_free_queue(vq->vq.vdev, vq->packed.ring_size_in_bytes, vq->packed.vring.desc, - vq->packed.ring_dma_addr); + vq->packed.ring_dma_addr, + vq->map); vring_free_queue(vq->vq.vdev, vq->packed.event_size_in_bytes, vq->packed.vring.driver, - vq->packed.driver_event_dma_addr); + vq->packed.driver_event_dma_addr, + vq->map); vring_free_queue(vq->vq.vdev, vq->packed.event_size_in_bytes, vq->packed.vring.device, - vq->packed.device_event_dma_addr); + vq->packed.device_event_dma_addr, + vq->map); kfree(vq->packed.desc_state); kfree(vq->packed.desc_extra); @@ -2333,17 +2930,47 @@ void vring_del_virtqueue(struct virtqueue *_vq) vring_free_queue(vq->vq.vdev, vq->split.queue_size_in_bytes, vq->split.vring.desc, - vq->split.queue_dma_addr); + vq->split.queue_dma_addr, + vq->map); } } if (!vq->packed_ring) { kfree(vq->split.desc_state); kfree(vq->split.desc_extra); } +} + +void vring_del_virtqueue(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + spin_lock(&vq->vq.vdev->vqs_list_lock); + list_del(&_vq->list); + spin_unlock(&vq->vq.vdev->vqs_list_lock); + + vring_free(_vq); + kfree(vq); } EXPORT_SYMBOL_GPL(vring_del_virtqueue); +u32 vring_notification_data(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 next; + + if (vq->packed_ring) + next = (vq->packed.next_avail_idx & + ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) | + vq->packed.avail_wrap_counter << + VRING_PACKED_EVENT_F_WRAP_CTR; + else + next = vq->split.avail_idx_shadow; + + return next << 16 | _vq->index; +} +EXPORT_SYMBOL_GPL(vring_notification_data); + /* Manipulates transport-specific feature bits. */ void vring_transport_features(struct virtio_device *vdev) { @@ -2363,6 +2990,8 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_ORDER_PLATFORM: break; + case VIRTIO_F_NOTIFICATION_DATA: + break; default: /* We don't understand this bit. */ __virtio_clear_bit(vdev, i); @@ -2378,19 +3007,43 @@ EXPORT_SYMBOL_GPL(vring_transport_features); * Returns the size of the vring. This is mainly used for boasting to * userspace. Unlike other operations, this need not be serialized. */ -unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) +unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; } EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); -bool virtqueue_is_broken(struct virtqueue *_vq) +/* + * This function should only be called by the core, not directly by the driver. + */ +void __virtqueue_break(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, true); +} +EXPORT_SYMBOL_GPL(__virtqueue_break); + +/* + * This function should only be called by the core, not directly by the driver. + */ +void __virtqueue_unbreak(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, false); +} +EXPORT_SYMBOL_GPL(__virtqueue_unbreak); + +bool virtqueue_is_broken(const struct virtqueue *_vq) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + return READ_ONCE(vq->broken); } EXPORT_SYMBOL_GPL(virtqueue_is_broken); @@ -2414,9 +3067,31 @@ void virtio_break_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(virtio_break_device); -dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) +/* + * This should allow the device to be used by the driver. You may + * need to grab appropriate locks to flush the write to + * vq->broken. This should only be used in some specific case e.g + * (probing and restoring). This function should only be called by the + * core, not directly by the driver. + */ +void __virtio_unbreak_device(struct virtio_device *dev) { - struct vring_virtqueue *vq = to_vvq(_vq); + struct virtqueue *_vq; + + spin_lock(&dev->vqs_list_lock); + list_for_each_entry(_vq, &dev->vqs, list) { + struct vring_virtqueue *vq = to_vvq(_vq); + + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, false); + } + spin_unlock(&dev->vqs_list_lock); +} +EXPORT_SYMBOL_GPL(__virtio_unbreak_device); + +dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); BUG_ON(!vq->we_own_ring); @@ -2427,9 +3102,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); -dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) +dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); BUG_ON(!vq->we_own_ring); @@ -2441,9 +3116,9 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); -dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) +dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); BUG_ON(!vq->we_own_ring); @@ -2456,10 +3131,277 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); /* Only available for split ring */ -const struct vring *virtqueue_get_vring(struct virtqueue *vq) +const struct vring *virtqueue_get_vring(const struct virtqueue *vq) { return &to_vvq(vq)->split.vring; } EXPORT_SYMBOL_GPL(virtqueue_get_vring); +/** + * virtqueue_map_alloc_coherent - alloc coherent mapping + * @vdev: the virtio device we are talking to + * @map: metadata for performing mapping + * @size: the size of the buffer + * @map_handle: the pointer to the mapped address + * @gfp: allocation flag (GFP_XXX) + * + * return virtual address or NULL on error + */ +void *virtqueue_map_alloc_coherent(struct virtio_device *vdev, + union virtio_map map, + size_t size, dma_addr_t *map_handle, + gfp_t gfp) +{ + if (vdev->map) + return vdev->map->alloc(map, size, + map_handle, gfp); + else + return dma_alloc_coherent(map.dma_dev, size, + map_handle, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_map_alloc_coherent); + +/** + * virtqueue_map_free_coherent - free coherent mapping + * @vdev: the virtio device we are talking to + * @map: metadata for performing mapping + * @size: the size of the buffer + * @vaddr: the virtual address that needs to be freed + * @map_handle: the mapped address that needs to be freed + * + */ +void virtqueue_map_free_coherent(struct virtio_device *vdev, + union virtio_map map, size_t size, void *vaddr, + dma_addr_t map_handle) +{ + if (vdev->map) + vdev->map->free(map, size, vaddr, + map_handle, 0); + else + dma_free_coherent(map.dma_dev, size, vaddr, map_handle); +} +EXPORT_SYMBOL_GPL(virtqueue_map_free_coherent); + +/** + * virtqueue_map_page_attrs - map a page to the device + * @_vq: the virtqueue we are talking to + * @page: the page that will be mapped by the device + * @offset: the offset in the page for a buffer + * @size: the buffer size + * @dir: mapping direction + * @attrs: mapping attributes + * + * Returns mapped address. Caller should check that by virtqueue_map_mapping_error(). + */ +dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + + if (vdev->map) + return vdev->map->map_page(vq->map, + page, offset, size, + dir, attrs); + + return dma_map_page_attrs(vring_dma_dev(vq), + page, offset, size, + dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_map_page_attrs); + +/** + * virtqueue_unmap_page_attrs - map a page to the device + * @_vq: the virtqueue we are talking to + * @map_handle: the mapped address + * @size: the buffer size + * @dir: mapping direction + * @attrs: unmapping attributes + */ +void virtqueue_unmap_page_attrs(const struct virtqueue *_vq, + dma_addr_t map_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + + if (vdev->map) + vdev->map->unmap_page(vq->map, + map_handle, size, dir, attrs); + else + dma_unmap_page_attrs(vring_dma_dev(vq), map_handle, + size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_unmap_page_attrs); + +/** + * virtqueue_map_single_attrs - map DMA for _vq + * @_vq: the struct virtqueue we're talking about. + * @ptr: the pointer of the buffer to do dma + * @size: the size of the buffer to do dma + * @dir: DMA direction + * @attrs: DMA Attrs + * + * The caller calls this to do dma mapping in advance. The DMA address can be + * passed to this _vq when it is in pre-mapped mode. + * + * return mapped address. Caller should check that by virtqueue_map_mapping_error(). + */ +dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_map_api) { + kmsan_handle_dma(virt_to_phys(ptr), size, dir); + return (dma_addr_t)virt_to_phys(ptr); + } + + /* DMA must never operate on areas that might be remapped. */ + if (dev_WARN_ONCE(&_vq->vdev->dev, is_vmalloc_addr(ptr), + "rejecting DMA map of vmalloc memory\n")) + return DMA_MAPPING_ERROR; + + return virtqueue_map_page_attrs(&vq->vq, virt_to_page(ptr), + offset_in_page(ptr), size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs); + +/** + * virtqueue_unmap_single_attrs - unmap map for _vq + * @_vq: the struct virtqueue we're talking about. + * @addr: the dma address to unmap + * @size: the size of the buffer + * @dir: DMA direction + * @attrs: DMA Attrs + * + * Unmap the address that is mapped by the virtqueue_map_* APIs. + * + */ +void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, + dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_map_api) + return; + + virtqueue_unmap_page_attrs(_vq, addr, size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs); + +/** + * virtqueue_map_mapping_error - check dma address + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * + * Returns 0 means dma valid. Other means invalid dma address. + */ +int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + + return vring_mapping_error(vq, addr); +} +EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error); + +/** + * virtqueue_map_need_sync - check a dma address needs sync + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * + * Check if the dma address mapped by the virtqueue_map_* APIs needs to be + * synchronized + * + * return bool + */ +bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + + if (!vq->use_map_api) + return false; + + if (vdev->map) + return vdev->map->need_sync(vq->map, addr); + else + return dma_need_sync(vring_dma_dev(vq), addr); +} +EXPORT_SYMBOL_GPL(virtqueue_map_need_sync); + +/** + * virtqueue_map_sync_single_range_for_cpu - map sync for cpu + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * @offset: DMA address offset + * @size: buf size for sync + * @dir: DMA direction + * + * Before calling this function, use virtqueue_map_need_sync() to confirm that + * the DMA address really needs to be synchronized + * + */ +void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq, + dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + + if (!vq->use_map_api) + return; + + if (vdev->map) + vdev->map->sync_single_for_cpu(vq->map, + addr + offset, size, dir); + else + dma_sync_single_range_for_cpu(vring_dma_dev(vq), + addr, offset, size, dir); +} +EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu); + +/** + * virtqueue_map_sync_single_range_for_device - map sync for device + * @_vq: the struct virtqueue we're talking about. + * @addr: DMA address + * @offset: DMA address offset + * @size: buf size for sync + * @dir: DMA direction + * + * Before calling this function, use virtqueue_map_need_sync() to confirm that + * the DMA address really needs to be synchronized + */ +void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq, + dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + + if (!vq->use_map_api) + return; + + if (vdev->map) + vdev->map->sync_single_for_device(vq->map, + addr + offset, + size, dir); + else + dma_sync_single_range_for_device(vring_dma_dev(vq), addr, + offset, size, dir); +} +EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device); + +MODULE_DESCRIPTION("Virtio ring implementation"); MODULE_LICENSE("GPL"); |
