From d7b31359ecef8d32540266f39d99892f61d17c4b Mon Sep 17 00:00:00 2001 From: Lénaïc Huard Date: Thu, 22 Mar 2018 23:53:05 +0100 Subject: kvm_config: add CONFIG_VIRTIO_MENU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that make kvmconfig enables all the virtio drivers even if it is preceded by a make allnoconfig. Signed-off-by: Lénaïc Huard Signed-off-by: Michael S. Tsirkin --- kernel/configs/kvm_guest.config | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/configs/kvm_guest.config b/kernel/configs/kvm_guest.config index 108fecc20fc1..208481d91090 100644 --- a/kernel/configs/kvm_guest.config +++ b/kernel/configs/kvm_guest.config @@ -20,6 +20,7 @@ CONFIG_PARAVIRT=y CONFIG_KVM_GUEST=y CONFIG_S390_GUEST=y CONFIG_VIRTIO=y +CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BLK=y CONFIG_VIRTIO_CONSOLE=y -- cgit From 86a559787e6f5cf662c081363f64a20cad654195 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 27 Aug 2018 09:32:17 +0800 Subject: virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_HINT feature indicates the support of reporting hints of guest free pages to host via virtio-balloon. Currenlty, only free page blocks of MAX_ORDER - 1 are reported. They are obtained one by one from the mm free list via the regular allocation function. Host requests the guest to report free page hints by sending a new cmd id to the guest via the free_page_report_cmd_id configuration register. When the guest starts to report, it first sends a start cmd to host via the free page vq, which acks to host the cmd id received. When the guest finishes reporting free pages, a stop cmd is sent to host via the vq. Host may also send a stop cmd id to the guest to stop the reporting. VIRTIO_BALLOON_CMD_ID_STOP: Host sends this cmd to stop the guest reporting. VIRTIO_BALLOON_CMD_ID_DONE: Host sends this cmd to tell the guest that the reported pages are ready to be freed. Why does the guest free the reported pages when host tells it is ready to free? This is because freeing pages appears to be expensive for live migration. free_pages() dirties memory very quickly and makes the live migraion not converge in some cases. So it is good to delay the free_page operation when the migration is done, and host sends a command to guest about that. Why do we need the new VIRTIO_BALLOON_CMD_ID_DONE, instead of reusing VIRTIO_BALLOON_CMD_ID_STOP? This is because live migration is usually done in several rounds. At the end of each round, host needs to send a VIRTIO_BALLOON_CMD_ID_STOP cmd to the guest to stop (or say pause) the reporting. The guest resumes the reporting when it receives a new command id at the beginning of the next round. So we need a new cmd id to distinguish between "stop reporting" and "ready to free the reported pages". TODO: - Add a batch page allocation API to amortize the allocation overhead. Signed-off-by: Wei Wang Signed-off-by: Liang Li Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 364 ++++++++++++++++++++++++++++++++---- include/uapi/linux/virtio_balloon.h | 5 + 2 files changed, 336 insertions(+), 33 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index d1c1f6283729..a18567889289 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -41,13 +41,34 @@ #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 +#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \ + __GFP_NOMEMALLOC) +/* The order of free page blocks to report to host */ +#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1) +/* The size of a free page block in bytes */ +#define VIRTIO_BALLOON_FREE_PAGE_SIZE \ + (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT)) + #ifdef CONFIG_BALLOON_COMPACTION static struct vfsmount *balloon_mnt; #endif +enum virtio_balloon_vq { + VIRTIO_BALLOON_VQ_INFLATE, + VIRTIO_BALLOON_VQ_DEFLATE, + VIRTIO_BALLOON_VQ_STATS, + VIRTIO_BALLOON_VQ_FREE_PAGE, + VIRTIO_BALLOON_VQ_MAX +}; + struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; + + /* Balloon's own wq for cpu-intensive work items */ + struct workqueue_struct *balloon_wq; + /* The free page reporting work item submitted to the balloon wq */ + struct work_struct report_free_page_work; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; @@ -57,6 +78,18 @@ struct virtio_balloon { spinlock_t stop_update_lock; bool stop_update; + /* The list of allocated free pages, waiting to be given back to mm */ + struct list_head free_page_list; + spinlock_t free_page_list_lock; + /* The number of free page blocks on the above list */ + unsigned long num_free_page_blocks; + /* The cmd id received from host */ + u32 cmd_id_received; + /* The cmd id that is actively in use */ + __virtio32 cmd_id_active; + /* Buffer to store the stop sign */ + __virtio32 cmd_id_stop; + /* Waiting for host to ack the pages we released. */ wait_queue_head_t acked; @@ -320,17 +353,6 @@ static void stats_handle_request(struct virtio_balloon *vb) virtqueue_kick(vq); } -static void virtballoon_changed(struct virtio_device *vdev) -{ - struct virtio_balloon *vb = vdev->priv; - unsigned long flags; - - spin_lock_irqsave(&vb->stop_update_lock, flags); - if (!vb->stop_update) - queue_work(system_freezable_wq, &vb->update_balloon_size_work); - spin_unlock_irqrestore(&vb->stop_update_lock, flags); -} - static inline s64 towards_target(struct virtio_balloon *vb) { s64 target; @@ -347,6 +369,60 @@ static inline s64 towards_target(struct virtio_balloon *vb) return target - vb->num_pages; } +/* Gives back @num_to_return blocks of free pages to mm. */ +static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb, + unsigned long num_to_return) +{ + struct page *page; + unsigned long num_returned; + + spin_lock_irq(&vb->free_page_list_lock); + for (num_returned = 0; num_returned < num_to_return; num_returned++) { + page = balloon_page_pop(&vb->free_page_list); + if (!page) + break; + free_pages((unsigned long)page_address(page), + VIRTIO_BALLOON_FREE_PAGE_ORDER); + } + vb->num_free_page_blocks -= num_returned; + spin_unlock_irq(&vb->free_page_list_lock); + + return num_returned; +} + +static void virtballoon_changed(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + unsigned long flags; + s64 diff = towards_target(vb); + + if (diff) { + spin_lock_irqsave(&vb->stop_update_lock, flags); + if (!vb->stop_update) + queue_work(system_freezable_wq, + &vb->update_balloon_size_work); + spin_unlock_irqrestore(&vb->stop_update_lock, flags); + } + + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + virtio_cread(vdev, struct virtio_balloon_config, + free_page_report_cmd_id, &vb->cmd_id_received); + if (vb->cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) { + /* Pass ULONG_MAX to give back all the free pages */ + return_free_pages_to_mm(vb, ULONG_MAX); + } else if (vb->cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP && + vb->cmd_id_received != + virtio32_to_cpu(vdev, vb->cmd_id_active)) { + spin_lock_irqsave(&vb->stop_update_lock, flags); + if (!vb->stop_update) { + queue_work(vb->balloon_wq, + &vb->report_free_page_work); + } + spin_unlock_irqrestore(&vb->stop_update_lock, flags); + } + } +} + static void update_balloon_size(struct virtio_balloon *vb) { u32 actual = vb->num_pages; @@ -389,26 +465,44 @@ static void update_balloon_size_func(struct work_struct *work) static int init_vqs(struct virtio_balloon *vb) { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; - static const char * const names[] = { "inflate", "deflate", "stats" }; - int err, nvqs; + struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX]; + vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX]; + const char *names[VIRTIO_BALLOON_VQ_MAX]; + int err; /* - * We expect two virtqueues: inflate and deflate, and - * optionally stat. + * Inflateq and deflateq are used unconditionally. The names[] + * will be NULL if the related feature is not enabled, which will + * cause no allocation for the corresponding virtqueue in find_vqs. */ - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); + callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack; + names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; + callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; + names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; + names[VIRTIO_BALLOON_VQ_STATS] = NULL; + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + names[VIRTIO_BALLOON_VQ_STATS] = "stats"; + callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request; + } + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq"; + callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; + } + + err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, + vqs, callbacks, names, NULL, NULL); if (err) return err; - vb->inflate_vq = vqs[0]; - vb->deflate_vq = vqs[1]; + vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE]; + vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE]; if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { struct scatterlist sg; unsigned int num_stats; - vb->stats_vq = vqs[2]; + vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS]; /* * Prime this virtqueue with one buffer so the hypervisor can @@ -426,9 +520,145 @@ static int init_vqs(struct virtio_balloon *vb) } virtqueue_kick(vb->stats_vq); } + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE]; + + return 0; +} + +static int send_cmd_id_start(struct virtio_balloon *vb) +{ + struct scatterlist sg; + struct virtqueue *vq = vb->free_page_vq; + int err, unused; + + /* Detach all the used buffers from the vq */ + while (virtqueue_get_buf(vq, &unused)) + ; + + vb->cmd_id_active = cpu_to_virtio32(vb->vdev, vb->cmd_id_received); + sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active)); + err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL); + if (!err) + virtqueue_kick(vq); + return err; +} + +static int send_cmd_id_stop(struct virtio_balloon *vb) +{ + struct scatterlist sg; + struct virtqueue *vq = vb->free_page_vq; + int err, unused; + + /* Detach all the used buffers from the vq */ + while (virtqueue_get_buf(vq, &unused)) + ; + + sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop)); + err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL); + if (!err) + virtqueue_kick(vq); + return err; +} + +static int get_free_page_and_send(struct virtio_balloon *vb) +{ + struct virtqueue *vq = vb->free_page_vq; + struct page *page; + struct scatterlist sg; + int err, unused; + void *p; + + /* Detach all the used buffers from the vq */ + while (virtqueue_get_buf(vq, &unused)) + ; + + page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG, + VIRTIO_BALLOON_FREE_PAGE_ORDER); + /* + * When the allocation returns NULL, it indicates that we have got all + * the possible free pages, so return -EINTR to stop. + */ + if (!page) + return -EINTR; + + p = page_address(page); + sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE); + /* There is always 1 entry reserved for the cmd id to use. */ + if (vq->num_free > 1) { + err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL); + if (unlikely(err)) { + free_pages((unsigned long)p, + VIRTIO_BALLOON_FREE_PAGE_ORDER); + return err; + } + virtqueue_kick(vq); + spin_lock_irq(&vb->free_page_list_lock); + balloon_page_push(&vb->free_page_list, page); + vb->num_free_page_blocks++; + spin_unlock_irq(&vb->free_page_list_lock); + } else { + /* + * The vq has no available entry to add this page block, so + * just free it. + */ + free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER); + } + return 0; } +static int send_free_pages(struct virtio_balloon *vb) +{ + int err; + u32 cmd_id_active; + + while (1) { + /* + * If a stop id or a new cmd id was just received from host, + * stop the reporting. + */ + cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active); + if (cmd_id_active != vb->cmd_id_received) + break; + + /* + * The free page blocks are allocated and sent to host one by + * one. + */ + err = get_free_page_and_send(vb); + if (err == -EINTR) + break; + else if (unlikely(err)) + return err; + } + + return 0; +} + +static void report_free_page_func(struct work_struct *work) +{ + int err; + struct virtio_balloon *vb = container_of(work, struct virtio_balloon, + report_free_page_work); + struct device *dev = &vb->vdev->dev; + + /* Start by sending the received cmd id to host with an outbuf. */ + err = send_cmd_id_start(vb); + if (unlikely(err)) + dev_err(dev, "Failed to send a start id, err = %d\n", err); + + err = send_free_pages(vb); + if (unlikely(err)) + dev_err(dev, "Failed to send a free page, err = %d\n", err); + + /* End by sending a stop id to host with an outbuf. */ + err = send_cmd_id_stop(vb); + if (unlikely(err)) + dev_err(dev, "Failed to send a stop id, err = %d\n", err); +} + #ifdef CONFIG_BALLOON_COMPACTION /* * virtballoon_migratepage - perform the balloon page migration on behalf of @@ -512,14 +742,23 @@ static struct file_system_type balloon_fs = { #endif /* CONFIG_BALLOON_COMPACTION */ -static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, - struct shrink_control *sc) +static unsigned long shrink_free_pages(struct virtio_balloon *vb, + unsigned long pages_to_free) { - unsigned long pages_to_free, pages_freed = 0; - struct virtio_balloon *vb = container_of(shrinker, - struct virtio_balloon, shrinker); + unsigned long blocks_to_free, blocks_freed; - pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE; + pages_to_free = round_up(pages_to_free, + 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER); + blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER; + blocks_freed = return_free_pages_to_mm(vb, blocks_to_free); + + return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER; +} + +static unsigned long shrink_balloon_pages(struct virtio_balloon *vb, + unsigned long pages_to_free) +{ + unsigned long pages_freed = 0; /* * One invocation of leak_balloon can deflate at most @@ -527,12 +766,33 @@ static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, * multiple times to deflate pages till reaching pages_to_free. */ while (vb->num_pages && pages_to_free) { + pages_freed += leak_balloon(vb, pages_to_free) / + VIRTIO_BALLOON_PAGES_PER_PAGE; pages_to_free -= pages_freed; - pages_freed += leak_balloon(vb, pages_to_free); } update_balloon_size(vb); - return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE; + return pages_freed; +} + +static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, + struct shrink_control *sc) +{ + unsigned long pages_to_free, pages_freed = 0; + struct virtio_balloon *vb = container_of(shrinker, + struct virtio_balloon, shrinker); + + pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE; + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + pages_freed = shrink_free_pages(vb, pages_to_free); + + if (pages_freed >= pages_to_free) + return pages_freed; + + pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed); + + return pages_freed; } static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, @@ -540,8 +800,12 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, { struct virtio_balloon *vb = container_of(shrinker, struct virtio_balloon, shrinker); + unsigned long count; - return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; + count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; + count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER; + + return count; } static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) @@ -604,6 +868,31 @@ static int virtballoon_probe(struct virtio_device *vdev) } vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + /* + * There is always one entry reserved for cmd id, so the ring + * size needs to be at least two to report free page hints. + */ + if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { + err = -ENOSPC; + goto out_del_vqs; + } + vb->balloon_wq = alloc_workqueue("balloon-wq", + WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); + if (!vb->balloon_wq) { + err = -ENOMEM; + goto out_del_vqs; + } + INIT_WORK(&vb->report_free_page_work, report_free_page_func); + vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP; + vb->cmd_id_active = cpu_to_virtio32(vb->vdev, + VIRTIO_BALLOON_CMD_ID_STOP); + vb->cmd_id_stop = cpu_to_virtio32(vb->vdev, + VIRTIO_BALLOON_CMD_ID_STOP); + vb->num_free_page_blocks = 0; + spin_lock_init(&vb->free_page_list_lock); + INIT_LIST_HEAD(&vb->free_page_list); + } /* * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a * shrinker needs to be registered to relieve memory pressure. @@ -611,7 +900,7 @@ static int virtballoon_probe(struct virtio_device *vdev) if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { err = virtio_balloon_register_shrinker(vb); if (err) - goto out_del_vqs; + goto out_del_balloon_wq; } virtio_device_ready(vdev); @@ -619,6 +908,9 @@ static int virtballoon_probe(struct virtio_device *vdev) virtballoon_changed(vdev); return 0; +out_del_balloon_wq: + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + destroy_workqueue(vb->balloon_wq); out_del_vqs: vdev->config->del_vqs(vdev); out_free_vb: @@ -652,6 +944,11 @@ static void virtballoon_remove(struct virtio_device *vdev) cancel_work_sync(&vb->update_balloon_size_work); cancel_work_sync(&vb->update_balloon_stats_work); + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + cancel_work_sync(&vb->report_free_page_work); + destroy_workqueue(vb->balloon_wq); + } + remove_common(vb); #ifdef CONFIG_BALLOON_COMPACTION if (vb->vb_dev_info.inode) @@ -703,6 +1000,7 @@ static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, + VIRTIO_BALLOON_F_FREE_PAGE_HINT, }; static struct virtio_driver virtio_balloon_driver = { diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 13b8cb563892..47c9eb401c08 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -34,15 +34,20 @@ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ +#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 +#define VIRTIO_BALLOON_CMD_ID_STOP 0 +#define VIRTIO_BALLOON_CMD_ID_DONE 1 struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ __u32 num_pages; /* Number of pages we've actually got in balloon. */ __u32 actual; + /* Free page report command id, readonly by guest */ + __u32 free_page_report_cmd_id; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ -- cgit From d95f58f4a6ca002126c36c530fb096519c94baef Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 27 Aug 2018 09:32:18 +0800 Subject: mm/page_poison: expose page_poisoning_enabled to kernel modules In some usages, e.g. virtio-balloon, a kernel module needs to know if page poisoning is in use. This patch exposes the page_poisoning_enabled function to kernel modules. Signed-off-by: Wei Wang Cc: Andrew Morton Cc: Michal Hocko Cc: Michael S. Tsirkin Acked-by: Andrew Morton Signed-off-by: Michael S. Tsirkin --- mm/page_poison.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/page_poison.c b/mm/page_poison.c index aa2b3d34e8ea..830f60489b14 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -17,6 +17,11 @@ static int __init early_page_poison_param(char *buf) } early_param("page_poison", early_page_poison_param); +/** + * page_poisoning_enabled - check if page poisoning is enabled + * + * Return true if page poisoning is enabled, or false if not. + */ bool page_poisoning_enabled(void) { /* @@ -29,6 +34,7 @@ bool page_poisoning_enabled(void) (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && debug_pagealloc_enabled())); } +EXPORT_SYMBOL_GPL(page_poisoning_enabled); static void poison_page(struct page *page) { -- cgit From 2e991629bcf55a43681aec1ee096eeb03cf81709 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 27 Aug 2018 09:32:19 +0800 Subject: virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the guest is using page poisoning. Guest writes to the poison_val config field to tell host about the page poisoning value that is in use. Suggested-by: Michael S. Tsirkin Signed-off-by: Wei Wang Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Andrew Morton Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 10 ++++++++++ include/uapi/linux/virtio_balloon.h | 3 +++ 2 files changed, 13 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index a18567889289..728ecd1eea30 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -825,6 +825,7 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; + __u32 poison_val; int err; if (!vdev->config->get) { @@ -892,6 +893,11 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->num_free_page_blocks = 0; spin_lock_init(&vb->free_page_list_lock); INIT_LIST_HEAD(&vb->free_page_list); + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { + memset(&poison_val, PAGE_POISON, sizeof(poison_val)); + virtio_cwrite(vb->vdev, struct virtio_balloon_config, + poison_val, &poison_val); + } } /* * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a @@ -992,6 +998,9 @@ static int virtballoon_restore(struct virtio_device *vdev) static int virtballoon_validate(struct virtio_device *vdev) { + if (!page_poisoning_enabled()) + __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON); + __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); return 0; } @@ -1001,6 +1010,7 @@ static unsigned int features[] = { VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, VIRTIO_BALLOON_F_FREE_PAGE_HINT, + VIRTIO_BALLOON_F_PAGE_POISON, }; static struct virtio_driver virtio_balloon_driver = { diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 47c9eb401c08..a1966cd7b677 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -35,6 +35,7 @@ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ #define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ +#define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -48,6 +49,8 @@ struct virtio_balloon_config { __u32 actual; /* Free page report command id, readonly by guest */ __u32 free_page_report_cmd_id; + /* Stores PAGE_POISON if page poisoning is in use */ + __u32 poison_val; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ -- cgit From 4542d623c7134bc1738f8a68ccb6dd546f1c264f Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Wed, 22 Aug 2018 13:21:53 -0600 Subject: vhost/scsi: truncate T10 PI iov_iter to prot_bytes Commands with protection information included were not truncating the protection iov_iter to the number of protection bytes in the command. This resulted in vhost_scsi mis-calculating the size of the protection SGL in vhost_scsi_calc_sgls(), and including both the protection and data SG entries in the protection SGL. Fixes: 09b13fa8c1a1 ("vhost/scsi: Add ANY_LAYOUT support in vhost_scsi_handle_vq") Signed-off-by: Greg Edwards Signed-off-by: Michael S. Tsirkin Fixes: 09b13fa8c1a1093e9458549ac8bb203a7c65c62a Cc: stable@vger.kernel.org Reviewed-by: Paolo Bonzini --- drivers/vhost/scsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c24bb690680b..e7e3ae13516d 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -964,7 +964,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin); } /* - * Set prot_iter to data_iter, and advance past any + * Set prot_iter to data_iter and truncate it to + * prot_bytes, and advance data_iter past any * preceeding prot_bytes that may be present. * * Also fix up the exp_data_len to reflect only the @@ -973,6 +974,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (prot_bytes) { exp_data_len -= prot_bytes; prot_iter = data_iter; + iov_iter_truncate(&prot_iter, prot_bytes); iov_iter_advance(&data_iter, prot_bytes); } tag = vhost64_to_cpu(vq, v_req_pi.tag); -- cgit From 0d02dbd68c47b66367130b696baef7246720791c Mon Sep 17 00:00:00 2001 From: Bijan Mottahedeh Date: Mon, 17 Sep 2018 17:09:47 -0700 Subject: vhost/scsi: Respond to control queue operations The vhost-scsi driver currently does not handle any control queue operations. In particular, vhost_scsi_ctl_handle_kick, merely prints out a debug message but does nothing else. This can cause guest VMs to hang. As part of SCSI recovery from an error, e.g., an I/O timeout, the SCSI midlayer attempts to abort the failed operation. The SCSI virtio driver translates the abort to a SCSI TMF request that gets put on the control queue (virtscsi_abort -> virtscsi_tmf). The SCSI virtio driver then waits indefinitely for this request to be completed, but it never will because vhost-scsi never responds to that request. To avoid a hang, always respond to control queue operations; explicitly reject TMF requests, and return a no-op response to event requests. Signed-off-by: Bijan Mottahedeh Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index e7e3ae13516d..1c33d6e39152 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1050,9 +1050,199 @@ out: mutex_unlock(&vq->mutex); } +static void +vhost_scsi_send_tmf_resp(struct vhost_scsi *vs, + struct vhost_virtqueue *vq, + int head, unsigned int out) +{ + struct virtio_scsi_ctrl_tmf_resp __user *resp; + struct virtio_scsi_ctrl_tmf_resp rsp; + int ret; + + pr_debug("%s\n", __func__); + memset(&rsp, 0, sizeof(rsp)); + rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; + resp = vq->iov[out].iov_base; + ret = __copy_to_user(resp, &rsp, sizeof(rsp)); + if (!ret) + vhost_add_used_and_signal(&vs->dev, vq, head, 0); + else + pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n"); +} + +static void +vhost_scsi_send_an_resp(struct vhost_scsi *vs, + struct vhost_virtqueue *vq, + int head, unsigned int out) +{ + struct virtio_scsi_ctrl_an_resp __user *resp; + struct virtio_scsi_ctrl_an_resp rsp; + int ret; + + pr_debug("%s\n", __func__); + memset(&rsp, 0, sizeof(rsp)); /* event_actual = 0 */ + rsp.response = VIRTIO_SCSI_S_OK; + resp = vq->iov[out].iov_base; + ret = __copy_to_user(resp, &rsp, sizeof(rsp)); + if (!ret) + vhost_add_used_and_signal(&vs->dev, vq, head, 0); + else + pr_err("Faulted on virtio_scsi_ctrl_an_resp\n"); +} + +static void +vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) +{ + union { + __virtio32 type; + struct virtio_scsi_ctrl_an_req an; + struct virtio_scsi_ctrl_tmf_req tmf; + } v_req; + struct iov_iter out_iter; + unsigned int out = 0, in = 0; + int head; + size_t req_size, rsp_size, typ_size; + size_t out_size, in_size; + u8 *lunp; + void *req; + + mutex_lock(&vq->mutex); + /* + * We can handle the vq only after the endpoint is setup by calling the + * VHOST_SCSI_SET_ENDPOINT ioctl. + */ + if (!vq->private_data) + goto out; + + vhost_disable_notify(&vs->dev, vq); + + for (;;) { + head = vhost_get_vq_desc(vq, vq->iov, + ARRAY_SIZE(vq->iov), &out, &in, + NULL, NULL); + pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", + head, out, in); + /* On error, stop handling until the next kick. */ + if (unlikely(head < 0)) + break; + /* Nothing new? Wait for eventfd to tell us they refilled. */ + if (head == vq->num) { + if (unlikely(vhost_enable_notify(&vs->dev, vq))) { + vhost_disable_notify(&vs->dev, vq); + continue; + } + break; + } + + /* + * Get the size of request and response buffers. + */ + out_size = iov_length(vq->iov, out); + in_size = iov_length(&vq->iov[out], in); + + /* + * Copy over the virtio-scsi request header, which for a + * ANY_LAYOUT enabled guest may span multiple iovecs, or a + * single iovec may contain both the header + outgoing + * WRITE payloads. + * + * copy_from_iter() will advance out_iter, so that it will + * point at the start of the outgoing WRITE payload, if + * DMA_TO_DEVICE is set. + */ + iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size); + + req = &v_req.type; + typ_size = sizeof(v_req.type); + + if (unlikely(!copy_from_iter_full(req, typ_size, &out_iter))) { + vq_err(vq, "Faulted on copy_from_iter tmf type\n"); + /* + * The size of the response buffer varies based on + * the request type and must be validated against it. + * Since the request type is not known, don't send + * a response. + */ + continue; + } + + switch (v_req.type) { + case VIRTIO_SCSI_T_TMF: + req = &v_req.tmf; + lunp = &v_req.tmf.lun[0]; + req_size = sizeof(struct virtio_scsi_ctrl_tmf_req); + rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp); + break; + case VIRTIO_SCSI_T_AN_QUERY: + case VIRTIO_SCSI_T_AN_SUBSCRIBE: + req = &v_req.an; + lunp = &v_req.an.lun[0]; + req_size = sizeof(struct virtio_scsi_ctrl_an_req); + rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp); + break; + default: + vq_err(vq, "Unknown control request %d", v_req.type); + continue; + } + + /* + * Check for a sane response buffer so we can report early + * errors back to the guest. + */ + if (unlikely(in_size < rsp_size)) { + vq_err(vq, + "Resp buf too small, need min %zu bytes got %zu", + rsp_size, in_size); + /* + * Notifications are disabled at this point; + * continue so they can be eventually enabled + * when processing terminates. + */ + continue; + } + + if (unlikely(out_size < req_size)) { + vq_err(vq, + "Req buf too small, need min %zu bytes got %zu", + req_size, out_size); + vhost_scsi_send_bad_target(vs, vq, head, out); + continue; + } + + req += typ_size; + req_size -= typ_size; + + if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) { + vq_err(vq, "Faulted on copy_from_iter\n"); + vhost_scsi_send_bad_target(vs, vq, head, out); + continue; + } + + /* virtio-scsi spec requires byte 0 of the lun to be 1 */ + if (unlikely(*lunp != 1)) { + vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp); + vhost_scsi_send_bad_target(vs, vq, head, out); + continue; + } + + if (v_req.type == VIRTIO_SCSI_T_TMF) { + pr_debug("%s tmf %d\n", __func__, v_req.tmf.subtype); + vhost_scsi_send_tmf_resp(vs, vq, head, out); + } else + vhost_scsi_send_an_resp(vs, vq, head, out); + } +out: + mutex_unlock(&vq->mutex); +} + static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) { + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev); + pr_debug("%s: The handling func for control queue.\n", __func__); + vhost_scsi_ctl_handle_vq(vs, vq); } static void -- cgit From 3f8ca2e115e55af4c15d97dda635e948d2e380be Mon Sep 17 00:00:00 2001 From: Bijan Mottahedeh Date: Mon, 17 Sep 2018 17:09:48 -0700 Subject: vhost/scsi: Extract common handling code from control queue handler Prepare to change the request queue handler to use common handling routines. Signed-off-by: Bijan Mottahedeh Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 271 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 172 insertions(+), 99 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 1c33d6e39152..4cd03a1d7f21 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -203,6 +203,19 @@ struct vhost_scsi { int vs_events_nr; /* num of pending events, protected by vq->mutex */ }; +/* + * Context for processing request and control queue operations. + */ +struct vhost_scsi_ctx { + int head; + unsigned int out, in; + size_t req_size, rsp_size; + size_t out_size, in_size; + u8 *target, *lunp; + void *req; + struct iov_iter out_iter; +}; + static struct workqueue_struct *vhost_scsi_workqueue; /* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */ @@ -1050,10 +1063,107 @@ out: mutex_unlock(&vq->mutex); } +static int +vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq, + struct vhost_scsi_ctx *vc) +{ + int ret = -ENXIO; + + vc->head = vhost_get_vq_desc(vq, vq->iov, + ARRAY_SIZE(vq->iov), &vc->out, &vc->in, + NULL, NULL); + + pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", + vc->head, vc->out, vc->in); + + /* On error, stop handling until the next kick. */ + if (unlikely(vc->head < 0)) + goto done; + + /* Nothing new? Wait for eventfd to tell us they refilled. */ + if (vc->head == vq->num) { + if (unlikely(vhost_enable_notify(&vs->dev, vq))) { + vhost_disable_notify(&vs->dev, vq); + ret = -EAGAIN; + } + goto done; + } + + /* + * Get the size of request and response buffers. + */ + vc->out_size = iov_length(vq->iov, vc->out); + vc->in_size = iov_length(&vq->iov[vc->out], vc->in); + + /* + * Copy over the virtio-scsi request header, which for a + * ANY_LAYOUT enabled guest may span multiple iovecs, or a + * single iovec may contain both the header + outgoing + * WRITE payloads. + * + * copy_from_iter() will advance out_iter, so that it will + * point at the start of the outgoing WRITE payload, if + * DMA_TO_DEVICE is set. + */ + iov_iter_init(&vc->out_iter, WRITE, vq->iov, vc->out, vc->out_size); + ret = 0; + +done: + return ret; +} + +static int +vhost_scsi_chk_size(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc) +{ + if (unlikely(vc->in_size < vc->rsp_size)) { + vq_err(vq, + "Response buf too small, need min %zu bytes got %zu", + vc->rsp_size, vc->in_size); + return -EINVAL; + } else if (unlikely(vc->out_size < vc->req_size)) { + vq_err(vq, + "Request buf too small, need min %zu bytes got %zu", + vc->req_size, vc->out_size); + return -EIO; + } + + return 0; +} + +static int +vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc, + struct vhost_scsi_tpg **tpgp) +{ + int ret = -EIO; + + if (unlikely(!copy_from_iter_full(vc->req, vc->req_size, + &vc->out_iter))) + vq_err(vq, "Faulted on copy_from_iter\n"); + else if (unlikely(*vc->lunp != 1)) + /* virtio-scsi spec requires byte 0 of the lun to be 1 */ + vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp); + else { + struct vhost_scsi_tpg **vs_tpg, *tpg; + + vs_tpg = vq->private_data; /* validated at handler entry */ + + tpg = READ_ONCE(vs_tpg[*vc->target]); + if (unlikely(!tpg)) + vq_err(vq, "Target 0x%x does not exist\n", *vc->target); + else { + if (tpgp) + *tpgp = tpg; + ret = 0; + } + } + + return ret; +} + static void vhost_scsi_send_tmf_resp(struct vhost_scsi *vs, - struct vhost_virtqueue *vq, - int head, unsigned int out) + struct vhost_virtqueue *vq, + struct vhost_scsi_ctx *vc) { struct virtio_scsi_ctrl_tmf_resp __user *resp; struct virtio_scsi_ctrl_tmf_resp rsp; @@ -1062,18 +1172,18 @@ vhost_scsi_send_tmf_resp(struct vhost_scsi *vs, pr_debug("%s\n", __func__); memset(&rsp, 0, sizeof(rsp)); rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; - resp = vq->iov[out].iov_base; + resp = vq->iov[vc->out].iov_base; ret = __copy_to_user(resp, &rsp, sizeof(rsp)); if (!ret) - vhost_add_used_and_signal(&vs->dev, vq, head, 0); + vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); else pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n"); } static void vhost_scsi_send_an_resp(struct vhost_scsi *vs, - struct vhost_virtqueue *vq, - int head, unsigned int out) + struct vhost_virtqueue *vq, + struct vhost_scsi_ctx *vc) { struct virtio_scsi_ctrl_an_resp __user *resp; struct virtio_scsi_ctrl_an_resp rsp; @@ -1082,10 +1192,10 @@ vhost_scsi_send_an_resp(struct vhost_scsi *vs, pr_debug("%s\n", __func__); memset(&rsp, 0, sizeof(rsp)); /* event_actual = 0 */ rsp.response = VIRTIO_SCSI_S_OK; - resp = vq->iov[out].iov_base; + resp = vq->iov[vc->out].iov_base; ret = __copy_to_user(resp, &rsp, sizeof(rsp)); if (!ret) - vhost_add_used_and_signal(&vs->dev, vq, head, 0); + vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); else pr_err("Faulted on virtio_scsi_ctrl_an_resp\n"); } @@ -1098,13 +1208,9 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) struct virtio_scsi_ctrl_an_req an; struct virtio_scsi_ctrl_tmf_req tmf; } v_req; - struct iov_iter out_iter; - unsigned int out = 0, in = 0; - int head; - size_t req_size, rsp_size, typ_size; - size_t out_size, in_size; - u8 *lunp; - void *req; + struct vhost_scsi_ctx vc; + size_t typ_size; + int ret; mutex_lock(&vq->mutex); /* @@ -1114,52 +1220,28 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (!vq->private_data) goto out; + memset(&vc, 0, sizeof(vc)); + vhost_disable_notify(&vs->dev, vq); for (;;) { - head = vhost_get_vq_desc(vq, vq->iov, - ARRAY_SIZE(vq->iov), &out, &in, - NULL, NULL); - pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", - head, out, in); - /* On error, stop handling until the next kick. */ - if (unlikely(head < 0)) - break; - /* Nothing new? Wait for eventfd to tell us they refilled. */ - if (head == vq->num) { - if (unlikely(vhost_enable_notify(&vs->dev, vq))) { - vhost_disable_notify(&vs->dev, vq); - continue; - } - break; - } + ret = vhost_scsi_get_desc(vs, vq, &vc); + if (ret) + goto err; /* - * Get the size of request and response buffers. + * Get the request type first in order to setup + * other parameters dependent on the type. */ - out_size = iov_length(vq->iov, out); - in_size = iov_length(&vq->iov[out], in); - - /* - * Copy over the virtio-scsi request header, which for a - * ANY_LAYOUT enabled guest may span multiple iovecs, or a - * single iovec may contain both the header + outgoing - * WRITE payloads. - * - * copy_from_iter() will advance out_iter, so that it will - * point at the start of the outgoing WRITE payload, if - * DMA_TO_DEVICE is set. - */ - iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size); - - req = &v_req.type; + vc.req = &v_req.type; typ_size = sizeof(v_req.type); - if (unlikely(!copy_from_iter_full(req, typ_size, &out_iter))) { + if (unlikely(!copy_from_iter_full(vc.req, typ_size, + &vc.out_iter))) { vq_err(vq, "Faulted on copy_from_iter tmf type\n"); /* - * The size of the response buffer varies based on - * the request type and must be validated against it. + * The size of the response buffer depends on the + * request type and must be validated against it. * Since the request type is not known, don't send * a response. */ @@ -1168,17 +1250,19 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) switch (v_req.type) { case VIRTIO_SCSI_T_TMF: - req = &v_req.tmf; - lunp = &v_req.tmf.lun[0]; - req_size = sizeof(struct virtio_scsi_ctrl_tmf_req); - rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp); + vc.req = &v_req.tmf; + vc.req_size = sizeof(struct virtio_scsi_ctrl_tmf_req); + vc.rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp); + vc.lunp = &v_req.tmf.lun[0]; + vc.target = &v_req.tmf.lun[1]; break; case VIRTIO_SCSI_T_AN_QUERY: case VIRTIO_SCSI_T_AN_SUBSCRIBE: - req = &v_req.an; - lunp = &v_req.an.lun[0]; - req_size = sizeof(struct virtio_scsi_ctrl_an_req); - rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp); + vc.req = &v_req.an; + vc.req_size = sizeof(struct virtio_scsi_ctrl_an_req); + vc.rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp); + vc.lunp = &v_req.an.lun[0]; + vc.target = NULL; break; default: vq_err(vq, "Unknown control request %d", v_req.type); @@ -1186,50 +1270,39 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) } /* - * Check for a sane response buffer so we can report early - * errors back to the guest. + * Validate the size of request and response buffers. + * Check for a sane response buffer so we can report + * early errors back to the guest. */ - if (unlikely(in_size < rsp_size)) { - vq_err(vq, - "Resp buf too small, need min %zu bytes got %zu", - rsp_size, in_size); - /* - * Notifications are disabled at this point; - * continue so they can be eventually enabled - * when processing terminates. - */ - continue; - } + ret = vhost_scsi_chk_size(vq, &vc); + if (ret) + goto err; - if (unlikely(out_size < req_size)) { - vq_err(vq, - "Req buf too small, need min %zu bytes got %zu", - req_size, out_size); - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; - } - - req += typ_size; - req_size -= typ_size; - - if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) { - vq_err(vq, "Faulted on copy_from_iter\n"); - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; - } + /* + * Get the rest of the request now that its size is known. + */ + vc.req += typ_size; + vc.req_size -= typ_size; - /* virtio-scsi spec requires byte 0 of the lun to be 1 */ - if (unlikely(*lunp != 1)) { - vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp); - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; - } + ret = vhost_scsi_get_req(vq, &vc, NULL); + if (ret) + goto err; - if (v_req.type == VIRTIO_SCSI_T_TMF) { - pr_debug("%s tmf %d\n", __func__, v_req.tmf.subtype); - vhost_scsi_send_tmf_resp(vs, vq, head, out); - } else - vhost_scsi_send_an_resp(vs, vq, head, out); + if (v_req.type == VIRTIO_SCSI_T_TMF) + vhost_scsi_send_tmf_resp(vs, vq, &vc); + else + vhost_scsi_send_an_resp(vs, vq, &vc); +err: + /* + * ENXIO: No more requests, or read error, wait for next kick + * EINVAL: Invalid response buffer, drop the request + * EIO: Respond with bad target + * EAGAIN: Pending request + */ + if (ret == -ENXIO) + break; + else if (ret == -EIO) + vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); } out: mutex_unlock(&vq->mutex); -- cgit From 09d7583294aada625349b6f80f1e1c730b5a5208 Mon Sep 17 00:00:00 2001 From: Bijan Mottahedeh Date: Mon, 17 Sep 2018 17:09:49 -0700 Subject: vhost/scsi: Use common handling code in request queue handler Change the request queue handler to use common handling routines same as the control queue handler. Signed-off-by: Bijan Mottahedeh Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 361 +++++++++++++++++++++++---------------------------- 1 file changed, 164 insertions(+), 197 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 4cd03a1d7f21..50dffe83714c 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -813,24 +813,120 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs, pr_err("Faulted on virtio_scsi_cmd_resp\n"); } +static int +vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq, + struct vhost_scsi_ctx *vc) +{ + int ret = -ENXIO; + + vc->head = vhost_get_vq_desc(vq, vq->iov, + ARRAY_SIZE(vq->iov), &vc->out, &vc->in, + NULL, NULL); + + pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", + vc->head, vc->out, vc->in); + + /* On error, stop handling until the next kick. */ + if (unlikely(vc->head < 0)) + goto done; + + /* Nothing new? Wait for eventfd to tell us they refilled. */ + if (vc->head == vq->num) { + if (unlikely(vhost_enable_notify(&vs->dev, vq))) { + vhost_disable_notify(&vs->dev, vq); + ret = -EAGAIN; + } + goto done; + } + + /* + * Get the size of request and response buffers. + * FIXME: Not correct for BIDI operation + */ + vc->out_size = iov_length(vq->iov, vc->out); + vc->in_size = iov_length(&vq->iov[vc->out], vc->in); + + /* + * Copy over the virtio-scsi request header, which for a + * ANY_LAYOUT enabled guest may span multiple iovecs, or a + * single iovec may contain both the header + outgoing + * WRITE payloads. + * + * copy_from_iter() will advance out_iter, so that it will + * point at the start of the outgoing WRITE payload, if + * DMA_TO_DEVICE is set. + */ + iov_iter_init(&vc->out_iter, WRITE, vq->iov, vc->out, vc->out_size); + ret = 0; + +done: + return ret; +} + +static int +vhost_scsi_chk_size(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc) +{ + if (unlikely(vc->in_size < vc->rsp_size)) { + vq_err(vq, + "Response buf too small, need min %zu bytes got %zu", + vc->rsp_size, vc->in_size); + return -EINVAL; + } else if (unlikely(vc->out_size < vc->req_size)) { + vq_err(vq, + "Request buf too small, need min %zu bytes got %zu", + vc->req_size, vc->out_size); + return -EIO; + } + + return 0; +} + +static int +vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc, + struct vhost_scsi_tpg **tpgp) +{ + int ret = -EIO; + + if (unlikely(!copy_from_iter_full(vc->req, vc->req_size, + &vc->out_iter))) { + vq_err(vq, "Faulted on copy_from_iter\n"); + } else if (unlikely(*vc->lunp != 1)) { + /* virtio-scsi spec requires byte 0 of the lun to be 1 */ + vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp); + } else { + struct vhost_scsi_tpg **vs_tpg, *tpg; + + vs_tpg = vq->private_data; /* validated at handler entry */ + + tpg = READ_ONCE(vs_tpg[*vc->target]); + if (unlikely(!tpg)) { + vq_err(vq, "Target 0x%x does not exist\n", *vc->target); + } else { + if (tpgp) + *tpgp = tpg; + ret = 0; + } + } + + return ret; +} + static void vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) { struct vhost_scsi_tpg **vs_tpg, *tpg; struct virtio_scsi_cmd_req v_req; struct virtio_scsi_cmd_req_pi v_req_pi; + struct vhost_scsi_ctx vc; struct vhost_scsi_cmd *cmd; - struct iov_iter out_iter, in_iter, prot_iter, data_iter; + struct iov_iter in_iter, prot_iter, data_iter; u64 tag; u32 exp_data_len, data_direction; - unsigned int out = 0, in = 0; - int head, ret, prot_bytes; - size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp); - size_t out_size, in_size; + int ret, prot_bytes; u16 lun; - u8 *target, *lunp, task_attr; + u8 task_attr; bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI); - void *req, *cdb; + void *cdb; mutex_lock(&vq->mutex); /* @@ -841,85 +937,47 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (!vs_tpg) goto out; + memset(&vc, 0, sizeof(vc)); + vc.rsp_size = sizeof(struct virtio_scsi_cmd_resp); + vhost_disable_notify(&vs->dev, vq); for (;;) { - head = vhost_get_vq_desc(vq, vq->iov, - ARRAY_SIZE(vq->iov), &out, &in, - NULL, NULL); - pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", - head, out, in); - /* On error, stop handling until the next kick. */ - if (unlikely(head < 0)) - break; - /* Nothing new? Wait for eventfd to tell us they refilled. */ - if (head == vq->num) { - if (unlikely(vhost_enable_notify(&vs->dev, vq))) { - vhost_disable_notify(&vs->dev, vq); - continue; - } - break; - } - /* - * Check for a sane response buffer so we can report early - * errors back to the guest. - */ - if (unlikely(vq->iov[out].iov_len < rsp_size)) { - vq_err(vq, "Expecting at least virtio_scsi_cmd_resp" - " size, got %zu bytes\n", vq->iov[out].iov_len); - break; - } + ret = vhost_scsi_get_desc(vs, vq, &vc); + if (ret) + goto err; + /* * Setup pointers and values based upon different virtio-scsi * request header if T10_PI is enabled in KVM guest. */ if (t10_pi) { - req = &v_req_pi; - req_size = sizeof(v_req_pi); - lunp = &v_req_pi.lun[0]; - target = &v_req_pi.lun[1]; + vc.req = &v_req_pi; + vc.req_size = sizeof(v_req_pi); + vc.lunp = &v_req_pi.lun[0]; + vc.target = &v_req_pi.lun[1]; } else { - req = &v_req; - req_size = sizeof(v_req); - lunp = &v_req.lun[0]; - target = &v_req.lun[1]; + vc.req = &v_req; + vc.req_size = sizeof(v_req); + vc.lunp = &v_req.lun[0]; + vc.target = &v_req.lun[1]; } - /* - * FIXME: Not correct for BIDI operation - */ - out_size = iov_length(vq->iov, out); - in_size = iov_length(&vq->iov[out], in); /* - * Copy over the virtio-scsi request header, which for a - * ANY_LAYOUT enabled guest may span multiple iovecs, or a - * single iovec may contain both the header + outgoing - * WRITE payloads. - * - * copy_from_iter() will advance out_iter, so that it will - * point at the start of the outgoing WRITE payload, if - * DMA_TO_DEVICE is set. + * Validate the size of request and response buffers. + * Check for a sane response buffer so we can report + * early errors back to the guest. */ - iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size); + ret = vhost_scsi_chk_size(vq, &vc); + if (ret) + goto err; - if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) { - vq_err(vq, "Faulted on copy_from_iter\n"); - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; - } - /* virtio-scsi spec requires byte 0 of the lun to be 1 */ - if (unlikely(*lunp != 1)) { - vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp); - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; - } + ret = vhost_scsi_get_req(vq, &vc, &tpg); + if (ret) + goto err; + + ret = -EIO; /* bad target on any error from here on */ - tpg = READ_ONCE(vs_tpg[*target]); - if (unlikely(!tpg)) { - /* Target does not exist, fail the request */ - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; - } /* * Determine data_direction by calculating the total outgoing * iovec sizes + incoming iovec sizes vs. virtio-scsi request + @@ -937,17 +995,17 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) */ prot_bytes = 0; - if (out_size > req_size) { + if (vc.out_size > vc.req_size) { data_direction = DMA_TO_DEVICE; - exp_data_len = out_size - req_size; - data_iter = out_iter; - } else if (in_size > rsp_size) { + exp_data_len = vc.out_size - vc.req_size; + data_iter = vc.out_iter; + } else if (vc.in_size > vc.rsp_size) { data_direction = DMA_FROM_DEVICE; - exp_data_len = in_size - rsp_size; + exp_data_len = vc.in_size - vc.rsp_size; - iov_iter_init(&in_iter, READ, &vq->iov[out], in, - rsp_size + exp_data_len); - iov_iter_advance(&in_iter, rsp_size); + iov_iter_init(&in_iter, READ, &vq->iov[vc.out], vc.in, + vc.rsp_size + exp_data_len); + iov_iter_advance(&in_iter, vc.rsp_size); data_iter = in_iter; } else { data_direction = DMA_NONE; @@ -963,16 +1021,14 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (data_direction != DMA_TO_DEVICE) { vq_err(vq, "Received non zero pi_bytesout," " but wrong data_direction\n"); - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; + goto err; } prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout); } else if (v_req_pi.pi_bytesin) { if (data_direction != DMA_FROM_DEVICE) { vq_err(vq, "Received non zero pi_bytesin," " but wrong data_direction\n"); - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; + goto err; } prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin); } @@ -1011,8 +1067,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) vq_err(vq, "Received SCSI CDB with command_size: %d that" " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n", scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE); - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; + goto err; } cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr, exp_data_len + prot_bytes, @@ -1020,13 +1075,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (IS_ERR(cmd)) { vq_err(vq, "vhost_scsi_get_tag failed %ld\n", PTR_ERR(cmd)); - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; + goto err; } cmd->tvc_vhost = vs; cmd->tvc_vq = vq; - cmd->tvc_resp_iov = vq->iov[out]; - cmd->tvc_in_iovs = in; + cmd->tvc_resp_iov = vq->iov[vc.out]; + cmd->tvc_in_iovs = vc.in; pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n", cmd->tvc_cdb[0], cmd->tvc_lun); @@ -1034,14 +1088,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) " %d\n", cmd, exp_data_len, prot_bytes, data_direction); if (data_direction != DMA_NONE) { - ret = vhost_scsi_mapal(cmd, - prot_bytes, &prot_iter, - exp_data_len, &data_iter); - if (unlikely(ret)) { + if (unlikely(vhost_scsi_mapal(cmd, prot_bytes, + &prot_iter, exp_data_len, + &data_iter))) { vq_err(vq, "Failed to map iov to sgl\n"); vhost_scsi_release_cmd(&cmd->tvc_se_cmd); - vhost_scsi_send_bad_target(vs, vq, head, out); - continue; + goto err; } } /* @@ -1049,7 +1101,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) * complete the virtio-scsi request in TCM callback context via * vhost_scsi_queue_data_in() and vhost_scsi_queue_status() */ - cmd->tvc_vq_desc = head; + cmd->tvc_vq_desc = vc.head; /* * Dispatch cmd descriptor for cmwq execution in process * context provided by vhost_scsi_workqueue. This also ensures @@ -1058,112 +1110,27 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) */ INIT_WORK(&cmd->work, vhost_scsi_submission_work); queue_work(vhost_scsi_workqueue, &cmd->work); + ret = 0; +err: + /* + * ENXIO: No more requests, or read error, wait for next kick + * EINVAL: Invalid response buffer, drop the request + * EIO: Respond with bad target + * EAGAIN: Pending request + */ + if (ret == -ENXIO) + break; + else if (ret == -EIO) + vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); } out: mutex_unlock(&vq->mutex); } -static int -vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq, - struct vhost_scsi_ctx *vc) -{ - int ret = -ENXIO; - - vc->head = vhost_get_vq_desc(vq, vq->iov, - ARRAY_SIZE(vq->iov), &vc->out, &vc->in, - NULL, NULL); - - pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", - vc->head, vc->out, vc->in); - - /* On error, stop handling until the next kick. */ - if (unlikely(vc->head < 0)) - goto done; - - /* Nothing new? Wait for eventfd to tell us they refilled. */ - if (vc->head == vq->num) { - if (unlikely(vhost_enable_notify(&vs->dev, vq))) { - vhost_disable_notify(&vs->dev, vq); - ret = -EAGAIN; - } - goto done; - } - - /* - * Get the size of request and response buffers. - */ - vc->out_size = iov_length(vq->iov, vc->out); - vc->in_size = iov_length(&vq->iov[vc->out], vc->in); - - /* - * Copy over the virtio-scsi request header, which for a - * ANY_LAYOUT enabled guest may span multiple iovecs, or a - * single iovec may contain both the header + outgoing - * WRITE payloads. - * - * copy_from_iter() will advance out_iter, so that it will - * point at the start of the outgoing WRITE payload, if - * DMA_TO_DEVICE is set. - */ - iov_iter_init(&vc->out_iter, WRITE, vq->iov, vc->out, vc->out_size); - ret = 0; - -done: - return ret; -} - -static int -vhost_scsi_chk_size(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc) -{ - if (unlikely(vc->in_size < vc->rsp_size)) { - vq_err(vq, - "Response buf too small, need min %zu bytes got %zu", - vc->rsp_size, vc->in_size); - return -EINVAL; - } else if (unlikely(vc->out_size < vc->req_size)) { - vq_err(vq, - "Request buf too small, need min %zu bytes got %zu", - vc->req_size, vc->out_size); - return -EIO; - } - - return 0; -} - -static int -vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc, - struct vhost_scsi_tpg **tpgp) -{ - int ret = -EIO; - - if (unlikely(!copy_from_iter_full(vc->req, vc->req_size, - &vc->out_iter))) - vq_err(vq, "Faulted on copy_from_iter\n"); - else if (unlikely(*vc->lunp != 1)) - /* virtio-scsi spec requires byte 0 of the lun to be 1 */ - vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp); - else { - struct vhost_scsi_tpg **vs_tpg, *tpg; - - vs_tpg = vq->private_data; /* validated at handler entry */ - - tpg = READ_ONCE(vs_tpg[*vc->target]); - if (unlikely(!tpg)) - vq_err(vq, "Target 0x%x does not exist\n", *vc->target); - else { - if (tpgp) - *tpgp = tpg; - ret = 0; - } - } - - return ret; -} - static void -vhost_scsi_send_tmf_resp(struct vhost_scsi *vs, - struct vhost_virtqueue *vq, - struct vhost_scsi_ctx *vc) +vhost_scsi_send_tmf_reject(struct vhost_scsi *vs, + struct vhost_virtqueue *vq, + struct vhost_scsi_ctx *vc) { struct virtio_scsi_ctrl_tmf_resp __user *resp; struct virtio_scsi_ctrl_tmf_resp rsp; @@ -1289,7 +1256,7 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) goto err; if (v_req.type == VIRTIO_SCSI_T_TMF) - vhost_scsi_send_tmf_resp(vs, vq, &vc); + vhost_scsi_send_tmf_reject(vs, vq, &vc); else vhost_scsi_send_an_resp(vs, vq, &vc); err: -- cgit From 79f800b2e76923cd8ce0aa659cb5c019d9643bc9 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 12 Oct 2018 16:46:37 +0100 Subject: MAINTAINERS: remove reference to bogus vsock file The file drivers/vhost/vsock.h never existed. Remove the bogus MAINTAINERS reference. Fixes: 433fc58e6bf2c8bd97e57153ed28e64fd78207b8 ("VSOCK: Introduce vhost_vsock.ko") Reported-by: Joe Perches Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b2f710eee67a..3b776daf4a85 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15542,7 +15542,6 @@ F: net/vmw_vsock/virtio_transport_common.c F: net/vmw_vsock/virtio_transport.c F: drivers/net/vsockmon.c F: drivers/vhost/vsock.c -F: drivers/vhost/vsock.h F: tools/testing/vsock/ VIRTIO CONSOLE DRIVER -- cgit