From c7cdff0e864713a089d7cb3a2b1136ba9a54881a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 13 Oct 2017 16:11:48 +0300 Subject: virtio_balloon: fix deadlock on OOM fill_balloon doing memory allocations under balloon_lock can cause a deadlock when leak_balloon is called from virtballoon_oom_notify and tries to take same lock. To fix, split page allocation and enqueue and do allocations outside the lock. Here's a detailed analysis of the deadlock by Tetsuo Handa: In leak_balloon(), mutex_lock(&vb->balloon_lock) is called in order to serialize against fill_balloon(). But in fill_balloon(), alloc_page(GFP_HIGHUSER[_MOVABLE] | __GFP_NOMEMALLOC | __GFP_NORETRY) is called with vb->balloon_lock mutex held. Since GFP_HIGHUSER[_MOVABLE] implies __GFP_DIRECT_RECLAIM | __GFP_IO | __GFP_FS, despite __GFP_NORETRY is specified, this allocation attempt might indirectly depend on somebody else's __GFP_DIRECT_RECLAIM memory allocation. And such indirect __GFP_DIRECT_RECLAIM memory allocation might call leak_balloon() via virtballoon_oom_notify() via blocking_notifier_call_chain() callback via out_of_memory() when it reached __alloc_pages_may_oom() and held oom_lock mutex. Since vb->balloon_lock mutex is already held by fill_balloon(), it will cause OOM lockup. Thread1 Thread2 fill_balloon() takes a balloon_lock balloon_page_enqueue() alloc_page(GFP_HIGHUSER_MOVABLE) direct reclaim (__GFP_FS context) takes a fs lock waits for that fs lock alloc_page(GFP_NOFS) __alloc_pages_may_oom() takes the oom_lock out_of_memory() blocking_notifier_call_chain() leak_balloon() tries to take that balloon_lock and deadlocks Reported-by: Tetsuo Handa Cc: Michal Hocko Cc: Wei Wang Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 24 +++++++++++++++++++----- include/linux/balloon_compaction.h | 35 ++++++++++++++++++++++++++++++++++- mm/balloon_compaction.c | 28 +++++++++++++++++++++------- 3 files changed, 74 insertions(+), 13 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index f0b3a0b9d42f..7960746f7597 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -143,16 +143,17 @@ static void set_page_pfns(struct virtio_balloon *vb, static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) { - struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; unsigned num_allocated_pages; + unsigned num_pfns; + struct page *page; + LIST_HEAD(pages); /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb->pfns)); - mutex_lock(&vb->balloon_lock); - for (vb->num_pfns = 0; vb->num_pfns < num; - vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { - struct page *page = balloon_page_enqueue(vb_dev_info); + for (num_pfns = 0; num_pfns < num; + num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { + struct page *page = balloon_page_alloc(); if (!page) { dev_info_ratelimited(&vb->vdev->dev, @@ -162,6 +163,19 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) msleep(200); break; } + + balloon_page_push(&pages, page); + } + + mutex_lock(&vb->balloon_lock); + + vb->num_pfns = 0; + + while ((page = balloon_page_pop(&pages))) { + balloon_page_enqueue(&vb->vb_dev_info, page); + + vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; + set_page_pfns(vb, vb->pfns + vb->num_pfns, page); vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; if (!virtio_has_feature(vb->vdev, diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index fbbe6da40fed..53051f3d8f25 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -50,6 +50,7 @@ #include #include #include +#include /* * Balloon device information descriptor. @@ -67,7 +68,9 @@ struct balloon_dev_info { struct inode *inode; }; -extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info); +extern struct page *balloon_page_alloc(void); +extern void balloon_page_enqueue(struct balloon_dev_info *b_dev_info, + struct page *page); extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info); static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) @@ -193,4 +196,34 @@ static inline gfp_t balloon_mapping_gfp_mask(void) } #endif /* CONFIG_BALLOON_COMPACTION */ + +/* + * balloon_page_push - insert a page into a page list. + * @head : pointer to list + * @page : page to be added + * + * Caller must ensure the page is private and protect the list. + */ +static inline void balloon_page_push(struct list_head *pages, struct page *page) +{ + list_add(&page->lru, pages); +} + +/* + * balloon_page_pop - remove a page from a page list. + * @head : pointer to list + * @page : page to be added + * + * Caller must ensure the page is private and protect the list. + */ +static inline struct page *balloon_page_pop(struct list_head *pages) +{ + struct page *page = list_first_entry_or_null(pages, struct page, lru); + + if (!page) + return NULL; + + list_del(&page->lru); + return page; +} #endif /* _LINUX_BALLOON_COMPACTION_H */ diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 68d28924ba79..ef858d547e2d 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -10,23 +10,38 @@ #include #include +/* + * balloon_page_alloc - allocates a new page for insertion into the balloon + * page list. + * + * Driver must call it to properly allocate a new enlisted balloon page. + * Driver must call balloon_page_enqueue before definitively removing it from + * the guest system. This function returns the page address for the recently + * allocated page or NULL in the case we fail to allocate a new page this turn. + */ +struct page *balloon_page_alloc(void) +{ + struct page *page = alloc_page(balloon_mapping_gfp_mask() | + __GFP_NOMEMALLOC | __GFP_NORETRY); + return page; +} +EXPORT_SYMBOL_GPL(balloon_page_alloc); + /* * balloon_page_enqueue - allocates a new page and inserts it into the balloon * page list. * @b_dev_info: balloon device descriptor where we will insert a new page to + * @page: new page to enqueue - allocated using balloon_page_alloc. * - * Driver must call it to properly allocate a new enlisted balloon page + * Driver must call it to properly enqueue a new allocated balloon page * before definitively removing it from the guest system. * This function returns the page address for the recently enqueued page or * NULL in the case we fail to allocate a new page this turn. */ -struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info) +void balloon_page_enqueue(struct balloon_dev_info *b_dev_info, + struct page *page) { unsigned long flags; - struct page *page = alloc_page(balloon_mapping_gfp_mask() | - __GFP_NOMEMALLOC | __GFP_NORETRY); - if (!page) - return NULL; /* * Block others from accessing the 'page' when we get around to @@ -39,7 +54,6 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info) __count_vm_event(BALLOON_INFLATE); spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); unlock_page(page); - return page; } EXPORT_SYMBOL_GPL(balloon_page_enqueue); -- cgit From 816e85edff0f31bc8e4614eaaf9a0364406d7a8f Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Thu, 9 Nov 2017 09:00:21 +0900 Subject: vhost/scsi: Use safe iteration in vhost_scsi_complete_cmd_work() The following patch changed the behavior which originally did safe iteration. Make it safe as it was. 12bdcbd539c6327c09da0503c674733cb2d82cb5 vhost/scsi: Don't reinvent the wheel but use existing llist API Signed-off-by: Byungchul Park Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 046f6d280af5..46539ca6ca86 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -519,7 +519,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vs_completion_work); DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ); struct virtio_scsi_cmd_resp v_rsp; - struct vhost_scsi_cmd *cmd; + struct vhost_scsi_cmd *cmd, *t; struct llist_node *llnode; struct se_cmd *se_cmd; struct iov_iter iov_iter; @@ -527,7 +527,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) bitmap_zero(signal, VHOST_SCSI_MAX_VQ); llnode = llist_del_all(&vs->vs_completion_list); - llist_for_each_entry(cmd, llnode, tvc_completion_list) { + llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { se_cmd = &cmd->tvc_se_cmd; pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__, -- cgit From ca2c5b33a285723f517fa296b76f7fd36c383dad Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 21 Aug 2017 22:33:33 +0300 Subject: vhost: fix end of range for access_ok During access_ok checks, addr increases as we iterate over the data structure, thus addr + len - 1 will point beyond the end of region we are translating. Harmless since we then verify that the region covers addr, but let's not waste cpu cycles. Reported-by: Koichiro Den Signed-off-by: Michael S. Tsirkin Acked-by: Koichiro Den --- drivers/vhost/vhost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d6dbb28245e6..33ac2b186b85 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1175,7 +1175,7 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq, { const struct vhost_umem_node *node; struct vhost_umem *umem = vq->iotlb; - u64 s = 0, size, orig_addr = addr; + u64 s = 0, size, orig_addr = addr, last = addr + len - 1; if (vhost_vq_meta_fetch(vq, addr, len, type)) return true; @@ -1183,7 +1183,7 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq, while (len > s) { node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, addr, - addr + len - 1); + last); if (node == NULL || node->start > addr) { vhost_iotlb_miss(vq, addr, access); return false; -- cgit From a72b69dc083a931422cc8a5e33841aff7d5312f2 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 9 Nov 2017 13:29:10 +0000 Subject: vhost/vsock: fix uninitialized vhost_vsock->guest_cid The vhost_vsock->guest_cid field is uninitialized when /dev/vhost-vsock is opened until the VHOST_VSOCK_SET_GUEST_CID ioctl is called. kvmalloc(..., GFP_KERNEL | __GFP_RETRY_MAYFAIL) does not zero memory. All other vhost_vsock fields are initialized explicitly so just initialize this field too. Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index c9de9c41aa97..5a5e981bd8e4 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -518,6 +518,8 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) goto out; } + vsock->guest_cid = 0; /* no CID assigned yet */ + atomic_set(&vsock->queued_replies, 0); vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; -- cgit From c1d0c3f623ada808904dec676da0126f5b800630 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Mon, 13 Nov 2017 20:29:54 +0100 Subject: fw_cfg: fix the command line module name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau Acked-by: Gabriel Somlo Signed-off-by: Michael S. Tsirkin --- drivers/firmware/qemu_fw_cfg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 0e2011636fbb..5cfe39f7a45f 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -10,9 +10,9 @@ * and select subsets of aarch64), a Device Tree node (on arm), or using * a kernel module (or command line) parameter with the following syntax: * - * [fw_cfg.]ioport=@[::] + * [qemu_fw_cfg.]ioport=@[::] * or - * [fw_cfg.]mmio=@[::] + * [qemu_fw_cfg.]mmio=@[::] * * where: * := size of ioport or mmio range @@ -21,9 +21,9 @@ * := (optional) offset of data register * * e.g.: - * fw_cfg.ioport=2@0x510:0:1 (the default on x86) + * qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86) * or - * fw_cfg.mmio=0xA@0x9020000:8:0 (the default on arm) + * qemu_fw_cfg.mmio=0xA@0x9020000:8:0 (the default on arm) */ #include -- cgit