From 87dffe86d406bee8782cac2db035acb9a28620a7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 6 Sep 2018 13:26:08 +0200 Subject: xen/manage: don't complain about an empty value in control/sysrq node When guest receives a sysrq request from the host it acknowledges it by writing '\0' to control/sysrq xenstore node. This, however, make xenstore watch fire again but xenbus_scanf() fails to parse empty value with "%c" format string: sysrq: SysRq : Emergency Sync Emergency Sync complete xen:manage: Error -34 reading sysrq code in control/sysrq Ignore -ERANGE the same way we already ignore -ENOENT, empty value in control/sysrq is totally legal. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Wei Liu Signed-off-by: Boris Ostrovsky --- drivers/xen/manage.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index c93d8ef8df34..5bb01a62f214 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -280,9 +280,11 @@ static void sysrq_handler(struct xenbus_watch *watch, const char *path, /* * The Xenstore watch fires directly after registering it and * after a suspend/resume cycle. So ENOENT is no error but - * might happen in those cases. + * might happen in those cases. ERANGE is observed when we get + * an empty value (''), this happens when we acknowledge the + * request by writing '\0' below. */ - if (err != -ENOENT) + if (err != -ENOENT && err != -ERANGE) pr_err("Error %d reading sysrq code in control/sysrq\n", err); xenbus_transaction_end(xbt, 1); -- cgit From 197ecb3802c04499d8ff4f8cb28f6efa008067db Mon Sep 17 00:00:00 2001 From: Marek Marczykowski-Górecki Date: Fri, 7 Sep 2018 18:49:08 +0200 Subject: xen/balloon: add runtime control for scrubbing ballooned out pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scrubbing pages on initial balloon down can take some time, especially in nested virtualization case (nested EPT is slow). When HVM/PVH guest is started with memory= significantly lower than maxmem=, all the extra pages will be scrubbed before returning to Xen. But since most of them weren't used at all at that point, Xen needs to populate them first (from populate-on-demand pool). In nested virt case (Xen inside KVM) this slows down the guest boot by 15-30s with just 1.5GB needed to be returned to Xen. Add runtime parameter to enable/disable it, to allow initially disabling scrubbing, then enable it back during boot (for example in initramfs). Such usage relies on assumption that a) most pages ballooned out during initial boot weren't used at all, and b) even if they were, very few secrets are in the guest at that time (before any serious userspace kicks in). Convert CONFIG_XEN_SCRUB_PAGES to CONFIG_XEN_SCRUB_PAGES_DEFAULT (also enabled by default), controlling default value for the new runtime switch. Signed-off-by: Marek Marczykowski-Górecki Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/Kconfig | 10 +++++++--- drivers/xen/mem-reservation.c | 4 ++++ drivers/xen/xen-balloon.c | 3 +++ 3 files changed, 14 insertions(+), 3 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index b459edfacff3..90d387b50ab7 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -79,15 +79,19 @@ config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT This value is used to allocate enough space in internal tables needed for physical memory administration. -config XEN_SCRUB_PAGES - bool "Scrub pages before returning them to system" +config XEN_SCRUB_PAGES_DEFAULT + bool "Scrub pages before returning them to system by default" depends on XEN_BALLOON default y help Scrub pages before returning them to the system for reuse by other domains. This makes sure that any confidential data is not accidentally visible to other domains. Is it more - secure, but slightly less efficient. + secure, but slightly less efficient. This can be controlled with + xen_scrub_pages=0 parameter and + /sys/devices/system/xen_memory/xen_memory0/scrub_pages. + This option only sets the default value. + If in doubt, say yes. config XEN_DEV_EVTCHN diff --git a/drivers/xen/mem-reservation.c b/drivers/xen/mem-reservation.c index 084799c6180e..3782cf070338 100644 --- a/drivers/xen/mem-reservation.c +++ b/drivers/xen/mem-reservation.c @@ -14,6 +14,10 @@ #include #include +#include + +bool __read_mostly xen_scrub_pages = IS_ENABLED(CONFIG_XEN_SCRUB_PAGES_DEFAULT); +core_param(xen_scrub_pages, xen_scrub_pages, bool, 0); /* * Use one extent per PAGE_SIZE to avoid to break down the page into diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 294f35ce9e46..63c1494a8d73 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -44,6 +44,7 @@ #include #include #include +#include #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) @@ -137,6 +138,7 @@ static DEVICE_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay); static DEVICE_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay); static DEVICE_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count); static DEVICE_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count); +static DEVICE_BOOL_ATTR(scrub_pages, 0644, xen_scrub_pages); static ssize_t show_target_kb(struct device *dev, struct device_attribute *attr, char *buf) @@ -203,6 +205,7 @@ static struct attribute *balloon_attrs[] = { &dev_attr_max_schedule_delay.attr.attr, &dev_attr_retry_count.attr.attr, &dev_attr_max_retry_count.attr.attr, + &dev_attr_scrub_pages.attr.attr, NULL }; -- cgit From 3366cdb6d350d95466ee430ac50f3c8415ca8f46 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Fri, 7 Sep 2018 16:31:35 +0200 Subject: xen: avoid crash in disable_hotplug_cpu The command 'xl vcpu-set 0 0', issued in dom0, will crash dom0: BUG: unable to handle kernel NULL pointer dereference at 00000000000002d8 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 7 PID: 65 Comm: xenwatch Not tainted 4.19.0-rc2-1.ga9462db-default #1 openSUSE Tumbleweed (unreleased) Hardware name: Intel Corporation S5520UR/S5520UR, BIOS S5500.86B.01.00.0050.050620101605 05/06/2010 RIP: e030:device_offline+0x9/0xb0 Code: 77 24 00 e9 ce fe ff ff 48 8b 13 e9 68 ff ff ff 48 8b 13 e9 29 ff ff ff 48 8b 13 e9 ea fe ff ff 90 66 66 66 66 90 41 54 55 53 87 d8 02 00 00 01 0f 85 88 00 00 00 48 c7 c2 20 09 60 81 31 f6 RSP: e02b:ffffc90040f27e80 EFLAGS: 00010203 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff8801f3800000 RSI: ffffc90040f27e70 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffffffff820e47b3 R09: 0000000000000000 R10: 0000000000007ff0 R11: 0000000000000000 R12: ffffffff822e6d30 R13: dead000000000200 R14: dead000000000100 R15: ffffffff8158b4e0 FS: 00007ffa595158c0(0000) GS:ffff8801f39c0000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000002d8 CR3: 00000001d9602000 CR4: 0000000000002660 Call Trace: handle_vcpu_hotplug_event+0xb5/0xc0 xenwatch_thread+0x80/0x140 ? wait_woken+0x80/0x80 kthread+0x112/0x130 ? kthread_create_worker_on_cpu+0x40/0x40 ret_from_fork+0x3a/0x50 This happens because handle_vcpu_hotplug_event is called twice. In the first iteration cpu_present is still true, in the second iteration cpu_present is false which causes get_cpu_device to return NULL. In case of cpu#0, cpu_online is apparently always true. Fix this crash by checking if the cpu can be hotplugged, which is false for a cpu that was just removed. Also check if the cpu was actually offlined by device_remove, otherwise leave the cpu_present state as it is. Rearrange to code to do all work with device_hotplug_lock held. Signed-off-by: Olaf Hering Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/cpu_hotplug.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index d4265c8ebb22..b1357aa4bc55 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -19,15 +19,16 @@ static void enable_hotplug_cpu(int cpu) static void disable_hotplug_cpu(int cpu) { - if (cpu_online(cpu)) { - lock_device_hotplug(); + if (!cpu_is_hotpluggable(cpu)) + return; + lock_device_hotplug(); + if (cpu_online(cpu)) device_offline(get_cpu_device(cpu)); - unlock_device_hotplug(); - } - if (cpu_present(cpu)) + if (!cpu_online(cpu) && cpu_present(cpu)) { xen_arch_unregister_cpu(cpu); - - set_cpu_present(cpu, false); + set_cpu_present(cpu, false); + } + unlock_device_hotplug(); } static int vcpu_online(unsigned int cpu) -- cgit From 4dca864b59dd150a221730775e2f21f49779c135 Mon Sep 17 00:00:00 2001 From: Josh Abraham Date: Wed, 12 Sep 2018 15:13:54 -1000 Subject: xen: fix GCC warning and remove duplicate EVTCHN_ROW/EVTCHN_COL usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch removes duplicate macro useage in events_base.c. It also fixes gcc warning: variable ‘col’ set but not used [-Wunused-but-set-variable] Signed-off-by: Joshua Abraham Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/events/events_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 08e4af04d6f2..e6c1934734b7 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -138,7 +138,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) clear_evtchn_to_irq_row(row); } - evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq; + evtchn_to_irq[row][col] = irq; return 0; } -- cgit From 58a57569904039d9ac38c0ff2a88396a43899689 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 5 Sep 2018 09:21:39 +1000 Subject: xen/gntdev: fix up blockable calls to mn_invl_range_start Patch series "mmu_notifiers follow ups". Tetsuo has noticed some fallouts from 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers"). One of them has been fixed and picked up by AMD/DRM maintainer [1]. XEN issue is fixed by patch 1. I have also clarified expectations about blockable semantic of invalidate_range_end. Finally the last patch removes MMU_INVALIDATE_DOES_NOT_BLOCK which is no longer used nor needed. [1] http://lkml.kernel.org/r/20180824135257.GU29735@dhcp22.suse.cz This patch (of 3): 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers") has introduced blockable parameter to all mmu_notifiers and the notifier has to back off when called in !blockable case and it could block down the road. The above commit implemented that for mn_invl_range_start but both in_range checks are done unconditionally regardless of the blockable mode and as such they would fail all the time for regular calls. Fix this by checking blockable parameter as well. Once we are there we can remove the stale TODO. The lock has to be sleepable because we wait for completion down in gnttab_unmap_refs_sync. Link: http://lkml.kernel.org/r/20180827112623.8992-2-mhocko@kernel.org Fixes: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers") Signed-off-by: Michal Hocko Cc: Boris Ostrovsky Cc: Juergen Gross Cc: David Rientjes Cc: Jerome Glisse Cc: Tetsuo Handa Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/gntdev.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 57390c7666e5..b0b02a501167 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -492,12 +492,19 @@ static bool in_range(struct gntdev_grant_map *map, return true; } -static void unmap_if_in_range(struct gntdev_grant_map *map, - unsigned long start, unsigned long end) +static int unmap_if_in_range(struct gntdev_grant_map *map, + unsigned long start, unsigned long end, + bool blockable) { unsigned long mstart, mend; int err; + if (!in_range(map, start, end)) + return 0; + + if (!blockable) + return -EAGAIN; + mstart = max(start, map->vma->vm_start); mend = min(end, map->vma->vm_end); pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", @@ -508,6 +515,8 @@ static void unmap_if_in_range(struct gntdev_grant_map *map, (mstart - map->vma->vm_start) >> PAGE_SHIFT, (mend - mstart) >> PAGE_SHIFT); WARN_ON(err); + + return 0; } static int mn_invl_range_start(struct mmu_notifier *mn, @@ -519,25 +528,20 @@ static int mn_invl_range_start(struct mmu_notifier *mn, struct gntdev_grant_map *map; int ret = 0; - /* TODO do we really need a mutex here? */ if (blockable) mutex_lock(&priv->lock); else if (!mutex_trylock(&priv->lock)) return -EAGAIN; list_for_each_entry(map, &priv->maps, next) { - if (in_range(map, start, end)) { - ret = -EAGAIN; + ret = unmap_if_in_range(map, start, end, blockable); + if (ret) goto out_unlock; - } - unmap_if_in_range(map, start, end); } list_for_each_entry(map, &priv->freeable_maps, next) { - if (in_range(map, start, end)) { - ret = -EAGAIN; + ret = unmap_if_in_range(map, start, end, blockable); + if (ret) goto out_unlock; - } - unmap_if_in_range(map, start, end); } out_unlock: -- cgit From d59f532480f5231bf62615a9287e05b78225fb05 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 19 Sep 2018 15:42:33 +0200 Subject: xen: issue warning message when out of grant maptrack entries When a driver domain (e.g. dom0) is running out of maptrack entries it can't map any more foreign domain pages. Instead of silently stalling the affected domUs issue a rate limited warning in this case in order to make it easier to detect that situation. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/grant-table.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7bafa703a992..84575baceebc 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -1040,18 +1040,33 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, return ret; for (i = 0; i < count; i++) { - /* Retry eagain maps */ - if (map_ops[i].status == GNTST_eagain) - gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, - &map_ops[i].status, __func__); - - if (map_ops[i].status == GNTST_okay) { + switch (map_ops[i].status) { + case GNTST_okay: + { struct xen_page_foreign *foreign; SetPageForeign(pages[i]); foreign = xen_page_foreign(pages[i]); foreign->domid = map_ops[i].dom; foreign->gref = map_ops[i].ref; + break; + } + + case GNTST_no_device_space: + pr_warn_ratelimited("maptrack limit reached, can't map all guest pages\n"); + break; + + case GNTST_eagain: + /* Retry eagain maps */ + gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, + map_ops + i, + &map_ops[i].status, __func__); + /* Test status in next loop iteration. */ + i--; + break; + + default: + break; } } -- cgit