From 87dffe86d406bee8782cac2db035acb9a28620a7 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 6 Sep 2018 13:26:08 +0200
Subject: xen/manage: don't complain about an empty value in control/sysrq node

When guest receives a sysrq request from the host it acknowledges it by
writing '\0' to control/sysrq xenstore node. This, however, make xenstore
watch fire again but xenbus_scanf() fails to parse empty value with "%c"
format string:

 sysrq: SysRq : Emergency Sync
 Emergency Sync complete
 xen:manage: Error -34 reading sysrq code in control/sysrq

Ignore -ERANGE the same way we already ignore -ENOENT, empty value in
control/sysrq is totally legal.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/manage.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/xen')

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index c93d8ef8df34..5bb01a62f214 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -280,9 +280,11 @@ static void sysrq_handler(struct xenbus_watch *watch, const char *path,
 		/*
 		 * The Xenstore watch fires directly after registering it and
 		 * after a suspend/resume cycle. So ENOENT is no error but
-		 * might happen in those cases.
+		 * might happen in those cases. ERANGE is observed when we get
+		 * an empty value (''), this happens when we acknowledge the
+		 * request by writing '\0' below.
 		 */
-		if (err != -ENOENT)
+		if (err != -ENOENT && err != -ERANGE)
 			pr_err("Error %d reading sysrq code in control/sysrq\n",
 			       err);
 		xenbus_transaction_end(xbt, 1);
-- 
cgit 


From 197ecb3802c04499d8ff4f8cb28f6efa008067db Mon Sep 17 00:00:00 2001
From: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Date: Fri, 7 Sep 2018 18:49:08 +0200
Subject: xen/balloon: add runtime control for scrubbing ballooned out pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Scrubbing pages on initial balloon down can take some time, especially
in nested virtualization case (nested EPT is slow). When HVM/PVH guest is
started with memory= significantly lower than maxmem=, all the extra
pages will be scrubbed before returning to Xen. But since most of them
weren't used at all at that point, Xen needs to populate them first
(from populate-on-demand pool). In nested virt case (Xen inside KVM)
this slows down the guest boot by 15-30s with just 1.5GB needed to be
returned to Xen.

Add runtime parameter to enable/disable it, to allow initially disabling
scrubbing, then enable it back during boot (for example in initramfs).
Such usage relies on assumption that a) most pages ballooned out during
initial boot weren't used at all, and b) even if they were, very few
secrets are in the guest at that time (before any serious userspace
kicks in).
Convert CONFIG_XEN_SCRUB_PAGES to CONFIG_XEN_SCRUB_PAGES_DEFAULT (also
enabled by default), controlling default value for the new runtime
switch.

Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/Kconfig           | 10 +++++++---
 drivers/xen/mem-reservation.c |  4 ++++
 drivers/xen/xen-balloon.c     |  3 +++
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'drivers/xen')

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index b459edfacff3..90d387b50ab7 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -79,15 +79,19 @@ config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
 	  This value is used to allocate enough space in internal
 	  tables needed for physical memory administration.
 
-config XEN_SCRUB_PAGES
-	bool "Scrub pages before returning them to system"
+config XEN_SCRUB_PAGES_DEFAULT
+	bool "Scrub pages before returning them to system by default"
 	depends on XEN_BALLOON
 	default y
 	help
 	  Scrub pages before returning them to the system for reuse by
 	  other domains.  This makes sure that any confidential data
 	  is not accidentally visible to other domains.  Is it more
-	  secure, but slightly less efficient.
+	  secure, but slightly less efficient. This can be controlled with
+	  xen_scrub_pages=0 parameter and
+	  /sys/devices/system/xen_memory/xen_memory0/scrub_pages.
+	  This option only sets the default value.
+
 	  If in doubt, say yes.
 
 config XEN_DEV_EVTCHN
diff --git a/drivers/xen/mem-reservation.c b/drivers/xen/mem-reservation.c
index 084799c6180e..3782cf070338 100644
--- a/drivers/xen/mem-reservation.c
+++ b/drivers/xen/mem-reservation.c
@@ -14,6 +14,10 @@
 
 #include <xen/interface/memory.h>
 #include <xen/mem-reservation.h>
+#include <linux/moduleparam.h>
+
+bool __read_mostly xen_scrub_pages = IS_ENABLED(CONFIG_XEN_SCRUB_PAGES_DEFAULT);
+core_param(xen_scrub_pages, xen_scrub_pages, bool, 0);
 
 /*
  * Use one extent per PAGE_SIZE to avoid to break down the page into
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 294f35ce9e46..63c1494a8d73 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -44,6 +44,7 @@
 #include <xen/xenbus.h>
 #include <xen/features.h>
 #include <xen/page.h>
+#include <xen/mem-reservation.h>
 
 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
 
@@ -137,6 +138,7 @@ static DEVICE_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay);
 static DEVICE_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay);
 static DEVICE_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count);
 static DEVICE_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count);
+static DEVICE_BOOL_ATTR(scrub_pages, 0644, xen_scrub_pages);
 
 static ssize_t show_target_kb(struct device *dev, struct device_attribute *attr,
 			      char *buf)
@@ -203,6 +205,7 @@ static struct attribute *balloon_attrs[] = {
 	&dev_attr_max_schedule_delay.attr.attr,
 	&dev_attr_retry_count.attr.attr,
 	&dev_attr_max_retry_count.attr.attr,
+	&dev_attr_scrub_pages.attr.attr,
 	NULL
 };
 
-- 
cgit 


From 3366cdb6d350d95466ee430ac50f3c8415ca8f46 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Fri, 7 Sep 2018 16:31:35 +0200
Subject: xen: avoid crash in disable_hotplug_cpu

The command 'xl vcpu-set 0 0', issued in dom0, will crash dom0:

BUG: unable to handle kernel NULL pointer dereference at 00000000000002d8
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
CPU: 7 PID: 65 Comm: xenwatch Not tainted 4.19.0-rc2-1.ga9462db-default #1 openSUSE Tumbleweed (unreleased)
Hardware name: Intel Corporation S5520UR/S5520UR, BIOS S5500.86B.01.00.0050.050620101605 05/06/2010
RIP: e030:device_offline+0x9/0xb0
Code: 77 24 00 e9 ce fe ff ff 48 8b 13 e9 68 ff ff ff 48 8b 13 e9 29 ff ff ff 48 8b 13 e9 ea fe ff ff 90 66 66 66 66 90 41 54 55 53 <f6> 87 d8 02 00 00 01 0f 85 88 00 00 00 48 c7 c2 20 09 60 81 31 f6
RSP: e02b:ffffc90040f27e80 EFLAGS: 00010203
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: ffff8801f3800000 RSI: ffffc90040f27e70 RDI: 0000000000000000
RBP: 0000000000000000 R08: ffffffff820e47b3 R09: 0000000000000000
R10: 0000000000007ff0 R11: 0000000000000000 R12: ffffffff822e6d30
R13: dead000000000200 R14: dead000000000100 R15: ffffffff8158b4e0
FS:  00007ffa595158c0(0000) GS:ffff8801f39c0000(0000) knlGS:0000000000000000
CS:  e033 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000000002d8 CR3: 00000001d9602000 CR4: 0000000000002660
Call Trace:
 handle_vcpu_hotplug_event+0xb5/0xc0
 xenwatch_thread+0x80/0x140
 ? wait_woken+0x80/0x80
 kthread+0x112/0x130
 ? kthread_create_worker_on_cpu+0x40/0x40
 ret_from_fork+0x3a/0x50

This happens because handle_vcpu_hotplug_event is called twice. In the
first iteration cpu_present is still true, in the second iteration
cpu_present is false which causes get_cpu_device to return NULL.
In case of cpu#0, cpu_online is apparently always true.

Fix this crash by checking if the cpu can be hotplugged, which is false
for a cpu that was just removed.

Also check if the cpu was actually offlined by device_remove, otherwise
leave the cpu_present state as it is.

Rearrange to code to do all work with device_hotplug_lock held.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/cpu_hotplug.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'drivers/xen')

diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index d4265c8ebb22..b1357aa4bc55 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -19,15 +19,16 @@ static void enable_hotplug_cpu(int cpu)
 
 static void disable_hotplug_cpu(int cpu)
 {
-	if (cpu_online(cpu)) {
-		lock_device_hotplug();
+	if (!cpu_is_hotpluggable(cpu))
+		return;
+	lock_device_hotplug();
+	if (cpu_online(cpu))
 		device_offline(get_cpu_device(cpu));
-		unlock_device_hotplug();
-	}
-	if (cpu_present(cpu))
+	if (!cpu_online(cpu) && cpu_present(cpu)) {
 		xen_arch_unregister_cpu(cpu);
-
-	set_cpu_present(cpu, false);
+		set_cpu_present(cpu, false);
+	}
+	unlock_device_hotplug();
 }
 
 static int vcpu_online(unsigned int cpu)
-- 
cgit 


From 4dca864b59dd150a221730775e2f21f49779c135 Mon Sep 17 00:00:00 2001
From: Josh Abraham <j.abraham1776@gmail.com>
Date: Wed, 12 Sep 2018 15:13:54 -1000
Subject: xen: fix GCC warning and remove duplicate EVTCHN_ROW/EVTCHN_COL usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch removes duplicate macro useage in events_base.c.

It also fixes gcc warning:
variable ‘col’ set but not used [-Wunused-but-set-variable]

Signed-off-by: Joshua Abraham <j.abraham1776@gmail.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/events/events_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/xen')

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 08e4af04d6f2..e6c1934734b7 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -138,7 +138,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
 		clear_evtchn_to_irq_row(row);
 	}
 
-	evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq;
+	evtchn_to_irq[row][col] = irq;
 	return 0;
 }
 
-- 
cgit 


From 58a57569904039d9ac38c0ff2a88396a43899689 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 5 Sep 2018 09:21:39 +1000
Subject: xen/gntdev: fix up blockable calls to mn_invl_range_start

Patch series "mmu_notifiers follow ups".

Tetsuo has noticed some fallouts from 93065ac753e4 ("mm, oom: distinguish
blockable mode for mmu notifiers").  One of them has been fixed and picked
up by AMD/DRM maintainer [1].  XEN issue is fixed by patch 1.  I have also
clarified expectations about blockable semantic of invalidate_range_end.
Finally the last patch removes MMU_INVALIDATE_DOES_NOT_BLOCK which is no
longer used nor needed.

[1] http://lkml.kernel.org/r/20180824135257.GU29735@dhcp22.suse.cz

This patch (of 3):

93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers") has
introduced blockable parameter to all mmu_notifiers and the notifier has
to back off when called in !blockable case and it could block down the
road.

The above commit implemented that for mn_invl_range_start but both
in_range checks are done unconditionally regardless of the blockable mode
and as such they would fail all the time for regular calls.  Fix this by
checking blockable parameter as well.

Once we are there we can remove the stale TODO.  The lock has to be
sleepable because we wait for completion down in gnttab_unmap_refs_sync.

Link: http://lkml.kernel.org/r/20180827112623.8992-2-mhocko@kernel.org
Fixes: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/gntdev.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'drivers/xen')

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 57390c7666e5..b0b02a501167 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -492,12 +492,19 @@ static bool in_range(struct gntdev_grant_map *map,
 	return true;
 }
 
-static void unmap_if_in_range(struct gntdev_grant_map *map,
-			      unsigned long start, unsigned long end)
+static int unmap_if_in_range(struct gntdev_grant_map *map,
+			      unsigned long start, unsigned long end,
+			      bool blockable)
 {
 	unsigned long mstart, mend;
 	int err;
 
+	if (!in_range(map, start, end))
+		return 0;
+
+	if (!blockable)
+		return -EAGAIN;
+
 	mstart = max(start, map->vma->vm_start);
 	mend   = min(end,   map->vma->vm_end);
 	pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
@@ -508,6 +515,8 @@ static void unmap_if_in_range(struct gntdev_grant_map *map,
 				(mstart - map->vma->vm_start) >> PAGE_SHIFT,
 				(mend - mstart) >> PAGE_SHIFT);
 	WARN_ON(err);
+
+	return 0;
 }
 
 static int mn_invl_range_start(struct mmu_notifier *mn,
@@ -519,25 +528,20 @@ static int mn_invl_range_start(struct mmu_notifier *mn,
 	struct gntdev_grant_map *map;
 	int ret = 0;
 
-	/* TODO do we really need a mutex here? */
 	if (blockable)
 		mutex_lock(&priv->lock);
 	else if (!mutex_trylock(&priv->lock))
 		return -EAGAIN;
 
 	list_for_each_entry(map, &priv->maps, next) {
-		if (in_range(map, start, end)) {
-			ret = -EAGAIN;
+		ret = unmap_if_in_range(map, start, end, blockable);
+		if (ret)
 			goto out_unlock;
-		}
-		unmap_if_in_range(map, start, end);
 	}
 	list_for_each_entry(map, &priv->freeable_maps, next) {
-		if (in_range(map, start, end)) {
-			ret = -EAGAIN;
+		ret = unmap_if_in_range(map, start, end, blockable);
+		if (ret)
 			goto out_unlock;
-		}
-		unmap_if_in_range(map, start, end);
 	}
 
 out_unlock:
-- 
cgit 


From d59f532480f5231bf62615a9287e05b78225fb05 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 19 Sep 2018 15:42:33 +0200
Subject: xen: issue warning message when out of grant maptrack entries

When a driver domain (e.g. dom0) is running out of maptrack entries it
can't map any more foreign domain pages. Instead of silently stalling
the affected domUs issue a rate limited warning in this case in order
to make it easier to detect that situation.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/grant-table.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

(limited to 'drivers/xen')

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7bafa703a992..84575baceebc 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1040,18 +1040,33 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		return ret;
 
 	for (i = 0; i < count; i++) {
-		/* Retry eagain maps */
-		if (map_ops[i].status == GNTST_eagain)
-			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
-						&map_ops[i].status, __func__);
-
-		if (map_ops[i].status == GNTST_okay) {
+		switch (map_ops[i].status) {
+		case GNTST_okay:
+		{
 			struct xen_page_foreign *foreign;
 
 			SetPageForeign(pages[i]);
 			foreign = xen_page_foreign(pages[i]);
 			foreign->domid = map_ops[i].dom;
 			foreign->gref = map_ops[i].ref;
+			break;
+		}
+
+		case GNTST_no_device_space:
+			pr_warn_ratelimited("maptrack limit reached, can't map all guest pages\n");
+			break;
+
+		case GNTST_eagain:
+			/* Retry eagain maps */
+			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref,
+						map_ops + i,
+						&map_ops[i].status, __func__);
+			/* Test status in next loop iteration. */
+			i--;
+			break;
+
+		default:
+			break;
 		}
 	}
 
-- 
cgit