summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-04-07 14:11:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-04-07 14:11:54 -0700
commit63bef48fd6c9d3f1ba4f0e23b4da1e007db6a3c0 (patch)
treef27c1ea7686b2ee30eea6b973a430f1c102bb03f /drivers
parent04de788e61a576820baf03ff8accc246ca146cb3 (diff)
parent1cd377baa91844b9f87a2b72eabf7ff783946b5e (diff)
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: - a lot more of MM, quite a bit more yet to come: (memcg, pagemap, vmalloc, pagealloc, migration, thp, ksm, madvise, virtio, userfaultfd, memory-hotplug, shmem, rmap, zswap, zsmalloc, cleanups) - various other subsystems (procfs, misc, MAINTAINERS, bitops, lib, checkpatch, epoll, binfmt, kallsyms, reiserfs, kmod, gcov, kconfig, ubsan, fault-injection, ipc) * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (158 commits) ipc/shm.c: make compat_ksys_shmctl() static ipc/mqueue.c: fix a brace coding style issue lib/Kconfig.debug: fix a typo "capabilitiy" -> "capability" ubsan: include bug type in report header kasan: unset panic_on_warn before calling panic() ubsan: check panic_on_warn drivers/misc/lkdtm/bugs.c: add arithmetic overflow and array bounds checks ubsan: split "bounds" checker from other options ubsan: add trap instrumentation option init/Kconfig: clean up ANON_INODES and old IO schedulers options kernel/gcov/fs.c: replace zero-length array with flexible-array member gcov: gcc_3_4: replace zero-length array with flexible-array member gcov: gcc_4_7: replace zero-length array with flexible-array member kernel/kmod.c: fix a typo "assuems" -> "assumes" reiserfs: clean up several indentation issues kallsyms: unexport kallsyms_lookup_name() and kallsyms_on_each_symbol() samples/hw_breakpoint: drop use of kallsyms_lookup_name() samples/hw_breakpoint: drop HW_BREAKPOINT_R when reporting writes fs/binfmt_elf.c: don't free interpreter's ELF pheaders on common path fs/binfmt_elf.c: allocate less for static executable ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/base/memory.c130
-rw-r--r--drivers/hv/hv_balloon.c25
-rw-r--r--drivers/misc/lkdtm/bugs.c75
-rw-r--r--drivers/misc/lkdtm/core.c3
-rw-r--r--drivers/misc/lkdtm/lkdtm.h3
-rw-r--r--drivers/virtio/Kconfig1
-rw-r--r--drivers/virtio/virtio_balloon.c180
7 files changed, 255 insertions, 162 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 4086718f6876..dbec3a05590a 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -27,6 +27,24 @@
#define MEMORY_CLASS_NAME "memory"
+static const char *const online_type_to_str[] = {
+ [MMOP_OFFLINE] = "offline",
+ [MMOP_ONLINE] = "online",
+ [MMOP_ONLINE_KERNEL] = "online_kernel",
+ [MMOP_ONLINE_MOVABLE] = "online_movable",
+};
+
+int memhp_online_type_from_str(const char *str)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) {
+ if (sysfs_streq(str, online_type_to_str[i]))
+ return i;
+ }
+ return -EINVAL;
+}
+
#define to_memory_block(dev) container_of(dev, struct memory_block, dev)
static int sections_per_block;
@@ -145,45 +163,6 @@ int memory_notify(unsigned long val, void *v)
}
/*
- * The probe routines leave the pages uninitialized, just as the bootmem code
- * does. Make sure we do not access them, but instead use only information from
- * within sections.
- */
-static bool pages_correctly_probed(unsigned long start_pfn)
-{
- unsigned long section_nr = pfn_to_section_nr(start_pfn);
- unsigned long section_nr_end = section_nr + sections_per_block;
- unsigned long pfn = start_pfn;
-
- /*
- * memmap between sections is not contiguous except with
- * SPARSEMEM_VMEMMAP. We lookup the page once per section
- * and assume memmap is contiguous within each section
- */
- for (; section_nr < section_nr_end; section_nr++) {
- if (WARN_ON_ONCE(!pfn_valid(pfn)))
- return false;
-
- if (!present_section_nr(section_nr)) {
- pr_warn("section %ld pfn[%lx, %lx) not present\n",
- section_nr, pfn, pfn + PAGES_PER_SECTION);
- return false;
- } else if (!valid_section_nr(section_nr)) {
- pr_warn("section %ld pfn[%lx, %lx) no valid memmap\n",
- section_nr, pfn, pfn + PAGES_PER_SECTION);
- return false;
- } else if (online_section_nr(section_nr)) {
- pr_warn("section %ld pfn[%lx, %lx) is already online\n",
- section_nr, pfn, pfn + PAGES_PER_SECTION);
- return false;
- }
- pfn += PAGES_PER_SECTION;
- }
-
- return true;
-}
-
-/*
* MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
* OK to have direct references to sparsemem variables in here.
*/
@@ -199,9 +178,6 @@ memory_block_action(unsigned long start_section_nr, unsigned long action,
switch (action) {
case MEM_ONLINE:
- if (!pages_correctly_probed(start_pfn))
- return -EBUSY;
-
ret = online_pages(start_pfn, nr_pages, online_type, nid);
break;
case MEM_OFFLINE:
@@ -245,17 +221,14 @@ static int memory_subsys_online(struct device *dev)
return 0;
/*
- * If we are called from state_store(), online_type will be
- * set >= 0 Otherwise we were called from the device online
- * attribute and need to set the online_type.
+ * When called via device_online() without configuring the online_type,
+ * we want to default to MMOP_ONLINE.
*/
- if (mem->online_type < 0)
- mem->online_type = MMOP_ONLINE_KEEP;
+ if (mem->online_type == MMOP_OFFLINE)
+ mem->online_type = MMOP_ONLINE;
ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
-
- /* clear online_type */
- mem->online_type = -1;
+ mem->online_type = MMOP_OFFLINE;
return ret;
}
@@ -267,40 +240,27 @@ static int memory_subsys_offline(struct device *dev)
if (mem->state == MEM_OFFLINE)
return 0;
- /* Can't offline block with non-present sections */
- if (mem->section_count != sections_per_block)
- return -EINVAL;
-
return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
}
static ssize_t state_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
+ const int online_type = memhp_online_type_from_str(buf);
struct memory_block *mem = to_memory_block(dev);
- int ret, online_type;
+ int ret;
+
+ if (online_type < 0)
+ return -EINVAL;
ret = lock_device_hotplug_sysfs();
if (ret)
return ret;
- if (sysfs_streq(buf, "online_kernel"))
- online_type = MMOP_ONLINE_KERNEL;
- else if (sysfs_streq(buf, "online_movable"))
- online_type = MMOP_ONLINE_MOVABLE;
- else if (sysfs_streq(buf, "online"))
- online_type = MMOP_ONLINE_KEEP;
- else if (sysfs_streq(buf, "offline"))
- online_type = MMOP_OFFLINE;
- else {
- ret = -EINVAL;
- goto err;
- }
-
switch (online_type) {
case MMOP_ONLINE_KERNEL:
case MMOP_ONLINE_MOVABLE:
- case MMOP_ONLINE_KEEP:
+ case MMOP_ONLINE:
/* mem->online_type is protected by device_hotplug_lock */
mem->online_type = online_type;
ret = device_online(&mem->dev);
@@ -312,7 +272,6 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
ret = -EINVAL; /* should never happen */
}
-err:
unlock_device_hotplug();
if (ret < 0)
@@ -380,7 +339,8 @@ static ssize_t valid_zones_show(struct device *dev,
}
nid = mem->nid;
- default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages);
+ default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn,
+ nr_pages);
strcat(buf, default_zone->name);
print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL,
@@ -418,23 +378,20 @@ static DEVICE_ATTR_RO(block_size_bytes);
static ssize_t auto_online_blocks_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- if (memhp_auto_online)
- return sprintf(buf, "online\n");
- else
- return sprintf(buf, "offline\n");
+ return sprintf(buf, "%s\n",
+ online_type_to_str[memhp_default_online_type]);
}
static ssize_t auto_online_blocks_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- if (sysfs_streq(buf, "online"))
- memhp_auto_online = true;
- else if (sysfs_streq(buf, "offline"))
- memhp_auto_online = false;
- else
+ const int online_type = memhp_online_type_from_str(buf);
+
+ if (online_type < 0)
return -EINVAL;
+ memhp_default_online_type = online_type;
return count;
}
@@ -627,7 +584,7 @@ static int init_memory_block(struct memory_block **memory,
static int add_memory_block(unsigned long base_section_nr)
{
- int ret, section_count = 0;
+ int section_count = 0;
struct memory_block *mem;
unsigned long nr;
@@ -638,12 +595,8 @@ static int add_memory_block(unsigned long base_section_nr)
if (section_count == 0)
return 0;
- ret = init_memory_block(&mem, base_memory_block_id(base_section_nr),
- MEM_ONLINE);
- if (ret)
- return ret;
- mem->section_count = section_count;
- return 0;
+ return init_memory_block(&mem, base_memory_block_id(base_section_nr),
+ MEM_ONLINE);
}
static void unregister_memory(struct memory_block *memory)
@@ -679,7 +632,6 @@ int create_memory_block_devices(unsigned long start, unsigned long size)
ret = init_memory_block(&mem, block_id, MEM_OFFLINE);
if (ret)
break;
- mem->section_count = sections_per_block;
}
if (ret) {
end_block_id = block_id;
@@ -688,7 +640,6 @@ int create_memory_block_devices(unsigned long start, unsigned long size)
mem = find_memory_block_by_id(block_id);
if (WARN_ON_ONCE(!mem))
continue;
- mem->section_count = 0;
unregister_memory(mem);
}
}
@@ -717,7 +668,6 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
mem = find_memory_block_by_id(block_id);
if (WARN_ON_ONCE(!mem))
continue;
- mem->section_count = 0;
unregister_memory_block_under_nodes(mem);
unregister_memory(mem);
}
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index a02ce43d778d..32e3bc0aa665 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -533,7 +533,6 @@ struct hv_dynmem_device {
* State to synchronize hot-add.
*/
struct completion ol_waitevent;
- bool ha_waiting;
/*
* This thread handles hot-add
* requests from the host as well as notifying
@@ -634,10 +633,7 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
switch (val) {
case MEM_ONLINE:
case MEM_CANCEL_ONLINE:
- if (dm_device.ha_waiting) {
- dm_device.ha_waiting = false;
- complete(&dm_device.ol_waitevent);
- }
+ complete(&dm_device.ol_waitevent);
break;
case MEM_OFFLINE:
@@ -726,8 +722,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
has->covered_end_pfn += processed_pfn;
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
- init_completion(&dm_device.ol_waitevent);
- dm_device.ha_waiting = !memhp_auto_online;
+ reinit_completion(&dm_device.ol_waitevent);
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
ret = add_memory(nid, PFN_PHYS((start_pfn)),
@@ -753,15 +748,14 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
}
/*
- * Wait for the memory block to be onlined when memory onlining
- * is done outside of kernel (memhp_auto_online). Since the hot
- * add has succeeded, it is ok to proceed even if the pages in
- * the hot added region have not been "onlined" within the
- * allowed time.
+ * Wait for memory to get onlined. If the kernel onlined the
+ * memory when adding it, this will return directly. Otherwise,
+ * it will wait for user space to online the memory. This helps
+ * to avoid adding memory faster than it is getting onlined. As
+ * adding succeeded, it is ok to proceed even if the memory was
+ * not onlined in time.
*/
- if (dm_device.ha_waiting)
- wait_for_completion_timeout(&dm_device.ol_waitevent,
- 5*HZ);
+ wait_for_completion_timeout(&dm_device.ol_waitevent, 5 * HZ);
post_status(&dm_device);
}
}
@@ -1706,6 +1700,7 @@ static int balloon_probe(struct hv_device *dev,
#ifdef CONFIG_MEMORY_HOTPLUG
set_online_page_callback(&hv_online_page);
+ init_completion(&dm_device.ol_waitevent);
register_memory_notifier(&hv_memory_nb);
#endif
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index cc92bc3ed820..886459e0ddd9 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -11,6 +11,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h>
#include <linux/uaccess.h>
+#include <linux/slab.h>
#ifdef CONFIG_X86_32
#include <asm/desc.h>
@@ -175,6 +176,80 @@ void lkdtm_HUNG_TASK(void)
schedule();
}
+volatile unsigned int huge = INT_MAX - 2;
+volatile unsigned int ignored;
+
+void lkdtm_OVERFLOW_SIGNED(void)
+{
+ int value;
+
+ value = huge;
+ pr_info("Normal signed addition ...\n");
+ value += 1;
+ ignored = value;
+
+ pr_info("Overflowing signed addition ...\n");
+ value += 4;
+ ignored = value;
+}
+
+
+void lkdtm_OVERFLOW_UNSIGNED(void)
+{
+ unsigned int value;
+
+ value = huge;
+ pr_info("Normal unsigned addition ...\n");
+ value += 1;
+ ignored = value;
+
+ pr_info("Overflowing unsigned addition ...\n");
+ value += 4;
+ ignored = value;
+}
+
+/* Intentially using old-style flex array definition of 1 byte. */
+struct array_bounds_flex_array {
+ int one;
+ int two;
+ char data[1];
+};
+
+struct array_bounds {
+ int one;
+ int two;
+ char data[8];
+ int three;
+};
+
+void lkdtm_ARRAY_BOUNDS(void)
+{
+ struct array_bounds_flex_array *not_checked;
+ struct array_bounds *checked;
+ volatile int i;
+
+ not_checked = kmalloc(sizeof(*not_checked) * 2, GFP_KERNEL);
+ checked = kmalloc(sizeof(*checked) * 2, GFP_KERNEL);
+
+ pr_info("Array access within bounds ...\n");
+ /* For both, touch all bytes in the actual member size. */
+ for (i = 0; i < sizeof(checked->data); i++)
+ checked->data[i] = 'A';
+ /*
+ * For the uninstrumented flex array member, also touch 1 byte
+ * beyond to verify it is correctly uninstrumented.
+ */
+ for (i = 0; i < sizeof(not_checked->data) + 1; i++)
+ not_checked->data[i] = 'A';
+
+ pr_info("Array access beyond bounds ...\n");
+ for (i = 0; i < sizeof(checked->data) + 1; i++)
+ checked->data[i] = 'B';
+
+ kfree(not_checked);
+ kfree(checked);
+}
+
void lkdtm_CORRUPT_LIST_ADD(void)
{
/*
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 5ce4ac8c06fc..a5e344df9166 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -130,6 +130,9 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(HARDLOCKUP),
CRASHTYPE(SPINLOCKUP),
CRASHTYPE(HUNG_TASK),
+ CRASHTYPE(OVERFLOW_SIGNED),
+ CRASHTYPE(OVERFLOW_UNSIGNED),
+ CRASHTYPE(ARRAY_BOUNDS),
CRASHTYPE(EXEC_DATA),
CRASHTYPE(EXEC_STACK),
CRASHTYPE(EXEC_KMALLOC),
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 8d13d0176624..601a2156a0d4 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -22,6 +22,9 @@ void lkdtm_SOFTLOCKUP(void);
void lkdtm_HARDLOCKUP(void);
void lkdtm_SPINLOCKUP(void);
void lkdtm_HUNG_TASK(void);
+void lkdtm_OVERFLOW_SIGNED(void);
+void lkdtm_OVERFLOW_UNSIGNED(void);
+void lkdtm_ARRAY_BOUNDS(void);
void lkdtm_CORRUPT_LIST_ADD(void);
void lkdtm_CORRUPT_LIST_DEL(void);
void lkdtm_CORRUPT_USER_DS(void);
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 078615cf2afc..4b2dd8259ff5 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -58,6 +58,7 @@ config VIRTIO_BALLOON
tristate "Virtio balloon driver"
depends on VIRTIO
select MEMORY_BALLOON
+ select PAGE_REPORTING
---help---
This driver supports increasing and decreasing the amount
of memory within a KVM guest.
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 341458fd95ca..0ef16566c3f3 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -14,11 +14,13 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/balloon_compaction.h>
+#include <linux/oom.h>
#include <linux/wait.h>
#include <linux/mm.h>
#include <linux/mount.h>
#include <linux/magic.h>
#include <linux/pseudo_fs.h>
+#include <linux/page_reporting.h>
/*
* Balloon device works in 4K page units. So each page is pointed to by
@@ -27,7 +29,9 @@
*/
#define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
-#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
+/* Maximum number of (4k) pages to deflate on OOM notifications. */
+#define VIRTIO_BALLOON_OOM_NR_PAGES 256
+#define VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY 80
#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
__GFP_NOMEMALLOC)
@@ -47,6 +51,7 @@ enum virtio_balloon_vq {
VIRTIO_BALLOON_VQ_DEFLATE,
VIRTIO_BALLOON_VQ_STATS,
VIRTIO_BALLOON_VQ_FREE_PAGE,
+ VIRTIO_BALLOON_VQ_REPORTING,
VIRTIO_BALLOON_VQ_MAX
};
@@ -112,8 +117,15 @@ struct virtio_balloon {
/* Memory statistics */
struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
- /* To register a shrinker to shrink memory upon memory pressure */
+ /* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */
struct shrinker shrinker;
+
+ /* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */
+ struct notifier_block oom_nb;
+
+ /* Free page reporting device */
+ struct virtqueue *reporting_vq;
+ struct page_reporting_dev_info pr_dev_info;
};
static struct virtio_device_id id_table[] = {
@@ -153,6 +165,33 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
}
+int virtballoon_free_page_report(struct page_reporting_dev_info *pr_dev_info,
+ struct scatterlist *sg, unsigned int nents)
+{
+ struct virtio_balloon *vb =
+ container_of(pr_dev_info, struct virtio_balloon, pr_dev_info);
+ struct virtqueue *vq = vb->reporting_vq;
+ unsigned int unused, err;
+
+ /* We should always be able to add these buffers to an empty queue. */
+ err = virtqueue_add_inbuf(vq, sg, nents, vb, GFP_NOWAIT | __GFP_NOWARN);
+
+ /*
+ * In the extremely unlikely case that something has occurred and we
+ * are able to trigger an error we will simply display a warning
+ * and exit without actually processing the pages.
+ */
+ if (WARN_ON_ONCE(err))
+ return err;
+
+ virtqueue_kick(vq);
+
+ /* When host has read buffer, this completes via balloon_ack */
+ wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
+
+ return 0;
+}
+
static void set_page_pfns(struct virtio_balloon *vb,
__virtio32 pfns[], struct page *page)
{
@@ -481,6 +520,7 @@ static int init_vqs(struct virtio_balloon *vb)
names[VIRTIO_BALLOON_VQ_STATS] = NULL;
callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+ names[VIRTIO_BALLOON_VQ_REPORTING] = NULL;
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
names[VIRTIO_BALLOON_VQ_STATS] = "stats";
@@ -492,6 +532,11 @@ static int init_vqs(struct virtio_balloon *vb)
callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
}
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
+ names[VIRTIO_BALLOON_VQ_REPORTING] = "reporting_vq";
+ callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
+ }
+
err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
vqs, callbacks, names, NULL, NULL);
if (err)
@@ -524,6 +569,9 @@ static int init_vqs(struct virtio_balloon *vb)
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
+ vb->reporting_vq = vqs[VIRTIO_BALLOON_VQ_REPORTING];
+
return 0;
}
@@ -788,50 +836,13 @@ static unsigned long shrink_free_pages(struct virtio_balloon *vb,
return blocks_freed * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
}
-static unsigned long leak_balloon_pages(struct virtio_balloon *vb,
- unsigned long pages_to_free)
-{
- return leak_balloon(vb, pages_to_free * VIRTIO_BALLOON_PAGES_PER_PAGE) /
- VIRTIO_BALLOON_PAGES_PER_PAGE;
-}
-
-static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
- unsigned long pages_to_free)
-{
- unsigned long pages_freed = 0;
-
- /*
- * One invocation of leak_balloon can deflate at most
- * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
- * multiple times to deflate pages till reaching pages_to_free.
- */
- while (vb->num_pages && pages_freed < pages_to_free)
- pages_freed += leak_balloon_pages(vb,
- pages_to_free - pages_freed);
-
- update_balloon_size(vb);
-
- return pages_freed;
-}
-
static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
struct shrink_control *sc)
{
- unsigned long pages_to_free, pages_freed = 0;
struct virtio_balloon *vb = container_of(shrinker,
struct virtio_balloon, shrinker);
- pages_to_free = sc->nr_to_scan;
-
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
- pages_freed = shrink_free_pages(vb, pages_to_free);
-
- if (pages_freed >= pages_to_free)
- return pages_freed;
-
- pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
-
- return pages_freed;
+ return shrink_free_pages(vb, sc->nr_to_scan);
}
static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
@@ -839,12 +850,22 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
{
struct virtio_balloon *vb = container_of(shrinker,
struct virtio_balloon, shrinker);
- unsigned long count;
- count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
- count += vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
+ return vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
+}
+
+static int virtio_balloon_oom_notify(struct notifier_block *nb,
+ unsigned long dummy, void *parm)
+{
+ struct virtio_balloon *vb = container_of(nb,
+ struct virtio_balloon, oom_nb);
+ unsigned long *freed = parm;
+
+ *freed += leak_balloon(vb, VIRTIO_BALLOON_OOM_NR_PAGES) /
+ VIRTIO_BALLOON_PAGES_PER_PAGE;
+ update_balloon_size(vb);
- return count;
+ return NOTIFY_OK;
}
static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
@@ -864,7 +885,6 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
static int virtballoon_probe(struct virtio_device *vdev)
{
struct virtio_balloon *vb;
- __u32 poison_val;
int err;
if (!vdev->config->get) {
@@ -930,27 +950,65 @@ static int virtballoon_probe(struct virtio_device *vdev)
VIRTIO_BALLOON_CMD_ID_STOP);
spin_lock_init(&vb->free_page_list_lock);
INIT_LIST_HEAD(&vb->free_page_list);
- if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
- memset(&poison_val, PAGE_POISON, sizeof(poison_val));
- virtio_cwrite(vb->vdev, struct virtio_balloon_config,
- poison_val, &poison_val);
- }
- }
- /*
- * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
- * shrinker needs to be registered to relieve memory pressure.
- */
- if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
+ /*
+ * We're allowed to reuse any free pages, even if they are
+ * still to be processed by the host.
+ */
err = virtio_balloon_register_shrinker(vb);
if (err)
goto out_del_balloon_wq;
}
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
+ vb->oom_nb.notifier_call = virtio_balloon_oom_notify;
+ vb->oom_nb.priority = VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY;
+ err = register_oom_notifier(&vb->oom_nb);
+ if (err < 0)
+ goto out_unregister_shrinker;
+ }
+
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
+ /* Start with poison val of 0 representing general init */
+ __u32 poison_val = 0;
+
+ /*
+ * Let the hypervisor know that we are expecting a
+ * specific value to be written back in balloon pages.
+ */
+ if (!want_init_on_free())
+ memset(&poison_val, PAGE_POISON, sizeof(poison_val));
+
+ virtio_cwrite(vb->vdev, struct virtio_balloon_config,
+ poison_val, &poison_val);
+ }
+
+ vb->pr_dev_info.report = virtballoon_free_page_report;
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
+ unsigned int capacity;
+
+ capacity = virtqueue_get_vring_size(vb->reporting_vq);
+ if (capacity < PAGE_REPORTING_CAPACITY) {
+ err = -ENOSPC;
+ goto out_unregister_oom;
+ }
+
+ err = page_reporting_register(&vb->pr_dev_info);
+ if (err)
+ goto out_unregister_oom;
+ }
+
virtio_device_ready(vdev);
if (towards_target(vb))
virtballoon_changed(vdev);
return 0;
+out_unregister_oom:
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+ unregister_oom_notifier(&vb->oom_nb);
+out_unregister_shrinker:
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+ virtio_balloon_unregister_shrinker(vb);
out_del_balloon_wq:
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
destroy_workqueue(vb->balloon_wq);
@@ -989,7 +1047,11 @@ static void virtballoon_remove(struct virtio_device *vdev)
{
struct virtio_balloon *vb = vdev->priv;
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
+ page_reporting_unregister(&vb->pr_dev_info);
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+ unregister_oom_notifier(&vb->oom_nb);
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
virtio_balloon_unregister_shrinker(vb);
spin_lock_irq(&vb->stop_update_lock);
vb->stop_update = true;
@@ -1045,7 +1107,10 @@ static int virtballoon_restore(struct virtio_device *vdev)
static int virtballoon_validate(struct virtio_device *vdev)
{
- if (!page_poisoning_enabled())
+ /* Tell the host whether we care about poisoned pages. */
+ if (!want_init_on_free() &&
+ (IS_ENABLED(CONFIG_PAGE_POISONING_NO_SANITY) ||
+ !page_poisoning_enabled()))
__virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
@@ -1058,6 +1123,7 @@ static unsigned int features[] = {
VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
VIRTIO_BALLOON_F_FREE_PAGE_HINT,
VIRTIO_BALLOON_F_PAGE_POISON,
+ VIRTIO_BALLOON_F_REPORTING,
};
static struct virtio_driver virtio_balloon_driver = {