summaryrefslogtreecommitdiff
path: root/drivers/virtio/virtio_mem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/virtio/virtio_mem.c')
-rw-r--r--drivers/virtio/virtio_mem.c206
1 files changed, 177 insertions, 29 deletions
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 8e3223294442..56d0dbe62163 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -21,6 +21,8 @@
#include <linux/bitmap.h>
#include <linux/lockdep.h>
#include <linux/log2.h>
+#include <linux/vmalloc.h>
+#include <linux/suspend.h>
#include <acpi/acpi_numa.h>
@@ -131,6 +133,8 @@ struct virtio_mem {
uint64_t addr;
/* Maximum region size in bytes. */
uint64_t region_size;
+ /* Usable region size in bytes. */
+ uint64_t usable_region_size;
/* The parent resource for all memory added via this device. */
struct resource *parent_resource;
@@ -252,6 +256,9 @@ struct virtio_mem {
/* Memory notifier (online/offline events). */
struct notifier_block memory_notifier;
+ /* Notifier to block hibernation image storing/reloading. */
+ struct notifier_block pm_notifier;
+
#ifdef CONFIG_PROC_VMCORE
/* vmcore callback for /proc/vmcore handling in kdump mode */
struct vmcore_cb vmcore_cb;
@@ -1111,6 +1118,25 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
return rc;
}
+static int virtio_mem_pm_notifier_cb(struct notifier_block *nb,
+ unsigned long action, void *arg)
+{
+ struct virtio_mem *vm = container_of(nb, struct virtio_mem,
+ pm_notifier);
+ switch (action) {
+ case PM_HIBERNATION_PREPARE:
+ case PM_RESTORE_PREPARE:
+ /*
+ * When restarting the VM, all memory is unplugged. Don't
+ * allow to hibernate and restore from an image.
+ */
+ dev_err(&vm->vdev->dev, "hibernation is not supported.\n");
+ return NOTIFY_BAD;
+ default:
+ return NOTIFY_OK;
+ }
+}
+
/*
* Set a range of pages PG_offline. Remember pages that were never onlined
* (via generic_online_page()) using PageDirty().
@@ -1122,12 +1148,16 @@ static void virtio_mem_set_fake_offline(unsigned long pfn,
for (; nr_pages--; pfn++) {
struct page *page = pfn_to_page(pfn);
- __SetPageOffline(page);
- if (!onlined) {
+ if (!onlined)
+ /*
+ * Pages that have not been onlined yet were initialized
+ * to PageOffline(). Remember that we have to route them
+ * through generic_online_page().
+ */
SetPageDirty(page);
- /* FIXME: remove after cleanups */
- ClearPageReserved(page);
- }
+ else
+ __SetPageOffline(page);
+ VM_WARN_ON_ONCE(!PageOffline(page));
}
page_offline_end();
}
@@ -1142,9 +1172,11 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn,
for (; nr_pages--; pfn++) {
struct page *page = pfn_to_page(pfn);
- __ClearPageOffline(page);
if (!onlined)
+ /* generic_online_page() will clear PageOffline(). */
ClearPageDirty(page);
+ else
+ __ClearPageOffline(page);
}
}
@@ -1239,12 +1271,6 @@ static void virtio_mem_fake_offline_going_offline(unsigned long pfn,
struct page *page;
unsigned long i;
- /*
- * Drop our reference to the pages so the memory can get offlined
- * and add the unplugged pages to the managed page counters (so
- * offlining code can correctly subtract them again).
- */
- adjust_managed_page_count(pfn_to_page(pfn), nr_pages);
/* Drop our reference to the pages so the memory can get offlined. */
for (i = 0; i < nr_pages; i++) {
page = pfn_to_page(pfn + i);
@@ -1263,10 +1289,9 @@ static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
unsigned long i;
/*
- * Get the reference we dropped when going offline and subtract the
- * unplugged pages from the managed page counters.
+ * Get the reference again that we dropped via page_ref_dec_and_test()
+ * when going offline.
*/
- adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
for (i = 0; i < nr_pages; i++)
page_ref_inc(pfn_to_page(pfn + i));
}
@@ -2345,7 +2370,7 @@ static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm)
static void virtio_mem_refresh_config(struct virtio_mem *vm)
{
const struct range pluggable_range = mhp_get_pluggable_range(true);
- uint64_t new_plugged_size, usable_region_size, end_addr;
+ uint64_t new_plugged_size, end_addr;
/* the plugged_size is just a reflection of what _we_ did previously */
virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
@@ -2355,8 +2380,8 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm)
/* calculate the last usable memory block id */
virtio_cread_le(vm->vdev, struct virtio_mem_config,
- usable_region_size, &usable_region_size);
- end_addr = min(vm->addr + usable_region_size - 1,
+ usable_region_size, &vm->usable_region_size);
+ end_addr = min(vm->addr + vm->usable_region_size - 1,
pluggable_range.end);
if (vm->in_sbm) {
@@ -2615,11 +2640,20 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm)
rc = register_memory_notifier(&vm->memory_notifier);
if (rc)
goto out_unreg_group;
- rc = register_virtio_mem_device(vm);
+ /* Block hibernation as early as possible. */
+ vm->pm_notifier.priority = INT_MAX;
+ vm->pm_notifier.notifier_call = virtio_mem_pm_notifier_cb;
+ rc = register_pm_notifier(&vm->pm_notifier);
if (rc)
goto out_unreg_mem;
+ rc = register_virtio_mem_device(vm);
+ if (rc)
+ goto out_unreg_pm;
+ virtio_device_ready(vm->vdev);
return 0;
+out_unreg_pm:
+ unregister_pm_notifier(&vm->pm_notifier);
out_unreg_mem:
unregister_memory_notifier(&vm->memory_notifier);
out_unreg_group:
@@ -2694,13 +2728,103 @@ static bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb,
mutex_unlock(&vm->hotplug_mutex);
return is_ram;
}
+
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+static int virtio_mem_vmcore_add_device_ram(struct virtio_mem *vm,
+ struct list_head *list, uint64_t start, uint64_t end)
+{
+ int rc;
+
+ rc = vmcore_alloc_add_range(list, start, end - start);
+ if (rc)
+ dev_err(&vm->vdev->dev,
+ "Error adding device RAM range: %d\n", rc);
+ return rc;
+}
+
+static int virtio_mem_vmcore_get_device_ram(struct vmcore_cb *cb,
+ struct list_head *list)
+{
+ struct virtio_mem *vm = container_of(cb, struct virtio_mem,
+ vmcore_cb);
+ const uint64_t device_start = vm->addr;
+ const uint64_t device_end = vm->addr + vm->usable_region_size;
+ uint64_t chunk_size, cur_start, cur_end, plugged_range_start = 0;
+ LIST_HEAD(tmp_list);
+ int rc;
+
+ if (!vm->plugged_size)
+ return 0;
+
+ /* Process memory sections, unless the device block size is bigger. */
+ chunk_size = max_t(uint64_t, PFN_PHYS(PAGES_PER_SECTION),
+ vm->device_block_size);
+
+ mutex_lock(&vm->hotplug_mutex);
+
+ /*
+ * We process larger chunks and indicate the complete chunk if any
+ * block in there is plugged. This reduces the number of pfn_is_ram()
+ * callbacks and mimic what is effectively being done when the old
+ * kernel would add complete memory sections/blocks to the elfcore hdr.
+ */
+ cur_start = device_start;
+ for (cur_start = device_start; cur_start < device_end; cur_start = cur_end) {
+ cur_end = ALIGN_DOWN(cur_start + chunk_size, chunk_size);
+ cur_end = min_t(uint64_t, cur_end, device_end);
+
+ rc = virtio_mem_send_state_request(vm, cur_start,
+ cur_end - cur_start);
+
+ if (rc < 0) {
+ dev_err(&vm->vdev->dev,
+ "Error querying block states: %d\n", rc);
+ goto out;
+ } else if (rc != VIRTIO_MEM_STATE_UNPLUGGED) {
+ /* Merge ranges with plugged memory. */
+ if (!plugged_range_start)
+ plugged_range_start = cur_start;
+ continue;
+ }
+
+ /* Flush any plugged range. */
+ if (plugged_range_start) {
+ rc = virtio_mem_vmcore_add_device_ram(vm, &tmp_list,
+ plugged_range_start,
+ cur_start);
+ if (rc)
+ goto out;
+ plugged_range_start = 0;
+ }
+ }
+
+ /* Flush any plugged range. */
+ if (plugged_range_start)
+ rc = virtio_mem_vmcore_add_device_ram(vm, &tmp_list,
+ plugged_range_start,
+ cur_start);
+out:
+ mutex_unlock(&vm->hotplug_mutex);
+ if (rc < 0) {
+ vmcore_free_ranges(&tmp_list);
+ return rc;
+ }
+ list_splice_tail(&tmp_list, list);
+ return 0;
+}
+#endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */
#endif /* CONFIG_PROC_VMCORE */
static int virtio_mem_init_kdump(struct virtio_mem *vm)
{
+ /* We must be prepared to receive a callback immediately. */
+ virtio_device_ready(vm->vdev);
#ifdef CONFIG_PROC_VMCORE
dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n");
vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram;
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+ vm->vmcore_cb.get_device_ram = virtio_mem_vmcore_get_device_ram;
+#endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */
register_vmcore_cb(&vm->vmcore_cb);
return 0;
#else /* CONFIG_PROC_VMCORE */
@@ -2729,6 +2853,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr);
virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size,
&vm->region_size);
+ virtio_cread_le(vm->vdev, struct virtio_mem_config, usable_region_size,
+ &vm->usable_region_size);
/* Determine the nid for the device based on the lowest address. */
if (vm->nid == NUMA_NO_NODE)
@@ -2824,8 +2950,8 @@ static int virtio_mem_probe(struct virtio_device *vdev)
mutex_init(&vm->hotplug_mutex);
INIT_LIST_HEAD(&vm->next);
spin_lock_init(&vm->removal_lock);
- hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- vm->retry_timer.function = virtio_mem_timer_expired;
+ hrtimer_setup(&vm->retry_timer, virtio_mem_timer_expired, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
vm->in_kdump = is_kdump_kernel();
@@ -2839,8 +2965,6 @@ static int virtio_mem_probe(struct virtio_device *vdev)
if (rc)
goto out_del_vq;
- virtio_device_ready(vdev);
-
/* trigger a config update to start processing the requested_size */
if (!vm->in_kdump) {
atomic_set(&vm->config_changed, 1);
@@ -2897,6 +3021,7 @@ static void virtio_mem_deinit_hotplug(struct virtio_mem *vm)
/* unregister callbacks */
unregister_virtio_mem_device(vm);
+ unregister_pm_notifier(&vm->pm_notifier);
unregister_memory_notifier(&vm->memory_notifier);
/*
@@ -2960,17 +3085,40 @@ static void virtio_mem_config_changed(struct virtio_device *vdev)
#ifdef CONFIG_PM_SLEEP
static int virtio_mem_freeze(struct virtio_device *vdev)
{
+ struct virtio_mem *vm = vdev->priv;
+
/*
- * When restarting the VM, all memory is usually unplugged. Don't
- * allow to suspend/hibernate.
+ * We block hibernation using the PM notifier completely. The workqueue
+ * is already frozen by the PM core at this point, so we simply
+ * reset the device and cleanup the queues.
*/
- dev_err(&vdev->dev, "save/restore not supported.\n");
- return -EPERM;
+ if (pm_suspend_target_state != PM_SUSPEND_TO_IDLE &&
+ vm->plugged_size &&
+ !virtio_has_feature(vm->vdev, VIRTIO_MEM_F_PERSISTENT_SUSPEND)) {
+ dev_err(&vm->vdev->dev,
+ "suspending with plugged memory is not supported\n");
+ return -EPERM;
+ }
+
+ virtio_reset_device(vdev);
+ vdev->config->del_vqs(vdev);
+ vm->vq = NULL;
+ return 0;
}
static int virtio_mem_restore(struct virtio_device *vdev)
{
- return -EPERM;
+ struct virtio_mem *vm = vdev->priv;
+ int ret;
+
+ ret = virtio_mem_init_vq(vm);
+ if (ret)
+ return ret;
+ virtio_device_ready(vdev);
+
+ /* Let's check if anything changed. */
+ virtio_mem_config_changed(vdev);
+ return 0;
}
#endif
@@ -2979,6 +3127,7 @@ static unsigned int virtio_mem_features[] = {
VIRTIO_MEM_F_ACPI_PXM,
#endif
VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE,
+ VIRTIO_MEM_F_PERSISTENT_SUSPEND,
};
static const struct virtio_device_id virtio_mem_id_table[] = {
@@ -2990,7 +3139,6 @@ static struct virtio_driver virtio_mem_driver = {
.feature_table = virtio_mem_features,
.feature_table_size = ARRAY_SIZE(virtio_mem_features),
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = virtio_mem_id_table,
.probe = virtio_mem_probe,
.remove = virtio_mem_remove,