From 02b42d58f3898134b900ff3030561099e38adb32 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:55:13 +0200 Subject: PM: hibernate: factor out a helper to find the resume device Split the logic to find the resume device out software_resume and into a separate helper to start unwindig the convoluted goto logic. Signed-off-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230531125535.676098-3-hch@lst.de Signed-off-by: Jens Axboe --- kernel/power/hibernate.c | 72 +++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 35 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 30d1274f03f6..072795063662 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -910,6 +910,41 @@ unlock: } EXPORT_SYMBOL_GPL(hibernate_quiet_exec); +static int find_resume_device(void) +{ + if (!strlen(resume_file)) + return -ENOENT; + + pm_pr_dbg("Checking hibernation image partition %s\n", resume_file); + + if (resume_delay) { + pr_info("Waiting %dsec before reading resume device ...\n", + resume_delay); + ssleep(resume_delay); + } + + /* Check if the device is there */ + swsusp_resume_device = name_to_dev_t(resume_file); + if (swsusp_resume_device) + return 0; + + /* + * Some device discovery might still be in progress; we need to wait for + * this to finish. + */ + wait_for_device_probe(); + if (resume_wait) { + while (!(swsusp_resume_device = name_to_dev_t(resume_file))) + msleep(10); + async_synchronize_full(); + } + + swsusp_resume_device = name_to_dev_t(resume_file); + if (!swsusp_resume_device) + return -ENODEV; + return 0; +} + /** * software_resume - Resume from a saved hibernation image. * @@ -949,45 +984,12 @@ static int software_resume(void) snapshot_test = false; - if (swsusp_resume_device) - goto Check_image; - - if (!strlen(resume_file)) { - error = -ENOENT; - goto Unlock; - } - - pm_pr_dbg("Checking hibernation image partition %s\n", resume_file); - - if (resume_delay) { - pr_info("Waiting %dsec before reading resume device ...\n", - resume_delay); - ssleep(resume_delay); - } - - /* Check if the device is there */ - swsusp_resume_device = name_to_dev_t(resume_file); if (!swsusp_resume_device) { - /* - * Some device discovery might still be in progress; we need - * to wait for this to finish. - */ - wait_for_device_probe(); - - if (resume_wait) { - while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0) - msleep(10); - async_synchronize_full(); - } - - swsusp_resume_device = name_to_dev_t(resume_file); - if (!swsusp_resume_device) { - error = -ENODEV; + error = find_resume_device(); + if (error) goto Unlock; - } } - Check_image: pm_pr_dbg("Hibernation image partition %d:%d present\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); -- cgit From d6545e687271ab27472eebff770f2de6a5f1a464 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:55:14 +0200 Subject: PM: hibernate: remove the global snapshot_test variable Passing call dependent variable in global variables is a huge antipattern. Fix it up. Signed-off-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230531125535.676098-4-hch@lst.de Signed-off-by: Jens Axboe --- kernel/power/hibernate.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 072795063662..78696aa04f5c 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -64,7 +64,6 @@ enum { static int hibernation_mode = HIBERNATION_SHUTDOWN; bool freezer_test_done; -bool snapshot_test; static const struct platform_hibernation_ops *hibernation_ops; @@ -684,7 +683,7 @@ static void power_down(void) cpu_relax(); } -static int load_image_and_restore(void) +static int load_image_and_restore(bool snapshot_test) { int error; unsigned int flags; @@ -721,6 +720,7 @@ static int load_image_and_restore(void) */ int hibernate(void) { + bool snapshot_test = false; unsigned int sleep_flags; int error; @@ -748,9 +748,6 @@ int hibernate(void) if (error) goto Exit; - /* protected by system_transition_mutex */ - snapshot_test = false; - lock_device_hotplug(); /* Allocate memory management structures */ error = create_basic_memory_bitmaps(); @@ -792,9 +789,9 @@ int hibernate(void) unlock_device_hotplug(); if (snapshot_test) { pm_pr_dbg("Checking hibernation image\n"); - error = swsusp_check(); + error = swsusp_check(snapshot_test); if (!error) - error = load_image_and_restore(); + error = load_image_and_restore(snapshot_test); } thaw_processes(); @@ -982,8 +979,6 @@ static int software_resume(void) */ mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING); - snapshot_test = false; - if (!swsusp_resume_device) { error = find_resume_device(); if (error) @@ -994,7 +989,7 @@ static int software_resume(void) MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); pm_pr_dbg("Looking for hibernation image.\n"); - error = swsusp_check(); + error = swsusp_check(false); if (error) goto Unlock; @@ -1022,7 +1017,7 @@ static int software_resume(void) goto Close_Finish; } - error = load_image_and_restore(); + error = load_image_and_restore(false); thaw_processes(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); -- cgit From cc89c63e2fe37d476357c82390dfb12edcd41cdd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:55:15 +0200 Subject: PM: hibernate: move finding the resume device out of software_resume software_resume can be called either from an init call in the boot code, or from sysfs once the system has finished booting, and the two invocation methods this can't race with each other. For the latter case we did just parse the suspend device manually, while the former might not have one. Split software_resume so that the search only happens for the boot case, which also means the special lockdep nesting annotation can go away as the system transition mutex can be taken a little later and doesn't have the sysfs locking nest inside it. Signed-off-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230531125535.676098-5-hch@lst.de Signed-off-by: Jens Axboe --- kernel/power/hibernate.c | 80 +++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 41 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 78696aa04f5c..45e24b02cd50 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -907,7 +907,7 @@ unlock: } EXPORT_SYMBOL_GPL(hibernate_quiet_exec); -static int find_resume_device(void) +static int __init find_resume_device(void) { if (!strlen(resume_file)) return -ENOENT; @@ -942,53 +942,16 @@ static int find_resume_device(void) return 0; } -/** - * software_resume - Resume from a saved hibernation image. - * - * This routine is called as a late initcall, when all devices have been - * discovered and initialized already. - * - * The image reading code is called to see if there is a hibernation image - * available for reading. If that is the case, devices are quiesced and the - * contents of memory is restored from the saved image. - * - * If this is successful, control reappears in the restored target kernel in - * hibernation_snapshot() which returns to hibernate(). Otherwise, the routine - * attempts to recover gracefully and make the kernel return to the normal mode - * of operation. - */ static int software_resume(void) { int error; - /* - * If the user said "noresume".. bail out early. - */ - if (noresume || !hibernation_available()) - return 0; - - /* - * name_to_dev_t() below takes a sysfs buffer mutex when sysfs - * is configured into the kernel. Since the regular hibernate - * trigger path is via sysfs which takes a buffer mutex before - * calling hibernate functions (which take system_transition_mutex) - * this can cause lockdep to complain about a possible ABBA deadlock - * which cannot happen since we're in the boot code here and - * sysfs can't be invoked yet. Therefore, we use a subclass - * here to avoid lockdep complaining. - */ - mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING); - - if (!swsusp_resume_device) { - error = find_resume_device(); - if (error) - goto Unlock; - } - pm_pr_dbg("Hibernation image partition %d:%d present\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); pm_pr_dbg("Looking for hibernation image.\n"); + + mutex_lock(&system_transition_mutex); error = swsusp_check(false); if (error) goto Unlock; @@ -1035,7 +998,39 @@ static int software_resume(void) goto Finish; } -late_initcall_sync(software_resume); +/** + * software_resume_initcall - Resume from a saved hibernation image. + * + * This routine is called as a late initcall, when all devices have been + * discovered and initialized already. + * + * The image reading code is called to see if there is a hibernation image + * available for reading. If that is the case, devices are quiesced and the + * contents of memory is restored from the saved image. + * + * If this is successful, control reappears in the restored target kernel in + * hibernation_snapshot() which returns to hibernate(). Otherwise, the routine + * attempts to recover gracefully and make the kernel return to the normal mode + * of operation. + */ +static int __init software_resume_initcall(void) +{ + /* + * If the user said "noresume".. bail out early. + */ + if (noresume || !hibernation_available()) + return 0; + + if (!swsusp_resume_device) { + int error = find_resume_device(); + + if (error) + return error; + } + + return software_resume(); +} +late_initcall_sync(software_resume_initcall); static const char * const hibernation_modes[] = { @@ -1176,6 +1171,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, char *name; dev_t res; + if (!hibernation_available()) + return 0; + if (len && buf[len-1] == '\n') len--; name = kstrndup(buf, len, GFP_KERNEL); -- cgit From cf056a43121559d3642419917d405c3237ded90a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:55:24 +0200 Subject: init: improve the name_to_dev_t interface name_to_dev_t has a very misleading name, that doesn't make clear it should only be used by the early init code, and also has a bad calling convention that doesn't allow returning different kinds of errors. Rename it to early_lookup_bdev to make the use case clear, and return an errno, where -EINVAL means the string could not be parsed, and -ENODEV means it the string was valid, but there was no device found for it. Also stub out the whole call for !CONFIG_BLOCK as all the non-block root cases are always covered in the caller. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230531125535.676098-14-hch@lst.de Signed-off-by: Jens Axboe --- kernel/power/hibernate.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 45e24b02cd50..c52dedb9f7c8 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) "PM: hibernation: " fmt +#include #include #include #include @@ -921,8 +922,7 @@ static int __init find_resume_device(void) } /* Check if the device is there */ - swsusp_resume_device = name_to_dev_t(resume_file); - if (swsusp_resume_device) + if (!early_lookup_bdev(resume_file, &swsusp_resume_device)) return 0; /* @@ -931,15 +931,12 @@ static int __init find_resume_device(void) */ wait_for_device_probe(); if (resume_wait) { - while (!(swsusp_resume_device = name_to_dev_t(resume_file))) + while (early_lookup_bdev(resume_file, &swsusp_resume_device)) msleep(10); async_synchronize_full(); } - swsusp_resume_device = name_to_dev_t(resume_file); - if (!swsusp_resume_device) - return -ENODEV; - return 0; + return early_lookup_bdev(resume_file, &swsusp_resume_device); } static int software_resume(void) @@ -1169,7 +1166,8 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, unsigned int sleep_flags; int len = n; char *name; - dev_t res; + dev_t dev; + int error; if (!hibernation_available()) return 0; @@ -1180,13 +1178,13 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, if (!name) return -ENOMEM; - res = name_to_dev_t(name); + error = early_lookup_bdev(name, &dev); kfree(name); - if (!res) - return -EINVAL; + if (error) + return error; sleep_flags = lock_system_sleep(); - swsusp_resume_device = res; + swsusp_resume_device = dev; unlock_system_sleep(sleep_flags); pm_pr_dbg("Configured hibernation resume from disk to %u\n", -- cgit From 1e8c813b083c4122dfeaa5c3b11028331026e85d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:55:32 +0200 Subject: PM: hibernate: don't use early_lookup_bdev in resume_store resume_store is a sysfs attribute written during normal kernel runtime, and it should not use the early_lookup_bdev API that bypasses all normal path based permission checking, and might cause problems with certain container environments renaming devices. Switch to lookup_bdev, which does a normal path lookup instead, and fall back to trying to parse a numeric dev_t just like early_lookup_bdev did. Note that this strictly speaking changes the kernel ABI as the PARTUUID= and PARTLABEL= style syntax is now not available during a running systems. They never were intended for that, but this breaks things we'll have to figure out a way to make them available again. But if avoidable in any way I'd rather avoid that. Fixes: 421a5fa1a6cf ("PM / hibernate: use name_to_dev_t to parse resume") Signed-off-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230531125535.676098-22-hch@lst.de Signed-off-by: Jens Axboe --- kernel/power/hibernate.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c52dedb9f7c8..7ae95ec72f99 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1178,7 +1178,23 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, if (!name) return -ENOMEM; - error = early_lookup_bdev(name, &dev); + error = lookup_bdev(name, &dev); + if (error) { + unsigned maj, min, offset; + char *p, dummy; + + if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || + sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, + &dummy) == 3) { + dev = MKDEV(maj, min); + if (maj != MAJOR(dev) || min != MINOR(dev)) + error = -EINVAL; + } else { + dev = new_decode_dev(simple_strtoul(name, &p, 16)); + if (*p) + error = -EINVAL; + } + } kfree(name); if (error) return error; -- cgit From 2736e8eeb0ccdc71d1f4256c9c9a28f58cc43307 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jun 2023 13:02:43 +0200 Subject: block: use the holder as indication for exclusive opens The current interface for exclusive opens is rather confusing as it requires both the FMODE_EXCL flag and a holder. Remove the need to pass FMODE_EXCL and just key off the exclusive open off a non-NULL holder. For blkdev_put this requires adding the holder argument, which provides better debug checking that only the holder actually releases the hold, but at the same time allows removing the now superfluous mode argument. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Acked-by: Christian Brauner Acked-by: David Sterba [btrfs] Acked-by: Jack Wang [rnbd] Link: https://lore.kernel.org/r/20230608110258.189493-16-hch@lst.de Signed-off-by: Jens Axboe --- kernel/power/hibernate.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 7ae95ec72f99..f62e89d0d906 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -688,22 +688,18 @@ static int load_image_and_restore(bool snapshot_test) { int error; unsigned int flags; - fmode_t mode = FMODE_READ; - - if (snapshot_test) - mode |= FMODE_EXCL; pm_pr_dbg("Loading hibernation image.\n"); lock_device_hotplug(); error = create_basic_memory_bitmaps(); if (error) { - swsusp_close(mode); + swsusp_close(snapshot_test); goto Unlock; } error = swsusp_read(&flags); - swsusp_close(mode); + swsusp_close(snapshot_test); if (!error) error = hibernation_restore(flags & SF_PLATFORM_MODE); @@ -956,7 +952,7 @@ static int software_resume(void) /* The snapshot device should not be opened while we're running */ if (!hibernate_acquire()) { error = -EBUSY; - swsusp_close(FMODE_READ | FMODE_EXCL); + swsusp_close(false); goto Unlock; } @@ -991,7 +987,7 @@ static int software_resume(void) pm_pr_dbg("Hibernation image not present or could not be loaded.\n"); return error; Close_Finish: - swsusp_close(FMODE_READ | FMODE_EXCL); + swsusp_close(false); goto Finish; } -- cgit From c9e4bf607d8c431452ef362c00e62ce999bbae93 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 11 Jul 2023 13:48:12 +0200 Subject: PM: hibernate: Fix writing maj:min to /sys/power/resume resume_store() first calls lookup_bdev() and after tries to handle maj:min, but it does not reset the error before, hence if you will write maj:min you will get ENOENT: # echo 259:2 >| /sys/power/resume bash: echo: write error: No such file or directory This also should fix hiberation via systemd, since it uses this way. Fixes: 1e8c813b083c4 ("PM: hibernate: don't use early_lookup_bdev in resume_store") Signed-off-by: Azat Khuzhin Reviewed-by: Christoph Hellwig [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f62e89d0d906..e1b4bfa938dd 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1179,6 +1179,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, unsigned maj, min, offset; char *p, dummy; + error = 0; if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) { -- cgit